Merge tag 'linux-kselftest-5.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / net / openvswitch / flow_netlink.c
1 /*
2  * Copyright (c) 2007-2017 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include "flow.h"
22 #include "datapath.h"
23 #include <linux/uaccess.h>
24 #include <linux/netdevice.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <net/llc_pdu.h>
29 #include <linux/kernel.h>
30 #include <linux/jhash.h>
31 #include <linux/jiffies.h>
32 #include <linux/llc.h>
33 #include <linux/module.h>
34 #include <linux/in.h>
35 #include <linux/rcupdate.h>
36 #include <linux/if_arp.h>
37 #include <linux/ip.h>
38 #include <linux/ipv6.h>
39 #include <linux/sctp.h>
40 #include <linux/tcp.h>
41 #include <linux/udp.h>
42 #include <linux/icmp.h>
43 #include <linux/icmpv6.h>
44 #include <linux/rculist.h>
45 #include <net/geneve.h>
46 #include <net/ip.h>
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/mpls.h>
50 #include <net/vxlan.h>
51 #include <net/tun_proto.h>
52 #include <net/erspan.h>
53
54 #include "flow_netlink.h"
55
56 struct ovs_len_tbl {
57         int len;
58         const struct ovs_len_tbl *next;
59 };
60
61 #define OVS_ATTR_NESTED -1
62 #define OVS_ATTR_VARIABLE -2
63
64 static bool actions_may_change_flow(const struct nlattr *actions)
65 {
66         struct nlattr *nla;
67         int rem;
68
69         nla_for_each_nested(nla, actions, rem) {
70                 u16 action = nla_type(nla);
71
72                 switch (action) {
73                 case OVS_ACTION_ATTR_OUTPUT:
74                 case OVS_ACTION_ATTR_RECIRC:
75                 case OVS_ACTION_ATTR_TRUNC:
76                 case OVS_ACTION_ATTR_USERSPACE:
77                         break;
78
79                 case OVS_ACTION_ATTR_CT:
80                 case OVS_ACTION_ATTR_CT_CLEAR:
81                 case OVS_ACTION_ATTR_HASH:
82                 case OVS_ACTION_ATTR_POP_ETH:
83                 case OVS_ACTION_ATTR_POP_MPLS:
84                 case OVS_ACTION_ATTR_POP_NSH:
85                 case OVS_ACTION_ATTR_POP_VLAN:
86                 case OVS_ACTION_ATTR_PUSH_ETH:
87                 case OVS_ACTION_ATTR_PUSH_MPLS:
88                 case OVS_ACTION_ATTR_PUSH_NSH:
89                 case OVS_ACTION_ATTR_PUSH_VLAN:
90                 case OVS_ACTION_ATTR_SAMPLE:
91                 case OVS_ACTION_ATTR_SET:
92                 case OVS_ACTION_ATTR_SET_MASKED:
93                 case OVS_ACTION_ATTR_METER:
94                 default:
95                         return true;
96                 }
97         }
98         return false;
99 }
100
101 static void update_range(struct sw_flow_match *match,
102                          size_t offset, size_t size, bool is_mask)
103 {
104         struct sw_flow_key_range *range;
105         size_t start = rounddown(offset, sizeof(long));
106         size_t end = roundup(offset + size, sizeof(long));
107
108         if (!is_mask)
109                 range = &match->range;
110         else
111                 range = &match->mask->range;
112
113         if (range->start == range->end) {
114                 range->start = start;
115                 range->end = end;
116                 return;
117         }
118
119         if (range->start > start)
120                 range->start = start;
121
122         if (range->end < end)
123                 range->end = end;
124 }
125
126 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
127         do { \
128                 update_range(match, offsetof(struct sw_flow_key, field),    \
129                              sizeof((match)->key->field), is_mask);         \
130                 if (is_mask)                                                \
131                         (match)->mask->key.field = value;                   \
132                 else                                                        \
133                         (match)->key->field = value;                        \
134         } while (0)
135
136 #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask)     \
137         do {                                                                \
138                 update_range(match, offset, len, is_mask);                  \
139                 if (is_mask)                                                \
140                         memcpy((u8 *)&(match)->mask->key + offset, value_p, \
141                                len);                                       \
142                 else                                                        \
143                         memcpy((u8 *)(match)->key + offset, value_p, len);  \
144         } while (0)
145
146 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask)               \
147         SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
148                                   value_p, len, is_mask)
149
150 #define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask)              \
151         do {                                                                \
152                 update_range(match, offsetof(struct sw_flow_key, field),    \
153                              sizeof((match)->key->field), is_mask);         \
154                 if (is_mask)                                                \
155                         memset((u8 *)&(match)->mask->key.field, value,      \
156                                sizeof((match)->mask->key.field));           \
157                 else                                                        \
158                         memset((u8 *)&(match)->key->field, value,           \
159                                sizeof((match)->key->field));                \
160         } while (0)
161
162 static bool match_validate(const struct sw_flow_match *match,
163                            u64 key_attrs, u64 mask_attrs, bool log)
164 {
165         u64 key_expected = 0;
166         u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
167
168         /* The following mask attributes allowed only if they
169          * pass the validation tests. */
170         mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
171                         | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)
172                         | (1 << OVS_KEY_ATTR_IPV6)
173                         | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)
174                         | (1 << OVS_KEY_ATTR_TCP)
175                         | (1 << OVS_KEY_ATTR_TCP_FLAGS)
176                         | (1 << OVS_KEY_ATTR_UDP)
177                         | (1 << OVS_KEY_ATTR_SCTP)
178                         | (1 << OVS_KEY_ATTR_ICMP)
179                         | (1 << OVS_KEY_ATTR_ICMPV6)
180                         | (1 << OVS_KEY_ATTR_ARP)
181                         | (1 << OVS_KEY_ATTR_ND)
182                         | (1 << OVS_KEY_ATTR_MPLS)
183                         | (1 << OVS_KEY_ATTR_NSH));
184
185         /* Always allowed mask fields. */
186         mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
187                        | (1 << OVS_KEY_ATTR_IN_PORT)
188                        | (1 << OVS_KEY_ATTR_ETHERTYPE));
189
190         /* Check key attributes. */
191         if (match->key->eth.type == htons(ETH_P_ARP)
192                         || match->key->eth.type == htons(ETH_P_RARP)) {
193                 key_expected |= 1 << OVS_KEY_ATTR_ARP;
194                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
195                         mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
196         }
197
198         if (eth_p_mpls(match->key->eth.type)) {
199                 key_expected |= 1 << OVS_KEY_ATTR_MPLS;
200                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
201                         mask_allowed |= 1 << OVS_KEY_ATTR_MPLS;
202         }
203
204         if (match->key->eth.type == htons(ETH_P_IP)) {
205                 key_expected |= 1 << OVS_KEY_ATTR_IPV4;
206                 if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
207                         mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
208                         mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4;
209                 }
210
211                 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
212                         if (match->key->ip.proto == IPPROTO_UDP) {
213                                 key_expected |= 1 << OVS_KEY_ATTR_UDP;
214                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
215                                         mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
216                         }
217
218                         if (match->key->ip.proto == IPPROTO_SCTP) {
219                                 key_expected |= 1 << OVS_KEY_ATTR_SCTP;
220                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
221                                         mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
222                         }
223
224                         if (match->key->ip.proto == IPPROTO_TCP) {
225                                 key_expected |= 1 << OVS_KEY_ATTR_TCP;
226                                 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
227                                 if (match->mask && (match->mask->key.ip.proto == 0xff)) {
228                                         mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
229                                         mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
230                                 }
231                         }
232
233                         if (match->key->ip.proto == IPPROTO_ICMP) {
234                                 key_expected |= 1 << OVS_KEY_ATTR_ICMP;
235                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
236                                         mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
237                         }
238                 }
239         }
240
241         if (match->key->eth.type == htons(ETH_P_IPV6)) {
242                 key_expected |= 1 << OVS_KEY_ATTR_IPV6;
243                 if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
244                         mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
245                         mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6;
246                 }
247
248                 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
249                         if (match->key->ip.proto == IPPROTO_UDP) {
250                                 key_expected |= 1 << OVS_KEY_ATTR_UDP;
251                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
252                                         mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
253                         }
254
255                         if (match->key->ip.proto == IPPROTO_SCTP) {
256                                 key_expected |= 1 << OVS_KEY_ATTR_SCTP;
257                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
258                                         mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
259                         }
260
261                         if (match->key->ip.proto == IPPROTO_TCP) {
262                                 key_expected |= 1 << OVS_KEY_ATTR_TCP;
263                                 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
264                                 if (match->mask && (match->mask->key.ip.proto == 0xff)) {
265                                         mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
266                                         mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
267                                 }
268                         }
269
270                         if (match->key->ip.proto == IPPROTO_ICMPV6) {
271                                 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
272                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
273                                         mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
274
275                                 if (match->key->tp.src ==
276                                                 htons(NDISC_NEIGHBOUR_SOLICITATION) ||
277                                     match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
278                                         key_expected |= 1 << OVS_KEY_ATTR_ND;
279                                         /* Original direction conntrack tuple
280                                          * uses the same space as the ND fields
281                                          * in the key, so both are not allowed
282                                          * at the same time.
283                                          */
284                                         mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
285                                         if (match->mask && (match->mask->key.tp.src == htons(0xff)))
286                                                 mask_allowed |= 1 << OVS_KEY_ATTR_ND;
287                                 }
288                         }
289                 }
290         }
291
292         if (match->key->eth.type == htons(ETH_P_NSH)) {
293                 key_expected |= 1 << OVS_KEY_ATTR_NSH;
294                 if (match->mask &&
295                     match->mask->key.eth.type == htons(0xffff)) {
296                         mask_allowed |= 1 << OVS_KEY_ATTR_NSH;
297                 }
298         }
299
300         if ((key_attrs & key_expected) != key_expected) {
301                 /* Key attributes check failed. */
302                 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
303                           (unsigned long long)key_attrs,
304                           (unsigned long long)key_expected);
305                 return false;
306         }
307
308         if ((mask_attrs & mask_allowed) != mask_attrs) {
309                 /* Mask attributes check failed. */
310                 OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)",
311                           (unsigned long long)mask_attrs,
312                           (unsigned long long)mask_allowed);
313                 return false;
314         }
315
316         return true;
317 }
318
319 size_t ovs_tun_key_attr_size(void)
320 {
321         /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
322          * updating this function.
323          */
324         return    nla_total_size_64bit(8) /* OVS_TUNNEL_KEY_ATTR_ID */
325                 + nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */
326                 + nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */
327                 + nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TOS */
328                 + nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TTL */
329                 + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
330                 + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
331                 + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
332                 + nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
333                 /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and
334                  * OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS is mutually exclusive with
335                  * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
336                  */
337                 + nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
338                 + nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
339 }
340
341 static size_t ovs_nsh_key_attr_size(void)
342 {
343         /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider
344          * updating this function.
345          */
346         return  nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */
347                 /* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are
348                  * mutually exclusive, so the bigger one can cover
349                  * the small one.
350                  */
351                 + nla_total_size(NSH_CTX_HDRS_MAX_LEN);
352 }
353
354 size_t ovs_key_attr_size(void)
355 {
356         /* Whenever adding new OVS_KEY_ FIELDS, we should consider
357          * updating this function.
358          */
359         BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29);
360
361         return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
362                 + nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
363                   + ovs_tun_key_attr_size()
364                 + nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
365                 + nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
366                 + nla_total_size(4)   /* OVS_KEY_ATTR_DP_HASH */
367                 + nla_total_size(4)   /* OVS_KEY_ATTR_RECIRC_ID */
368                 + nla_total_size(4)   /* OVS_KEY_ATTR_CT_STATE */
369                 + nla_total_size(2)   /* OVS_KEY_ATTR_CT_ZONE */
370                 + nla_total_size(4)   /* OVS_KEY_ATTR_CT_MARK */
371                 + nla_total_size(16)  /* OVS_KEY_ATTR_CT_LABELS */
372                 + nla_total_size(40)  /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
373                 + nla_total_size(0)   /* OVS_KEY_ATTR_NSH */
374                   + ovs_nsh_key_attr_size()
375                 + nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
376                 + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
377                 + nla_total_size(4)   /* OVS_KEY_ATTR_VLAN */
378                 + nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
379                 + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
380                 + nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
381                 + nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
382                 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
383 }
384
385 static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = {
386         [OVS_VXLAN_EXT_GBP]         = { .len = sizeof(u32) },
387 };
388
389 static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
390         [OVS_TUNNEL_KEY_ATTR_ID]            = { .len = sizeof(u64) },
391         [OVS_TUNNEL_KEY_ATTR_IPV4_SRC]      = { .len = sizeof(u32) },
392         [OVS_TUNNEL_KEY_ATTR_IPV4_DST]      = { .len = sizeof(u32) },
393         [OVS_TUNNEL_KEY_ATTR_TOS]           = { .len = 1 },
394         [OVS_TUNNEL_KEY_ATTR_TTL]           = { .len = 1 },
395         [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
396         [OVS_TUNNEL_KEY_ATTR_CSUM]          = { .len = 0 },
397         [OVS_TUNNEL_KEY_ATTR_TP_SRC]        = { .len = sizeof(u16) },
398         [OVS_TUNNEL_KEY_ATTR_TP_DST]        = { .len = sizeof(u16) },
399         [OVS_TUNNEL_KEY_ATTR_OAM]           = { .len = 0 },
400         [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_VARIABLE },
401         [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS]    = { .len = OVS_ATTR_NESTED,
402                                                 .next = ovs_vxlan_ext_key_lens },
403         [OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) },
404         [OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
405         [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS]   = { .len = OVS_ATTR_VARIABLE },
406 };
407
408 static const struct ovs_len_tbl
409 ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = {
410         [OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) },
411         [OVS_NSH_KEY_ATTR_MD1]  = { .len = sizeof(struct ovs_nsh_key_md1) },
412         [OVS_NSH_KEY_ATTR_MD2]  = { .len = OVS_ATTR_VARIABLE },
413 };
414
415 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
416 static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
417         [OVS_KEY_ATTR_ENCAP]     = { .len = OVS_ATTR_NESTED },
418         [OVS_KEY_ATTR_PRIORITY]  = { .len = sizeof(u32) },
419         [OVS_KEY_ATTR_IN_PORT]   = { .len = sizeof(u32) },
420         [OVS_KEY_ATTR_SKB_MARK]  = { .len = sizeof(u32) },
421         [OVS_KEY_ATTR_ETHERNET]  = { .len = sizeof(struct ovs_key_ethernet) },
422         [OVS_KEY_ATTR_VLAN]      = { .len = sizeof(__be16) },
423         [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
424         [OVS_KEY_ATTR_IPV4]      = { .len = sizeof(struct ovs_key_ipv4) },
425         [OVS_KEY_ATTR_IPV6]      = { .len = sizeof(struct ovs_key_ipv6) },
426         [OVS_KEY_ATTR_TCP]       = { .len = sizeof(struct ovs_key_tcp) },
427         [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
428         [OVS_KEY_ATTR_UDP]       = { .len = sizeof(struct ovs_key_udp) },
429         [OVS_KEY_ATTR_SCTP]      = { .len = sizeof(struct ovs_key_sctp) },
430         [OVS_KEY_ATTR_ICMP]      = { .len = sizeof(struct ovs_key_icmp) },
431         [OVS_KEY_ATTR_ICMPV6]    = { .len = sizeof(struct ovs_key_icmpv6) },
432         [OVS_KEY_ATTR_ARP]       = { .len = sizeof(struct ovs_key_arp) },
433         [OVS_KEY_ATTR_ND]        = { .len = sizeof(struct ovs_key_nd) },
434         [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
435         [OVS_KEY_ATTR_DP_HASH]   = { .len = sizeof(u32) },
436         [OVS_KEY_ATTR_TUNNEL]    = { .len = OVS_ATTR_NESTED,
437                                      .next = ovs_tunnel_key_lens, },
438         [OVS_KEY_ATTR_MPLS]      = { .len = sizeof(struct ovs_key_mpls) },
439         [OVS_KEY_ATTR_CT_STATE]  = { .len = sizeof(u32) },
440         [OVS_KEY_ATTR_CT_ZONE]   = { .len = sizeof(u16) },
441         [OVS_KEY_ATTR_CT_MARK]   = { .len = sizeof(u32) },
442         [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
443         [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = {
444                 .len = sizeof(struct ovs_key_ct_tuple_ipv4) },
445         [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
446                 .len = sizeof(struct ovs_key_ct_tuple_ipv6) },
447         [OVS_KEY_ATTR_NSH]       = { .len = OVS_ATTR_NESTED,
448                                      .next = ovs_nsh_key_attr_lens, },
449 };
450
451 static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
452 {
453         return expected_len == attr_len ||
454                expected_len == OVS_ATTR_NESTED ||
455                expected_len == OVS_ATTR_VARIABLE;
456 }
457
458 static bool is_all_zero(const u8 *fp, size_t size)
459 {
460         int i;
461
462         if (!fp)
463                 return false;
464
465         for (i = 0; i < size; i++)
466                 if (fp[i])
467                         return false;
468
469         return true;
470 }
471
472 static int __parse_flow_nlattrs(const struct nlattr *attr,
473                                 const struct nlattr *a[],
474                                 u64 *attrsp, bool log, bool nz)
475 {
476         const struct nlattr *nla;
477         u64 attrs;
478         int rem;
479
480         attrs = *attrsp;
481         nla_for_each_nested(nla, attr, rem) {
482                 u16 type = nla_type(nla);
483                 int expected_len;
484
485                 if (type > OVS_KEY_ATTR_MAX) {
486                         OVS_NLERR(log, "Key type %d is out of range max %d",
487                                   type, OVS_KEY_ATTR_MAX);
488                         return -EINVAL;
489                 }
490
491                 if (attrs & (1 << type)) {
492                         OVS_NLERR(log, "Duplicate key (type %d).", type);
493                         return -EINVAL;
494                 }
495
496                 expected_len = ovs_key_lens[type].len;
497                 if (!check_attr_len(nla_len(nla), expected_len)) {
498                         OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
499                                   type, nla_len(nla), expected_len);
500                         return -EINVAL;
501                 }
502
503                 if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) {
504                         attrs |= 1 << type;
505                         a[type] = nla;
506                 }
507         }
508         if (rem) {
509                 OVS_NLERR(log, "Message has %d unknown bytes.", rem);
510                 return -EINVAL;
511         }
512
513         *attrsp = attrs;
514         return 0;
515 }
516
517 static int parse_flow_mask_nlattrs(const struct nlattr *attr,
518                                    const struct nlattr *a[], u64 *attrsp,
519                                    bool log)
520 {
521         return __parse_flow_nlattrs(attr, a, attrsp, log, true);
522 }
523
524 int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
525                        u64 *attrsp, bool log)
526 {
527         return __parse_flow_nlattrs(attr, a, attrsp, log, false);
528 }
529
530 static int genev_tun_opt_from_nlattr(const struct nlattr *a,
531                                      struct sw_flow_match *match, bool is_mask,
532                                      bool log)
533 {
534         unsigned long opt_key_offset;
535
536         if (nla_len(a) > sizeof(match->key->tun_opts)) {
537                 OVS_NLERR(log, "Geneve option length err (len %d, max %zu).",
538                           nla_len(a), sizeof(match->key->tun_opts));
539                 return -EINVAL;
540         }
541
542         if (nla_len(a) % 4 != 0) {
543                 OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.",
544                           nla_len(a));
545                 return -EINVAL;
546         }
547
548         /* We need to record the length of the options passed
549          * down, otherwise packets with the same format but
550          * additional options will be silently matched.
551          */
552         if (!is_mask) {
553                 SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
554                                 false);
555         } else {
556                 /* This is somewhat unusual because it looks at
557                  * both the key and mask while parsing the
558                  * attributes (and by extension assumes the key
559                  * is parsed first). Normally, we would verify
560                  * that each is the correct length and that the
561                  * attributes line up in the validate function.
562                  * However, that is difficult because this is
563                  * variable length and we won't have the
564                  * information later.
565                  */
566                 if (match->key->tun_opts_len != nla_len(a)) {
567                         OVS_NLERR(log, "Geneve option len %d != mask len %d",
568                                   match->key->tun_opts_len, nla_len(a));
569                         return -EINVAL;
570                 }
571
572                 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
573         }
574
575         opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
576         SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
577                                   nla_len(a), is_mask);
578         return 0;
579 }
580
581 static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
582                                      struct sw_flow_match *match, bool is_mask,
583                                      bool log)
584 {
585         struct nlattr *a;
586         int rem;
587         unsigned long opt_key_offset;
588         struct vxlan_metadata opts;
589
590         BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
591
592         memset(&opts, 0, sizeof(opts));
593         nla_for_each_nested(a, attr, rem) {
594                 int type = nla_type(a);
595
596                 if (type > OVS_VXLAN_EXT_MAX) {
597                         OVS_NLERR(log, "VXLAN extension %d out of range max %d",
598                                   type, OVS_VXLAN_EXT_MAX);
599                         return -EINVAL;
600                 }
601
602                 if (!check_attr_len(nla_len(a),
603                                     ovs_vxlan_ext_key_lens[type].len)) {
604                         OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d",
605                                   type, nla_len(a),
606                                   ovs_vxlan_ext_key_lens[type].len);
607                         return -EINVAL;
608                 }
609
610                 switch (type) {
611                 case OVS_VXLAN_EXT_GBP:
612                         opts.gbp = nla_get_u32(a);
613                         break;
614                 default:
615                         OVS_NLERR(log, "Unknown VXLAN extension attribute %d",
616                                   type);
617                         return -EINVAL;
618                 }
619         }
620         if (rem) {
621                 OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.",
622                           rem);
623                 return -EINVAL;
624         }
625
626         if (!is_mask)
627                 SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
628         else
629                 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
630
631         opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
632         SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
633                                   is_mask);
634         return 0;
635 }
636
637 static int erspan_tun_opt_from_nlattr(const struct nlattr *a,
638                                       struct sw_flow_match *match, bool is_mask,
639                                       bool log)
640 {
641         unsigned long opt_key_offset;
642
643         BUILD_BUG_ON(sizeof(struct erspan_metadata) >
644                      sizeof(match->key->tun_opts));
645
646         if (nla_len(a) > sizeof(match->key->tun_opts)) {
647                 OVS_NLERR(log, "ERSPAN option length err (len %d, max %zu).",
648                           nla_len(a), sizeof(match->key->tun_opts));
649                 return -EINVAL;
650         }
651
652         if (!is_mask)
653                 SW_FLOW_KEY_PUT(match, tun_opts_len,
654                                 sizeof(struct erspan_metadata), false);
655         else
656                 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
657
658         opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
659         SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
660                                   nla_len(a), is_mask);
661         return 0;
662 }
663
664 static int ip_tun_from_nlattr(const struct nlattr *attr,
665                               struct sw_flow_match *match, bool is_mask,
666                               bool log)
667 {
668         bool ttl = false, ipv4 = false, ipv6 = false;
669         __be16 tun_flags = 0;
670         int opts_type = 0;
671         struct nlattr *a;
672         int rem;
673
674         nla_for_each_nested(a, attr, rem) {
675                 int type = nla_type(a);
676                 int err;
677
678                 if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
679                         OVS_NLERR(log, "Tunnel attr %d out of range max %d",
680                                   type, OVS_TUNNEL_KEY_ATTR_MAX);
681                         return -EINVAL;
682                 }
683
684                 if (!check_attr_len(nla_len(a),
685                                     ovs_tunnel_key_lens[type].len)) {
686                         OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
687                                   type, nla_len(a), ovs_tunnel_key_lens[type].len);
688                         return -EINVAL;
689                 }
690
691                 switch (type) {
692                 case OVS_TUNNEL_KEY_ATTR_ID:
693                         SW_FLOW_KEY_PUT(match, tun_key.tun_id,
694                                         nla_get_be64(a), is_mask);
695                         tun_flags |= TUNNEL_KEY;
696                         break;
697                 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
698                         SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
699                                         nla_get_in_addr(a), is_mask);
700                         ipv4 = true;
701                         break;
702                 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
703                         SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
704                                         nla_get_in_addr(a), is_mask);
705                         ipv4 = true;
706                         break;
707                 case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
708                         SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src,
709                                         nla_get_in6_addr(a), is_mask);
710                         ipv6 = true;
711                         break;
712                 case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
713                         SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
714                                         nla_get_in6_addr(a), is_mask);
715                         ipv6 = true;
716                         break;
717                 case OVS_TUNNEL_KEY_ATTR_TOS:
718                         SW_FLOW_KEY_PUT(match, tun_key.tos,
719                                         nla_get_u8(a), is_mask);
720                         break;
721                 case OVS_TUNNEL_KEY_ATTR_TTL:
722                         SW_FLOW_KEY_PUT(match, tun_key.ttl,
723                                         nla_get_u8(a), is_mask);
724                         ttl = true;
725                         break;
726                 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
727                         tun_flags |= TUNNEL_DONT_FRAGMENT;
728                         break;
729                 case OVS_TUNNEL_KEY_ATTR_CSUM:
730                         tun_flags |= TUNNEL_CSUM;
731                         break;
732                 case OVS_TUNNEL_KEY_ATTR_TP_SRC:
733                         SW_FLOW_KEY_PUT(match, tun_key.tp_src,
734                                         nla_get_be16(a), is_mask);
735                         break;
736                 case OVS_TUNNEL_KEY_ATTR_TP_DST:
737                         SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
738                                         nla_get_be16(a), is_mask);
739                         break;
740                 case OVS_TUNNEL_KEY_ATTR_OAM:
741                         tun_flags |= TUNNEL_OAM;
742                         break;
743                 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
744                         if (opts_type) {
745                                 OVS_NLERR(log, "Multiple metadata blocks provided");
746                                 return -EINVAL;
747                         }
748
749                         err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
750                         if (err)
751                                 return err;
752
753                         tun_flags |= TUNNEL_GENEVE_OPT;
754                         opts_type = type;
755                         break;
756                 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
757                         if (opts_type) {
758                                 OVS_NLERR(log, "Multiple metadata blocks provided");
759                                 return -EINVAL;
760                         }
761
762                         err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
763                         if (err)
764                                 return err;
765
766                         tun_flags |= TUNNEL_VXLAN_OPT;
767                         opts_type = type;
768                         break;
769                 case OVS_TUNNEL_KEY_ATTR_PAD:
770                         break;
771                 case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
772                         if (opts_type) {
773                                 OVS_NLERR(log, "Multiple metadata blocks provided");
774                                 return -EINVAL;
775                         }
776
777                         err = erspan_tun_opt_from_nlattr(a, match, is_mask,
778                                                          log);
779                         if (err)
780                                 return err;
781
782                         tun_flags |= TUNNEL_ERSPAN_OPT;
783                         opts_type = type;
784                         break;
785                 default:
786                         OVS_NLERR(log, "Unknown IP tunnel attribute %d",
787                                   type);
788                         return -EINVAL;
789                 }
790         }
791
792         SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
793         if (is_mask)
794                 SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
795         else
796                 SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET,
797                                 false);
798
799         if (rem > 0) {
800                 OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.",
801                           rem);
802                 return -EINVAL;
803         }
804
805         if (ipv4 && ipv6) {
806                 OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes");
807                 return -EINVAL;
808         }
809
810         if (!is_mask) {
811                 if (!ipv4 && !ipv6) {
812                         OVS_NLERR(log, "IP tunnel dst address not specified");
813                         return -EINVAL;
814                 }
815                 if (ipv4 && !match->key->tun_key.u.ipv4.dst) {
816                         OVS_NLERR(log, "IPv4 tunnel dst address is zero");
817                         return -EINVAL;
818                 }
819                 if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
820                         OVS_NLERR(log, "IPv6 tunnel dst address is zero");
821                         return -EINVAL;
822                 }
823
824                 if (!ttl) {
825                         OVS_NLERR(log, "IP tunnel TTL not specified.");
826                         return -EINVAL;
827                 }
828         }
829
830         return opts_type;
831 }
832
833 static int vxlan_opt_to_nlattr(struct sk_buff *skb,
834                                const void *tun_opts, int swkey_tun_opts_len)
835 {
836         const struct vxlan_metadata *opts = tun_opts;
837         struct nlattr *nla;
838
839         nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
840         if (!nla)
841                 return -EMSGSIZE;
842
843         if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
844                 return -EMSGSIZE;
845
846         nla_nest_end(skb, nla);
847         return 0;
848 }
849
850 static int __ip_tun_to_nlattr(struct sk_buff *skb,
851                               const struct ip_tunnel_key *output,
852                               const void *tun_opts, int swkey_tun_opts_len,
853                               unsigned short tun_proto)
854 {
855         if (output->tun_flags & TUNNEL_KEY &&
856             nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id,
857                          OVS_TUNNEL_KEY_ATTR_PAD))
858                 return -EMSGSIZE;
859         switch (tun_proto) {
860         case AF_INET:
861                 if (output->u.ipv4.src &&
862                     nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
863                                     output->u.ipv4.src))
864                         return -EMSGSIZE;
865                 if (output->u.ipv4.dst &&
866                     nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
867                                     output->u.ipv4.dst))
868                         return -EMSGSIZE;
869                 break;
870         case AF_INET6:
871                 if (!ipv6_addr_any(&output->u.ipv6.src) &&
872                     nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
873                                      &output->u.ipv6.src))
874                         return -EMSGSIZE;
875                 if (!ipv6_addr_any(&output->u.ipv6.dst) &&
876                     nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
877                                      &output->u.ipv6.dst))
878                         return -EMSGSIZE;
879                 break;
880         }
881         if (output->tos &&
882             nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
883                 return -EMSGSIZE;
884         if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
885                 return -EMSGSIZE;
886         if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
887             nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
888                 return -EMSGSIZE;
889         if ((output->tun_flags & TUNNEL_CSUM) &&
890             nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
891                 return -EMSGSIZE;
892         if (output->tp_src &&
893             nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
894                 return -EMSGSIZE;
895         if (output->tp_dst &&
896             nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
897                 return -EMSGSIZE;
898         if ((output->tun_flags & TUNNEL_OAM) &&
899             nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
900                 return -EMSGSIZE;
901         if (swkey_tun_opts_len) {
902                 if (output->tun_flags & TUNNEL_GENEVE_OPT &&
903                     nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
904                             swkey_tun_opts_len, tun_opts))
905                         return -EMSGSIZE;
906                 else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
907                          vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
908                         return -EMSGSIZE;
909                 else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
910                          nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
911                                  swkey_tun_opts_len, tun_opts))
912                         return -EMSGSIZE;
913         }
914
915         return 0;
916 }
917
918 static int ip_tun_to_nlattr(struct sk_buff *skb,
919                             const struct ip_tunnel_key *output,
920                             const void *tun_opts, int swkey_tun_opts_len,
921                             unsigned short tun_proto)
922 {
923         struct nlattr *nla;
924         int err;
925
926         nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
927         if (!nla)
928                 return -EMSGSIZE;
929
930         err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len,
931                                  tun_proto);
932         if (err)
933                 return err;
934
935         nla_nest_end(skb, nla);
936         return 0;
937 }
938
939 int ovs_nla_put_tunnel_info(struct sk_buff *skb,
940                             struct ip_tunnel_info *tun_info)
941 {
942         return __ip_tun_to_nlattr(skb, &tun_info->key,
943                                   ip_tunnel_info_opts(tun_info),
944                                   tun_info->options_len,
945                                   ip_tunnel_info_af(tun_info));
946 }
947
948 static int encode_vlan_from_nlattrs(struct sw_flow_match *match,
949                                     const struct nlattr *a[],
950                                     bool is_mask, bool inner)
951 {
952         __be16 tci = 0;
953         __be16 tpid = 0;
954
955         if (a[OVS_KEY_ATTR_VLAN])
956                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
957
958         if (a[OVS_KEY_ATTR_ETHERTYPE])
959                 tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
960
961         if (likely(!inner)) {
962                 SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask);
963                 SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask);
964         } else {
965                 SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask);
966                 SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask);
967         }
968         return 0;
969 }
970
971 static int validate_vlan_from_nlattrs(const struct sw_flow_match *match,
972                                       u64 key_attrs, bool inner,
973                                       const struct nlattr **a, bool log)
974 {
975         __be16 tci = 0;
976
977         if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
978               (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
979                eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) {
980                 /* Not a VLAN. */
981                 return 0;
982         }
983
984         if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
985               (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
986                 OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN");
987                 return -EINVAL;
988         }
989
990         if (a[OVS_KEY_ATTR_VLAN])
991                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
992
993         if (!(tci & htons(VLAN_CFI_MASK))) {
994                 if (tci) {
995                         OVS_NLERR(log, "%s TCI does not have VLAN_CFI_MASK bit set.",
996                                   (inner) ? "C-VLAN" : "VLAN");
997                         return -EINVAL;
998                 } else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) {
999                         /* Corner case for truncated VLAN header. */
1000                         OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.",
1001                                   (inner) ? "C-VLAN" : "VLAN");
1002                         return -EINVAL;
1003                 }
1004         }
1005
1006         return 1;
1007 }
1008
1009 static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match,
1010                                            u64 key_attrs, bool inner,
1011                                            const struct nlattr **a, bool log)
1012 {
1013         __be16 tci = 0;
1014         __be16 tpid = 0;
1015         bool encap_valid = !!(match->key->eth.vlan.tci &
1016                               htons(VLAN_CFI_MASK));
1017         bool i_encap_valid = !!(match->key->eth.cvlan.tci &
1018                                 htons(VLAN_CFI_MASK));
1019
1020         if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) {
1021                 /* Not a VLAN. */
1022                 return 0;
1023         }
1024
1025         if ((!inner && !encap_valid) || (inner && !i_encap_valid)) {
1026                 OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.",
1027                           (inner) ? "C-VLAN" : "VLAN");
1028                 return -EINVAL;
1029         }
1030
1031         if (a[OVS_KEY_ATTR_VLAN])
1032                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1033
1034         if (a[OVS_KEY_ATTR_ETHERTYPE])
1035                 tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1036
1037         if (tpid != htons(0xffff)) {
1038                 OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).",
1039                           (inner) ? "C-VLAN" : "VLAN", ntohs(tpid));
1040                 return -EINVAL;
1041         }
1042         if (!(tci & htons(VLAN_CFI_MASK))) {
1043                 OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_CFI_MASK bit.",
1044                           (inner) ? "C-VLAN" : "VLAN");
1045                 return -EINVAL;
1046         }
1047
1048         return 1;
1049 }
1050
1051 static int __parse_vlan_from_nlattrs(struct sw_flow_match *match,
1052                                      u64 *key_attrs, bool inner,
1053                                      const struct nlattr **a, bool is_mask,
1054                                      bool log)
1055 {
1056         int err;
1057         const struct nlattr *encap;
1058
1059         if (!is_mask)
1060                 err = validate_vlan_from_nlattrs(match, *key_attrs, inner,
1061                                                  a, log);
1062         else
1063                 err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner,
1064                                                       a, log);
1065         if (err <= 0)
1066                 return err;
1067
1068         err = encode_vlan_from_nlattrs(match, a, is_mask, inner);
1069         if (err)
1070                 return err;
1071
1072         *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1073         *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
1074         *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1075
1076         encap = a[OVS_KEY_ATTR_ENCAP];
1077
1078         if (!is_mask)
1079                 err = parse_flow_nlattrs(encap, a, key_attrs, log);
1080         else
1081                 err = parse_flow_mask_nlattrs(encap, a, key_attrs, log);
1082
1083         return err;
1084 }
1085
1086 static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
1087                                    u64 *key_attrs, const struct nlattr **a,
1088                                    bool is_mask, bool log)
1089 {
1090         int err;
1091         bool encap_valid = false;
1092
1093         err = __parse_vlan_from_nlattrs(match, key_attrs, false, a,
1094                                         is_mask, log);
1095         if (err)
1096                 return err;
1097
1098         encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_CFI_MASK));
1099         if (encap_valid) {
1100                 err = __parse_vlan_from_nlattrs(match, key_attrs, true, a,
1101                                                 is_mask, log);
1102                 if (err)
1103                         return err;
1104         }
1105
1106         return 0;
1107 }
1108
1109 static int parse_eth_type_from_nlattrs(struct sw_flow_match *match,
1110                                        u64 *attrs, const struct nlattr **a,
1111                                        bool is_mask, bool log)
1112 {
1113         __be16 eth_type;
1114
1115         eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1116         if (is_mask) {
1117                 /* Always exact match EtherType. */
1118                 eth_type = htons(0xffff);
1119         } else if (!eth_proto_is_802_3(eth_type)) {
1120                 OVS_NLERR(log, "EtherType %x is less than min %x",
1121                                 ntohs(eth_type), ETH_P_802_3_MIN);
1122                 return -EINVAL;
1123         }
1124
1125         SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
1126         *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1127         return 0;
1128 }
1129
1130 static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
1131                                  u64 *attrs, const struct nlattr **a,
1132                                  bool is_mask, bool log)
1133 {
1134         u8 mac_proto = MAC_PROTO_ETHERNET;
1135
1136         if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
1137                 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
1138
1139                 SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
1140                 *attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH);
1141         }
1142
1143         if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) {
1144                 u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
1145
1146                 SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
1147                 *attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID);
1148         }
1149
1150         if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
1151                 SW_FLOW_KEY_PUT(match, phy.priority,
1152                           nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
1153                 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
1154         }
1155
1156         if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
1157                 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
1158
1159                 if (is_mask) {
1160                         in_port = 0xffffffff; /* Always exact match in_port. */
1161                 } else if (in_port >= DP_MAX_PORTS) {
1162                         OVS_NLERR(log, "Port %d exceeds max allowable %d",
1163                                   in_port, DP_MAX_PORTS);
1164                         return -EINVAL;
1165                 }
1166
1167                 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
1168                 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
1169         } else if (!is_mask) {
1170                 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
1171         }
1172
1173         if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
1174                 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
1175
1176                 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
1177                 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
1178         }
1179         if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
1180                 if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
1181                                        is_mask, log) < 0)
1182                         return -EINVAL;
1183                 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
1184         }
1185
1186         if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&
1187             ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
1188                 u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
1189
1190                 if (ct_state & ~CT_SUPPORTED_MASK) {
1191                         OVS_NLERR(log, "ct_state flags %08x unsupported",
1192                                   ct_state);
1193                         return -EINVAL;
1194                 }
1195
1196                 SW_FLOW_KEY_PUT(match, ct_state, ct_state, is_mask);
1197                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
1198         }
1199         if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) &&
1200             ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) {
1201                 u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]);
1202
1203                 SW_FLOW_KEY_PUT(match, ct_zone, ct_zone, is_mask);
1204                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
1205         }
1206         if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) &&
1207             ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) {
1208                 u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]);
1209
1210                 SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask);
1211                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK);
1212         }
1213         if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) &&
1214             ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) {
1215                 const struct ovs_key_ct_labels *cl;
1216
1217                 cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]);
1218                 SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels,
1219                                    sizeof(*cl), is_mask);
1220                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
1221         }
1222         if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) {
1223                 const struct ovs_key_ct_tuple_ipv4 *ct;
1224
1225                 ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]);
1226
1227                 SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask);
1228                 SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask);
1229                 SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
1230                 SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
1231                 SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv4_proto, is_mask);
1232                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4);
1233         }
1234         if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) {
1235                 const struct ovs_key_ct_tuple_ipv6 *ct;
1236
1237                 ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]);
1238
1239                 SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src,
1240                                    sizeof(match->key->ipv6.ct_orig.src),
1241                                    is_mask);
1242                 SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst,
1243                                    sizeof(match->key->ipv6.ct_orig.dst),
1244                                    is_mask);
1245                 SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
1246                 SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
1247                 SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv6_proto, is_mask);
1248                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
1249         }
1250
1251         /* For layer 3 packets the Ethernet type is provided
1252          * and treated as metadata but no MAC addresses are provided.
1253          */
1254         if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
1255             (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)))
1256                 mac_proto = MAC_PROTO_NONE;
1257
1258         /* Always exact match mac_proto */
1259         SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask);
1260
1261         if (mac_proto == MAC_PROTO_NONE)
1262                 return parse_eth_type_from_nlattrs(match, attrs, a, is_mask,
1263                                                    log);
1264
1265         return 0;
1266 }
1267
1268 int nsh_hdr_from_nlattr(const struct nlattr *attr,
1269                         struct nshhdr *nh, size_t size)
1270 {
1271         struct nlattr *a;
1272         int rem;
1273         u8 flags = 0;
1274         u8 ttl = 0;
1275         int mdlen = 0;
1276
1277         /* validate_nsh has check this, so we needn't do duplicate check here
1278          */
1279         if (size < NSH_BASE_HDR_LEN)
1280                 return -ENOBUFS;
1281
1282         nla_for_each_nested(a, attr, rem) {
1283                 int type = nla_type(a);
1284
1285                 switch (type) {
1286                 case OVS_NSH_KEY_ATTR_BASE: {
1287                         const struct ovs_nsh_key_base *base = nla_data(a);
1288
1289                         flags = base->flags;
1290                         ttl = base->ttl;
1291                         nh->np = base->np;
1292                         nh->mdtype = base->mdtype;
1293                         nh->path_hdr = base->path_hdr;
1294                         break;
1295                 }
1296                 case OVS_NSH_KEY_ATTR_MD1:
1297                         mdlen = nla_len(a);
1298                         if (mdlen > size - NSH_BASE_HDR_LEN)
1299                                 return -ENOBUFS;
1300                         memcpy(&nh->md1, nla_data(a), mdlen);
1301                         break;
1302
1303                 case OVS_NSH_KEY_ATTR_MD2:
1304                         mdlen = nla_len(a);
1305                         if (mdlen > size - NSH_BASE_HDR_LEN)
1306                                 return -ENOBUFS;
1307                         memcpy(&nh->md2, nla_data(a), mdlen);
1308                         break;
1309
1310                 default:
1311                         return -EINVAL;
1312                 }
1313         }
1314
1315         /* nsh header length  = NSH_BASE_HDR_LEN + mdlen */
1316         nh->ver_flags_ttl_len = 0;
1317         nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen);
1318
1319         return 0;
1320 }
1321
1322 int nsh_key_from_nlattr(const struct nlattr *attr,
1323                         struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask)
1324 {
1325         struct nlattr *a;
1326         int rem;
1327
1328         /* validate_nsh has check this, so we needn't do duplicate check here
1329          */
1330         nla_for_each_nested(a, attr, rem) {
1331                 int type = nla_type(a);
1332
1333                 switch (type) {
1334                 case OVS_NSH_KEY_ATTR_BASE: {
1335                         const struct ovs_nsh_key_base *base = nla_data(a);
1336                         const struct ovs_nsh_key_base *base_mask = base + 1;
1337
1338                         nsh->base = *base;
1339                         nsh_mask->base = *base_mask;
1340                         break;
1341                 }
1342                 case OVS_NSH_KEY_ATTR_MD1: {
1343                         const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1344                         const struct ovs_nsh_key_md1 *md1_mask = md1 + 1;
1345
1346                         memcpy(nsh->context, md1->context, sizeof(*md1));
1347                         memcpy(nsh_mask->context, md1_mask->context,
1348                                sizeof(*md1_mask));
1349                         break;
1350                 }
1351                 case OVS_NSH_KEY_ATTR_MD2:
1352                         /* Not supported yet */
1353                         return -ENOTSUPP;
1354                 default:
1355                         return -EINVAL;
1356                 }
1357         }
1358
1359         return 0;
1360 }
1361
1362 static int nsh_key_put_from_nlattr(const struct nlattr *attr,
1363                                    struct sw_flow_match *match, bool is_mask,
1364                                    bool is_push_nsh, bool log)
1365 {
1366         struct nlattr *a;
1367         int rem;
1368         bool has_base = false;
1369         bool has_md1 = false;
1370         bool has_md2 = false;
1371         u8 mdtype = 0;
1372         int mdlen = 0;
1373
1374         if (WARN_ON(is_push_nsh && is_mask))
1375                 return -EINVAL;
1376
1377         nla_for_each_nested(a, attr, rem) {
1378                 int type = nla_type(a);
1379                 int i;
1380
1381                 if (type > OVS_NSH_KEY_ATTR_MAX) {
1382                         OVS_NLERR(log, "nsh attr %d is out of range max %d",
1383                                   type, OVS_NSH_KEY_ATTR_MAX);
1384                         return -EINVAL;
1385                 }
1386
1387                 if (!check_attr_len(nla_len(a),
1388                                     ovs_nsh_key_attr_lens[type].len)) {
1389                         OVS_NLERR(
1390                             log,
1391                             "nsh attr %d has unexpected len %d expected %d",
1392                             type,
1393                             nla_len(a),
1394                             ovs_nsh_key_attr_lens[type].len
1395                         );
1396                         return -EINVAL;
1397                 }
1398
1399                 switch (type) {
1400                 case OVS_NSH_KEY_ATTR_BASE: {
1401                         const struct ovs_nsh_key_base *base = nla_data(a);
1402
1403                         has_base = true;
1404                         mdtype = base->mdtype;
1405                         SW_FLOW_KEY_PUT(match, nsh.base.flags,
1406                                         base->flags, is_mask);
1407                         SW_FLOW_KEY_PUT(match, nsh.base.ttl,
1408                                         base->ttl, is_mask);
1409                         SW_FLOW_KEY_PUT(match, nsh.base.mdtype,
1410                                         base->mdtype, is_mask);
1411                         SW_FLOW_KEY_PUT(match, nsh.base.np,
1412                                         base->np, is_mask);
1413                         SW_FLOW_KEY_PUT(match, nsh.base.path_hdr,
1414                                         base->path_hdr, is_mask);
1415                         break;
1416                 }
1417                 case OVS_NSH_KEY_ATTR_MD1: {
1418                         const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1419
1420                         has_md1 = true;
1421                         for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++)
1422                                 SW_FLOW_KEY_PUT(match, nsh.context[i],
1423                                                 md1->context[i], is_mask);
1424                         break;
1425                 }
1426                 case OVS_NSH_KEY_ATTR_MD2:
1427                         if (!is_push_nsh) /* Not supported MD type 2 yet */
1428                                 return -ENOTSUPP;
1429
1430                         has_md2 = true;
1431                         mdlen = nla_len(a);
1432                         if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) {
1433                                 OVS_NLERR(
1434                                     log,
1435                                     "Invalid MD length %d for MD type %d",
1436                                     mdlen,
1437                                     mdtype
1438                                 );
1439                                 return -EINVAL;
1440                         }
1441                         break;
1442                 default:
1443                         OVS_NLERR(log, "Unknown nsh attribute %d",
1444                                   type);
1445                         return -EINVAL;
1446                 }
1447         }
1448
1449         if (rem > 0) {
1450                 OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem);
1451                 return -EINVAL;
1452         }
1453
1454         if (has_md1 && has_md2) {
1455                 OVS_NLERR(
1456                     1,
1457                     "invalid nsh attribute: md1 and md2 are exclusive."
1458                 );
1459                 return -EINVAL;
1460         }
1461
1462         if (!is_mask) {
1463                 if ((has_md1 && mdtype != NSH_M_TYPE1) ||
1464                     (has_md2 && mdtype != NSH_M_TYPE2)) {
1465                         OVS_NLERR(1, "nsh attribute has unmatched MD type %d.",
1466                                   mdtype);
1467                         return -EINVAL;
1468                 }
1469
1470                 if (is_push_nsh &&
1471                     (!has_base || (!has_md1 && !has_md2))) {
1472                         OVS_NLERR(
1473                             1,
1474                             "push_nsh: missing base or metadata attributes"
1475                         );
1476                         return -EINVAL;
1477                 }
1478         }
1479
1480         return 0;
1481 }
1482
1483 static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
1484                                 u64 attrs, const struct nlattr **a,
1485                                 bool is_mask, bool log)
1486 {
1487         int err;
1488
1489         err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log);
1490         if (err)
1491                 return err;
1492
1493         if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
1494                 const struct ovs_key_ethernet *eth_key;
1495
1496                 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
1497                 SW_FLOW_KEY_MEMCPY(match, eth.src,
1498                                 eth_key->eth_src, ETH_ALEN, is_mask);
1499                 SW_FLOW_KEY_MEMCPY(match, eth.dst,
1500                                 eth_key->eth_dst, ETH_ALEN, is_mask);
1501                 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
1502
1503                 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
1504                         /* VLAN attribute is always parsed before getting here since it
1505                          * may occur multiple times.
1506                          */
1507                         OVS_NLERR(log, "VLAN attribute unexpected.");
1508                         return -EINVAL;
1509                 }
1510
1511                 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
1512                         err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask,
1513                                                           log);
1514                         if (err)
1515                                 return err;
1516                 } else if (!is_mask) {
1517                         SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
1518                 }
1519         } else if (!match->key->eth.type) {
1520                 OVS_NLERR(log, "Either Ethernet header or EtherType is required.");
1521                 return -EINVAL;
1522         }
1523
1524         if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1525                 const struct ovs_key_ipv4 *ipv4_key;
1526
1527                 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
1528                 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
1529                         OVS_NLERR(log, "IPv4 frag type %d is out of range max %d",
1530                                   ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
1531                         return -EINVAL;
1532                 }
1533                 SW_FLOW_KEY_PUT(match, ip.proto,
1534                                 ipv4_key->ipv4_proto, is_mask);
1535                 SW_FLOW_KEY_PUT(match, ip.tos,
1536                                 ipv4_key->ipv4_tos, is_mask);
1537                 SW_FLOW_KEY_PUT(match, ip.ttl,
1538                                 ipv4_key->ipv4_ttl, is_mask);
1539                 SW_FLOW_KEY_PUT(match, ip.frag,
1540                                 ipv4_key->ipv4_frag, is_mask);
1541                 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1542                                 ipv4_key->ipv4_src, is_mask);
1543                 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1544                                 ipv4_key->ipv4_dst, is_mask);
1545                 attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
1546         }
1547
1548         if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
1549                 const struct ovs_key_ipv6 *ipv6_key;
1550
1551                 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
1552                 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
1553                         OVS_NLERR(log, "IPv6 frag type %d is out of range max %d",
1554                                   ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
1555                         return -EINVAL;
1556                 }
1557
1558                 if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
1559                         OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x)",
1560                                   ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
1561                         return -EINVAL;
1562                 }
1563
1564                 SW_FLOW_KEY_PUT(match, ipv6.label,
1565                                 ipv6_key->ipv6_label, is_mask);
1566                 SW_FLOW_KEY_PUT(match, ip.proto,
1567                                 ipv6_key->ipv6_proto, is_mask);
1568                 SW_FLOW_KEY_PUT(match, ip.tos,
1569                                 ipv6_key->ipv6_tclass, is_mask);
1570                 SW_FLOW_KEY_PUT(match, ip.ttl,
1571                                 ipv6_key->ipv6_hlimit, is_mask);
1572                 SW_FLOW_KEY_PUT(match, ip.frag,
1573                                 ipv6_key->ipv6_frag, is_mask);
1574                 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
1575                                 ipv6_key->ipv6_src,
1576                                 sizeof(match->key->ipv6.addr.src),
1577                                 is_mask);
1578                 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
1579                                 ipv6_key->ipv6_dst,
1580                                 sizeof(match->key->ipv6.addr.dst),
1581                                 is_mask);
1582
1583                 attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
1584         }
1585
1586         if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
1587                 const struct ovs_key_arp *arp_key;
1588
1589                 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
1590                 if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
1591                         OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).",
1592                                   arp_key->arp_op);
1593                         return -EINVAL;
1594                 }
1595
1596                 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1597                                 arp_key->arp_sip, is_mask);
1598                 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1599                         arp_key->arp_tip, is_mask);
1600                 SW_FLOW_KEY_PUT(match, ip.proto,
1601                                 ntohs(arp_key->arp_op), is_mask);
1602                 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
1603                                 arp_key->arp_sha, ETH_ALEN, is_mask);
1604                 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
1605                                 arp_key->arp_tha, ETH_ALEN, is_mask);
1606
1607                 attrs &= ~(1 << OVS_KEY_ATTR_ARP);
1608         }
1609
1610         if (attrs & (1 << OVS_KEY_ATTR_NSH)) {
1611                 if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match,
1612                                             is_mask, false, log) < 0)
1613                         return -EINVAL;
1614                 attrs &= ~(1 << OVS_KEY_ATTR_NSH);
1615         }
1616
1617         if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
1618                 const struct ovs_key_mpls *mpls_key;
1619
1620                 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
1621                 SW_FLOW_KEY_PUT(match, mpls.top_lse,
1622                                 mpls_key->mpls_lse, is_mask);
1623
1624                 attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
1625          }
1626
1627         if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
1628                 const struct ovs_key_tcp *tcp_key;
1629
1630                 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
1631                 SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
1632                 SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
1633                 attrs &= ~(1 << OVS_KEY_ATTR_TCP);
1634         }
1635
1636         if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
1637                 SW_FLOW_KEY_PUT(match, tp.flags,
1638                                 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
1639                                 is_mask);
1640                 attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS);
1641         }
1642
1643         if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
1644                 const struct ovs_key_udp *udp_key;
1645
1646                 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
1647                 SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
1648                 SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
1649                 attrs &= ~(1 << OVS_KEY_ATTR_UDP);
1650         }
1651
1652         if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
1653                 const struct ovs_key_sctp *sctp_key;
1654
1655                 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
1656                 SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
1657                 SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
1658                 attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
1659         }
1660
1661         if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
1662                 const struct ovs_key_icmp *icmp_key;
1663
1664                 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
1665                 SW_FLOW_KEY_PUT(match, tp.src,
1666                                 htons(icmp_key->icmp_type), is_mask);
1667                 SW_FLOW_KEY_PUT(match, tp.dst,
1668                                 htons(icmp_key->icmp_code), is_mask);
1669                 attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
1670         }
1671
1672         if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
1673                 const struct ovs_key_icmpv6 *icmpv6_key;
1674
1675                 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
1676                 SW_FLOW_KEY_PUT(match, tp.src,
1677                                 htons(icmpv6_key->icmpv6_type), is_mask);
1678                 SW_FLOW_KEY_PUT(match, tp.dst,
1679                                 htons(icmpv6_key->icmpv6_code), is_mask);
1680                 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
1681         }
1682
1683         if (attrs & (1 << OVS_KEY_ATTR_ND)) {
1684                 const struct ovs_key_nd *nd_key;
1685
1686                 nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
1687                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
1688                         nd_key->nd_target,
1689                         sizeof(match->key->ipv6.nd.target),
1690                         is_mask);
1691                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
1692                         nd_key->nd_sll, ETH_ALEN, is_mask);
1693                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
1694                                 nd_key->nd_tll, ETH_ALEN, is_mask);
1695                 attrs &= ~(1 << OVS_KEY_ATTR_ND);
1696         }
1697
1698         if (attrs != 0) {
1699                 OVS_NLERR(log, "Unknown key attributes %llx",
1700                           (unsigned long long)attrs);
1701                 return -EINVAL;
1702         }
1703
1704         return 0;
1705 }
1706
1707 static void nlattr_set(struct nlattr *attr, u8 val,
1708                        const struct ovs_len_tbl *tbl)
1709 {
1710         struct nlattr *nla;
1711         int rem;
1712
1713         /* The nlattr stream should already have been validated */
1714         nla_for_each_nested(nla, attr, rem) {
1715                 if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
1716                         nlattr_set(nla, val, tbl[nla_type(nla)].next ? : tbl);
1717                 else
1718                         memset(nla_data(nla), val, nla_len(nla));
1719
1720                 if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
1721                         *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
1722         }
1723 }
1724
1725 static void mask_set_nlattr(struct nlattr *attr, u8 val)
1726 {
1727         nlattr_set(attr, val, ovs_key_lens);
1728 }
1729
1730 /**
1731  * ovs_nla_get_match - parses Netlink attributes into a flow key and
1732  * mask. In case the 'mask' is NULL, the flow is treated as exact match
1733  * flow. Otherwise, it is treated as a wildcarded flow, except the mask
1734  * does not include any don't care bit.
1735  * @net: Used to determine per-namespace field support.
1736  * @match: receives the extracted flow match information.
1737  * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1738  * sequence. The fields should of the packet that triggered the creation
1739  * of this flow.
1740  * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
1741  * attribute specifies the mask field of the wildcarded flow.
1742  * @log: Boolean to allow kernel error logging.  Normally true, but when
1743  * probing for feature compatibility this should be passed in as false to
1744  * suppress unnecessary error logging.
1745  */
1746 int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
1747                       const struct nlattr *nla_key,
1748                       const struct nlattr *nla_mask,
1749                       bool log)
1750 {
1751         const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1752         struct nlattr *newmask = NULL;
1753         u64 key_attrs = 0;
1754         u64 mask_attrs = 0;
1755         int err;
1756
1757         err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
1758         if (err)
1759                 return err;
1760
1761         err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log);
1762         if (err)
1763                 return err;
1764
1765         err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);
1766         if (err)
1767                 return err;
1768
1769         if (match->mask) {
1770                 if (!nla_mask) {
1771                         /* Create an exact match mask. We need to set to 0xff
1772                          * all the 'match->mask' fields that have been touched
1773                          * in 'match->key'. We cannot simply memset
1774                          * 'match->mask', because padding bytes and fields not
1775                          * specified in 'match->key' should be left to 0.
1776                          * Instead, we use a stream of netlink attributes,
1777                          * copied from 'key' and set to 0xff.
1778                          * ovs_key_from_nlattrs() will take care of filling
1779                          * 'match->mask' appropriately.
1780                          */
1781                         newmask = kmemdup(nla_key,
1782                                           nla_total_size(nla_len(nla_key)),
1783                                           GFP_KERNEL);
1784                         if (!newmask)
1785                                 return -ENOMEM;
1786
1787                         mask_set_nlattr(newmask, 0xff);
1788
1789                         /* The userspace does not send tunnel attributes that
1790                          * are 0, but we should not wildcard them nonetheless.
1791                          */
1792                         if (match->key->tun_proto)
1793                                 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
1794                                                          0xff, true);
1795
1796                         nla_mask = newmask;
1797                 }
1798
1799                 err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log);
1800                 if (err)
1801                         goto free_newmask;
1802
1803                 /* Always match on tci. */
1804                 SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true);
1805                 SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true);
1806
1807                 err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log);
1808                 if (err)
1809                         goto free_newmask;
1810
1811                 err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,
1812                                            log);
1813                 if (err)
1814                         goto free_newmask;
1815         }
1816
1817         if (!match_validate(match, key_attrs, mask_attrs, log))
1818                 err = -EINVAL;
1819
1820 free_newmask:
1821         kfree(newmask);
1822         return err;
1823 }
1824
1825 static size_t get_ufid_len(const struct nlattr *attr, bool log)
1826 {
1827         size_t len;
1828
1829         if (!attr)
1830                 return 0;
1831
1832         len = nla_len(attr);
1833         if (len < 1 || len > MAX_UFID_LENGTH) {
1834                 OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
1835                           nla_len(attr), MAX_UFID_LENGTH);
1836                 return 0;
1837         }
1838
1839         return len;
1840 }
1841
1842 /* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
1843  * or false otherwise.
1844  */
1845 bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
1846                       bool log)
1847 {
1848         sfid->ufid_len = get_ufid_len(attr, log);
1849         if (sfid->ufid_len)
1850                 memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
1851
1852         return sfid->ufid_len;
1853 }
1854
1855 int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
1856                            const struct sw_flow_key *key, bool log)
1857 {
1858         struct sw_flow_key *new_key;
1859
1860         if (ovs_nla_get_ufid(sfid, ufid, log))
1861                 return 0;
1862
1863         /* If UFID was not provided, use unmasked key. */
1864         new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
1865         if (!new_key)
1866                 return -ENOMEM;
1867         memcpy(new_key, key, sizeof(*key));
1868         sfid->unmasked_key = new_key;
1869
1870         return 0;
1871 }
1872
1873 u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
1874 {
1875         return attr ? nla_get_u32(attr) : 0;
1876 }
1877
1878 /**
1879  * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
1880  * @net: Network namespace.
1881  * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack
1882  * metadata.
1883  * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink
1884  * attributes.
1885  * @attrs: Bit mask for the netlink attributes included in @a.
1886  * @log: Boolean to allow kernel error logging.  Normally true, but when
1887  * probing for feature compatibility this should be passed in as false to
1888  * suppress unnecessary error logging.
1889  *
1890  * This parses a series of Netlink attributes that form a flow key, which must
1891  * take the same form accepted by flow_from_nlattrs(), but only enough of it to
1892  * get the metadata, that is, the parts of the flow key that cannot be
1893  * extracted from the packet itself.
1894  *
1895  * This must be called before the packet key fields are filled in 'key'.
1896  */
1897
1898 int ovs_nla_get_flow_metadata(struct net *net,
1899                               const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
1900                               u64 attrs, struct sw_flow_key *key, bool log)
1901 {
1902         struct sw_flow_match match;
1903
1904         memset(&match, 0, sizeof(match));
1905         match.key = key;
1906
1907         key->ct_state = 0;
1908         key->ct_zone = 0;
1909         key->ct_orig_proto = 0;
1910         memset(&key->ct, 0, sizeof(key->ct));
1911         memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig));
1912         memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig));
1913
1914         key->phy.in_port = DP_MAX_PORTS;
1915
1916         return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
1917 }
1918
1919 static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
1920                             bool is_mask)
1921 {
1922         __be16 eth_type = !is_mask ? vh->tpid : htons(0xffff);
1923
1924         if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
1925             nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci))
1926                 return -EMSGSIZE;
1927         return 0;
1928 }
1929
1930 static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask,
1931                              struct sk_buff *skb)
1932 {
1933         struct nlattr *start;
1934
1935         start = nla_nest_start(skb, OVS_KEY_ATTR_NSH);
1936         if (!start)
1937                 return -EMSGSIZE;
1938
1939         if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base))
1940                 goto nla_put_failure;
1941
1942         if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) {
1943                 if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1,
1944                             sizeof(nsh->context), nsh->context))
1945                         goto nla_put_failure;
1946         }
1947
1948         /* Don't support MD type 2 yet */
1949
1950         nla_nest_end(skb, start);
1951
1952         return 0;
1953
1954 nla_put_failure:
1955         return -EMSGSIZE;
1956 }
1957
1958 static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
1959                              const struct sw_flow_key *output, bool is_mask,
1960                              struct sk_buff *skb)
1961 {
1962         struct ovs_key_ethernet *eth_key;
1963         struct nlattr *nla;
1964         struct nlattr *encap = NULL;
1965         struct nlattr *in_encap = NULL;
1966
1967         if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
1968                 goto nla_put_failure;
1969
1970         if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
1971                 goto nla_put_failure;
1972
1973         if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
1974                 goto nla_put_failure;
1975
1976         if ((swkey->tun_proto || is_mask)) {
1977                 const void *opts = NULL;
1978
1979                 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
1980                         opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
1981
1982                 if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
1983                                      swkey->tun_opts_len, swkey->tun_proto))
1984                         goto nla_put_failure;
1985         }
1986
1987         if (swkey->phy.in_port == DP_MAX_PORTS) {
1988                 if (is_mask && (output->phy.in_port == 0xffff))
1989                         if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
1990                                 goto nla_put_failure;
1991         } else {
1992                 u16 upper_u16;
1993                 upper_u16 = !is_mask ? 0 : 0xffff;
1994
1995                 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
1996                                 (upper_u16 << 16) | output->phy.in_port))
1997                         goto nla_put_failure;
1998         }
1999
2000         if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
2001                 goto nla_put_failure;
2002
2003         if (ovs_ct_put_key(swkey, output, skb))
2004                 goto nla_put_failure;
2005
2006         if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
2007                 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
2008                 if (!nla)
2009                         goto nla_put_failure;
2010
2011                 eth_key = nla_data(nla);
2012                 ether_addr_copy(eth_key->eth_src, output->eth.src);
2013                 ether_addr_copy(eth_key->eth_dst, output->eth.dst);
2014
2015                 if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
2016                         if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
2017                                 goto nla_put_failure;
2018                         encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
2019                         if (!swkey->eth.vlan.tci)
2020                                 goto unencap;
2021
2022                         if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
2023                                 if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
2024                                         goto nla_put_failure;
2025                                 in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
2026                                 if (!swkey->eth.cvlan.tci)
2027                                         goto unencap;
2028                         }
2029                 }
2030
2031                 if (swkey->eth.type == htons(ETH_P_802_2)) {
2032                         /*
2033                         * Ethertype 802.2 is represented in the netlink with omitted
2034                         * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
2035                         * 0xffff in the mask attribute.  Ethertype can also
2036                         * be wildcarded.
2037                         */
2038                         if (is_mask && output->eth.type)
2039                                 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
2040                                                         output->eth.type))
2041                                         goto nla_put_failure;
2042                         goto unencap;
2043                 }
2044         }
2045
2046         if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
2047                 goto nla_put_failure;
2048
2049         if (eth_type_vlan(swkey->eth.type)) {
2050                 /* There are 3 VLAN tags, we don't know anything about the rest
2051                  * of the packet, so truncate here.
2052                  */
2053                 WARN_ON_ONCE(!(encap && in_encap));
2054                 goto unencap;
2055         }
2056
2057         if (swkey->eth.type == htons(ETH_P_IP)) {
2058                 struct ovs_key_ipv4 *ipv4_key;
2059
2060                 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
2061                 if (!nla)
2062                         goto nla_put_failure;
2063                 ipv4_key = nla_data(nla);
2064                 ipv4_key->ipv4_src = output->ipv4.addr.src;
2065                 ipv4_key->ipv4_dst = output->ipv4.addr.dst;
2066                 ipv4_key->ipv4_proto = output->ip.proto;
2067                 ipv4_key->ipv4_tos = output->ip.tos;
2068                 ipv4_key->ipv4_ttl = output->ip.ttl;
2069                 ipv4_key->ipv4_frag = output->ip.frag;
2070         } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
2071                 struct ovs_key_ipv6 *ipv6_key;
2072
2073                 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
2074                 if (!nla)
2075                         goto nla_put_failure;
2076                 ipv6_key = nla_data(nla);
2077                 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
2078                                 sizeof(ipv6_key->ipv6_src));
2079                 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
2080                                 sizeof(ipv6_key->ipv6_dst));
2081                 ipv6_key->ipv6_label = output->ipv6.label;
2082                 ipv6_key->ipv6_proto = output->ip.proto;
2083                 ipv6_key->ipv6_tclass = output->ip.tos;
2084                 ipv6_key->ipv6_hlimit = output->ip.ttl;
2085                 ipv6_key->ipv6_frag = output->ip.frag;
2086         } else if (swkey->eth.type == htons(ETH_P_NSH)) {
2087                 if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
2088                         goto nla_put_failure;
2089         } else if (swkey->eth.type == htons(ETH_P_ARP) ||
2090                    swkey->eth.type == htons(ETH_P_RARP)) {
2091                 struct ovs_key_arp *arp_key;
2092
2093                 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
2094                 if (!nla)
2095                         goto nla_put_failure;
2096                 arp_key = nla_data(nla);
2097                 memset(arp_key, 0, sizeof(struct ovs_key_arp));
2098                 arp_key->arp_sip = output->ipv4.addr.src;
2099                 arp_key->arp_tip = output->ipv4.addr.dst;
2100                 arp_key->arp_op = htons(output->ip.proto);
2101                 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
2102                 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
2103         } else if (eth_p_mpls(swkey->eth.type)) {
2104                 struct ovs_key_mpls *mpls_key;
2105
2106                 nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
2107                 if (!nla)
2108                         goto nla_put_failure;
2109                 mpls_key = nla_data(nla);
2110                 mpls_key->mpls_lse = output->mpls.top_lse;
2111         }
2112
2113         if ((swkey->eth.type == htons(ETH_P_IP) ||
2114              swkey->eth.type == htons(ETH_P_IPV6)) &&
2115              swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
2116
2117                 if (swkey->ip.proto == IPPROTO_TCP) {
2118                         struct ovs_key_tcp *tcp_key;
2119
2120                         nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
2121                         if (!nla)
2122                                 goto nla_put_failure;
2123                         tcp_key = nla_data(nla);
2124                         tcp_key->tcp_src = output->tp.src;
2125                         tcp_key->tcp_dst = output->tp.dst;
2126                         if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
2127                                          output->tp.flags))
2128                                 goto nla_put_failure;
2129                 } else if (swkey->ip.proto == IPPROTO_UDP) {
2130                         struct ovs_key_udp *udp_key;
2131
2132                         nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
2133                         if (!nla)
2134                                 goto nla_put_failure;
2135                         udp_key = nla_data(nla);
2136                         udp_key->udp_src = output->tp.src;
2137                         udp_key->udp_dst = output->tp.dst;
2138                 } else if (swkey->ip.proto == IPPROTO_SCTP) {
2139                         struct ovs_key_sctp *sctp_key;
2140
2141                         nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
2142                         if (!nla)
2143                                 goto nla_put_failure;
2144                         sctp_key = nla_data(nla);
2145                         sctp_key->sctp_src = output->tp.src;
2146                         sctp_key->sctp_dst = output->tp.dst;
2147                 } else if (swkey->eth.type == htons(ETH_P_IP) &&
2148                            swkey->ip.proto == IPPROTO_ICMP) {
2149                         struct ovs_key_icmp *icmp_key;
2150
2151                         nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
2152                         if (!nla)
2153                                 goto nla_put_failure;
2154                         icmp_key = nla_data(nla);
2155                         icmp_key->icmp_type = ntohs(output->tp.src);
2156                         icmp_key->icmp_code = ntohs(output->tp.dst);
2157                 } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
2158                            swkey->ip.proto == IPPROTO_ICMPV6) {
2159                         struct ovs_key_icmpv6 *icmpv6_key;
2160
2161                         nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
2162                                                 sizeof(*icmpv6_key));
2163                         if (!nla)
2164                                 goto nla_put_failure;
2165                         icmpv6_key = nla_data(nla);
2166                         icmpv6_key->icmpv6_type = ntohs(output->tp.src);
2167                         icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
2168
2169                         if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
2170                             icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
2171                                 struct ovs_key_nd *nd_key;
2172
2173                                 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
2174                                 if (!nla)
2175                                         goto nla_put_failure;
2176                                 nd_key = nla_data(nla);
2177                                 memcpy(nd_key->nd_target, &output->ipv6.nd.target,
2178                                                         sizeof(nd_key->nd_target));
2179                                 ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
2180                                 ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
2181                         }
2182                 }
2183         }
2184
2185 unencap:
2186         if (in_encap)
2187                 nla_nest_end(skb, in_encap);
2188         if (encap)
2189                 nla_nest_end(skb, encap);
2190
2191         return 0;
2192
2193 nla_put_failure:
2194         return -EMSGSIZE;
2195 }
2196
2197 int ovs_nla_put_key(const struct sw_flow_key *swkey,
2198                     const struct sw_flow_key *output, int attr, bool is_mask,
2199                     struct sk_buff *skb)
2200 {
2201         int err;
2202         struct nlattr *nla;
2203
2204         nla = nla_nest_start(skb, attr);
2205         if (!nla)
2206                 return -EMSGSIZE;
2207         err = __ovs_nla_put_key(swkey, output, is_mask, skb);
2208         if (err)
2209                 return err;
2210         nla_nest_end(skb, nla);
2211
2212         return 0;
2213 }
2214
2215 /* Called with ovs_mutex or RCU read lock. */
2216 int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
2217 {
2218         if (ovs_identifier_is_ufid(&flow->id))
2219                 return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
2220                                flow->id.ufid);
2221
2222         return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
2223                                OVS_FLOW_ATTR_KEY, false, skb);
2224 }
2225
2226 /* Called with ovs_mutex or RCU read lock. */
2227 int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
2228 {
2229         return ovs_nla_put_key(&flow->key, &flow->key,
2230                                 OVS_FLOW_ATTR_KEY, false, skb);
2231 }
2232
2233 /* Called with ovs_mutex or RCU read lock. */
2234 int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
2235 {
2236         return ovs_nla_put_key(&flow->key, &flow->mask->key,
2237                                 OVS_FLOW_ATTR_MASK, true, skb);
2238 }
2239
2240 #define MAX_ACTIONS_BUFSIZE     (32 * 1024)
2241
2242 static struct sw_flow_actions *nla_alloc_flow_actions(int size)
2243 {
2244         struct sw_flow_actions *sfa;
2245
2246         WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE);
2247
2248         sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
2249         if (!sfa)
2250                 return ERR_PTR(-ENOMEM);
2251
2252         sfa->actions_len = 0;
2253         return sfa;
2254 }
2255
2256 static void ovs_nla_free_set_action(const struct nlattr *a)
2257 {
2258         const struct nlattr *ovs_key = nla_data(a);
2259         struct ovs_tunnel_info *ovs_tun;
2260
2261         switch (nla_type(ovs_key)) {
2262         case OVS_KEY_ATTR_TUNNEL_INFO:
2263                 ovs_tun = nla_data(ovs_key);
2264                 dst_release((struct dst_entry *)ovs_tun->tun_dst);
2265                 break;
2266         }
2267 }
2268
2269 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
2270 {
2271         const struct nlattr *a;
2272         int rem;
2273
2274         if (!sf_acts)
2275                 return;
2276
2277         nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) {
2278                 switch (nla_type(a)) {
2279                 case OVS_ACTION_ATTR_SET:
2280                         ovs_nla_free_set_action(a);
2281                         break;
2282                 case OVS_ACTION_ATTR_CT:
2283                         ovs_ct_free_action(a);
2284                         break;
2285                 }
2286         }
2287
2288         kfree(sf_acts);
2289 }
2290
2291 static void __ovs_nla_free_flow_actions(struct rcu_head *head)
2292 {
2293         ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu));
2294 }
2295
2296 /* Schedules 'sf_acts' to be freed after the next RCU grace period.
2297  * The caller must hold rcu_read_lock for this to be sensible. */
2298 void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts)
2299 {
2300         call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions);
2301 }
2302
2303 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
2304                                        int attr_len, bool log)
2305 {
2306
2307         struct sw_flow_actions *acts;
2308         int new_acts_size;
2309         int req_size = NLA_ALIGN(attr_len);
2310         int next_offset = offsetof(struct sw_flow_actions, actions) +
2311                                         (*sfa)->actions_len;
2312
2313         if (req_size <= (ksize(*sfa) - next_offset))
2314                 goto out;
2315
2316         new_acts_size = ksize(*sfa) * 2;
2317
2318         if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
2319                 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
2320                         OVS_NLERR(log, "Flow action size exceeds max %u",
2321                                   MAX_ACTIONS_BUFSIZE);
2322                         return ERR_PTR(-EMSGSIZE);
2323                 }
2324                 new_acts_size = MAX_ACTIONS_BUFSIZE;
2325         }
2326
2327         acts = nla_alloc_flow_actions(new_acts_size);
2328         if (IS_ERR(acts))
2329                 return (void *)acts;
2330
2331         memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
2332         acts->actions_len = (*sfa)->actions_len;
2333         acts->orig_len = (*sfa)->orig_len;
2334         kfree(*sfa);
2335         *sfa = acts;
2336
2337 out:
2338         (*sfa)->actions_len += req_size;
2339         return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
2340 }
2341
2342 static struct nlattr *__add_action(struct sw_flow_actions **sfa,
2343                                    int attrtype, void *data, int len, bool log)
2344 {
2345         struct nlattr *a;
2346
2347         a = reserve_sfa_size(sfa, nla_attr_size(len), log);
2348         if (IS_ERR(a))
2349                 return a;
2350
2351         a->nla_type = attrtype;
2352         a->nla_len = nla_attr_size(len);
2353
2354         if (data)
2355                 memcpy(nla_data(a), data, len);
2356         memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
2357
2358         return a;
2359 }
2360
2361 int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data,
2362                        int len, bool log)
2363 {
2364         struct nlattr *a;
2365
2366         a = __add_action(sfa, attrtype, data, len, log);
2367
2368         return PTR_ERR_OR_ZERO(a);
2369 }
2370
2371 static inline int add_nested_action_start(struct sw_flow_actions **sfa,
2372                                           int attrtype, bool log)
2373 {
2374         int used = (*sfa)->actions_len;
2375         int err;
2376
2377         err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log);
2378         if (err)
2379                 return err;
2380
2381         return used;
2382 }
2383
2384 static inline void add_nested_action_end(struct sw_flow_actions *sfa,
2385                                          int st_offset)
2386 {
2387         struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
2388                                                                st_offset);
2389
2390         a->nla_len = sfa->actions_len - st_offset;
2391 }
2392
2393 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2394                                   const struct sw_flow_key *key,
2395                                   struct sw_flow_actions **sfa,
2396                                   __be16 eth_type, __be16 vlan_tci, bool log);
2397
2398 static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
2399                                     const struct sw_flow_key *key,
2400                                     struct sw_flow_actions **sfa,
2401                                     __be16 eth_type, __be16 vlan_tci,
2402                                     bool log, bool last)
2403 {
2404         const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
2405         const struct nlattr *probability, *actions;
2406         const struct nlattr *a;
2407         int rem, start, err;
2408         struct sample_arg arg;
2409
2410         memset(attrs, 0, sizeof(attrs));
2411         nla_for_each_nested(a, attr, rem) {
2412                 int type = nla_type(a);
2413                 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
2414                         return -EINVAL;
2415                 attrs[type] = a;
2416         }
2417         if (rem)
2418                 return -EINVAL;
2419
2420         probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
2421         if (!probability || nla_len(probability) != sizeof(u32))
2422                 return -EINVAL;
2423
2424         actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
2425         if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
2426                 return -EINVAL;
2427
2428         /* validation done, copy sample action. */
2429         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
2430         if (start < 0)
2431                 return start;
2432
2433         /* When both skb and flow may be changed, put the sample
2434          * into a deferred fifo. On the other hand, if only skb
2435          * may be modified, the actions can be executed in place.
2436          *
2437          * Do this analysis at the flow installation time.
2438          * Set 'clone_action->exec' to true if the actions can be
2439          * executed without being deferred.
2440          *
2441          * If the sample is the last action, it can always be excuted
2442          * rather than deferred.
2443          */
2444         arg.exec = last || !actions_may_change_flow(actions);
2445         arg.probability = nla_get_u32(probability);
2446
2447         err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
2448                                  log);
2449         if (err)
2450                 return err;
2451
2452         err = __ovs_nla_copy_actions(net, actions, key, sfa,
2453                                      eth_type, vlan_tci, log);
2454
2455         if (err)
2456                 return err;
2457
2458         add_nested_action_end(*sfa, start);
2459
2460         return 0;
2461 }
2462
2463 static int validate_and_copy_clone(struct net *net,
2464                                    const struct nlattr *attr,
2465                                    const struct sw_flow_key *key,
2466                                    struct sw_flow_actions **sfa,
2467                                    __be16 eth_type, __be16 vlan_tci,
2468                                    bool log, bool last)
2469 {
2470         int start, err;
2471         u32 exec;
2472
2473         if (nla_len(attr) && nla_len(attr) < NLA_HDRLEN)
2474                 return -EINVAL;
2475
2476         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CLONE, log);
2477         if (start < 0)
2478                 return start;
2479
2480         exec = last || !actions_may_change_flow(attr);
2481
2482         err = ovs_nla_add_action(sfa, OVS_CLONE_ATTR_EXEC, &exec,
2483                                  sizeof(exec), log);
2484         if (err)
2485                 return err;
2486
2487         err = __ovs_nla_copy_actions(net, attr, key, sfa,
2488                                      eth_type, vlan_tci, log);
2489         if (err)
2490                 return err;
2491
2492         add_nested_action_end(*sfa, start);
2493
2494         return 0;
2495 }
2496
2497 void ovs_match_init(struct sw_flow_match *match,
2498                     struct sw_flow_key *key,
2499                     bool reset_key,
2500                     struct sw_flow_mask *mask)
2501 {
2502         memset(match, 0, sizeof(*match));
2503         match->key = key;
2504         match->mask = mask;
2505
2506         if (reset_key)
2507                 memset(key, 0, sizeof(*key));
2508
2509         if (mask) {
2510                 memset(&mask->key, 0, sizeof(mask->key));
2511                 mask->range.start = mask->range.end = 0;
2512         }
2513 }
2514
2515 static int validate_geneve_opts(struct sw_flow_key *key)
2516 {
2517         struct geneve_opt *option;
2518         int opts_len = key->tun_opts_len;
2519         bool crit_opt = false;
2520
2521         option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len);
2522         while (opts_len > 0) {
2523                 int len;
2524
2525                 if (opts_len < sizeof(*option))
2526                         return -EINVAL;
2527
2528                 len = sizeof(*option) + option->length * 4;
2529                 if (len > opts_len)
2530                         return -EINVAL;
2531
2532                 crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
2533
2534                 option = (struct geneve_opt *)((u8 *)option + len);
2535                 opts_len -= len;
2536         }
2537
2538         key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
2539
2540         return 0;
2541 }
2542
2543 static int validate_and_copy_set_tun(const struct nlattr *attr,
2544                                      struct sw_flow_actions **sfa, bool log)
2545 {
2546         struct sw_flow_match match;
2547         struct sw_flow_key key;
2548         struct metadata_dst *tun_dst;
2549         struct ip_tunnel_info *tun_info;
2550         struct ovs_tunnel_info *ovs_tun;
2551         struct nlattr *a;
2552         int err = 0, start, opts_type;
2553         __be16 dst_opt_type;
2554
2555         dst_opt_type = 0;
2556         ovs_match_init(&match, &key, true, NULL);
2557         opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
2558         if (opts_type < 0)
2559                 return opts_type;
2560
2561         if (key.tun_opts_len) {
2562                 switch (opts_type) {
2563                 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
2564                         err = validate_geneve_opts(&key);
2565                         if (err < 0)
2566                                 return err;
2567                         dst_opt_type = TUNNEL_GENEVE_OPT;
2568                         break;
2569                 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
2570                         dst_opt_type = TUNNEL_VXLAN_OPT;
2571                         break;
2572                 case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
2573                         dst_opt_type = TUNNEL_ERSPAN_OPT;
2574                         break;
2575                 }
2576         }
2577
2578         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
2579         if (start < 0)
2580                 return start;
2581
2582         tun_dst = metadata_dst_alloc(key.tun_opts_len, METADATA_IP_TUNNEL,
2583                                      GFP_KERNEL);
2584
2585         if (!tun_dst)
2586                 return -ENOMEM;
2587
2588         err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
2589         if (err) {
2590                 dst_release((struct dst_entry *)tun_dst);
2591                 return err;
2592         }
2593
2594         a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
2595                          sizeof(*ovs_tun), log);
2596         if (IS_ERR(a)) {
2597                 dst_release((struct dst_entry *)tun_dst);
2598                 return PTR_ERR(a);
2599         }
2600
2601         ovs_tun = nla_data(a);
2602         ovs_tun->tun_dst = tun_dst;
2603
2604         tun_info = &tun_dst->u.tun_info;
2605         tun_info->mode = IP_TUNNEL_INFO_TX;
2606         if (key.tun_proto == AF_INET6)
2607                 tun_info->mode |= IP_TUNNEL_INFO_IPV6;
2608         tun_info->key = key.tun_key;
2609
2610         /* We need to store the options in the action itself since
2611          * everything else will go away after flow setup. We can append
2612          * it to tun_info and then point there.
2613          */
2614         ip_tunnel_info_opts_set(tun_info,
2615                                 TUN_METADATA_OPTS(&key, key.tun_opts_len),
2616                                 key.tun_opts_len, dst_opt_type);
2617         add_nested_action_end(*sfa, start);
2618
2619         return err;
2620 }
2621
2622 static bool validate_nsh(const struct nlattr *attr, bool is_mask,
2623                          bool is_push_nsh, bool log)
2624 {
2625         struct sw_flow_match match;
2626         struct sw_flow_key key;
2627         int ret = 0;
2628
2629         ovs_match_init(&match, &key, true, NULL);
2630         ret = nsh_key_put_from_nlattr(attr, &match, is_mask,
2631                                       is_push_nsh, log);
2632         return !ret;
2633 }
2634
2635 /* Return false if there are any non-masked bits set.
2636  * Mask follows data immediately, before any netlink padding.
2637  */
2638 static bool validate_masked(u8 *data, int len)
2639 {
2640         u8 *mask = data + len;
2641
2642         while (len--)
2643                 if (*data++ & ~*mask++)
2644                         return false;
2645
2646         return true;
2647 }
2648
2649 static int validate_set(const struct nlattr *a,
2650                         const struct sw_flow_key *flow_key,
2651                         struct sw_flow_actions **sfa, bool *skip_copy,
2652                         u8 mac_proto, __be16 eth_type, bool masked, bool log)
2653 {
2654         const struct nlattr *ovs_key = nla_data(a);
2655         int key_type = nla_type(ovs_key);
2656         size_t key_len;
2657
2658         /* There can be only one key in a action */
2659         if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
2660                 return -EINVAL;
2661
2662         key_len = nla_len(ovs_key);
2663         if (masked)
2664                 key_len /= 2;
2665
2666         if (key_type > OVS_KEY_ATTR_MAX ||
2667             !check_attr_len(key_len, ovs_key_lens[key_type].len))
2668                 return -EINVAL;
2669
2670         if (masked && !validate_masked(nla_data(ovs_key), key_len))
2671                 return -EINVAL;
2672
2673         switch (key_type) {
2674         const struct ovs_key_ipv4 *ipv4_key;
2675         const struct ovs_key_ipv6 *ipv6_key;
2676         int err;
2677
2678         case OVS_KEY_ATTR_PRIORITY:
2679         case OVS_KEY_ATTR_SKB_MARK:
2680         case OVS_KEY_ATTR_CT_MARK:
2681         case OVS_KEY_ATTR_CT_LABELS:
2682                 break;
2683
2684         case OVS_KEY_ATTR_ETHERNET:
2685                 if (mac_proto != MAC_PROTO_ETHERNET)
2686                         return -EINVAL;
2687                 break;
2688
2689         case OVS_KEY_ATTR_TUNNEL:
2690                 if (masked)
2691                         return -EINVAL; /* Masked tunnel set not supported. */
2692
2693                 *skip_copy = true;
2694                 err = validate_and_copy_set_tun(a, sfa, log);
2695                 if (err)
2696                         return err;
2697                 break;
2698
2699         case OVS_KEY_ATTR_IPV4:
2700                 if (eth_type != htons(ETH_P_IP))
2701                         return -EINVAL;
2702
2703                 ipv4_key = nla_data(ovs_key);
2704
2705                 if (masked) {
2706                         const struct ovs_key_ipv4 *mask = ipv4_key + 1;
2707
2708                         /* Non-writeable fields. */
2709                         if (mask->ipv4_proto || mask->ipv4_frag)
2710                                 return -EINVAL;
2711                 } else {
2712                         if (ipv4_key->ipv4_proto != flow_key->ip.proto)
2713                                 return -EINVAL;
2714
2715                         if (ipv4_key->ipv4_frag != flow_key->ip.frag)
2716                                 return -EINVAL;
2717                 }
2718                 break;
2719
2720         case OVS_KEY_ATTR_IPV6:
2721                 if (eth_type != htons(ETH_P_IPV6))
2722                         return -EINVAL;
2723
2724                 ipv6_key = nla_data(ovs_key);
2725
2726                 if (masked) {
2727                         const struct ovs_key_ipv6 *mask = ipv6_key + 1;
2728
2729                         /* Non-writeable fields. */
2730                         if (mask->ipv6_proto || mask->ipv6_frag)
2731                                 return -EINVAL;
2732
2733                         /* Invalid bits in the flow label mask? */
2734                         if (ntohl(mask->ipv6_label) & 0xFFF00000)
2735                                 return -EINVAL;
2736                 } else {
2737                         if (ipv6_key->ipv6_proto != flow_key->ip.proto)
2738                                 return -EINVAL;
2739
2740                         if (ipv6_key->ipv6_frag != flow_key->ip.frag)
2741                                 return -EINVAL;
2742                 }
2743                 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
2744                         return -EINVAL;
2745
2746                 break;
2747
2748         case OVS_KEY_ATTR_TCP:
2749                 if ((eth_type != htons(ETH_P_IP) &&
2750                      eth_type != htons(ETH_P_IPV6)) ||
2751                     flow_key->ip.proto != IPPROTO_TCP)
2752                         return -EINVAL;
2753
2754                 break;
2755
2756         case OVS_KEY_ATTR_UDP:
2757                 if ((eth_type != htons(ETH_P_IP) &&
2758                      eth_type != htons(ETH_P_IPV6)) ||
2759                     flow_key->ip.proto != IPPROTO_UDP)
2760                         return -EINVAL;
2761
2762                 break;
2763
2764         case OVS_KEY_ATTR_MPLS:
2765                 if (!eth_p_mpls(eth_type))
2766                         return -EINVAL;
2767                 break;
2768
2769         case OVS_KEY_ATTR_SCTP:
2770                 if ((eth_type != htons(ETH_P_IP) &&
2771                      eth_type != htons(ETH_P_IPV6)) ||
2772                     flow_key->ip.proto != IPPROTO_SCTP)
2773                         return -EINVAL;
2774
2775                 break;
2776
2777         case OVS_KEY_ATTR_NSH:
2778                 if (eth_type != htons(ETH_P_NSH))
2779                         return -EINVAL;
2780                 if (!validate_nsh(nla_data(a), masked, false, log))
2781                         return -EINVAL;
2782                 break;
2783
2784         default:
2785                 return -EINVAL;
2786         }
2787
2788         /* Convert non-masked non-tunnel set actions to masked set actions. */
2789         if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
2790                 int start, len = key_len * 2;
2791                 struct nlattr *at;
2792
2793                 *skip_copy = true;
2794
2795                 start = add_nested_action_start(sfa,
2796                                                 OVS_ACTION_ATTR_SET_TO_MASKED,
2797                                                 log);
2798                 if (start < 0)
2799                         return start;
2800
2801                 at = __add_action(sfa, key_type, NULL, len, log);
2802                 if (IS_ERR(at))
2803                         return PTR_ERR(at);
2804
2805                 memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
2806                 memset(nla_data(at) + key_len, 0xff, key_len);    /* Mask. */
2807                 /* Clear non-writeable bits from otherwise writeable fields. */
2808                 if (key_type == OVS_KEY_ATTR_IPV6) {
2809                         struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
2810
2811                         mask->ipv6_label &= htonl(0x000FFFFF);
2812                 }
2813                 add_nested_action_end(*sfa, start);
2814         }
2815
2816         return 0;
2817 }
2818
2819 static int validate_userspace(const struct nlattr *attr)
2820 {
2821         static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
2822                 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
2823                 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
2824                 [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
2825         };
2826         struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
2827         int error;
2828
2829         error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr,
2830                                  userspace_policy, NULL);
2831         if (error)
2832                 return error;
2833
2834         if (!a[OVS_USERSPACE_ATTR_PID] ||
2835             !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
2836                 return -EINVAL;
2837
2838         return 0;
2839 }
2840
2841 static int copy_action(const struct nlattr *from,
2842                        struct sw_flow_actions **sfa, bool log)
2843 {
2844         int totlen = NLA_ALIGN(from->nla_len);
2845         struct nlattr *to;
2846
2847         to = reserve_sfa_size(sfa, from->nla_len, log);
2848         if (IS_ERR(to))
2849                 return PTR_ERR(to);
2850
2851         memcpy(to, from, totlen);
2852         return 0;
2853 }
2854
2855 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2856                                   const struct sw_flow_key *key,
2857                                   struct sw_flow_actions **sfa,
2858                                   __be16 eth_type, __be16 vlan_tci, bool log)
2859 {
2860         u8 mac_proto = ovs_key_mac_proto(key);
2861         const struct nlattr *a;
2862         int rem, err;
2863
2864         nla_for_each_nested(a, attr, rem) {
2865                 /* Expected argument lengths, (u32)-1 for variable length. */
2866                 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
2867                         [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
2868                         [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
2869                         [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
2870                         [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
2871                         [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
2872                         [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
2873                         [OVS_ACTION_ATTR_POP_VLAN] = 0,
2874                         [OVS_ACTION_ATTR_SET] = (u32)-1,
2875                         [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
2876                         [OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
2877                         [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
2878                         [OVS_ACTION_ATTR_CT] = (u32)-1,
2879                         [OVS_ACTION_ATTR_CT_CLEAR] = 0,
2880                         [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
2881                         [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
2882                         [OVS_ACTION_ATTR_POP_ETH] = 0,
2883                         [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
2884                         [OVS_ACTION_ATTR_POP_NSH] = 0,
2885                         [OVS_ACTION_ATTR_METER] = sizeof(u32),
2886                         [OVS_ACTION_ATTR_CLONE] = (u32)-1,
2887                 };
2888                 const struct ovs_action_push_vlan *vlan;
2889                 int type = nla_type(a);
2890                 bool skip_copy;
2891
2892                 if (type > OVS_ACTION_ATTR_MAX ||
2893                     (action_lens[type] != nla_len(a) &&
2894                      action_lens[type] != (u32)-1))
2895                         return -EINVAL;
2896
2897                 skip_copy = false;
2898                 switch (type) {
2899                 case OVS_ACTION_ATTR_UNSPEC:
2900                         return -EINVAL;
2901
2902                 case OVS_ACTION_ATTR_USERSPACE:
2903                         err = validate_userspace(a);
2904                         if (err)
2905                                 return err;
2906                         break;
2907
2908                 case OVS_ACTION_ATTR_OUTPUT:
2909                         if (nla_get_u32(a) >= DP_MAX_PORTS)
2910                                 return -EINVAL;
2911                         break;
2912
2913                 case OVS_ACTION_ATTR_TRUNC: {
2914                         const struct ovs_action_trunc *trunc = nla_data(a);
2915
2916                         if (trunc->max_len < ETH_HLEN)
2917                                 return -EINVAL;
2918                         break;
2919                 }
2920
2921                 case OVS_ACTION_ATTR_HASH: {
2922                         const struct ovs_action_hash *act_hash = nla_data(a);
2923
2924                         switch (act_hash->hash_alg) {
2925                         case OVS_HASH_ALG_L4:
2926                                 break;
2927                         default:
2928                                 return  -EINVAL;
2929                         }
2930
2931                         break;
2932                 }
2933
2934                 case OVS_ACTION_ATTR_POP_VLAN:
2935                         if (mac_proto != MAC_PROTO_ETHERNET)
2936                                 return -EINVAL;
2937                         vlan_tci = htons(0);
2938                         break;
2939
2940                 case OVS_ACTION_ATTR_PUSH_VLAN:
2941                         if (mac_proto != MAC_PROTO_ETHERNET)
2942                                 return -EINVAL;
2943                         vlan = nla_data(a);
2944                         if (!eth_type_vlan(vlan->vlan_tpid))
2945                                 return -EINVAL;
2946                         if (!(vlan->vlan_tci & htons(VLAN_CFI_MASK)))
2947                                 return -EINVAL;
2948                         vlan_tci = vlan->vlan_tci;
2949                         break;
2950
2951                 case OVS_ACTION_ATTR_RECIRC:
2952                         break;
2953
2954                 case OVS_ACTION_ATTR_PUSH_MPLS: {
2955                         const struct ovs_action_push_mpls *mpls = nla_data(a);
2956
2957                         if (!eth_p_mpls(mpls->mpls_ethertype))
2958                                 return -EINVAL;
2959                         /* Prohibit push MPLS other than to a white list
2960                          * for packets that have a known tag order.
2961                          */
2962                         if (vlan_tci & htons(VLAN_CFI_MASK) ||
2963                             (eth_type != htons(ETH_P_IP) &&
2964                              eth_type != htons(ETH_P_IPV6) &&
2965                              eth_type != htons(ETH_P_ARP) &&
2966                              eth_type != htons(ETH_P_RARP) &&
2967                              !eth_p_mpls(eth_type)))
2968                                 return -EINVAL;
2969                         eth_type = mpls->mpls_ethertype;
2970                         break;
2971                 }
2972
2973                 case OVS_ACTION_ATTR_POP_MPLS:
2974                         if (vlan_tci & htons(VLAN_CFI_MASK) ||
2975                             !eth_p_mpls(eth_type))
2976                                 return -EINVAL;
2977
2978                         /* Disallow subsequent L2.5+ set and mpls_pop actions
2979                          * as there is no check here to ensure that the new
2980                          * eth_type is valid and thus set actions could
2981                          * write off the end of the packet or otherwise
2982                          * corrupt it.
2983                          *
2984                          * Support for these actions is planned using packet
2985                          * recirculation.
2986                          */
2987                         eth_type = htons(0);
2988                         break;
2989
2990                 case OVS_ACTION_ATTR_SET:
2991                         err = validate_set(a, key, sfa,
2992                                            &skip_copy, mac_proto, eth_type,
2993                                            false, log);
2994                         if (err)
2995                                 return err;
2996                         break;
2997
2998                 case OVS_ACTION_ATTR_SET_MASKED:
2999                         err = validate_set(a, key, sfa,
3000                                            &skip_copy, mac_proto, eth_type,
3001                                            true, log);
3002                         if (err)
3003                                 return err;
3004                         break;
3005
3006                 case OVS_ACTION_ATTR_SAMPLE: {
3007                         bool last = nla_is_last(a, rem);
3008
3009                         err = validate_and_copy_sample(net, a, key, sfa,
3010                                                        eth_type, vlan_tci,
3011                                                        log, last);
3012                         if (err)
3013                                 return err;
3014                         skip_copy = true;
3015                         break;
3016                 }
3017
3018                 case OVS_ACTION_ATTR_CT:
3019                         err = ovs_ct_copy_action(net, a, key, sfa, log);
3020                         if (err)
3021                                 return err;
3022                         skip_copy = true;
3023                         break;
3024
3025                 case OVS_ACTION_ATTR_CT_CLEAR:
3026                         break;
3027
3028                 case OVS_ACTION_ATTR_PUSH_ETH:
3029                         /* Disallow pushing an Ethernet header if one
3030                          * is already present */
3031                         if (mac_proto != MAC_PROTO_NONE)
3032                                 return -EINVAL;
3033                         mac_proto = MAC_PROTO_ETHERNET;
3034                         break;
3035
3036                 case OVS_ACTION_ATTR_POP_ETH:
3037                         if (mac_proto != MAC_PROTO_ETHERNET)
3038                                 return -EINVAL;
3039                         if (vlan_tci & htons(VLAN_CFI_MASK))
3040                                 return -EINVAL;
3041                         mac_proto = MAC_PROTO_NONE;
3042                         break;
3043
3044                 case OVS_ACTION_ATTR_PUSH_NSH:
3045                         if (mac_proto != MAC_PROTO_ETHERNET) {
3046                                 u8 next_proto;
3047
3048                                 next_proto = tun_p_from_eth_p(eth_type);
3049                                 if (!next_proto)
3050                                         return -EINVAL;
3051                         }
3052                         mac_proto = MAC_PROTO_NONE;
3053                         if (!validate_nsh(nla_data(a), false, true, true))
3054                                 return -EINVAL;
3055                         break;
3056
3057                 case OVS_ACTION_ATTR_POP_NSH: {
3058                         __be16 inner_proto;
3059
3060                         if (eth_type != htons(ETH_P_NSH))
3061                                 return -EINVAL;
3062                         inner_proto = tun_p_to_eth_p(key->nsh.base.np);
3063                         if (!inner_proto)
3064                                 return -EINVAL;
3065                         if (key->nsh.base.np == TUN_P_ETHERNET)
3066                                 mac_proto = MAC_PROTO_ETHERNET;
3067                         else
3068                                 mac_proto = MAC_PROTO_NONE;
3069                         break;
3070                 }
3071
3072                 case OVS_ACTION_ATTR_METER:
3073                         /* Non-existent meters are simply ignored.  */
3074                         break;
3075
3076                 case OVS_ACTION_ATTR_CLONE: {
3077                         bool last = nla_is_last(a, rem);
3078
3079                         err = validate_and_copy_clone(net, a, key, sfa,
3080                                                       eth_type, vlan_tci,
3081                                                       log, last);
3082                         if (err)
3083                                 return err;
3084                         skip_copy = true;
3085                         break;
3086                 }
3087
3088                 default:
3089                         OVS_NLERR(log, "Unknown Action type %d", type);
3090                         return -EINVAL;
3091                 }
3092                 if (!skip_copy) {
3093                         err = copy_action(a, sfa, log);
3094                         if (err)
3095                                 return err;
3096                 }
3097         }
3098
3099         if (rem > 0)
3100                 return -EINVAL;
3101
3102         return 0;
3103 }
3104
3105 /* 'key' must be the masked key. */
3106 int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
3107                          const struct sw_flow_key *key,
3108                          struct sw_flow_actions **sfa, bool log)
3109 {
3110         int err;
3111
3112         *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
3113         if (IS_ERR(*sfa))
3114                 return PTR_ERR(*sfa);
3115
3116         (*sfa)->orig_len = nla_len(attr);
3117         err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
3118                                      key->eth.vlan.tci, log);
3119         if (err)
3120                 ovs_nla_free_flow_actions(*sfa);
3121
3122         return err;
3123 }
3124
3125 static int sample_action_to_attr(const struct nlattr *attr,
3126                                  struct sk_buff *skb)
3127 {
3128         struct nlattr *start, *ac_start = NULL, *sample_arg;
3129         int err = 0, rem = nla_len(attr);
3130         const struct sample_arg *arg;
3131         struct nlattr *actions;
3132
3133         start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
3134         if (!start)
3135                 return -EMSGSIZE;
3136
3137         sample_arg = nla_data(attr);
3138         arg = nla_data(sample_arg);
3139         actions = nla_next(sample_arg, &rem);
3140
3141         if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
3142                 err = -EMSGSIZE;
3143                 goto out;
3144         }
3145
3146         ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
3147         if (!ac_start) {
3148                 err = -EMSGSIZE;
3149                 goto out;
3150         }
3151
3152         err = ovs_nla_put_actions(actions, rem, skb);
3153
3154 out:
3155         if (err) {
3156                 nla_nest_cancel(skb, ac_start);
3157                 nla_nest_cancel(skb, start);
3158         } else {
3159                 nla_nest_end(skb, ac_start);
3160                 nla_nest_end(skb, start);
3161         }
3162
3163         return err;
3164 }
3165
3166 static int clone_action_to_attr(const struct nlattr *attr,
3167                                 struct sk_buff *skb)
3168 {
3169         struct nlattr *start;
3170         int err = 0, rem = nla_len(attr);
3171
3172         start = nla_nest_start(skb, OVS_ACTION_ATTR_CLONE);
3173         if (!start)
3174                 return -EMSGSIZE;
3175
3176         err = ovs_nla_put_actions(nla_data(attr), rem, skb);
3177
3178         if (err)
3179                 nla_nest_cancel(skb, start);
3180         else
3181                 nla_nest_end(skb, start);
3182
3183         return err;
3184 }
3185
3186 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
3187 {
3188         const struct nlattr *ovs_key = nla_data(a);
3189         int key_type = nla_type(ovs_key);
3190         struct nlattr *start;
3191         int err;
3192
3193         switch (key_type) {
3194         case OVS_KEY_ATTR_TUNNEL_INFO: {
3195                 struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key);
3196                 struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info;
3197
3198                 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
3199                 if (!start)
3200                         return -EMSGSIZE;
3201
3202                 err =  ip_tun_to_nlattr(skb, &tun_info->key,
3203                                         ip_tunnel_info_opts(tun_info),
3204                                         tun_info->options_len,
3205                                         ip_tunnel_info_af(tun_info));
3206                 if (err)
3207                         return err;
3208                 nla_nest_end(skb, start);
3209                 break;
3210         }
3211         default:
3212                 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
3213                         return -EMSGSIZE;
3214                 break;
3215         }
3216
3217         return 0;
3218 }
3219
3220 static int masked_set_action_to_set_action_attr(const struct nlattr *a,
3221                                                 struct sk_buff *skb)
3222 {
3223         const struct nlattr *ovs_key = nla_data(a);
3224         struct nlattr *nla;
3225         size_t key_len = nla_len(ovs_key) / 2;
3226
3227         /* Revert the conversion we did from a non-masked set action to
3228          * masked set action.
3229          */
3230         nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
3231         if (!nla)
3232                 return -EMSGSIZE;
3233
3234         if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key)))
3235                 return -EMSGSIZE;
3236
3237         nla_nest_end(skb, nla);
3238         return 0;
3239 }
3240
3241 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
3242 {
3243         const struct nlattr *a;
3244         int rem, err;
3245
3246         nla_for_each_attr(a, attr, len, rem) {
3247                 int type = nla_type(a);
3248
3249                 switch (type) {
3250                 case OVS_ACTION_ATTR_SET:
3251                         err = set_action_to_attr(a, skb);
3252                         if (err)
3253                                 return err;
3254                         break;
3255
3256                 case OVS_ACTION_ATTR_SET_TO_MASKED:
3257                         err = masked_set_action_to_set_action_attr(a, skb);
3258                         if (err)
3259                                 return err;
3260                         break;
3261
3262                 case OVS_ACTION_ATTR_SAMPLE:
3263                         err = sample_action_to_attr(a, skb);
3264                         if (err)
3265                                 return err;
3266                         break;
3267
3268                 case OVS_ACTION_ATTR_CT:
3269                         err = ovs_ct_action_to_attr(nla_data(a), skb);
3270                         if (err)
3271                                 return err;
3272                         break;
3273
3274                 case OVS_ACTION_ATTR_CLONE:
3275                         err = clone_action_to_attr(a, skb);
3276                         if (err)
3277                                 return err;
3278                         break;
3279
3280                 default:
3281                         if (nla_put(skb, type, nla_len(a), nla_data(a)))
3282                                 return -EMSGSIZE;
3283                         break;
3284                 }
3285         }
3286
3287         return 0;
3288 }