[PATCHv5 net-next 06/10] openvswitch: Allow matching on conntrack mark
From: Joe Stringer
Date: Mon Aug 24 2015 - 20:33:47 EST
Allow matching and setting the ct_mark field. As with ct_state and
ct_zone, these fields are populated when the CT action is executed. To
write to this field, a value and mask can be specified as a nested
attribute under the CT action. This data is stored with the conntrack
entry, and is executed after the lookup occurs for the CT action. The
conntrack entry itself must be committed using the COMMIT flag in the CT
action flags for this change to persist.
Signed-off-by: Justin Pettit <jpettit@xxxxxxxxxx>
Signed-off-by: Joe Stringer <joestringer@xxxxxxxxxx>
---
v1-v3: No change.
v4: Only allow setting conntrack mark via ct action.
Documentation tweaks.
v5: Rebase against conntrack zone changes.
Add ct_mark to ct action serialization
Replace some #ifdefs with IS_ENABLED.
---
include/uapi/linux/openvswitch.h | 5 ++++
net/openvswitch/actions.c | 1 +
net/openvswitch/conntrack.c | 63 ++++++++++++++++++++++++++++++++++++++--
net/openvswitch/conntrack.h | 1 +
net/openvswitch/flow.h | 1 +
net/openvswitch/flow_netlink.c | 15 +++++++++-
6 files changed, 82 insertions(+), 4 deletions(-)
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 55f5997..7a185b5 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -325,6 +325,7 @@ enum ovs_key_attr {
* the accepted length of the array. */
OVS_KEY_ATTR_CT_STATE, /* u8 bitmask of OVS_CS_F_* */
OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */
+ OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */
#ifdef __KERNEL__
OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */
@@ -613,11 +614,15 @@ struct ovs_action_hash {
* enum ovs_ct_attr - Attributes for %OVS_ACTION_ATTR_CT action.
* @OVS_CT_ATTR_FLAGS: u32 connection tracking flags.
* @OVS_CT_ATTR_ZONE: u16 connection tracking zone.
+ * @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the
+ * mask, the corresponding bit in the value is copied to the connection
+ * tracking mark field in the connection.
*/
enum ovs_ct_attr {
OVS_CT_ATTR_UNSPEC,
OVS_CT_ATTR_FLAGS, /* u8 bitmask of OVS_CT_F_*. */
OVS_CT_ATTR_ZONE, /* u16 zone id. */
+ OVS_CT_ATTR_MARK, /* mark to associate with this connection. */
__OVS_CT_ATTR_MAX
};
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 72ca2c4..9741d2c 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -968,6 +968,7 @@ static int execute_masked_set_action(struct sk_buff *skb,
case OVS_KEY_ATTR_CT_STATE:
case OVS_KEY_ATTR_CT_ZONE:
+ case OVS_KEY_ATTR_CT_MARK:
err = -EINVAL;
break;
}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 4b7c4d7..daea29e 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -28,12 +28,19 @@ struct ovs_ct_len_tbl {
size_t minlen;
};
+/* Metadata mark for masked write to conntrack mark */
+struct md_mark {
+ u32 value;
+ u32 mask;
+};
+
/* Conntrack action context for execution. */
struct ovs_conntrack_info {
struct nf_conntrack_zone zone;
struct nf_conn *ct;
u32 flags;
u16 family;
+ struct md_mark mark;
};
static u16 key_to_nfproto(const struct sw_flow_key *key)
@@ -84,10 +91,12 @@ static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
}
static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
- const struct nf_conntrack_zone *zone)
+ const struct nf_conntrack_zone *zone,
+ const struct nf_conn *ct)
{
key->ct.state = state;
key->ct.zone = zone->id;
+ key->ct.mark = ct ? ct->mark : 0;
}
/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has
@@ -110,7 +119,7 @@ static void ovs_ct_update_key(const struct sk_buff *skb,
} else if (post_ct) {
state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
}
- __ovs_ct_update_key(key, state, zone);
+ __ovs_ct_update_key(key, state, zone, ct);
}
void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
@@ -118,6 +127,31 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
ovs_ct_update_key(skb, key, false);
}
+static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,
+ u32 ct_mark, u32 mask)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ u32 new_mark;
+
+ if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK))
+ return -ENOTSUPP;
+
+ /* The connection could be invalid, in which case set_mark is no-op. */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return 0;
+
+ new_mark = ct_mark | (ct->mark & ~(mask));
+ if (ct->mark != new_mark) {
+ ct->mark = new_mark;
+ nf_conntrack_event_cache(IPCT_MARK, ct);
+ key->ct.mark = new_mark;
+ }
+
+ return 0;
+}
+
static int handle_fragments(struct net *net, struct sw_flow_key *key,
u16 zone, struct sk_buff *skb)
{
@@ -235,7 +269,7 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
u8 state;
state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
- __ovs_ct_update_key(key, state, &info->zone);
+ __ovs_ct_update_key(key, state, &info->zone, exp->master);
} else {
int err;
@@ -298,7 +332,13 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
err = ovs_ct_commit(net, key, info, skb);
else
err = ovs_ct_lookup(net, key, info, skb);
+ if (err)
+ goto err;
+ if (info->mark.mask)
+ err = ovs_ct_set_mark(skb, key, info->mark.value,
+ info->mark.mask);
+err:
skb_push(skb, nh_ofs);
return err;
}
@@ -308,6 +348,8 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
.maxlen = sizeof(u32) },
[OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16),
.maxlen = sizeof(u16) },
+ [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark),
+ .maxlen = sizeof(struct md_mark) },
};
static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
@@ -343,6 +385,14 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
info->zone.id = nla_get_u16(a);
break;
#endif
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ case OVS_CT_ATTR_MARK: {
+ struct md_mark *mark = nla_data(a);
+
+ info->mark = *mark;
+ break;
+ }
+#endif
default:
OVS_NLERR(log, "Unknown conntrack attr (%d)",
type);
@@ -365,6 +415,9 @@ bool ovs_ct_verify(enum ovs_key_attr attr)
if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
attr == OVS_KEY_ATTR_CT_ZONE)
return true;
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
+ attr == OVS_KEY_ATTR_CT_MARK)
+ return true;
return false;
}
@@ -427,6 +480,10 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
return -EMSGSIZE;
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
+ nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
+ &ct_info->mark))
+ return -EMSGSIZE;
nla_nest_end(skb, start);
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index 40e0349..8c4e32e 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h
@@ -63,6 +63,7 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
{
key->ct.state = 0;
key->ct.zone = 0;
+ key->ct.mark = 0;
}
static inline void ovs_ct_free_action(const struct nlattr *a) { }
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 312c7d7..e05e697 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -114,6 +114,7 @@ struct sw_flow_key {
struct {
/* Connection tracking fields. */
u16 zone;
+ u32 mark;
u8 state;
} ct;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 35a2b9d..5e3b489 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -281,7 +281,7 @@ size_t ovs_key_attr_size(void)
/* Whenever adding new OVS_KEY_ FIELDS, we should consider
* updating this function.
*/
- BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 24);
+ BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 25);
return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
@@ -292,6 +292,7 @@ size_t ovs_key_attr_size(void)
+ nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */
+ nla_total_size(1) /* OVS_KEY_ATTR_CT_STATE */
+ nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */
+ + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */
+ nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
+ nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
@@ -343,6 +344,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) },
[OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u8) },
[OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) },
+ [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) },
};
static bool is_all_zero(const u8 *fp, size_t size)
@@ -787,6 +789,13 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
SW_FLOW_KEY_PUT(match, ct.zone, ct_zone, is_mask);
*attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
}
+ if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) &&
+ ovs_ct_verify(OVS_KEY_ATTR_CT_MARK)) {
+ u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]);
+
+ SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask);
+ *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK);
+ }
return 0;
}
@@ -1340,6 +1349,9 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
if (nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, output->ct.zone))
goto nla_put_failure;
+ if (nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, output->ct.mark))
+ goto nla_put_failure;
+
nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
if (!nla)
goto nla_put_failure;
@@ -1922,6 +1934,7 @@ static int validate_set(const struct nlattr *a,
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_SKB_MARK:
+ case OVS_KEY_ATTR_CT_MARK:
case OVS_KEY_ATTR_ETHERNET:
break;
--
2.1.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/