Re: [PATCH net-next] openvswitch: add TTL decrement action
From: Simon Horman
Date: Tue Nov 12 2019 - 10:00:54 EST
On Tue, Nov 12, 2019 at 11:25:18AM +0100, Matteo Croce wrote:
> New action to decrement TTL instead of setting it to a fixed value.
> This action will decrement the TTL and, in case of expired TTL, send the
> packet to userspace via output_userspace() to take care of it.
>
> Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.
>
> Tested with a corresponding change in the userspace:
>
> # ovs-dpctl dump-flows
> in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl,1
> in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl,2
> in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
> in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1
>
> # ping -c1 192.168.0.2 -t 42
> IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
> 192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
> # ping -c1 192.168.0.2 -t 120
> IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
> 192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
> # ping -c1 192.168.0.2 -t 1
> #
>
> Co-authored-by: Bindiya Kurle <bindiyakurle@xxxxxxxxx>
> Signed-off-by: Bindiya Kurle <bindiyakurle@xxxxxxxxx>
> Signed-off-by: Matteo Croce <mcroce@xxxxxxxxxx>
Usually OVS achieves this behaviour by matching on the TTL and
setting it to the desired value, pre-calculated as TTL -1.
With that in mind could you explain the motivation for this
change?
> ---
> include/uapi/linux/openvswitch.h | 2 ++
> net/openvswitch/actions.c | 46 ++++++++++++++++++++++++++++++++
> net/openvswitch/flow_netlink.c | 6 +++++
> 3 files changed, 54 insertions(+)
>
> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> index 1887a451c388..a3bdb1ecd1e7 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -890,6 +890,7 @@ struct check_pkt_len_arg {
> * @OVS_ACTION_ATTR_CHECK_PKT_LEN: Check the packet length and execute a set
> * of actions if greater than the specified packet length, else execute
> * another set of actions.
> + * @OVS_ACTION_ATTR_DEC_TTL: Decrement the IP TTL.
> *
> * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
> * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
> @@ -925,6 +926,7 @@ enum ovs_action_attr {
> OVS_ACTION_ATTR_METER, /* u32 meter ID. */
> OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */
> OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
> + OVS_ACTION_ATTR_DEC_TTL, /* Decrement ttl action */
>
> __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
> * from userspace. */
> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> index 12936c151cc0..077b7f309c93 100644
> --- a/net/openvswitch/actions.c
> +++ b/net/openvswitch/actions.c
> @@ -1174,6 +1174,43 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
> nla_len(actions), last, clone_flow_key);
> }
>
> +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
> +{
> + int err;
> +
> + if (skb->protocol == htons(ETH_P_IPV6)) {
> + struct ipv6hdr *nh = ipv6_hdr(skb);
> +
> + err = skb_ensure_writable(skb, skb_network_offset(skb) +
> + sizeof(*nh));
> + if (unlikely(err))
> + return err;
> +
> + if (nh->hop_limit <= 1)
> + return -EHOSTUNREACH;
> +
> + key->ip.ttl = --nh->hop_limit;
> + } else {
> + struct iphdr *nh = ip_hdr(skb);
> + u8 old_ttl;
> +
> + err = skb_ensure_writable(skb, skb_network_offset(skb) +
> + sizeof(*nh));
> + if (unlikely(err))
> + return err;
> +
> + if (nh->ttl <= 1)
> + return -EHOSTUNREACH;
> +
> + old_ttl = nh->ttl--;
> + csum_replace2(&nh->check, htons(old_ttl << 8),
> + htons(nh->ttl << 8));
> + key->ip.ttl = nh->ttl;
> + }
The above may send packets with TTL = 0, is that desired?
> +
> + return 0;
> +}
> +
> /* Execute a list of actions against 'skb'. */
> static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> struct sw_flow_key *key,
> @@ -1345,6 +1382,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
>
> break;
> }
> +
> + case OVS_ACTION_ATTR_DEC_TTL:
> + err = execute_dec_ttl(skb, key);
> + if (err == -EHOSTUNREACH) {
> + output_userspace(dp, skb, key, a, attr,
> + len, OVS_CB(skb)->cutlen);
> + OVS_CB(skb)->cutlen = 0;
> + }
> + break;
> }
>
> if (unlikely(err)) {
> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> index 65c2e3458ff5..d17f2d4b420f 100644
> --- a/net/openvswitch/flow_netlink.c
> +++ b/net/openvswitch/flow_netlink.c
> @@ -79,6 +79,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
> case OVS_ACTION_ATTR_SET_MASKED:
> case OVS_ACTION_ATTR_METER:
> case OVS_ACTION_ATTR_CHECK_PKT_LEN:
> + case OVS_ACTION_ATTR_DEC_TTL:
> default:
> return true;
> }
> @@ -3005,6 +3006,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> [OVS_ACTION_ATTR_METER] = sizeof(u32),
> [OVS_ACTION_ATTR_CLONE] = (u32)-1,
> [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
> + [OVS_ACTION_ATTR_DEC_TTL] = 0,
> };
> const struct ovs_action_push_vlan *vlan;
> int type = nla_type(a);
> @@ -3233,6 +3235,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> break;
> }
>
> + case OVS_ACTION_ATTR_DEC_TTL:
> + /* Nothing to do. */
> + break;
> +
> default:
> OVS_NLERR(log, "Unknown Action type %d", type);
> return -EINVAL;
> --
> 2.23.0
>