aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2023-08-27 06:56:54 +0100
committerDavid S. Miller <davem@davemloft.net>2023-08-27 06:56:54 +0100
commit2cc88bbcbb612d478e73bcc6b8d8bc1f0612665d (patch)
tree06a5cb6681b12a6de186dd8c4a081327f7c9c042
parentb32add2d20ea6e62f30a3c0a7c2fb306ec5ceb3d (diff)
parente8e0bd60e4833e5bcfa220d7b4c07456c83c1ea2 (diff)
downloadcloudkernel-2cc88bbcbb612d478e73bcc6b8d8bc1f0612665d.tar.gz
Merge branch 'sfc-pedit-offloads'
Pieter Jansen van Vuuren says: ==================== sfc: introduce eth, ipv4 and ipv6 pedit offloads This set introduces mac source and destination pedit set action offloads. It also adds offload for ipv4 ttl and ipv6 hop limit pedit set action as well pedit add actions that would result in the same semantics as decrementing the ttl and hop limit. v2: - fix 'efx_tc_mangle' kdoc which was orphaned when adding 'efx_tc_pedit_add'. - add description of 'match' in 'efx_tc_mangle' kdoc. - correct some inconsistent kdoc indentation. v1: https://lore.kernel.org/netdev/20230823111725.28090-1-pieter.jansen-van-vuuren@amd.com/ ==================== Reviewed-by: Simon Horman <horms@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/sfc/mae.c89
-rw-r--r--drivers/net/ethernet/sfc/mae.h4
-rw-r--r--drivers/net/ethernet/sfc/tc.c476
-rw-r--r--drivers/net/ethernet/sfc/tc.h58
4 files changed, 614 insertions, 13 deletions
diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c
index 3b8780c76b6e24..c3e2b4a21d1055 100644
--- a/drivers/net/ethernet/sfc/mae.c
+++ b/drivers/net/ethernet/sfc/mae.c
@@ -1219,6 +1219,71 @@ fail:
return rc;
}
+/**
+ * efx_mae_allocate_pedit_mac() - allocate pedit MAC address in HW.
+ * @efx: NIC we're installing a pedit MAC address on
+ * @ped: pedit MAC action to be installed
+ *
+ * Attempts to install @ped in HW and populates its id with an index of this
+ * entry in the firmware MAC address table on success.
+ *
+ * Return: negative value on error, 0 in success.
+ */
+int efx_mae_allocate_pedit_mac(struct efx_nic *efx,
+ struct efx_tc_mac_pedit_action *ped)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_LEN);
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_MAC_ADDR_ALLOC_IN_LEN);
+ size_t outlen;
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_MAE_MAC_ADDR_ALLOC_IN_MAC_ADDR_LEN !=
+ sizeof(ped->h_addr));
+ memcpy(MCDI_PTR(inbuf, MAE_MAC_ADDR_ALLOC_IN_MAC_ADDR), ped->h_addr,
+ sizeof(ped->h_addr));
+ rc = efx_mcdi_rpc(efx, MC_CMD_MAE_MAC_ADDR_ALLOC, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ return rc;
+ if (outlen < sizeof(outbuf))
+ return -EIO;
+ ped->fw_id = MCDI_DWORD(outbuf, MAE_MAC_ADDR_ALLOC_OUT_MAC_ID);
+ return 0;
+}
+
+/**
+ * efx_mae_free_pedit_mac() - free pedit MAC address in HW.
+ * @efx: NIC we're installing a pedit MAC address on
+ * @ped: pedit MAC action that needs to be freed
+ *
+ * Frees @ped in HW, check that firmware did not free a different one and clears
+ * the id (which denotes the index of the entry in the MAC address table).
+ */
+void efx_mae_free_pedit_mac(struct efx_nic *efx,
+ struct efx_tc_mac_pedit_action *ped)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_MAC_ADDR_FREE_OUT_LEN(1));
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_MAC_ADDR_FREE_IN_LEN(1));
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, MAE_MAC_ADDR_FREE_IN_MAC_ID, ped->fw_id);
+ rc = efx_mcdi_rpc(efx, MC_CMD_MAE_MAC_ADDR_FREE, inbuf,
+ sizeof(inbuf), outbuf, sizeof(outbuf), &outlen);
+ if (rc || outlen < sizeof(outbuf))
+ return;
+ /* FW freed a different ID than we asked for, should also never happen.
+ * Warn because it means we've now got a different idea to the FW of
+ * what MAC addresses exist, which could cause mayhem later.
+ */
+ if (WARN_ON(MCDI_DWORD(outbuf, MAE_MAC_ADDR_FREE_OUT_FREED_MAC_ID) != ped->fw_id))
+ return;
+ /* We're probably about to free @ped, but let's just make sure its
+ * fw_id is blatted so that it won't look valid if it leaks out.
+ */
+ ped->fw_id = MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL;
+}
+
int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act)
{
MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_ACTION_SET_ALLOC_OUT_LEN);
@@ -1226,15 +1291,27 @@ int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act)
size_t outlen;
int rc;
- MCDI_POPULATE_DWORD_3(inbuf, MAE_ACTION_SET_ALLOC_IN_FLAGS,
+ MCDI_POPULATE_DWORD_4(inbuf, MAE_ACTION_SET_ALLOC_IN_FLAGS,
MAE_ACTION_SET_ALLOC_IN_VLAN_PUSH, act->vlan_push,
MAE_ACTION_SET_ALLOC_IN_VLAN_POP, act->vlan_pop,
- MAE_ACTION_SET_ALLOC_IN_DECAP, act->decap);
+ MAE_ACTION_SET_ALLOC_IN_DECAP, act->decap,
+ MAE_ACTION_SET_ALLOC_IN_DO_DECR_IP_TTL,
+ act->do_ttl_dec);
+
+ if (act->src_mac)
+ MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_SRC_MAC_ID,
+ act->src_mac->fw_id);
+ else
+ MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_SRC_MAC_ID,
+ MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL);
+
+ if (act->dst_mac)
+ MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_DST_MAC_ID,
+ act->dst_mac->fw_id);
+ else
+ MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_DST_MAC_ID,
+ MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL);
- MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_SRC_MAC_ID,
- MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL);
- MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_DST_MAC_ID,
- MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL);
if (act->count && !WARN_ON(!act->count->cnt))
MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_COUNTER_ID,
act->count->cnt->fw_id);
diff --git a/drivers/net/ethernet/sfc/mae.h b/drivers/net/ethernet/sfc/mae.h
index e88e80574f1559..8df30bc4f3ba73 100644
--- a/drivers/net/ethernet/sfc/mae.h
+++ b/drivers/net/ethernet/sfc/mae.h
@@ -103,6 +103,10 @@ int efx_mae_update_encap_md(struct efx_nic *efx,
int efx_mae_free_encap_md(struct efx_nic *efx,
struct efx_tc_encap_action *encap);
+int efx_mae_allocate_pedit_mac(struct efx_nic *efx,
+ struct efx_tc_mac_pedit_action *ped);
+void efx_mae_free_pedit_mac(struct efx_nic *efx,
+ struct efx_tc_mac_pedit_action *ped);
int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act);
int efx_mae_free_action_set(struct efx_nic *efx, u32 fw_id);
diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c
index 039180c61c8301..047322b04d4f52 100644
--- a/drivers/net/ethernet/sfc/tc.c
+++ b/drivers/net/ethernet/sfc/tc.c
@@ -31,6 +31,9 @@ enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
return EFX_ENCAP_TYPE_NONE;
}
+#define EFX_TC_HDR_TYPE_TTL_MASK ((u32)0xff)
+/* Hoplimit is stored in the most significant byte in the pedit ipv6 header action */
+#define EFX_TC_HDR_TYPE_HLIMIT_MASK ~((u32)0xff000000)
#define EFX_EFV_PF NULL
/* Look up the representor information (efv) for a device.
* May return NULL for the PF (us), or an error pointer for a device that
@@ -86,6 +89,12 @@ s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv)
return mport;
}
+static const struct rhashtable_params efx_tc_mac_ht_params = {
+ .key_len = offsetofend(struct efx_tc_mac_pedit_action, h_addr),
+ .key_offset = 0,
+ .head_offset = offsetof(struct efx_tc_mac_pedit_action, linkage),
+};
+
static const struct rhashtable_params efx_tc_encap_match_ht_params = {
.key_len = offsetof(struct efx_tc_encap_match, linkage),
.key_offset = 0,
@@ -110,6 +119,56 @@ static const struct rhashtable_params efx_tc_recirc_ht_params = {
.head_offset = offsetof(struct efx_tc_recirc_id, linkage),
};
+static struct efx_tc_mac_pedit_action *efx_tc_flower_get_mac(struct efx_nic *efx,
+ unsigned char h_addr[ETH_ALEN],
+ struct netlink_ext_ack *extack)
+{
+ struct efx_tc_mac_pedit_action *ped, *old;
+ int rc;
+
+ ped = kzalloc(sizeof(*ped), GFP_USER);
+ if (!ped)
+ return ERR_PTR(-ENOMEM);
+ memcpy(ped->h_addr, h_addr, ETH_ALEN);
+ old = rhashtable_lookup_get_insert_fast(&efx->tc->mac_ht,
+ &ped->linkage,
+ efx_tc_mac_ht_params);
+ if (old) {
+ /* don't need our new entry */
+ kfree(ped);
+ if (!refcount_inc_not_zero(&old->ref))
+ return ERR_PTR(-EAGAIN);
+ /* existing entry found, ref taken */
+ return old;
+ }
+
+ rc = efx_mae_allocate_pedit_mac(efx, ped);
+ if (rc < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to store pedit MAC address in hw");
+ goto out_remove;
+ }
+
+ /* ref and return */
+ refcount_set(&ped->ref, 1);
+ return ped;
+out_remove:
+ rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage,
+ efx_tc_mac_ht_params);
+ kfree(ped);
+ return ERR_PTR(rc);
+}
+
+static void efx_tc_flower_put_mac(struct efx_nic *efx,
+ struct efx_tc_mac_pedit_action *ped)
+{
+ if (!refcount_dec_and_test(&ped->ref))
+ return; /* still in use */
+ rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage,
+ efx_tc_mac_ht_params);
+ efx_mae_free_pedit_mac(efx, ped);
+ kfree(ped);
+}
+
static void efx_tc_free_action_set(struct efx_nic *efx,
struct efx_tc_action_set *act, bool in_hw)
{
@@ -135,6 +194,10 @@ static void efx_tc_free_action_set(struct efx_nic *efx,
list_del(&act->encap_user);
efx_tc_flower_release_encap_md(efx, act->encap_md);
}
+ if (act->src_mac)
+ efx_tc_flower_put_mac(efx, act->src_mac);
+ if (act->dst_mac)
+ efx_tc_flower_put_mac(efx, act->dst_mac);
kfree(act);
}
@@ -697,6 +760,8 @@ static const char *efx_tc_encap_type_name(enum efx_encap_type typ)
/* For details of action order constraints refer to SF-123102-TC-1ยง12.6.1 */
enum efx_tc_action_order {
EFX_TC_AO_DECAP,
+ EFX_TC_AO_DEC_TTL,
+ EFX_TC_AO_PEDIT_MAC_ADDRS,
EFX_TC_AO_VLAN_POP,
EFX_TC_AO_VLAN_PUSH,
EFX_TC_AO_COUNT,
@@ -711,6 +776,15 @@ static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
case EFX_TC_AO_DECAP:
if (act->decap)
return false;
+ /* PEDIT_MAC_ADDRS must not happen before DECAP, though it
+ * can wait until much later
+ */
+ if (act->dst_mac || act->src_mac)
+ return false;
+
+ /* Decrementing ttl must not happen before DECAP */
+ if (act->do_ttl_dec)
+ return false;
fallthrough;
case EFX_TC_AO_VLAN_POP:
if (act->vlan_pop >= 2)
@@ -730,12 +804,17 @@ static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
if (act->count)
return false;
fallthrough;
+ case EFX_TC_AO_PEDIT_MAC_ADDRS:
case EFX_TC_AO_ENCAP:
if (act->encap_md)
return false;
fallthrough;
case EFX_TC_AO_DELIVER:
return !act->deliver;
+ case EFX_TC_AO_DEC_TTL:
+ if (act->encap_md)
+ return false;
+ return !act->do_ttl_dec;
default:
/* Bad caller. Whatever they wanted to do, say they can't. */
WARN_ON_ONCE(1);
@@ -900,6 +979,375 @@ static void efx_tc_flower_release_lhs_actions(struct efx_nic *efx,
efx_tc_flower_put_counter_index(efx, act->count);
}
+/**
+ * struct efx_tc_mangler_state - accumulates 32-bit pedits into fields
+ *
+ * @dst_mac_32: dst_mac[0:3] has been populated
+ * @dst_mac_16: dst_mac[4:5] has been populated
+ * @src_mac_16: src_mac[0:1] has been populated
+ * @src_mac_32: src_mac[2:5] has been populated
+ * @dst_mac: h_dest field of ethhdr
+ * @src_mac: h_source field of ethhdr
+ *
+ * Since FLOW_ACTION_MANGLE comes in 32-bit chunks that do not
+ * necessarily equate to whole fields of the packet header, this
+ * structure is used to hold the cumulative effect of the partial
+ * field pedits that have been processed so far.
+ */
+struct efx_tc_mangler_state {
+ u8 dst_mac_32:1; /* eth->h_dest[0:3] */
+ u8 dst_mac_16:1; /* eth->h_dest[4:5] */
+ u8 src_mac_16:1; /* eth->h_source[0:1] */
+ u8 src_mac_32:1; /* eth->h_source[2:5] */
+ unsigned char dst_mac[ETH_ALEN];
+ unsigned char src_mac[ETH_ALEN];
+};
+
+/** efx_tc_complete_mac_mangle() - pull complete field pedits out of @mung
+ * @efx: NIC we're installing a flow rule on
+ * @act: action set (cursor) to update
+ * @mung: accumulated partial mangles
+ * @extack: netlink extended ack for reporting errors
+ *
+ * Check @mung to find any combinations of partial mangles that can be
+ * combined into a complete packet field edit, add that edit to @act,
+ * and consume the partial mangles from @mung.
+ */
+
+static int efx_tc_complete_mac_mangle(struct efx_nic *efx,
+ struct efx_tc_action_set *act,
+ struct efx_tc_mangler_state *mung,
+ struct netlink_ext_ack *extack)
+{
+ struct efx_tc_mac_pedit_action *ped;
+
+ if (mung->dst_mac_32 && mung->dst_mac_16) {
+ ped = efx_tc_flower_get_mac(efx, mung->dst_mac, extack);
+ if (IS_ERR(ped))
+ return PTR_ERR(ped);
+
+ /* Check that we have not already populated dst_mac */
+ if (act->dst_mac)
+ efx_tc_flower_put_mac(efx, act->dst_mac);
+
+ act->dst_mac = ped;
+
+ /* consume the incomplete state */
+ mung->dst_mac_32 = 0;
+ mung->dst_mac_16 = 0;
+ }
+ if (mung->src_mac_16 && mung->src_mac_32) {
+ ped = efx_tc_flower_get_mac(efx, mung->src_mac, extack);
+ if (IS_ERR(ped))
+ return PTR_ERR(ped);
+
+ /* Check that we have not already populated src_mac */
+ if (act->src_mac)
+ efx_tc_flower_put_mac(efx, act->src_mac);
+
+ act->src_mac = ped;
+
+ /* consume the incomplete state */
+ mung->src_mac_32 = 0;
+ mung->src_mac_16 = 0;
+ }
+ return 0;
+}
+
+static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act,
+ const struct flow_action_entry *fa,
+ struct netlink_ext_ack *extack)
+{
+ switch (fa->mangle.htype) {
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+ switch (fa->mangle.offset) {
+ case offsetof(struct iphdr, ttl):
+ /* check that pedit applies to ttl only */
+ if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK)
+ break;
+
+ /* Adding 0xff is equivalent to decrementing the ttl.
+ * Other added values are not supported.
+ */
+ if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) != U8_MAX)
+ break;
+
+ /* check that we do not decrement ttl twice */
+ if (!efx_tc_flower_action_order_ok(act,
+ EFX_TC_AO_DEC_TTL)) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported: multiple dec ttl");
+ return -EOPNOTSUPP;
+ }
+ act->do_ttl_dec = 1;
+ return 0;
+ default:
+ break;
+ }
+ break;
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
+ switch (fa->mangle.offset) {
+ case round_down(offsetof(struct ipv6hdr, hop_limit), 4):
+ /* check that pedit applies to hoplimit only */
+ if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK)
+ break;
+
+ /* Adding 0xff is equivalent to decrementing the hoplimit.
+ * Other added values are not supported.
+ */
+ if ((fa->mangle.val >> 24) != U8_MAX)
+ break;
+
+ /* check that we do not decrement hoplimit twice */
+ if (!efx_tc_flower_action_order_ok(act,
+ EFX_TC_AO_DEC_TTL)) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported: multiple dec ttl");
+ return -EOPNOTSUPP;
+ }
+ act->do_ttl_dec = 1;
+ return 0;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Unsupported: ttl add action type %x %x %x/%x",
+ fa->mangle.htype, fa->mangle.offset,
+ fa->mangle.val, fa->mangle.mask);
+ return -EOPNOTSUPP;
+}
+
+/**
+ * efx_tc_mangle() - handle a single 32-bit (or less) pedit
+ * @efx: NIC we're installing a flow rule on
+ * @act: action set (cursor) to update
+ * @fa: FLOW_ACTION_MANGLE action metadata
+ * @mung: accumulator for partial mangles
+ * @extack: netlink extended ack for reporting errors
+ * @match: original match used along with the mangle action
+ *
+ * Identify the fields written by a FLOW_ACTION_MANGLE, and record
+ * the partial mangle state in @mung. If this mangle completes an
+ * earlier partial mangle, consume and apply to @act by calling
+ * efx_tc_complete_mac_mangle().
+ */
+
+static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act,
+ const struct flow_action_entry *fa,
+ struct efx_tc_mangler_state *mung,
+ struct netlink_ext_ack *extack,
+ struct efx_tc_match *match)
+{
+ __le32 mac32;
+ __le16 mac16;
+ u8 tr_ttl;
+
+ switch (fa->mangle.htype) {
+ case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
+ BUILD_BUG_ON(offsetof(struct ethhdr, h_dest) != 0);
+ BUILD_BUG_ON(offsetof(struct ethhdr, h_source) != 6);
+ if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_PEDIT_MAC_ADDRS)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Pedit mangle mac action violates action order");
+ return -EOPNOTSUPP;
+ }
+ switch (fa->mangle.offset) {
+ case 0:
+ if (fa->mangle.mask) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Unsupported: mask (%#x) of eth.dst32 mangle",
+ fa->mangle.mask);
+ return -EOPNOTSUPP;
+ }
+ /* Ethernet address is little-endian */
+ mac32 = cpu_to_le32(fa->mangle.val);
+ memcpy(mung->dst_mac, &mac32, sizeof(mac32));
+ mung->dst_mac_32 = 1;
+ return efx_tc_complete_mac_mangle(efx, act, mung, extack);
+ case 4:
+ if (fa->mangle.mask == 0xffff) {
+ mac16 = cpu_to_le16(fa->mangle.val >> 16);
+ memcpy(mung->src_mac, &mac16, sizeof(mac16));
+ mung->src_mac_16 = 1;
+ } else if (fa->mangle.mask == 0xffff0000) {
+ mac16 = cpu_to_le16((u16)fa->mangle.val);
+ memcpy(mung->dst_mac + 4, &mac16, sizeof(mac16));
+ mung->dst_mac_16 = 1;
+ } else {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Unsupported: mask (%#x) of eth+4 mangle is not high or low 16b",
+ fa->mangle.mask);
+ return -EOPNOTSUPP;
+ }
+ return efx_tc_complete_mac_mangle(efx, act, mung, extack);
+ case 8:
+ if (fa->mangle.mask) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Unsupported: mask (%#x) of eth.src32 mangle",
+ fa->mangle.mask);
+ return -EOPNOTSUPP;
+ }
+ mac32 = cpu_to_le32(fa->mangle.val);
+ memcpy(mung->src_mac + 2, &mac32, sizeof(mac32));
+ mung->src_mac_32 = 1;
+ return efx_tc_complete_mac_mangle(efx, act, mung, extack);
+ default:
+ NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported: mangle eth+%u %x/%x",
+ fa->mangle.offset, fa->mangle.val, fa->mangle.mask);
+ return -EOPNOTSUPP;
+ }
+ break;
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+ switch (fa->mangle.offset) {
+ case offsetof(struct iphdr, ttl):
+ /* we currently only support pedit IP4 when it applies
+ * to TTL and then only when it can be achieved with a
+ * decrement ttl action
+ */
+
+ /* check that pedit applies to ttl only */
+ if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Unsupported: mask (%#x) out of range, only support mangle action on ipv4.ttl",
+ fa->mangle.mask);
+ return -EOPNOTSUPP;
+ }
+
+ /* we can only convert to a dec ttl when we have an
+ * exact match on the ttl field
+ */
+ if (match->mask.ip_ttl != U8_MAX) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Unsupported: only support mangle ipv4.ttl when we have an exact match on ttl, mask used for match (%#x)",
+ match->mask.ip_ttl);
+ return -EOPNOTSUPP;
+ }
+
+ /* check that we don't try to decrement 0, which equates
+ * to setting the ttl to 0xff
+ */
+ if (match->value.ip_ttl == 0) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Unsupported: we cannot decrement ttl past 0");
+ return -EOPNOTSUPP;
+ }
+
+ /* check that we do not decrement ttl twice */
+ if (!efx_tc_flower_action_order_ok(act,
+ EFX_TC_AO_DEC_TTL)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Unsupported: multiple dec ttl");
+ return -EOPNOTSUPP;
+ }
+
+ /* check pedit can be achieved with decrement action */
+ tr_ttl = match->value.ip_ttl - 1;
+ if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) == tr_ttl) {
+ act->do_ttl_dec = 1;
+ return 0;
+ }
+
+ fallthrough;
+ default:
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Unsupported: only support mangle on the ttl field (offset is %u)",
+ fa->mangle.offset);
+ return -EOPNOTSUPP;
+ }
+ break;
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
+ switch (fa->mangle.offset) {
+ case round_down(offsetof(struct ipv6hdr, hop_limit), 4):
+ /* we currently only support pedit IP6 when it applies
+ * to the hoplimit and then only when it can be achieved
+ * with a decrement hoplimit action
+ */
+
+ /* check that pedit applies to ttl only */
+ if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Unsupported: mask (%#x) out of range, only support mangle action on ipv6.hop_limit",
+ fa->mangle.mask);
+
+ return -EOPNOTSUPP;
+ }
+
+ /* we can only convert to a dec ttl when we have an
+ * exact match on the ttl field
+ */
+ if (match->mask.ip_ttl != U8_MAX) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Unsupported: only support mangle ipv6.hop_limit when we have an exact match on ttl, mask used for match (%#x)",
+ match->mask.ip_ttl);
+ return -EOPNOTSUPP;
+ }
+
+ /* check that we don't try to decrement 0, which equates
+ * to setting the ttl to 0xff
+ */
+ if (match->value.ip_ttl == 0) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Unsupported: we cannot decrement hop_limit past 0");
+ return -EOPNOTSUPP;
+ }
+
+ /* check that we do not decrement hoplimit twice */
+ if (!efx_tc_flower_action_order_ok(act,
+ EFX_TC_AO_DEC_TTL)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Unsupported: multiple dec ttl");
+ return -EOPNOTSUPP;
+ }
+
+ /* check pedit can be achieved with decrement action */
+ tr_ttl = match->value.ip_ttl - 1;
+ if ((fa->mangle.val >> 24) == tr_ttl) {
+ act->do_ttl_dec = 1;
+ return 0;
+ }
+
+ fallthrough;
+ default:
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Unsupported: only support mangle on the hop_limit field");
+ return -EOPNOTSUPP;
+ }
+ default:
+ NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled mangle htype %u for action rule",
+ fa->mangle.htype);
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+/**
+ * efx_tc_incomplete_mangle() - check for leftover partial pedits
+ * @mung: accumulator for partial mangles
+ * @extack: netlink extended ack for reporting errors
+ *
+ * Since the MAE can only overwrite whole fields, any partial
+ * field mangle left over on reaching packet delivery (mirred or
+ * end of TC actions) cannot be offloaded. Check for any such
+ * and reject them with -%EOPNOTSUPP.
+ */
+
+static int efx_tc_incomplete_mangle(struct efx_tc_mangler_state *mung,
+ struct netlink_ext_ack *extack)
+{
+ if (mung->dst_mac_32 || mung->dst_mac_16) {
+ NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of destination MAC address");
+ return -EOPNOTSUPP;
+ }
+ if (mung->src_mac_16 || mung->src_mac_32) {
+ NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of source MAC address");
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
struct net_device *net_dev,
struct flow_cls_offload *tc)
@@ -1295,6 +1743,7 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
struct netlink_ext_ack *extack = tc->common.extack;
const struct ip_tunnel_info *encap_info = NULL;
struct efx_tc_flow_rule *rule = NULL, *old;
+ struct efx_tc_mangler_state mung = {};
struct efx_tc_action_set *act = NULL;
const struct flow_action_entry *fa;
struct efx_rep *from_efv, *to_efv;
@@ -1631,6 +2080,16 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
act->vlan_proto[act->vlan_push] = fa->vlan.proto;
act->vlan_push++;
break;
+ case FLOW_ACTION_ADD:
+ rc = efx_tc_pedit_add(efx, act, fa, extack);
+ if (rc < 0)
+ goto release;
+ break;
+ case FLOW_ACTION_MANGLE:
+ rc = efx_tc_mangle(efx, act, fa, &mung, extack, &match);
+ if (rc < 0)
+ goto release;
+ break;
case FLOW_ACTION_TUNNEL_ENCAP:
if (encap_info) {
/* Can't specify encap multiple times.
@@ -1670,6 +2129,9 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
}
}
+ rc = efx_tc_incomplete_mangle(&mung, extack);
+ if (rc < 0)
+ goto release;
if (act) {
/* Not shot/redirected, so deliver to default dest */
if (from_efv == EFX_EFV_PF)
@@ -2156,6 +2618,14 @@ static void efx_tc_lhs_free(void *ptr, void *arg)
kfree(rule);
}
+static void efx_tc_mac_free(void *ptr, void *__unused)
+{
+ struct efx_tc_mac_pedit_action *ped = ptr;
+
+ WARN_ON(refcount_read(&ped->ref));
+ kfree(ped);
+}
+
static void efx_tc_flow_free(void *ptr, void *arg)
{
struct efx_tc_flow_rule *rule = ptr;
@@ -2196,6 +2666,9 @@ int efx_init_struct_tc(struct efx_nic *efx)
rc = efx_tc_init_counters(efx);
if (rc < 0)
goto fail_counters;
+ rc = rhashtable_init(&efx->tc->mac_ht, &efx_tc_mac_ht_params);
+ if (rc < 0)
+ goto fail_mac_ht;
rc = rhashtable_init(&efx->tc->encap_match_ht, &efx_tc_encap_match_ht_params);
if (rc < 0)
goto fail_encap_match_ht;
@@ -2233,6 +2706,8 @@ fail_lhs_rule_ht:
fail_match_action_ht:
rhashtable_destroy(&efx->tc->encap_match_ht);
fail_encap_match_ht:
+ rhashtable_destroy(&efx->tc->mac_ht);
+fail_mac_ht:
efx_tc_destroy_counters(efx);
fail_counters:
efx_tc_destroy_encap_actions(efx);
@@ -2268,6 +2743,7 @@ void efx_fini_struct_tc(struct efx_nic *efx)
rhashtable_free_and_destroy(&efx->tc->recirc_ht, efx_tc_recirc_free, efx);
WARN_ON(!ida_is_empty(&efx->tc->recirc_ida));
ida_destroy(&efx->tc->recirc_ida);
+ rhashtable_free_and_destroy(&efx->tc->mac_ht, efx_tc_mac_free, NULL);
efx_tc_fini_counters(efx);
efx_tc_fini_encap_actions(efx);
mutex_unlock(&efx->tc->mutex);
diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h
index 40d2c803fca827..4dd2c378fd9f5c 100644
--- a/drivers/net/ethernet/sfc/tc.h
+++ b/drivers/net/ethernet/sfc/tc.h
@@ -18,6 +18,23 @@
#define IS_ALL_ONES(v) (!(typeof (v))~(v))
+/**
+ * struct efx_tc_mac_pedit_action - mac pedit action fields
+ *
+ * @h_addr: mac address field of ethernet header
+ * @linkage: rhashtable reference
+ * @ref: reference count
+ * @fw_id: index of this entry in firmware MAC address table
+ *
+ * MAC address edits are indirected through a table in the hardware
+ */
+struct efx_tc_mac_pedit_action {
+ u8 h_addr[ETH_ALEN];
+ struct rhash_head linkage;
+ refcount_t ref;
+ u32 fw_id; /* index of this entry in firmware MAC address table */
+};
+
static inline bool efx_ipv6_addr_all_ones(struct in6_addr *addr)
{
return !memchr_inv(addr, 0xff, sizeof(*addr));
@@ -25,20 +42,45 @@ static inline bool efx_ipv6_addr_all_ones(struct in6_addr *addr)
struct efx_tc_encap_action; /* see tc_encap_actions.h */
+/**
+ * struct efx_tc_action_set - collection of tc action fields
+ *
+ * @vlan_push: the number of vlan headers to push
+ * @vlan_pop: the number of vlan headers to pop
+ * @decap: used to indicate a tunnel header decapsulation should take place
+ * @do_ttl_dec: used to indicate IP TTL / Hop Limit should be decremented
+ * @deliver: used to indicate a deliver action should take place
+ * @vlan_tci: tci fields for vlan push actions
+ * @vlan_proto: ethernet types for vlan push actions
+ * @count: counter mapping
+ * @encap_md: encap entry in tc_encap_ht table
+ * @encap_user: linked list of encap users (encap_md->users)
+ * @user: owning action-set-list. Only populated if @encap_md is; used by efx_tc_update_encap() fallback handling
+ * @count_user: linked list of counter users (counter->users)
+ * @dest_mport: destination mport
+ * @src_mac: source mac entry in tc_mac_ht table
+ * @dst_mac: destination mac entry in tc_mac_ht table
+ * @fw_id: index of this entry in firmware actions table
+ * @list: linked list of tc actions
+ *
+ */
struct efx_tc_action_set {
u16 vlan_push:2;
u16 vlan_pop:2;
u16 decap:1;
+ u16 do_ttl_dec:1;
u16 deliver:1;
- __be16 vlan_tci[2]; /* TCIs for vlan_push */
- __be16 vlan_proto[2]; /* Ethertypes for vlan_push */
+ __be16 vlan_tci[2];
+ __be16 vlan_proto[2];
struct efx_tc_counter_index *count;
- struct efx_tc_encap_action *encap_md; /* entry in tc_encap_ht table */
- struct list_head encap_user; /* entry on encap_md->users list */
- struct efx_tc_action_set_list *user; /* Only populated if encap_md */
- struct list_head count_user; /* entry on counter->users list, if encap */
+ struct efx_tc_encap_action *encap_md;
+ struct list_head encap_user;
+ struct efx_tc_action_set_list *user;
+ struct list_head count_user;
u32 dest_mport;
- u32 fw_id; /* index of this entry in firmware actions table */
+ struct efx_tc_mac_pedit_action *src_mac;
+ struct efx_tc_mac_pedit_action *dst_mac;
+ u32 fw_id;
struct list_head list;
};
@@ -220,6 +262,7 @@ struct efx_tc_table_ct { /* TABLE_ID_CONNTRACK_TABLE */
* @counter_ht: Hashtable of TC counters (FW IDs and counter values)
* @counter_id_ht: Hashtable mapping TC counter cookies to counters
* @encap_ht: Hashtable of TC encap actions
+ * @mac_ht: Hashtable of MAC address entries (for pedits)
* @encap_match_ht: Hashtable of TC encap matches
* @match_action_ht: Hashtable of TC match-action rules
* @lhs_rule_ht: Hashtable of TC left-hand (act ct & goto chain) rules
@@ -257,6 +300,7 @@ struct efx_tc_state {
struct rhashtable counter_ht;
struct rhashtable counter_id_ht;
struct rhashtable encap_ht;
+ struct rhashtable mac_ht;
struct rhashtable encap_match_ht;
struct rhashtable match_action_ht;
struct rhashtable lhs_rule_ht;