XDP hardware hints discussion mail archive
 help / color / mirror / Atom feed
From: Larysa Zaremba <larysa.zaremba@intel.com>
To: bpf@vger.kernel.org
Cc: Larysa Zaremba <larysa.zaremba@intel.com>,
	ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	martin.lau@linux.dev, song@kernel.org, yhs@fb.com,
	john.fastabend@gmail.com, kpsingh@kernel.org, sdf@google.com,
	haoluo@google.com, jolsa@kernel.org,
	David Ahern <dsahern@gmail.com>, Jakub Kicinski <kuba@kernel.org>,
	Willem de Bruijn <willemb@google.com>,
	Jesper Dangaard Brouer <brouer@redhat.com>,
	Anatoly Burakov <anatoly.burakov@intel.com>,
	Alexander Lobakin <alexandr.lobakin@intel.com>,
	Magnus Karlsson <magnus.karlsson@gmail.com>,
	Maryam Tahhan <mtahhan@redhat.com>,
	xdp-hints@xdp-project.net, netdev@vger.kernel.org,
	Willem de Bruijn <willemdebruijn.kernel@gmail.com>,
	Alexei Starovoitov <alexei.starovoitov@gmail.com>,
	Simon Horman <simon.horman@corigine.com>,
	Tariq Toukan <tariqt@mellanox.com>,
	Saeed Mahameed <saeedm@mellanox.com>
Subject: [xdp-hints] [RFC bpf-next 23/23] mlx5: implement RX checksum XDP hint
Date: Thu, 24 Aug 2023 21:27:02 +0200	[thread overview]
Message-ID: <20230824192703.712881-24-larysa.zaremba@intel.com> (raw)
In-Reply-To: <20230824192703.712881-1-larysa.zaremba@intel.com>

Implement .xmo_rx_csum() callback to expose checksum information
to XDP code.

This version contains a lot of logic, duplicated from skb path, because
refactoring would be much more complex than implementation itself, checksum
code is too coupled with the skb concept.

Inteded logic differences from the skb path:
- when checksum does not cover the whole packet, no fixups are performed,
  such packet is treated as one without complete checksum. Just to prevent
  the patch from ballooning from hints-unrelated code.
- with hints API, we can now inform about both complete and validated
  checksum statuses, that is why XDP_CHECKSUM_VERIFIED is ORed to the
  status. I hope this represents HW logic well.

Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/txrx.h |  10 ++
 .../net/ethernet/mellanox/mlx5/core/en/xdp.c  | 100 ++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   |  12 +--
 include/linux/mlx5/device.h                   |   2 +-
 4 files changed, 112 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 879d698b6119..9467a0dea6ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -506,4 +506,14 @@ static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int
 
 	return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz));
 }
+
+static inline u8 get_ip_proto(void *data, int network_depth, __be16 proto)
+{
+	void *ip_p = data + network_depth;
+
+	return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
+					    ((struct ipv6hdr *)ip_p)->nexthdr;
+}
+
+#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index e8319ab0fa85..e08b2ad56442 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -270,10 +270,110 @@ static int mlx5e_xdp_rx_vlan_tag(const struct xdp_md *ctx, u16 *vlan_tci,
 	return 0;
 }
 
+static __be16 xdp_buff_last_ethertype(const struct xdp_buff *xdp,
+				      int *network_offset)
+{
+	__be16 proto = ((struct ethhdr *)xdp->data)->h_proto;
+	struct vlan_hdr *remaining_data = xdp->data + ETH_HLEN;
+	u8 allowed_depth = VLAN_MAX_DEPTH;
+
+	while (eth_type_vlan(proto)) {
+		struct vlan_hdr *next_data = remaining_data + 1;
+
+		if ((void *)next_data > xdp->data_end || !--allowed_depth)
+			return 0;
+		proto = remaining_data->h_vlan_encapsulated_proto;
+		remaining_data = next_data;
+	}
+
+	*network_offset = (void *)remaining_data - xdp->data;
+	return proto;
+}
+
+static bool xdp_csum_needs_fixup(const struct xdp_buff *xdp, int network_depth,
+				 __be16 proto)
+{
+	struct ipv6hdr *ip6;
+	struct iphdr   *ip4;
+	int pkt_len;
+
+	if (network_depth > ETH_HLEN)
+		return true;
+
+	switch (proto) {
+	case htons(ETH_P_IP):
+		ip4 = (struct iphdr *)(xdp->data + network_depth);
+		pkt_len = network_depth + ntohs(ip4->tot_len);
+		break;
+	case htons(ETH_P_IPV6):
+		ip6 = (struct ipv6hdr *)(xdp->data + network_depth);
+		pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len);
+		break;
+	default:
+		return true;
+	}
+
+	if (likely(pkt_len >= xdp->data_end - xdp->data))
+		return false;
+
+	return true;
+}
+
+static int mlx5e_xdp_rx_csum(const struct xdp_md *ctx,
+			     enum xdp_csum_status *csum_status,
+			     __wsum *csum)
+{
+	const struct mlx5e_xdp_buff *_ctx = (void *)ctx;
+	const struct mlx5_cqe64 *cqe = _ctx->cqe;
+	const struct mlx5e_rq *rq = _ctx->rq;
+	__be16 last_ethertype;
+	int network_offset;
+	u8 lro_num_seg;
+
+	lro_num_seg = be32_to_cpu(cqe->srqn) >> 24;
+	if (lro_num_seg) {
+		*csum_status = XDP_CHECKSUM_VERIFIED;
+		return 0;
+	}
+
+	if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state) ||
+	    get_cqe_tls_offload(cqe))
+		goto csum_unnecessary;
+
+	if (short_frame(ctx->data_end - ctx->data))
+		goto csum_unnecessary;
+
+	last_ethertype = xdp_buff_last_ethertype(&_ctx->xdp, &network_offset);
+	if (last_ethertype != htons(ETH_P_IP) && last_ethertype != htons(ETH_P_IPV6))
+		goto csum_unnecessary;
+	if (unlikely(get_ip_proto(_ctx->xdp.data, network_offset,
+				  last_ethertype) == IPPROTO_SCTP))
+		goto csum_unnecessary;
+
+	*csum_status = XDP_CHECKSUM_COMPLETE;
+	*csum = csum_unfold((__force __sum16)cqe->check_sum);
+
+	if (test_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state))
+		goto csum_unnecessary;
+
+	if (unlikely(xdp_csum_needs_fixup(&_ctx->xdp, network_offset,
+					  last_ethertype)))
+		*csum_status = 0;
+
+csum_unnecessary:
+	if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
+		   (cqe->hds_ip_ext & CQE_L4_OK))) {
+		*csum_status |= XDP_CHECKSUM_VERIFIED;
+	}
+
+	return *csum_status ? 0 : -ENODATA;
+}
+
 const struct xdp_metadata_ops mlx5e_xdp_metadata_ops = {
 	.xmo_rx_timestamp		= mlx5e_xdp_rx_timestamp,
 	.xmo_rx_hash			= mlx5e_xdp_rx_hash,
 	.xmo_rx_vlan_tag		= mlx5e_xdp_rx_vlan_tag,
+	.xmo_rx_csum			= mlx5e_xdp_rx_csum,
 };
 
 /* returns true if packet was consumed by xdp */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 3fd11b0761e0..c303ab8b928c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1374,16 +1374,6 @@ static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
 	rq->stats->ecn_mark += !!rc;
 }
 
-static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
-{
-	void *ip_p = skb->data + network_depth;
-
-	return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
-					    ((struct ipv6hdr *)ip_p)->nexthdr;
-}
-
-#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
-
 #define MAX_PADDING 8
 
 static void
@@ -1493,7 +1483,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 		goto csum_unnecessary;
 
 	if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) {
-		if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP))
+		if (unlikely(get_ip_proto(skb->data, network_depth, proto) == IPPROTO_SCTP))
 			goto csum_unnecessary;
 
 		stats->csum_complete++;
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 95ffd78546a7..82813efae79d 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -908,7 +908,7 @@ static inline bool cqe_is_tunneled(struct mlx5_cqe64 *cqe)
 	return cqe->tls_outer_l3_tunneled & 0x1;
 }
 
-static inline u8 get_cqe_tls_offload(struct mlx5_cqe64 *cqe)
+static inline u8 get_cqe_tls_offload(const struct mlx5_cqe64 *cqe)
 {
 	return (cqe->tls_outer_l3_tunneled >> 3) & 0x3;
 }
-- 
2.41.0


  parent reply	other threads:[~2023-08-24 19:35 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-24 19:26 [xdp-hints] [RFC bpf-next 00/23] XDP metadata via kfuncs for ice + mlx5 Larysa Zaremba
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 01/23] ice: make RX hash reading code more reusable Larysa Zaremba
2023-09-04 14:37   ` [xdp-hints] " Maciej Fijalkowski
2023-09-06 12:23     ` Alexander Lobakin
2023-09-14 16:12   ` Alexander Lobakin
2023-09-14 16:15     ` Larysa Zaremba
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 02/23] ice: make RX HW timestamp " Larysa Zaremba
2023-09-04 14:56   ` [xdp-hints] " Maciej Fijalkowski
2023-09-04 16:29     ` Larysa Zaremba
2023-09-05 15:22       ` Maciej Fijalkowski
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 03/23] ice: make RX checksum checking " Larysa Zaremba
2023-09-04 15:02   ` [xdp-hints] " Maciej Fijalkowski
2023-09-04 18:01     ` Larysa Zaremba
2023-09-05 15:37       ` Maciej Fijalkowski
2023-09-05 16:53         ` Larysa Zaremba
2023-09-05 17:44           ` Maciej Fijalkowski
2023-09-06  9:28             ` Larysa Zaremba
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 04/23] ice: Make ptype internal to descriptor info processing Larysa Zaremba
2023-09-04 15:04   ` [xdp-hints] " Maciej Fijalkowski
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 05/23] ice: Introduce ice_xdp_buff Larysa Zaremba
2023-09-04 15:32   ` [xdp-hints] " Maciej Fijalkowski
2023-09-04 18:11     ` Larysa Zaremba
2023-09-05 17:53       ` Maciej Fijalkowski
2023-09-07 14:21         ` Larysa Zaremba
2023-09-07 16:33           ` Stanislav Fomichev
2023-09-07 16:42             ` Maciej Fijalkowski
2023-09-07 16:43               ` Maciej Fijalkowski
2023-09-13 15:40                 ` Larysa Zaremba
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 06/23] ice: Support HW timestamp hint Larysa Zaremba
2023-09-04 15:38   ` [xdp-hints] " Maciej Fijalkowski
2023-09-04 18:12     ` Larysa Zaremba
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 07/23] ice: Support RX hash XDP hint Larysa Zaremba
2023-09-05 15:42   ` [xdp-hints] " Maciej Fijalkowski
2023-09-05 17:09     ` Larysa Zaremba
2023-09-06 12:03     ` Alexander Lobakin
2023-09-14 16:54   ` Alexander Lobakin
2023-09-14 16:59     ` Larysa Zaremba
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 08/23] ice: Support XDP hints in AF_XDP ZC mode Larysa Zaremba
2023-09-04 15:42   ` [xdp-hints] " Maciej Fijalkowski
2023-09-04 18:14     ` Larysa Zaremba
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 09/23] xdp: Add VLAN tag hint Larysa Zaremba
2023-09-14 16:18   ` [xdp-hints] " Alexander Lobakin
2023-09-14 16:21     ` Larysa Zaremba
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 10/23] ice: Implement " Larysa Zaremba
2023-09-04 16:00   ` [xdp-hints] " Maciej Fijalkowski
2023-09-04 18:18     ` Larysa Zaremba
2023-09-14 16:25   ` Alexander Lobakin
2023-09-14 16:28     ` Larysa Zaremba
2023-09-14 16:38       ` Alexander Lobakin
2023-09-14 17:02         ` Larysa Zaremba
2023-09-18 14:07         ` Larysa Zaremba
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 11/23] ice: use VLAN proto from ring packet context in skb path Larysa Zaremba
2023-09-14 16:30   ` [xdp-hints] " Alexander Lobakin
2023-09-14 16:30     ` Larysa Zaremba
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 12/23] xdp: Add checksum hint Larysa Zaremba
2023-09-14 16:34   ` [xdp-hints] " Alexander Lobakin
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 13/23] ice: Implement " Larysa Zaremba
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 14/23] selftests/bpf: Allow VLAN packets in xdp_hw_metadata Larysa Zaremba
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 15/23] net, xdp: allow metadata > 32 Larysa Zaremba
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 16/23] selftests/bpf: Add flags and new hints to xdp_hw_metadata Larysa Zaremba
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 17/23] veth: Implement VLAN tag and checksum XDP hint Larysa Zaremba
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 18/23] net: make vlan_get_tag() return -ENODATA instead of -EINVAL Larysa Zaremba
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 19/23] selftests/bpf: Use AF_INET for TX in xdp_metadata Larysa Zaremba
2023-08-24 19:26 ` [xdp-hints] [RFC bpf-next 20/23] selftests/bpf: Check VLAN tag and proto " Larysa Zaremba
2023-08-24 19:27 ` [xdp-hints] [RFC bpf-next 21/23] selftests/bpf: check checksum state " Larysa Zaremba
2023-08-24 19:27 ` [xdp-hints] [RFC bpf-next 22/23] mlx5: implement VLAN tag XDP hint Larysa Zaremba
2023-08-24 19:27 ` Larysa Zaremba [this message]
2023-08-31 14:50 ` [xdp-hints] Re: [RFC bpf-next 00/23] XDP metadata via kfuncs for ice + mlx5 Larysa Zaremba
2023-09-04 16:06 ` Maciej Fijalkowski
2023-09-06 14:09   ` Larysa Zaremba

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://lists.xdp-project.net/postorius/lists/xdp-hints.xdp-project.net/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230824192703.712881-24-larysa.zaremba@intel.com \
    --to=larysa.zaremba@intel.com \
    --cc=alexandr.lobakin@intel.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=anatoly.burakov@intel.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=brouer@redhat.com \
    --cc=daniel@iogearbox.net \
    --cc=dsahern@gmail.com \
    --cc=haoluo@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kpsingh@kernel.org \
    --cc=kuba@kernel.org \
    --cc=magnus.karlsson@gmail.com \
    --cc=martin.lau@linux.dev \
    --cc=mtahhan@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=saeedm@mellanox.com \
    --cc=sdf@google.com \
    --cc=simon.horman@corigine.com \
    --cc=song@kernel.org \
    --cc=tariqt@mellanox.com \
    --cc=willemb@google.com \
    --cc=willemdebruijn.kernel@gmail.com \
    --cc=xdp-hints@xdp-project.net \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox