XDP hardware hints discussion mail archive
 help / color / mirror / Atom feed
From: Stanislav Fomichev <sdf@google.com>
To: bpf@vger.kernel.org
Cc: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	martin.lau@linux.dev, song@kernel.org, yhs@fb.com,
	john.fastabend@gmail.com, kpsingh@kernel.org, sdf@google.com,
	haoluo@google.com, jolsa@kernel.org, kuba@kernel.org,
	toke@kernel.org, willemb@google.com, dsahern@kernel.org,
	magnus.karlsson@intel.com, bjorn@kernel.org,
	maciej.fijalkowski@intel.com, hawk@kernel.org,
	netdev@vger.kernel.org, xdp-hints@xdp-project.net
Subject: [xdp-hints] [RFC net-next v4 1/8] xsk: Support XDP_TX_METADATA_LEN
Date: Mon, 24 Jul 2023 16:59:50 -0700	[thread overview]
Message-ID: <20230724235957.1953861-2-sdf@google.com> (raw)
In-Reply-To: <20230724235957.1953861-1-sdf@google.com>

For zerocopy mode, tx_desc->addr can point to the arbitrary offset
and carry some TX metadata in the headroom. For copy mode, there
is no way currently to populate skb metadata.

Introduce new XDP_TX_METADATA_LEN that indicates how many bytes
to treat as metadata. Metadata bytes come prior to tx_desc address
(same as in RX case).

The size of the metadata has the same constraints as XDP:
- less than 256 bytes
- 4-byte aligned
- non-zero

This data is not interpreted in any way right now.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/net/xdp_sock.h      |  1 +
 include/net/xsk_buff_pool.h |  1 +
 include/uapi/linux/if_xdp.h |  1 +
 net/xdp/xsk.c               | 20 ++++++++++++++++++++
 net/xdp/xsk_buff_pool.c     |  1 +
 net/xdp/xsk_queue.h         | 17 ++++++++++-------
 6 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 1617af380162..467b9fb56827 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -51,6 +51,7 @@ struct xdp_sock {
 	struct list_head flush_node;
 	struct xsk_buff_pool *pool;
 	u16 queue_id;
+	u8 tx_metadata_len;
 	bool zc;
 	bool sg;
 	enum {
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index b0bdff26fc88..9c31e8d1e198 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -77,6 +77,7 @@ struct xsk_buff_pool {
 	u32 chunk_size;
 	u32 chunk_shift;
 	u32 frame_len;
+	u8 tx_metadata_len; /* inherited from xsk_sock */
 	u8 cached_need_wakeup;
 	bool uses_need_wakeup;
 	bool dma_need_sync;
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index 8d48863472b9..b37b50102e1c 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -69,6 +69,7 @@ struct xdp_mmap_offsets {
 #define XDP_UMEM_COMPLETION_RING	6
 #define XDP_STATISTICS			7
 #define XDP_OPTIONS			8
+#define XDP_TX_METADATA_LEN		9
 
 struct xdp_umem_reg {
 	__u64 addr; /* Start of packet data area */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 4f1e0599146e..81106e4d6e0f 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1337,6 +1337,26 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
 		mutex_unlock(&xs->mutex);
 		return err;
 	}
+	case XDP_TX_METADATA_LEN:
+	{
+		int val;
+
+		if (optlen < sizeof(val))
+			return -EINVAL;
+		if (copy_from_sockptr(&val, optval, sizeof(val)))
+			return -EFAULT;
+		if (!val || val > 256 || val % 4)
+			return -EINVAL;
+
+		mutex_lock(&xs->mutex);
+		if (xs->state != XSK_READY) {
+			mutex_unlock(&xs->mutex);
+			return -EBUSY;
+		}
+		xs->tx_metadata_len = val;
+		mutex_unlock(&xs->mutex);
+		return 0;
+	}
 	default:
 		break;
 	}
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index b3f7b310811e..b351732f1032 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -85,6 +85,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
 		XDP_PACKET_HEADROOM;
 	pool->umem = umem;
 	pool->addrs = umem->addrs;
+	pool->tx_metadata_len = xs->tx_metadata_len;
 	INIT_LIST_HEAD(&pool->free_list);
 	INIT_LIST_HEAD(&pool->xskb_list);
 	INIT_LIST_HEAD(&pool->xsk_tx_list);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 13354a1e4280..c74a1372bcb9 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -143,15 +143,17 @@ static inline bool xp_unused_options_set(u32 options)
 static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
 					    struct xdp_desc *desc)
 {
-	u64 offset = desc->addr & (pool->chunk_size - 1);
+	u64 addr = desc->addr - pool->tx_metadata_len;
+	u64 len = desc->len + pool->tx_metadata_len;
+	u64 offset = addr & (pool->chunk_size - 1);
 
 	if (!desc->len)
 		return false;
 
-	if (offset + desc->len > pool->chunk_size)
+	if (offset + len > pool->chunk_size)
 		return false;
 
-	if (desc->addr >= pool->addrs_cnt)
+	if (addr >= pool->addrs_cnt)
 		return false;
 
 	if (xp_unused_options_set(desc->options))
@@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
 static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
 					      struct xdp_desc *desc)
 {
-	u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
+	u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len;
+	u64 len = desc->len + pool->tx_metadata_len;
 
 	if (!desc->len)
 		return false;
 
-	if (desc->len > pool->chunk_size)
+	if (len > pool->chunk_size)
 		return false;
 
-	if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt ||
-	    xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
+	if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt ||
+	    xp_desc_crosses_non_contig_pg(pool, addr, len))
 		return false;
 
 	if (xp_unused_options_set(desc->options))
-- 
2.41.0.487.g6d72f3e995-goog


  reply	other threads:[~2023-07-25  0:00 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-24 23:59 [xdp-hints] [RFC net-next v4 0/8] xsk: TX metadata Stanislav Fomichev
2023-07-24 23:59 ` Stanislav Fomichev [this message]
2023-07-24 23:59 ` [xdp-hints] [RFC net-next v4 2/8] xsk: add TX timestamp and TX checksum offload support Stanislav Fomichev
2023-07-25 19:28   ` [xdp-hints] " Simon Horman
2023-07-25 20:30     ` Stanislav Fomichev
2023-07-25 21:28       ` Jakub Kicinski
2023-07-25 22:40         ` Stanislav Fomichev
2023-07-26  7:47           ` Simon Horman
2023-07-25 20:54   ` Willem de Bruijn
2023-07-25 22:39     ` Stanislav Fomichev
2023-07-25 23:10       ` Willem de Bruijn
2023-07-25 23:48         ` Stanislav Fomichev
2023-07-27 14:11         ` Jesper Dangaard Brouer
2023-07-27 16:37           ` Stanislav Fomichev
2023-07-25 20:58   ` Willem de Bruijn
2023-07-28 11:56     ` Jesper Dangaard Brouer
2023-07-28 13:19       ` Willem de Bruijn
2023-07-26 19:25   ` Jesper Dangaard Brouer
2023-07-26 21:25     ` Stanislav Fomichev
2023-07-27 13:50       ` Jesper Dangaard Brouer
2023-07-27 16:34         ` Stanislav Fomichev
2023-07-24 23:59 ` [xdp-hints] [RFC net-next v4 3/8] net/mlx5e: Implement AF_XDP TX timestamp and checksum offload Stanislav Fomichev
2023-07-24 23:59 ` [xdp-hints] [RFC net-next v4 4/8] tools: ynl: update netdev sample to dump xsk-features Stanislav Fomichev
2023-07-24 23:59 ` [xdp-hints] [RFC net-next v4 5/8] selftests/xsk: Support XDP_TX_METADATA_LEN Stanislav Fomichev
2023-07-24 23:59 ` [xdp-hints] [RFC net-next v4 6/8] selftests/bpf: Add csum helpers Stanislav Fomichev
2023-07-24 23:59 ` [xdp-hints] [RFC net-next v4 7/8] selftests/bpf: Add TX side to xdp_metadata Stanislav Fomichev
2023-07-24 23:59 ` [xdp-hints] [RFC net-next v4 8/8] selftests/bpf: Add TX side to xdp_hw_metadata Stanislav Fomichev
2023-07-25 20:59   ` [xdp-hints] " Willem de Bruijn
2023-07-25 22:36     ` Stanislav Fomichev
2023-07-25 22:55       ` Willem de Bruijn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://lists.xdp-project.net/postorius/lists/xdp-hints.xdp-project.net/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230724235957.1953861-2-sdf@google.com \
    --to=sdf@google.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bjorn@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=dsahern@kernel.org \
    --cc=haoluo@google.com \
    --cc=hawk@kernel.org \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kpsingh@kernel.org \
    --cc=kuba@kernel.org \
    --cc=maciej.fijalkowski@intel.com \
    --cc=magnus.karlsson@intel.com \
    --cc=martin.lau@linux.dev \
    --cc=netdev@vger.kernel.org \
    --cc=song@kernel.org \
    --cc=toke@kernel.org \
    --cc=willemb@google.com \
    --cc=xdp-hints@xdp-project.net \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox