From: Stanislav Fomichev <sdf@google.com>
To: bpf@vger.kernel.org
Cc: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
martin.lau@linux.dev, song@kernel.org, yhs@fb.com,
john.fastabend@gmail.com, kpsingh@kernel.org, sdf@google.com,
haoluo@google.com, jolsa@kernel.org, kuba@kernel.org,
toke@kernel.org, willemb@google.com, dsahern@kernel.org,
magnus.karlsson@intel.com, bjorn@kernel.org,
maciej.fijalkowski@intel.com, hawk@kernel.org,
netdev@vger.kernel.org, xdp-hints@xdp-project.net
Subject: [xdp-hints] [RFC bpf-next v3 03/14] xsk: Support XDP_TX_METADATA_LEN
Date: Fri, 7 Jul 2023 12:29:55 -0700 [thread overview]
Message-ID: <20230707193006.1309662-4-sdf@google.com> (raw)
In-Reply-To: <20230707193006.1309662-1-sdf@google.com>
For zerocopy mode, tx_desc->addr can point to the arbitrary offset
and carry some TX metadata in the headroom. For copy mode, there
is no way currently to populate skb metadata.
Introduce new XDP_TX_METADATA_LEN that indicates how many bytes
to treat as metadata. Metadata bytes come prior to tx_desc address
(same as in RX case).
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
include/net/xdp_sock.h | 1 +
include/net/xsk_buff_pool.h | 1 +
include/uapi/linux/if_xdp.h | 1 +
net/xdp/xsk.c | 35 +++++++++++++++++++++++++++++++++--
net/xdp/xsk_buff_pool.c | 1 +
net/xdp/xsk_queue.h | 7 ++++---
6 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index e96a1151ec75..30018b3b862d 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -51,6 +51,7 @@ struct xdp_sock {
struct list_head flush_node;
struct xsk_buff_pool *pool;
u16 queue_id;
+ u8 tx_metadata_len;
bool zc;
enum {
XSK_READY = 0,
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index a8d7b8a3688a..751fea51a6af 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -75,6 +75,7 @@ struct xsk_buff_pool {
u32 chunk_size;
u32 chunk_shift;
u32 frame_len;
+ u8 tx_metadata_len; /* inherited from xsk_sock */
u8 cached_need_wakeup;
bool uses_need_wakeup;
bool dma_need_sync;
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index a78a8096f4ce..2374eafff7db 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -63,6 +63,7 @@ struct xdp_mmap_offsets {
#define XDP_UMEM_COMPLETION_RING 6
#define XDP_STATISTICS 7
#define XDP_OPTIONS 8
+#define XDP_TX_METADATA_LEN 9
struct xdp_umem_reg {
__u64 addr; /* Start of packet data area */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 5a8c0dd250af..6ef7bc4f514c 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -485,6 +485,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
int err;
hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
+ hr = max(hr, L1_CACHE_ALIGN((u32)xs->tx_metadata_len));
tr = dev->needed_tailroom;
len = desc->len;
@@ -493,14 +494,21 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
return ERR_PTR(err);
skb_reserve(skb, hr);
- skb_put(skb, len);
+ skb_put(skb, len + xs->tx_metadata_len);
buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
- err = skb_store_bits(skb, 0, buffer, len);
+ buffer -= xs->tx_metadata_len;
+
+ err = skb_store_bits(skb, 0, buffer, len + xs->tx_metadata_len);
if (unlikely(err)) {
kfree_skb(skb);
return ERR_PTR(err);
}
+
+ if (xs->tx_metadata_len) {
+ skb_metadata_set(skb, xs->tx_metadata_len);
+ __skb_pull(skb, xs->tx_metadata_len);
+ }
}
skb->dev = dev;
@@ -1137,6 +1145,29 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
mutex_unlock(&xs->mutex);
return err;
}
+ case XDP_TX_METADATA_LEN:
+ {
+ int val;
+
+ if (optlen < sizeof(val))
+ return -EINVAL;
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
+ return -EFAULT;
+
+ if (val >= 256)
+ return -EINVAL;
+ if (val % 4)
+ return -EINVAL;
+
+ mutex_lock(&xs->mutex);
+ if (xs->state != XSK_READY) {
+ mutex_unlock(&xs->mutex);
+ return -EBUSY;
+ }
+ xs->tx_metadata_len = val;
+ mutex_unlock(&xs->mutex);
+ return 0;
+ }
default:
break;
}
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 26f6d304451e..66ff9c345a67 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -85,6 +85,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
XDP_PACKET_HEADROOM;
pool->umem = umem;
pool->addrs = umem->addrs;
+ pool->tx_metadata_len = xs->tx_metadata_len;
INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
spin_lock_init(&pool->xsk_tx_list_lock);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 6d40a77fccbe..c8d287c18d64 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -133,12 +133,13 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 offset = desc->addr & (pool->chunk_size - 1);
+ u64 addr = desc->addr - pool->tx_metadata_len;
+ u64 offset = addr & (pool->chunk_size - 1);
if (offset + desc->len > pool->chunk_size)
return false;
- if (desc->addr >= pool->addrs_cnt)
+ if (addr >= pool->addrs_cnt)
return false;
if (desc->options)
@@ -149,7 +150,7 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
+ u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len;
if (desc->len > pool->chunk_size)
return false;
--
2.41.0.255.g8b1d071c50-goog
next prev parent reply other threads:[~2023-07-07 19:30 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-07-07 19:29 [xdp-hints] [RFC bpf-next v3 00/14] bpf: Netdev TX metadata Stanislav Fomichev
2023-07-07 19:29 ` [xdp-hints] [RFC bpf-next v3 01/14] bpf: Rename some xdp-metadata functions into dev-bound Stanislav Fomichev
2023-07-07 19:29 ` [xdp-hints] [RFC bpf-next v3 02/14] bpf: Make it easier to add new metadata kfunc Stanislav Fomichev
2023-07-07 19:29 ` Stanislav Fomichev [this message]
2023-07-07 19:29 ` [xdp-hints] [RFC bpf-next v3 04/14] bpf: Implement devtx hook points Stanislav Fomichev
2023-07-07 19:29 ` [xdp-hints] [RFC bpf-next v3 05/14] bpf: Implement devtx timestamp kfunc Stanislav Fomichev
2023-07-07 19:29 ` [xdp-hints] [RFC bpf-next v3 06/14] net: veth: Implement devtx timestamp kfuncs Stanislav Fomichev
2023-07-07 19:29 ` [xdp-hints] [RFC bpf-next v3 07/14] bpf: Introduce tx checksum devtx kfuncs Stanislav Fomichev
2023-07-07 19:30 ` [xdp-hints] [RFC bpf-next v3 08/14] net: veth: Implement devtx tx checksum Stanislav Fomichev
2023-07-07 19:30 ` [xdp-hints] [RFC bpf-next v3 09/14] net/mlx5e: Implement devtx kfuncs Stanislav Fomichev
2023-07-11 22:56 ` [xdp-hints] " Alexei Starovoitov
2023-07-11 23:24 ` Stanislav Fomichev
2023-07-11 23:45 ` Alexei Starovoitov
2023-07-12 0:14 ` Stanislav Fomichev
2023-07-12 2:50 ` Alexei Starovoitov
2023-07-12 3:29 ` Stanislav Fomichev
2023-07-12 4:59 ` Alexei Starovoitov
2023-07-12 5:36 ` Stanislav Fomichev
2023-07-12 15:16 ` Willem de Bruijn
2023-07-12 16:28 ` Willem de Bruijn
2023-07-12 19:03 ` Alexei Starovoitov
2023-07-12 19:11 ` Willem de Bruijn
2023-07-12 19:42 ` Alexei Starovoitov
2023-07-12 20:09 ` Jakub Kicinski
2023-07-12 20:53 ` Stanislav Fomichev
2023-07-12 0:32 ` Jakub Kicinski
2023-07-12 2:37 ` Alexei Starovoitov
2023-07-12 3:07 ` Jakub Kicinski
2023-07-12 3:23 ` Alexei Starovoitov
2023-07-07 19:30 ` [xdp-hints] [RFC bpf-next v3 10/14] selftests/xsk: Support XDP_TX_METADATA_LEN Stanislav Fomichev
2023-07-07 19:30 ` [xdp-hints] [RFC bpf-next v3 11/14] selftests/bpf: Add helper to query current netns cookie Stanislav Fomichev
2023-07-07 19:30 ` [xdp-hints] [RFC bpf-next v3 12/14] selftests/bpf: Add csum helpers Stanislav Fomichev
2023-07-07 19:30 ` [xdp-hints] [RFC bpf-next v3 13/14] selftests/bpf: Extend xdp_metadata with devtx kfuncs Stanislav Fomichev
2023-07-07 19:30 ` [xdp-hints] [RFC bpf-next v3 14/14] selftests/bpf: Extend xdp_hw_metadata " Stanislav Fomichev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://lists.xdp-project.net/postorius/lists/xdp-hints.xdp-project.net/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230707193006.1309662-4-sdf@google.com \
--to=sdf@google.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bjorn@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=dsahern@kernel.org \
--cc=haoluo@google.com \
--cc=hawk@kernel.org \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kpsingh@kernel.org \
--cc=kuba@kernel.org \
--cc=maciej.fijalkowski@intel.com \
--cc=magnus.karlsson@intel.com \
--cc=martin.lau@linux.dev \
--cc=netdev@vger.kernel.org \
--cc=song@kernel.org \
--cc=toke@kernel.org \
--cc=willemb@google.com \
--cc=xdp-hints@xdp-project.net \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox