From: "Jesper D. Brouer" <netdev@brouer.com>
To: Stanislav Fomichev <sdf@google.com>, bpf@vger.kernel.org
Cc: brouer@redhat.com, ast@kernel.org, daniel@iogearbox.net,
andrii@kernel.org, martin.lau@linux.dev, song@kernel.org,
yhs@fb.com, john.fastabend@gmail.com, kpsingh@kernel.org,
haoluo@google.com, jolsa@kernel.org,
"Björn Töpel" <bjorn@kernel.org>,
"Karlsson, Magnus" <magnus.karlsson@intel.com>,
"xdp-hints@xdp-project.net" <xdp-hints@xdp-project.net>
Subject: [xdp-hints] Re: [RFC bpf-next v2 03/11] xsk: Support XDP_TX_METADATA_LEN
Date: Thu, 22 Jun 2023 11:11:00 +0200 [thread overview]
Message-ID: <57b9fc14-c02e-f0e5-148d-a549ebab6cf6@brouer.com> (raw)
In-Reply-To: <20230621170244.1283336-4-sdf@google.com>
This needs to be reviewed by AF_XDP maintainers Magnus and Bjørn (Cc)
On 21/06/2023 19.02, Stanislav Fomichev wrote:
> For zerocopy mode, tx_desc->addr can point to the arbitrary offset
> and carry some TX metadata in the headroom. For copy mode, there
> is no way currently to populate skb metadata.
>
> Introduce new XDP_TX_METADATA_LEN that indicates how many bytes
> to treat as metadata. Metadata bytes come prior to tx_desc address
> (same as in RX case).
From looking at the code, this introduces a socket option for this TX
metadata length (tx_metadata_len).
This implies the same fixed TX metadata size is used for all packets.
Maybe describe this in patch desc.
What is the plan for dealing with cases that doesn't populate same/full
TX metadata size ?
>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
> include/net/xdp_sock.h | 1 +
> include/net/xsk_buff_pool.h | 1 +
> include/uapi/linux/if_xdp.h | 1 +
> net/xdp/xsk.c | 31 ++++++++++++++++++++++++++++++-
> net/xdp/xsk_buff_pool.c | 1 +
> net/xdp/xsk_queue.h | 7 ++++---
> 6 files changed, 38 insertions(+), 4 deletions(-)
>
> diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
> index e96a1151ec75..30018b3b862d 100644
> --- a/include/net/xdp_sock.h
> +++ b/include/net/xdp_sock.h
> @@ -51,6 +51,7 @@ struct xdp_sock {
> struct list_head flush_node;
> struct xsk_buff_pool *pool;
> u16 queue_id;
> + u8 tx_metadata_len;
> bool zc;
> enum {
> XSK_READY = 0,
> diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
> index a8d7b8a3688a..751fea51a6af 100644
> --- a/include/net/xsk_buff_pool.h
> +++ b/include/net/xsk_buff_pool.h
> @@ -75,6 +75,7 @@ struct xsk_buff_pool {
> u32 chunk_size;
> u32 chunk_shift;
> u32 frame_len;
> + u8 tx_metadata_len; /* inherited from xsk_sock */
> u8 cached_need_wakeup;
> bool uses_need_wakeup;
> bool dma_need_sync;
> diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
> index a78a8096f4ce..2374eafff7db 100644
> --- a/include/uapi/linux/if_xdp.h
> +++ b/include/uapi/linux/if_xdp.h
> @@ -63,6 +63,7 @@ struct xdp_mmap_offsets {
> #define XDP_UMEM_COMPLETION_RING 6
> #define XDP_STATISTICS 7
> #define XDP_OPTIONS 8
> +#define XDP_TX_METADATA_LEN 9
>
> struct xdp_umem_reg {
> __u64 addr; /* Start of packet data area */
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> index cc1e7f15fa73..c9b2daba7b6d 100644
> --- a/net/xdp/xsk.c
> +++ b/net/xdp/xsk.c
> @@ -485,6 +485,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
> int err;
>
> hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
> + hr = max(hr, L1_CACHE_ALIGN((u32)xs->tx_metadata_len));
> tr = dev->needed_tailroom;
> len = desc->len;
>
> @@ -493,14 +494,21 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
> return ERR_PTR(err);
>
> skb_reserve(skb, hr);
> - skb_put(skb, len);
> + skb_put(skb, len + xs->tx_metadata_len);
>
> buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
> + buffer -= xs->tx_metadata_len;
> +
> err = skb_store_bits(skb, 0, buffer, len);
> if (unlikely(err)) {
> kfree_skb(skb);
> return ERR_PTR(err);
> }
> +
> + if (xs->tx_metadata_len) {
> + skb_metadata_set(skb, xs->tx_metadata_len);
> + __skb_pull(skb, xs->tx_metadata_len);
> + }
> }
>
> skb->dev = dev;
> @@ -1137,6 +1145,27 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
> mutex_unlock(&xs->mutex);
> return err;
> }
> + case XDP_TX_METADATA_LEN:
> + {
> + int val;
> +
> + if (optlen < sizeof(val))
> + return -EINVAL;
> + if (copy_from_sockptr(&val, optval, sizeof(val)))
> + return -EFAULT;
> +
> + if (val >= 256)
> + return -EINVAL;
> +
> + mutex_lock(&xs->mutex);
> + if (xs->state != XSK_READY) {
> + mutex_unlock(&xs->mutex);
> + return -EBUSY;
> + }
> + xs->tx_metadata_len = val;
> + mutex_unlock(&xs->mutex);
> + return err;
> + }
> default:
> break;
> }
> diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
> index 26f6d304451e..66ff9c345a67 100644
> --- a/net/xdp/xsk_buff_pool.c
> +++ b/net/xdp/xsk_buff_pool.c
> @@ -85,6 +85,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
> XDP_PACKET_HEADROOM;
> pool->umem = umem;
> pool->addrs = umem->addrs;
> + pool->tx_metadata_len = xs->tx_metadata_len;
> INIT_LIST_HEAD(&pool->free_list);
> INIT_LIST_HEAD(&pool->xsk_tx_list);
> spin_lock_init(&pool->xsk_tx_list_lock);
> diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
> index 6d40a77fccbe..c8d287c18d64 100644
> --- a/net/xdp/xsk_queue.h
> +++ b/net/xdp/xsk_queue.h
> @@ -133,12 +133,13 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
> static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
> struct xdp_desc *desc)
> {
> - u64 offset = desc->addr & (pool->chunk_size - 1);
> + u64 addr = desc->addr - pool->tx_metadata_len;
> + u64 offset = addr & (pool->chunk_size - 1);
>
> if (offset + desc->len > pool->chunk_size)
> return false;
>
> - if (desc->addr >= pool->addrs_cnt)
> + if (addr >= pool->addrs_cnt)
> return false;
>
> if (desc->options)
> @@ -149,7 +150,7 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
> static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
> struct xdp_desc *desc)
> {
> - u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
> + u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len;
>
> if (desc->len > pool->chunk_size)
> return false;
next prev parent reply other threads:[~2023-06-22 9:11 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20230621170244.1283336-1-sdf@google.com>
2023-06-22 8:41 ` [xdp-hints] Re: [RFC bpf-next v2 00/11] bpf: Netdev TX metadata Jesper Dangaard Brouer
2023-06-22 17:55 ` Stanislav Fomichev
[not found] ` <20230621170244.1283336-4-sdf@google.com>
2023-06-22 9:11 ` Jesper D. Brouer [this message]
2023-06-22 17:55 ` [xdp-hints] Re: [RFC bpf-next v2 03/11] xsk: Support XDP_TX_METADATA_LEN Stanislav Fomichev
2023-06-23 10:24 ` Jesper Dangaard Brouer
2023-06-23 17:41 ` Stanislav Fomichev
2023-06-24 9:02 ` Jesper Dangaard Brouer
2023-06-26 17:00 ` Stanislav Fomichev
2023-06-28 8:09 ` Magnus Karlsson
2023-06-28 18:49 ` Stanislav Fomichev
2023-06-29 6:15 ` Magnus Karlsson
2023-06-29 11:30 ` Toke Høiland-Jørgensen
2023-06-29 11:48 ` Magnus Karlsson
2023-06-29 12:01 ` Toke Høiland-Jørgensen
2023-06-29 16:21 ` Stanislav Fomichev
2023-06-29 20:58 ` Toke Høiland-Jørgensen
2023-06-30 6:22 ` Magnus Karlsson
2023-06-30 9:19 ` Toke Høiland-Jørgensen
[not found] ` <20230621170244.1283336-10-sdf@google.com>
2023-06-23 11:12 ` [xdp-hints] Re: [RFC bpf-next v2 09/11] selftests/bpf: Extend xdp_metadata with devtx kfuncs Jesper D. Brouer
2023-06-23 17:40 ` Stanislav Fomichev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://lists.xdp-project.net/postorius/lists/xdp-hints.xdp-project.net/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=57b9fc14-c02e-f0e5-148d-a549ebab6cf6@brouer.com \
--to=netdev@brouer.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bjorn@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=brouer@redhat.com \
--cc=daniel@iogearbox.net \
--cc=haoluo@google.com \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kpsingh@kernel.org \
--cc=magnus.karlsson@intel.com \
--cc=martin.lau@linux.dev \
--cc=sdf@google.com \
--cc=song@kernel.org \
--cc=xdp-hints@xdp-project.net \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox