aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2023-03-14 15:20:05 -0700
committerAlexei Starovoitov <ast@kernel.org>2023-03-14 15:20:05 -0700
commit5584d9e63eee25c6ccf36aa5d09efc113d762a05 (patch)
treef796ff038dab22a2c934eca1bdf4ebda5af6c243
parent283b40c52d9ad850d204c447df69faaaf9d177f0 (diff)
parentd4e492338d11937c55841b1279287280d6e35894 (diff)
downloadbpf-5584d9e63eee25c6ccf36aa5d09efc113d762a05.tar.gz
Merge branch 'xdp: recycle Page Pool backed skbs built from XDP frames'
Alexander Lobakin says: ==================== Yeah, I still remember that "Who needs cpumap nowadays" (c), but anyway. __xdp_build_skb_from_frame() missed the moment when the networking stack became able to recycle skb pages backed by a page_pool. This was making e.g. cpumap redirect even less effective than simple %XDP_PASS. veth was also affected in some scenarios. A lot of drivers use skb_mark_for_recycle() already, it's been almost two years and seems like there are no issues in using it in the generic code too. {__,}xdp_release_frame() can be then removed as it losts its last user. Page Pool becomes then zero-alloc (or almost) in the abovementioned cases, too. Other memory type models (who needs them at this point) have no changes. Some numbers on 1 Xeon Platinum core bombed with 27 Mpps of 64-byte IPv6 UDP, iavf w/XDP[0] (CONFIG_PAGE_POOL_STATS is enabled): Plain %XDP_PASS on baseline, Page Pool driver: src cpu Rx drops dst cpu Rx 2.1 Mpps N/A 2.1 Mpps cpumap redirect (cross-core, w/o leaving its NUMA node) on baseline: 6.8 Mpps 5.0 Mpps 1.8 Mpps cpumap redirect with skb PP recycling: 7.9 Mpps 5.7 Mpps 2.2 Mpps +22% (from cpumap redir on baseline) [0] https://github.com/alobakin/linux/commits/iavf-xdp ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--include/linux/skbuff.h4
-rw-r--r--include/net/xdp.h29
-rw-r--r--net/core/xdp.c19
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c36
4 files changed, 30 insertions, 58 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index fe661011644b8f..3f3a2a82a86b30 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -5069,12 +5069,12 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb)
#endif
}
-#ifdef CONFIG_PAGE_POOL
static inline void skb_mark_for_recycle(struct sk_buff *skb)
{
+#ifdef CONFIG_PAGE_POOL
skb->pp_recycle = 1;
-}
#endif
+}
#endif /* __KERNEL__ */
#endif /* _LINUX_SKBUFF_H */
diff --git a/include/net/xdp.h b/include/net/xdp.h
index d517bfac937b0a..5393b3ebe56e57 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -317,35 +317,6 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq);
void xdp_return_frame_bulk(struct xdp_frame *xdpf,
struct xdp_frame_bulk *bq);
-/* When sending xdp_frame into the network stack, then there is no
- * return point callback, which is needed to release e.g. DMA-mapping
- * resources with page_pool. Thus, have explicit function to release
- * frame resources.
- */
-void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
-static inline void xdp_release_frame(struct xdp_frame *xdpf)
-{
- struct xdp_mem_info *mem = &xdpf->mem;
- struct skb_shared_info *sinfo;
- int i;
-
- /* Curr only page_pool needs this */
- if (mem->type != MEM_TYPE_PAGE_POOL)
- return;
-
- if (likely(!xdp_frame_has_frags(xdpf)))
- goto out;
-
- sinfo = xdp_get_shared_info_from_frame(xdpf);
- for (i = 0; i < sinfo->nr_frags; i++) {
- struct page *page = skb_frag_page(&sinfo->frags[i]);
-
- __xdp_release_frame(page_address(page), mem);
- }
-out:
- __xdp_release_frame(xdpf->data, mem);
-}
-
static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf)
{
struct skb_shared_info *sinfo;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 8c92fc55331771..8d3ad315f18df9 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -531,21 +531,6 @@ out:
}
EXPORT_SYMBOL_GPL(xdp_return_buff);
-/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
-void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
-{
- struct xdp_mem_allocator *xa;
- struct page *page;
-
- rcu_read_lock();
- xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
- page = virt_to_head_page(data);
- if (xa)
- page_pool_release_page(xa->page_pool, page);
- rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(__xdp_release_frame);
-
void xdp_attachment_setup(struct xdp_attachment_info *info,
struct netdev_bpf *bpf)
{
@@ -658,8 +643,8 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
* - RX ring dev queue index (skb_record_rx_queue)
*/
- /* Until page_pool get SKB return path, release DMA here */
- xdp_release_frame(xdpf);
+ if (xdpf->mem.type == MEM_TYPE_PAGE_POOL)
+ skb_mark_for_recycle(skb);
/* Allow SKB to reuse area used by xdp_frame */
xdp_scrub_frame(xdpf);
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
index 77a123071940ae..cd2d4e3258b899 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
@@ -4,6 +4,19 @@
#define ETH_ALEN 6
#define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr))
+
+/**
+ * enum frame_mark - magics to distinguish page/packet paths
+ * @MARK_XMIT: page was recycled due to the frame being "xmitted" by the NIC.
+ * @MARK_IN: frame is being processed by the input XDP prog.
+ * @MARK_SKB: frame did hit the TC ingress hook as an skb.
+ */
+enum frame_mark {
+ MARK_XMIT = 0U,
+ MARK_IN = 0x42,
+ MARK_SKB = 0x45,
+};
+
const volatile int ifindex_out;
const volatile int ifindex_in;
const volatile __u8 expect_dst[ETH_ALEN];
@@ -34,10 +47,10 @@ int xdp_redirect(struct xdp_md *xdp)
if (*metadata != 0x42)
return XDP_ABORTED;
- if (*payload == 0) {
- *payload = 0x42;
+ if (*payload == MARK_XMIT)
pkts_seen_zero++;
- }
+
+ *payload = MARK_IN;
if (bpf_xdp_adjust_meta(xdp, 4))
return XDP_ABORTED;
@@ -51,7 +64,7 @@ int xdp_redirect(struct xdp_md *xdp)
return ret;
}
-static bool check_pkt(void *data, void *data_end)
+static bool check_pkt(void *data, void *data_end, const __u32 mark)
{
struct ipv6hdr *iph = data + sizeof(struct ethhdr);
__u8 *payload = data + HDR_SZ;
@@ -59,13 +72,13 @@ static bool check_pkt(void *data, void *data_end)
if (payload + 1 > data_end)
return false;
- if (iph->nexthdr != IPPROTO_UDP || *payload != 0x42)
+ if (iph->nexthdr != IPPROTO_UDP || *payload != MARK_IN)
return false;
/* reset the payload so the same packet doesn't get counted twice when
* it cycles back through the kernel path and out the dst veth
*/
- *payload = 0;
+ *payload = mark;
return true;
}
@@ -75,11 +88,11 @@ int xdp_count_pkts(struct xdp_md *xdp)
void *data = (void *)(long)xdp->data;
void *data_end = (void *)(long)xdp->data_end;
- if (check_pkt(data, data_end))
+ if (check_pkt(data, data_end, MARK_XMIT))
pkts_seen_xdp++;
- /* Return XDP_DROP to make sure the data page is recycled, like when it
- * exits a physical NIC. Recycled pages will be counted in the
+ /* Return %XDP_DROP to recycle the data page with %MARK_XMIT, like
+ * it exited a physical NIC. Those pages will be counted in the
* pkts_seen_zero counter above.
*/
return XDP_DROP;
@@ -91,9 +104,12 @@ int tc_count_pkts(struct __sk_buff *skb)
void *data = (void *)(long)skb->data;
void *data_end = (void *)(long)skb->data_end;
- if (check_pkt(data, data_end))
+ if (check_pkt(data, data_end, MARK_SKB))
pkts_seen_tc++;
+ /* Will be either recycled or freed, %MARK_SKB makes sure it won't
+ * hit any of the counters above.
+ */
return 0;
}