aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSaeed Mahameed <saeedm@mellanox.com>2019-04-18 01:01:52 -0700
committerSaeed Mahameed <saeed@kernel.org>2020-12-08 19:20:43 -0800
commit959b1079e4797c67387a28dc61712b64a94e4627 (patch)
tree90b143e9c8b45ce21084843e9bfe7ada7ffb5d7e
parentd666e81121ff96eb8cf10d7b054a0b22ef0ce0fe (diff)
downloadlinux-topic/xdp_metadata4.tar.gz
samples/bpf: xdp_tx_iptunnel accelerate using flow mark meta datatopic/xdp_metadata4
Using XDP meta data acceleration, packet parsing can be avoided via flow mark offload, when compiled with XDP meta data btf: $ XDP_MD_BTF=1 make M=samples/bpf xdp_tx_iptunnel will load a XDP program which is aware of the XDP metadata offload, it will also create a new lookup bpf map with u32 flow_mark key and will assign a unique flow_mark for each tunnel. The flow mark can be offloaded via a tc command and will be printed on the screen when the sample is run: tc filter add dev mlx0 protocol ip parent ffff: flower skip_sw dst_ip \ 172.50.0.2 ip_proto tcp dst_port 10 action skbedit mark The new XDP prog will use the metadata to avoid any packet parsing: struct xdp_md_desc *md = (void *)(long)xdp->data_meta; tnl = bpf_map_lookup_elem(&flow2tnl, &md->flow_mark); and if tunnel is not found it will fallback to the slow parsing path. Issue: 2114293 Change-Id: If49317ba0c00dd739f0b74e47f7ceeb0d41a27e2 Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
-rw-r--r--samples/bpf/Makefile1
-rw-r--r--samples/bpf/xdp_tx_iptunnel_kern.c73
-rw-r--r--samples/bpf/xdp_tx_iptunnel_user.c83
3 files changed, 150 insertions, 7 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index a85850ea8f0f7d..5c56026340b1b9 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -335,3 +335,4 @@ endif
# Files that have #ifdef XDP_MD_BTF
$(obj)/xdp_sample_pkts_kern.o: $(obj)/xdp_sample_pkts_user.o
$(obj)/xdp_redirect_cpu_kern.o: $(obj)/xdp_redirect_cpu_user.o
+$(obj)/xdp_tx_iptunnel_kern.o: $(obj)/xdp_tx_iptunnel_user.o
diff --git a/samples/bpf/xdp_tx_iptunnel_kern.c b/samples/bpf/xdp_tx_iptunnel_kern.c
index a81fc7ff0f5c89..708e6073c0f4ec 100644
--- a/samples/bpf/xdp_tx_iptunnel_kern.c
+++ b/samples/bpf/xdp_tx_iptunnel_kern.c
@@ -247,8 +247,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
return fwd_tnl_ipv6(xdp, tnl, proto, payload_len);
}
-SEC("xdp_tx_iptunnel")
-int _xdp_tx_iptunnel(struct xdp_md *xdp)
+static __always_inline int __xdp_tx_iptunnel(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
@@ -269,4 +268,74 @@ int _xdp_tx_iptunnel(struct xdp_md *xdp)
return XDP_PASS;
}
+SEC("xdp_tx_iptunnel")
+int _xdp_tx_iptunnel(struct xdp_md *xdp)
+{
+ return __xdp_tx_iptunnel(xdp);
+}
+
+#ifdef XDP_MD_BTF
+#include "xdp_md_btf.h"
+
+struct bpf_map_def SEC("maps") flow2tnl = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct iptnl_info),
+ .max_entries = MAX_IPTNL_ENTRIES,
+};
+
+static __always_inline int
+fwd_tnl(struct xdp_md *xdp, struct iptnl_info *tnl)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+
+ if (tnl->family == AF_INET) {
+ struct iphdr *iph = data + sizeof(struct ethhdr);
+
+ if (iph + 1 > data_end)
+ return XDP_PASS;
+
+ return fwd_tnl_ipv4(xdp, tnl, iph->protocol, ntohs(iph->tot_len));
+ }
+
+ if (tnl->family == AF_INET6) {
+ struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
+
+ if (ip6h + 1 > data_end)
+ return XDP_PASS;
+
+ return fwd_tnl_ipv6(xdp, tnl, ip6h->nexthdr, ip6h->payload_len);
+ }
+
+ return XDP_DROP;
+}
+
+SEC("xdp_tx_iptunnel_md_flow_mark")
+int _xdp_tx_iptunnel_md_flow_mark(struct xdp_md *xdp)
+{
+ struct xdp_md_desc *md = (void *)(long)xdp->data_meta;
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ __u16 h_proto;
+
+ if (eth + 1 > data_end)
+ return XDP_DROP;
+
+ if (md + 1 <= data) {
+ struct iptnl_info *tnl = bpf_map_lookup_elem(&flow2tnl, &md->flow_mark);
+
+ /* Remove md to avoid memcpy on bpf_xdp_adjust_head */
+ bpf_xdp_adjust_meta(xdp, sizeof(*md));
+ if (tnl)
+ return fwd_tnl(xdp, tnl);
+ }
+
+ /* Fallback to slow path */
+ count_tx(0);
+ return __xdp_tx_iptunnel(xdp);
+}
+#endif
+
char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index 1d4f305d02aadc..ed6db8c36a3130 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -11,11 +11,13 @@
#include <string.h>
#include <net/if.h>
#include <sys/resource.h>
+#include <net/if.h>
#include <arpa/inet.h>
#include <netinet/ether.h>
#include <unistd.h>
#include <time.h>
#include <bpf/libbpf.h>
+#include <net/if.h>
#include <bpf/bpf.h>
#include "bpf_util.h"
#include "xdp_tx_iptunnel_common.h"
@@ -23,9 +25,12 @@
#define STATS_INTERVAL_S 2U
static int ifindex = -1;
+static char ifname[IF_NAMESIZE];
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static int rxcnt_map_fd;
static __u32 prog_id;
+static int min_port;
+static int max_port;
static void int_exit(int sig)
{
@@ -150,20 +155,63 @@ static int parse_ports(const char *port_str, int *min_port, int *max_port)
return 0;
}
+#ifdef XDP_MD_BTF
+static void flow_marks_prepare(struct bpf_object *obj, struct iptnl_info *tnl,
+ struct vip *vip, char *ipstr)
+{
+ int mport = min_port;
+ __u32 flow_mark = 1;
+ int flow_map_fd;
+
+ flow_map_fd = bpf_object__find_map_fd_by_name(obj, "flow2tnl");
+ if (flow_map_fd < 0) {
+ printf("bpf_object__find_map_fd_by_name(\"flow2tnl\") failed (%d)\n", flow_map_fd);
+ return;
+ }
+
+ printf("#XDP Meta data acceleration requested\n");
+ printf("#To activate HW flow mark, please run the following commands:\n");
+ printf("ethtool -K %s hw-tc-offload on\n", ifname);
+ printf("tc qdisc del dev %s ingress\n", ifname);
+ printf("tc qdisc add dev %s ingress\n", ifname);
+
+ while (mport <= max_port) {
+ vip->dport = htons(mport++);
+ /* Flow mark table */
+ if (bpf_map_update_elem(flow_map_fd, &flow_mark, tnl, BPF_NOEXIST)) {
+ perror("bpf_map_update_elem(&vip2tnl)");
+ return;
+ };
+ printf("tc filter add dev %s protocol %s parent ffff: flower skip_sw dst_ip %s"
+ " ip_proto %s dst_port %d action skbedit mark %d\n",
+ ifname, tnl->family == AF_INET ? "ip" : "ipv6", ipstr,
+ vip->protocol == 17 ? "udp" : "tcp", ntohs(vip->dport),
+ flow_mark++);
+ }
+}
+#else
+static void flow_marks_prepare(struct bpf_object *obj, struct iptnl_info *tnl,
+ struct vip *vip, char *ipstr)
+{ }
+#endif
+
int main(int argc, char **argv)
{
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
};
- int min_port = 0, max_port = 0, vip2tnl_map_fd;
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
const char *optstr = "i:a:p:s:d:m:T:P:FSNh";
unsigned char opt_flags[256] = {};
struct bpf_prog_info info = {};
+ int min_port = 0, max_port = 0;
__u32 info_len = sizeof(info);
unsigned int kill_after_s = 0;
struct iptnl_info tnl = {};
+ int mport, vip2tnl_map_fd;
struct bpf_object *obj;
struct vip vip = {};
+ char *ipstr = NULL;
char filename[256];
int opt, prog_fd;
int i, err;
@@ -182,11 +230,14 @@ int main(int argc, char **argv)
switch (opt) {
case 'i':
ifindex = if_nametoindex(optarg);
+ ifindex = ifindex ? : atoi(optarg);
if (!ifindex)
- ifindex = atoi(optarg);
+ return -1;
+ if_indextoname(ifindex, ifname);
break;
case 'a':
- vip.family = parse_ipstr(optarg, vip.daddr.v6);
+ ipstr = optarg;
+ vip.family = parse_ipstr(ipstr, vip.daddr.v6);
if (vip.family == AF_UNSPEC)
return 1;
break;
@@ -279,8 +330,9 @@ int main(int argc, char **argv)
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
- while (min_port <= max_port) {
- vip.dport = htons(min_port++);
+ mport = min_port;
+ while (mport <= max_port) {
+ vip.dport = htons(mport++);
if (bpf_map_update_elem(vip2tnl_map_fd, &vip, &tnl,
BPF_NOEXIST)) {
perror("bpf_map_update_elem(&vip2tnl)");
@@ -288,6 +340,27 @@ int main(int argc, char **argv)
}
}
+ flow_marks_prepare(obj, &tnl, &vip, ipstr);
+#ifdef XDP_MD_BTF
+ struct bpf_program *md_prog = NULL;
+ int md_prog_fd = -1;
+
+ md_prog = bpf_program__next(bpf_program__next(NULL, obj), obj);
+ if (md_prog) {
+ printf("Found xdp md prog\n");
+
+ md_prog_fd = bpf_program__fd(md_prog);
+ if (md_prog_fd < 0 || md_prog_fd == prog_fd) {
+ printf("bad md_prog_fd: %s\n", strerror(errno));
+ return 1;
+ }
+ /* Use the XDP meta data sample program */
+ prog_fd = md_prog_fd;
+ } else {
+ printf("XDP_MD_BTF is enabled but xdp md prog was not found\n");
+ }
+#endif
+
if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
printf("link set xdp fd failed\n");
return 1;