diff options
author | Saeed Mahameed <saeedm@mellanox.com> | 2019-04-18 01:01:52 -0700 |
---|---|---|
committer | Saeed Mahameed <saeed@kernel.org> | 2020-12-08 19:20:43 -0800 |
commit | 959b1079e4797c67387a28dc61712b64a94e4627 (patch) | |
tree | 90b143e9c8b45ce21084843e9bfe7ada7ffb5d7e | |
parent | d666e81121ff96eb8cf10d7b054a0b22ef0ce0fe (diff) | |
download | linux-topic/xdp_metadata4.tar.gz |
samples/bpf: xdp_tx_iptunnel accelerate using flow mark meta datatopic/xdp_metadata4
Using XDP meta data acceleration, packet parsing can be avoided via flow
mark offload, when compiled with XDP meta data btf:
$ XDP_MD_BTF=1 make M=samples/bpf
xdp_tx_iptunnel will load a XDP program which is aware of the XDP
metadata offload, it will also create a new lookup bpf map with u32
flow_mark key and will assign a unique flow_mark for each tunnel.
The flow mark can be offloaded via a tc command and will be printed on
the screen when the sample is run:
tc filter add dev mlx0 protocol ip parent ffff: flower skip_sw dst_ip \
172.50.0.2 ip_proto tcp dst_port 10 action skbedit mark
The new XDP prog will use the metadata to avoid any packet parsing:
struct xdp_md_desc *md = (void *)(long)xdp->data_meta;
tnl = bpf_map_lookup_elem(&flow2tnl, &md->flow_mark);
and if tunnel is not found it will fallback to the slow parsing path.
Issue: 2114293
Change-Id: If49317ba0c00dd739f0b74e47f7ceeb0d41a27e2
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
-rw-r--r-- | samples/bpf/Makefile | 1 | ||||
-rw-r--r-- | samples/bpf/xdp_tx_iptunnel_kern.c | 73 | ||||
-rw-r--r-- | samples/bpf/xdp_tx_iptunnel_user.c | 83 |
3 files changed, 150 insertions, 7 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index a85850ea8f0f7d..5c56026340b1b9 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -335,3 +335,4 @@ endif # Files that have #ifdef XDP_MD_BTF $(obj)/xdp_sample_pkts_kern.o: $(obj)/xdp_sample_pkts_user.o $(obj)/xdp_redirect_cpu_kern.o: $(obj)/xdp_redirect_cpu_user.o +$(obj)/xdp_tx_iptunnel_kern.o: $(obj)/xdp_tx_iptunnel_user.o diff --git a/samples/bpf/xdp_tx_iptunnel_kern.c b/samples/bpf/xdp_tx_iptunnel_kern.c index a81fc7ff0f5c89..708e6073c0f4ec 100644 --- a/samples/bpf/xdp_tx_iptunnel_kern.c +++ b/samples/bpf/xdp_tx_iptunnel_kern.c @@ -247,8 +247,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp) return fwd_tnl_ipv6(xdp, tnl, proto, payload_len); } -SEC("xdp_tx_iptunnel") -int _xdp_tx_iptunnel(struct xdp_md *xdp) +static __always_inline int __xdp_tx_iptunnel(struct xdp_md *xdp) { void *data_end = (void *)(long)xdp->data_end; void *data = (void *)(long)xdp->data; @@ -269,4 +268,74 @@ int _xdp_tx_iptunnel(struct xdp_md *xdp) return XDP_PASS; } +SEC("xdp_tx_iptunnel") +int _xdp_tx_iptunnel(struct xdp_md *xdp) +{ + return __xdp_tx_iptunnel(xdp); +} + +#ifdef XDP_MD_BTF +#include "xdp_md_btf.h" + +struct bpf_map_def SEC("maps") flow2tnl = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(struct iptnl_info), + .max_entries = MAX_IPTNL_ENTRIES, +}; + +static __always_inline int +fwd_tnl(struct xdp_md *xdp, struct iptnl_info *tnl) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + + if (tnl->family == AF_INET) { + struct iphdr *iph = data + sizeof(struct ethhdr); + + if (iph + 1 > data_end) + return XDP_PASS; + + return fwd_tnl_ipv4(xdp, tnl, iph->protocol, ntohs(iph->tot_len)); + } + + if (tnl->family == AF_INET6) { + struct ipv6hdr *ip6h = data + sizeof(struct ethhdr); + + if (ip6h + 1 > data_end) + return XDP_PASS; + + return fwd_tnl_ipv6(xdp, tnl, ip6h->nexthdr, ip6h->payload_len); + } + + return XDP_DROP; +} + +SEC("xdp_tx_iptunnel_md_flow_mark") +int _xdp_tx_iptunnel_md_flow_mark(struct xdp_md *xdp) +{ + struct xdp_md_desc *md = (void *)(long)xdp->data_meta; + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct ethhdr *eth = data; + __u16 h_proto; + + if (eth + 1 > data_end) + return XDP_DROP; + + if (md + 1 <= data) { + struct iptnl_info *tnl = bpf_map_lookup_elem(&flow2tnl, &md->flow_mark); + + /* Remove md to avoid memcpy on bpf_xdp_adjust_head */ + bpf_xdp_adjust_meta(xdp, sizeof(*md)); + if (tnl) + return fwd_tnl(xdp, tnl); + } + + /* Fallback to slow path */ + count_tx(0); + return __xdp_tx_iptunnel(xdp); +} +#endif + char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index 1d4f305d02aadc..ed6db8c36a3130 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -11,11 +11,13 @@ #include <string.h> #include <net/if.h> #include <sys/resource.h> +#include <net/if.h> #include <arpa/inet.h> #include <netinet/ether.h> #include <unistd.h> #include <time.h> #include <bpf/libbpf.h> +#include <net/if.h> #include <bpf/bpf.h> #include "bpf_util.h" #include "xdp_tx_iptunnel_common.h" @@ -23,9 +25,12 @@ #define STATS_INTERVAL_S 2U static int ifindex = -1; +static char ifname[IF_NAMESIZE]; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int rxcnt_map_fd; static __u32 prog_id; +static int min_port; +static int max_port; static void int_exit(int sig) { @@ -150,20 +155,63 @@ static int parse_ports(const char *port_str, int *min_port, int *max_port) return 0; } +#ifdef XDP_MD_BTF +static void flow_marks_prepare(struct bpf_object *obj, struct iptnl_info *tnl, + struct vip *vip, char *ipstr) +{ + int mport = min_port; + __u32 flow_mark = 1; + int flow_map_fd; + + flow_map_fd = bpf_object__find_map_fd_by_name(obj, "flow2tnl"); + if (flow_map_fd < 0) { + printf("bpf_object__find_map_fd_by_name(\"flow2tnl\") failed (%d)\n", flow_map_fd); + return; + } + + printf("#XDP Meta data acceleration requested\n"); + printf("#To activate HW flow mark, please run the following commands:\n"); + printf("ethtool -K %s hw-tc-offload on\n", ifname); + printf("tc qdisc del dev %s ingress\n", ifname); + printf("tc qdisc add dev %s ingress\n", ifname); + + while (mport <= max_port) { + vip->dport = htons(mport++); + /* Flow mark table */ + if (bpf_map_update_elem(flow_map_fd, &flow_mark, tnl, BPF_NOEXIST)) { + perror("bpf_map_update_elem(&vip2tnl)"); + return; + }; + printf("tc filter add dev %s protocol %s parent ffff: flower skip_sw dst_ip %s" + " ip_proto %s dst_port %d action skbedit mark %d\n", + ifname, tnl->family == AF_INET ? "ip" : "ipv6", ipstr, + vip->protocol == 17 ? "udp" : "tcp", ntohs(vip->dport), + flow_mark++); + } +} +#else +static void flow_marks_prepare(struct bpf_object *obj, struct iptnl_info *tnl, + struct vip *vip, char *ipstr) +{ } +#endif + int main(int argc, char **argv) { struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; - int min_port = 0, max_port = 0, vip2tnl_map_fd; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; const char *optstr = "i:a:p:s:d:m:T:P:FSNh"; unsigned char opt_flags[256] = {}; struct bpf_prog_info info = {}; + int min_port = 0, max_port = 0; __u32 info_len = sizeof(info); unsigned int kill_after_s = 0; struct iptnl_info tnl = {}; + int mport, vip2tnl_map_fd; struct bpf_object *obj; struct vip vip = {}; + char *ipstr = NULL; char filename[256]; int opt, prog_fd; int i, err; @@ -182,11 +230,14 @@ int main(int argc, char **argv) switch (opt) { case 'i': ifindex = if_nametoindex(optarg); + ifindex = ifindex ? : atoi(optarg); if (!ifindex) - ifindex = atoi(optarg); + return -1; + if_indextoname(ifindex, ifname); break; case 'a': - vip.family = parse_ipstr(optarg, vip.daddr.v6); + ipstr = optarg; + vip.family = parse_ipstr(ipstr, vip.daddr.v6); if (vip.family == AF_UNSPEC) return 1; break; @@ -279,8 +330,9 @@ int main(int argc, char **argv) signal(SIGINT, int_exit); signal(SIGTERM, int_exit); - while (min_port <= max_port) { - vip.dport = htons(min_port++); + mport = min_port; + while (mport <= max_port) { + vip.dport = htons(mport++); if (bpf_map_update_elem(vip2tnl_map_fd, &vip, &tnl, BPF_NOEXIST)) { perror("bpf_map_update_elem(&vip2tnl)"); @@ -288,6 +340,27 @@ int main(int argc, char **argv) } } + flow_marks_prepare(obj, &tnl, &vip, ipstr); +#ifdef XDP_MD_BTF + struct bpf_program *md_prog = NULL; + int md_prog_fd = -1; + + md_prog = bpf_program__next(bpf_program__next(NULL, obj), obj); + if (md_prog) { + printf("Found xdp md prog\n"); + + md_prog_fd = bpf_program__fd(md_prog); + if (md_prog_fd < 0 || md_prog_fd == prog_fd) { + printf("bad md_prog_fd: %s\n", strerror(errno)); + return 1; + } + /* Use the XDP meta data sample program */ + prog_fd = md_prog_fd; + } else { + printf("XDP_MD_BTF is enabled but xdp md prog was not found\n"); + } +#endif + if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { printf("link set xdp fd failed\n"); return 1; |