aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Hemminger <stephen@networkplumber.org>2024-03-11 16:39:39 -0700
committerStephen Hemminger <stephen@networkplumber.org>2024-03-11 16:39:39 -0700
commitade05d59c3dd09283e967f647ca2c52a0e40a3bf (patch)
tree7d3cd1629fde58bb893eb7ffedd6b55874c2cfcd
parent88f0b157e9963c020f29678c0004fc6bbca4bb46 (diff)
parenta9fce55334f7b8659079680007e518ce2beb34a8 (diff)
downloadiproute2-next-ade05d59c3dd09283e967f647ca2c52a0e40a3bf.tar.gz
Merge branch 'main' of git://git.kernel.org/pub/scm/network/iproute2/iproute2-next
-rw-r--r--include/uapi/linux/bpf.h103
-rw-r--r--include/uapi/linux/can.h9
-rw-r--r--include/uapi/linux/if_link.h1
-rw-r--r--include/uapi/linux/ioam6_genl.h20
-rw-r--r--include/uapi/linux/tc_act/tc_pedit.h2
-rw-r--r--include/utils.h2
-rw-r--r--ip/ipioam6.c78
-rw-r--r--ip/iplink_bond.c18
-rw-r--r--lib/libnetlink.c4
-rw-r--r--lib/utils.c14
-rw-r--r--man/man8/ip-ioam.85
-rw-r--r--man/man8/ss.89
-rw-r--r--man/man8/tc-mirred.824
-rw-r--r--man/man8/tc.86
-rw-r--r--misc/ifstat.c49
-rw-r--r--misc/nstat.c44
-rw-r--r--misc/ss.c447
-rw-r--r--tc/m_action.c25
-rw-r--r--tc/m_mirred.c61
-rw-r--r--tc/tc.c6
-rw-r--r--tc/tc_filter.c8
21 files changed, 838 insertions, 97 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0a2380a11..5156982e0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -77,12 +77,29 @@ struct bpf_insn {
__s32 imm; /* signed immediate constant */
};
-/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for
+ * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for
+ * the trailing flexible array member) instead.
+ */
struct bpf_lpm_trie_key {
__u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
__u8 data[0]; /* Arbitrary size */
};
+/* Header for bpf_lpm_trie_key structs */
+struct bpf_lpm_trie_key_hdr {
+ __u32 prefixlen;
+};
+
+/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */
+struct bpf_lpm_trie_key_u8 {
+ union {
+ struct bpf_lpm_trie_key_hdr hdr;
+ __u32 prefixlen;
+ };
+ __u8 data[]; /* Arbitrary size */
+};
+
struct bpf_cgroup_storage_key {
__u64 cgroup_inode_id; /* cgroup inode id */
__u32 attach_type; /* program attach type (enum bpf_attach_type) */
@@ -617,7 +634,11 @@ union bpf_iter_link_info {
* to NULL to begin the batched operation. After each subsequent
* **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
* *out_batch* as the *in_batch* for the next operation to
- * continue iteration from the current point.
+ * continue iteration from the current point. Both *in_batch* and
+ * *out_batch* must point to memory large enough to hold a key,
+ * except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH,
+ * LRU_HASH, LRU_PERCPU_HASH}**, for which batch parameters
+ * must be at least 4 bytes wide regardless of key size.
*
* The *keys* and *values* are output parameters which must point
* to memory large enough to hold *count* items based on the key
@@ -847,6 +868,36 @@ union bpf_iter_link_info {
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
+ * BPF_TOKEN_CREATE
+ * Description
+ * Create BPF token with embedded information about what
+ * BPF-related functionality it allows:
+ * - a set of allowed bpf() syscall commands;
+ * - a set of allowed BPF map types to be created with
+ * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed;
+ * - a set of allowed BPF program types and BPF program attach
+ * types to be loaded with BPF_PROG_LOAD command, if
+ * BPF_PROG_LOAD itself is allowed.
+ *
+ * BPF token is created (derived) from an instance of BPF FS,
+ * assuming it has necessary delegation mount options specified.
+ * This BPF token can be passed as an extra parameter to various
+ * bpf() syscall commands to grant BPF subsystem functionality to
+ * unprivileged processes.
+ *
+ * When created, BPF token is "associated" with the owning
+ * user namespace of BPF FS instance (super block) that it was
+ * derived from, and subsequent BPF operations performed with
+ * BPF token would be performing capabilities checks (i.e.,
+ * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within
+ * that user namespace. Without BPF token, such capabilities
+ * have to be granted in init user namespace, making bpf()
+ * syscall incompatible with user namespace, for the most part.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
* NOTES
* eBPF objects (maps and programs) can be shared between processes.
*
@@ -901,6 +952,8 @@ enum bpf_cmd {
BPF_ITER_CREATE,
BPF_LINK_DETACH,
BPF_PROG_BIND_MAP,
+ BPF_TOKEN_CREATE,
+ __MAX_BPF_CMD,
};
enum bpf_map_type {
@@ -951,6 +1004,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_BLOOM_FILTER,
BPF_MAP_TYPE_USER_RINGBUF,
BPF_MAP_TYPE_CGRP_STORAGE,
+ __MAX_BPF_MAP_TYPE
};
/* Note that tracing related programs such as
@@ -995,6 +1049,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_LOOKUP,
BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
BPF_PROG_TYPE_NETFILTER,
+ __MAX_BPF_PROG_TYPE
};
enum bpf_attach_type {
@@ -1330,6 +1385,12 @@ enum {
/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */
BPF_F_PATH_FD = (1U << 14),
+
+/* Flag for value_type_btf_obj_fd, the fd is available */
+ BPF_F_VTYPE_BTF_OBJ_FD = (1U << 15),
+
+/* BPF token FD is passed in a corresponding command's token_fd field */
+ BPF_F_TOKEN_FD = (1U << 16),
};
/* Flags for BPF_PROG_QUERY. */
@@ -1403,6 +1464,15 @@ union bpf_attr {
* to using 5 hash functions).
*/
__u64 map_extra;
+
+ __s32 value_type_btf_obj_fd; /* fd pointing to a BTF
+ * type data for
+ * btf_vmlinux_value_type_id.
+ */
+ /* BPF token FD to use with BPF_MAP_CREATE operation.
+ * If provided, map_flags should have BPF_F_TOKEN_FD flag set.
+ */
+ __s32 map_token_fd;
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -1472,6 +1542,10 @@ union bpf_attr {
* truncated), or smaller (if log buffer wasn't filled completely).
*/
__u32 log_true_size;
+ /* BPF token FD to use with BPF_PROG_LOAD operation.
+ * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
+ */
+ __s32 prog_token_fd;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -1584,6 +1658,11 @@ union bpf_attr {
* truncated), or smaller (if log buffer wasn't filled completely).
*/
__u32 btf_log_true_size;
+ __u32 btf_flags;
+ /* BPF token FD to use with BPF_BTF_LOAD operation.
+ * If provided, btf_flags should have BPF_F_TOKEN_FD flag set.
+ */
+ __s32 btf_token_fd;
};
struct {
@@ -1714,6 +1793,11 @@ union bpf_attr {
__u32 flags; /* extra flags */
} prog_bind_map;
+ struct { /* struct used by BPF_TOKEN_CREATE command */
+ __u32 flags;
+ __u32 bpffs_fd;
+ } token_create;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -4839,9 +4923,9 @@ union bpf_attr {
* going through the CPU's backlog queue.
*
* The *flags* argument is reserved and must be 0. The helper is
- * currently only supported for tc BPF program types at the ingress
- * hook and for veth device types. The peer device must reside in a
- * different network namespace.
+ * currently only supported for tc BPF program types at the
+ * ingress hook and for veth and netkit target device types. The
+ * peer device must reside in a different network namespace.
* Return
* The helper returns **TC_ACT_REDIRECT** on success or
* **TC_ACT_SHOT** on error.
@@ -6487,7 +6571,7 @@ struct bpf_map_info {
__u32 btf_id;
__u32 btf_key_type_id;
__u32 btf_value_type_id;
- __u32 :32; /* alignment pad */
+ __u32 btf_vmlinux_id;
__u64 map_extra;
} __attribute__((aligned(8)));
@@ -6563,6 +6647,7 @@ struct bpf_link_info {
__u32 count; /* in/out: kprobe_multi function count */
__u32 flags;
__u64 missed;
+ __aligned_u64 cookies;
} kprobe_multi;
struct {
__aligned_u64 path;
@@ -6582,6 +6667,7 @@ struct bpf_link_info {
__aligned_u64 file_name; /* in/out */
__u32 name_len;
__u32 offset; /* offset from file_name */
+ __u64 cookie;
} uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */
struct {
__aligned_u64 func_name; /* in/out */
@@ -6589,14 +6675,19 @@ struct bpf_link_info {
__u32 offset; /* offset from func_name */
__u64 addr;
__u64 missed;
+ __u64 cookie;
} kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
struct {
__aligned_u64 tp_name; /* in/out */
__u32 name_len;
+ __u32 :32;
+ __u64 cookie;
} tracepoint; /* BPF_PERF_EVENT_TRACEPOINT */
struct {
__u64 config;
__u32 type;
+ __u32 :32;
+ __u64 cookie;
} event; /* BPF_PERF_EVENT_EVENT */
};
} perf_event;
diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index 23d5bf4a5..229a77ffa 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -193,9 +193,14 @@ struct canfd_frame {
#define CANXL_XLF 0x80 /* mandatory CAN XL frame flag (must always be set!) */
#define CANXL_SEC 0x01 /* Simple Extended Content (security/segmentation) */
+/* the 8-bit VCID is optionally placed in the canxl_frame.prio element */
+#define CANXL_VCID_OFFSET 16 /* bit offset of VCID in prio element */
+#define CANXL_VCID_VAL_MASK 0xFFUL /* VCID is an 8-bit value */
+#define CANXL_VCID_MASK (CANXL_VCID_VAL_MASK << CANXL_VCID_OFFSET)
+
/**
* struct canxl_frame - CAN with e'X'tended frame 'L'ength frame structure
- * @prio: 11 bit arbitration priority with zero'ed CAN_*_FLAG flags
+ * @prio: 11 bit arbitration priority with zero'ed CAN_*_FLAG flags / VCID
* @flags: additional flags for CAN XL
* @sdt: SDU (service data unit) type
* @len: frame payload length in byte (CANXL_MIN_DLEN .. CANXL_MAX_DLEN)
@@ -205,7 +210,7 @@ struct canfd_frame {
* @prio shares the same position as @can_id from struct can[fd]_frame.
*/
struct canxl_frame {
- canid_t prio; /* 11 bit priority for arbitration (canid_t) */
+ canid_t prio; /* 11 bit priority for arbitration / 8 bit VCID */
__u8 flags; /* additional flags for CAN XL */
__u8 sdt; /* SDU (service data unit) type */
__u16 len; /* frame payload length in byte */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index d17271fbf..ff4ceeafc 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1503,6 +1503,7 @@ enum {
IFLA_BOND_AD_LACP_ACTIVE,
IFLA_BOND_MISSED_MAX,
IFLA_BOND_NS_IP6_TARGET,
+ IFLA_BOND_COUPLED_CONTROL,
__IFLA_BOND_MAX,
};
diff --git a/include/uapi/linux/ioam6_genl.h b/include/uapi/linux/ioam6_genl.h
index 6043d9f60..3f89b530c 100644
--- a/include/uapi/linux/ioam6_genl.h
+++ b/include/uapi/linux/ioam6_genl.h
@@ -49,4 +49,24 @@ enum {
#define IOAM6_CMD_MAX (__IOAM6_CMD_MAX - 1)
+#define IOAM6_GENL_EV_GRP_NAME "ioam6_events"
+
+enum ioam6_event_type {
+ IOAM6_EVENT_UNSPEC,
+ IOAM6_EVENT_TRACE,
+};
+
+enum ioam6_event_attr {
+ IOAM6_EVENT_ATTR_UNSPEC,
+
+ IOAM6_EVENT_ATTR_TRACE_NAMESPACE, /* u16 */
+ IOAM6_EVENT_ATTR_TRACE_NODELEN, /* u8 */
+ IOAM6_EVENT_ATTR_TRACE_TYPE, /* u32 */
+ IOAM6_EVENT_ATTR_TRACE_DATA, /* Binary */
+
+ __IOAM6_EVENT_ATTR_MAX
+};
+
+#define IOAM6_EVENT_ATTR_MAX (__IOAM6_EVENT_ATTR_MAX - 1)
+
#endif /* _LINUX_IOAM6_GENL_H */
diff --git a/include/uapi/linux/tc_act/tc_pedit.h b/include/uapi/linux/tc_act/tc_pedit.h
index f3e61b04f..f5cab7fc9 100644
--- a/include/uapi/linux/tc_act/tc_pedit.h
+++ b/include/uapi/linux/tc_act/tc_pedit.h
@@ -62,7 +62,7 @@ struct tc_pedit_sel {
tc_gen;
unsigned char nkeys;
unsigned char flags;
- struct tc_pedit_key keys[0];
+ struct tc_pedit_key keys[] __counted_by(nkeys);
};
#define tc_pedit tc_pedit_sel
diff --git a/include/utils.h b/include/utils.h
index 9ba129b8f..a2a98b9bf 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -393,4 +393,6 @@ int proto_a2n(unsigned short *id, const char *buf,
const char *proto_n2a(unsigned short id, char *buf, int len,
const struct proto *proto_tb, size_t tb_len);
+FILE *generic_proc_open(const char *env, const char *name);
+
#endif /* __UTILS_H__ */
diff --git a/ip/ipioam6.c b/ip/ipioam6.c
index b63d7d5c9..188609892 100644
--- a/ip/ipioam6.c
+++ b/ip/ipioam6.c
@@ -13,6 +13,7 @@
#include <inttypes.h>
#include <linux/genetlink.h>
+#include <linux/ioam6.h>
#include <linux/ioam6_genl.h>
#include "utils.h"
@@ -30,7 +31,8 @@ static void usage(void)
" ip ioam schema show\n"
" ip ioam schema add ID DATA\n"
" ip ioam schema del ID\n"
- " ip ioam namespace set ID schema { ID | none }\n");
+ " ip ioam namespace set ID schema { ID | none }\n"
+ " ip ioam monitor\n");
exit(-1);
}
@@ -42,6 +44,7 @@ static int genl_family = -1;
IOAM6_GENL_VERSION, _cmd, _flags)
static struct {
+ bool monitor;
unsigned int cmd;
__u32 sc_id;
__u32 ns_data;
@@ -96,6 +99,37 @@ static void print_schema(struct rtattr *attrs[])
print_nl();
}
+static void print_trace(struct rtattr *attrs[])
+{
+ __u8 data[IOAM6_TRACE_DATA_SIZE_MAX];
+ int len, i = 0;
+
+ printf("[TRACE] ");
+
+ if (attrs[IOAM6_EVENT_ATTR_TRACE_NAMESPACE])
+ printf("Namespace=%u ",
+ rta_getattr_u16(attrs[IOAM6_EVENT_ATTR_TRACE_NAMESPACE]));
+
+ if (attrs[IOAM6_EVENT_ATTR_TRACE_NODELEN])
+ printf("NodeLen=%u ",
+ rta_getattr_u8(attrs[IOAM6_EVENT_ATTR_TRACE_NODELEN]));
+
+ if (attrs[IOAM6_EVENT_ATTR_TRACE_TYPE])
+ printf("Type=%#08x ",
+ rta_getattr_u32(attrs[IOAM6_EVENT_ATTR_TRACE_TYPE]));
+
+ len = RTA_PAYLOAD(attrs[IOAM6_EVENT_ATTR_TRACE_DATA]);
+ memcpy(data, RTA_DATA(attrs[IOAM6_EVENT_ATTR_TRACE_DATA]), len);
+
+ printf("Data=");
+ while (i < len) {
+ printf("%02x", data[i]);
+ i++;
+ }
+
+ printf("\n");
+}
+
static int process_msg(struct nlmsghdr *n, void *arg)
{
struct rtattr *attrs[IOAM6_ATTR_MAX + 1];
@@ -126,6 +160,32 @@ static int process_msg(struct nlmsghdr *n, void *arg)
return 0;
}
+static int ioam6_monitor_msg(struct rtnl_ctrl_data *ctrl, struct nlmsghdr *n,
+ void *arg)
+{
+ struct rtattr *attrs[IOAM6_EVENT_ATTR_MAX + 1];
+ const struct genlmsghdr *ghdr = NLMSG_DATA(n);
+ int len = n->nlmsg_len;
+
+ if (n->nlmsg_type != genl_family)
+ return -1;
+
+ len -= NLMSG_LENGTH(GENL_HDRLEN);
+ if (len < 0)
+ return -1;
+
+ parse_rtattr(attrs, IOAM6_EVENT_ATTR_MAX,
+ (void *)ghdr + GENL_HDRLEN, len);
+
+ switch (ghdr->cmd) {
+ case IOAM6_EVENT_TRACE:
+ print_trace(attrs);
+ break;
+ }
+
+ return 0;
+}
+
static int ioam6_do_cmd(void)
{
IOAM6_REQUEST(req, 1056, opts.cmd, NLM_F_REQUEST);
@@ -134,6 +194,19 @@ static int ioam6_do_cmd(void)
if (genl_init_handle(&grth, IOAM6_GENL_NAME, &genl_family))
exit(1);
+ if (opts.monitor) {
+ if (genl_add_mcast_grp(&grth, genl_family,
+ IOAM6_GENL_EV_GRP_NAME) < 0) {
+ perror("can't subscribe to ioam6 events");
+ exit(1);
+ }
+
+ if (rtnl_listen(&grth, ioam6_monitor_msg, stdout) < 0)
+ exit(1);
+
+ return 0;
+ }
+
req.n.nlmsg_type = genl_family;
switch (opts.cmd) {
@@ -325,6 +398,9 @@ int do_ioam6(int argc, char **argv)
invarg("Unknown", *argv);
}
+ } else if (strcmp(*argv, "monitor") == 0) {
+ opts.monitor = true;
+
} else {
invarg("Unknown", *argv);
}
diff --git a/ip/iplink_bond.c b/ip/iplink_bond.c
index 214244daa..19af67d00 100644
--- a/ip/iplink_bond.c
+++ b/ip/iplink_bond.c
@@ -148,6 +148,7 @@ static void print_explain(FILE *f)
" [ tlb_dynamic_lb TLB_DYNAMIC_LB ]\n"
" [ lacp_rate LACP_RATE ]\n"
" [ lacp_active LACP_ACTIVE]\n"
+ " [ coupled_control COUPLED_CONTROL ]\n"
" [ ad_select AD_SELECT ]\n"
" [ ad_user_port_key PORTKEY ]\n"
" [ ad_actor_sys_prio SYSPRIO ]\n"
@@ -163,6 +164,7 @@ static void print_explain(FILE *f)
"LACP_ACTIVE := off|on\n"
"LACP_RATE := slow|fast\n"
"AD_SELECT := stable|bandwidth|count\n"
+ "COUPLED_CONTROL := off|on\n"
);
}
@@ -176,13 +178,14 @@ static int bond_parse_opt(struct link_util *lu, int argc, char **argv,
{
__u8 mode, use_carrier, primary_reselect, fail_over_mac;
__u8 xmit_hash_policy, num_peer_notif, all_slaves_active;
- __u8 lacp_active, lacp_rate, ad_select, tlb_dynamic_lb;
+ __u8 lacp_active, lacp_rate, ad_select, tlb_dynamic_lb, coupled_control;
__u16 ad_user_port_key, ad_actor_sys_prio;
__u32 miimon, updelay, downdelay, peer_notify_delay, arp_interval, arp_validate;
__u32 arp_all_targets, resend_igmp, min_links, lp_interval;
__u32 packets_per_slave;
__u8 missed_max;
unsigned int ifindex;
+ int ret;
while (argc > 0) {
if (matches(*argv, "mode") == 0) {
@@ -367,6 +370,12 @@ static int bond_parse_opt(struct link_util *lu, int argc, char **argv,
lacp_active = get_index(lacp_active_tbl, *argv);
addattr8(n, 1024, IFLA_BOND_AD_LACP_ACTIVE, lacp_active);
+ } else if (strcmp(*argv, "coupled_control") == 0) {
+ NEXT_ARG();
+ coupled_control = parse_on_off("coupled_control", *argv, &ret);
+ if (ret)
+ return ret;
+ addattr8(n, 1024, IFLA_BOND_COUPLED_CONTROL, coupled_control);
} else if (matches(*argv, "ad_select") == 0) {
NEXT_ARG();
if (get_index(ad_select_tbl, *argv) < 0)
@@ -659,6 +668,13 @@ static void bond_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
lacp_rate);
}
+ if (tb[IFLA_BOND_COUPLED_CONTROL]) {
+ print_on_off(PRINT_ANY,
+ "coupled_control",
+ "coupled_control %s ",
+ rta_getattr_u8(tb[IFLA_BOND_COUPLED_CONTROL]));
+ }
+
if (tb[IFLA_BOND_AD_SELECT]) {
const char *ad_select = get_name(ad_select_tbl,
rta_getattr_u8(tb[IFLA_BOND_AD_SELECT]));
diff --git a/lib/libnetlink.c b/lib/libnetlink.c
index 016482294..e2b284e69 100644
--- a/lib/libnetlink.c
+++ b/lib/libnetlink.c
@@ -111,6 +111,10 @@ int nl_dump_ext_ack(const struct nlmsghdr *nlh, nl_ext_ack_fn_t errfn)
err_nlh = &err->msg;
}
+ if (tb[NLMSGERR_ATTR_MISS_TYPE])
+ fprintf(stderr, "Missing required attribute type %u\n",
+ mnl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_TYPE]));
+
if (errfn)
return errfn(msg, off, err_nlh);
diff --git a/lib/utils.c b/lib/utils.c
index 6c1c1a8d3..deb7654a0 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -2003,3 +2003,17 @@ int proto_a2n(unsigned short *id, const char *buf,
return 0;
}
+
+FILE *generic_proc_open(const char *env, const char *name)
+{
+ const char *p = getenv(env);
+ char store[128];
+
+ if (!p) {
+ p = getenv("PROC_ROOT") ? : "/proc";
+ snprintf(store, sizeof(store) - 1, "%s/%s", p, name);
+ p = store;
+ }
+
+ return fopen(p, "r");
+}
diff --git a/man/man8/ip-ioam.8 b/man/man8/ip-ioam.8
index 1bdc0ece9..c723d7823 100644
--- a/man/man8/ip-ioam.8
+++ b/man/man8/ip-ioam.8
@@ -49,12 +49,17 @@ ip-ioam \- IPv6 In-situ OAM (IOAM)
.RI " { " ID " | "
.BR none " }"
+.ti -8
+.B ip ioam monitor
+
.SH DESCRIPTION
The \fBip ioam\fR command is used to configure IPv6 In-situ OAM (IOAM6)
internal parameters, namely IOAM namespaces and schemas.
.PP
Those parameters also include the mapping between an IOAM namespace and an IOAM
schema.
+.PP
+The \fBip ioam monitor\fR command displays IOAM data received.
.SH EXAMPLES
.PP
diff --git a/man/man8/ss.8 b/man/man8/ss.8
index 4ece41fa6..e23af826f 100644
--- a/man/man8/ss.8
+++ b/man/man8/ss.8
@@ -24,6 +24,9 @@ Output version information.
.B \-H, \-\-no-header
Suppress header line.
.TP
+.B \-Q, \-\-no-queues
+Suppress sending and receiving queue columns.
+.TP
.B \-O, \-\-oneline
Print each socket's data on a single line.
.TP
@@ -423,6 +426,12 @@ to FILE after applying filters. If FILE is - stdout is used.
Read filter information from FILE. Each line of FILE is interpreted
like single command line option. If FILE is - stdin is used.
.TP
+.B \-\-bpf-maps
+Pretty-print all the BPF socket-local data entries for each socket.
+.TP
+.B \-\-bpf-map-id=MAP_ID
+Pretty-print the BPF socket-local data entries for the requested map ID. Can be used more than once.
+.TP
.B FILTER := [ state STATE-FILTER ] [ EXPRESSION ]
Please take a look at the official documentation for details regarding filters.
diff --git a/man/man8/tc-mirred.8 b/man/man8/tc-mirred.8
index e529fa6a0..ea408467f 100644
--- a/man/man8/tc-mirred.8
+++ b/man/man8/tc-mirred.8
@@ -9,13 +9,24 @@ mirred - mirror/redirect action
.I DIRECTION ACTION
.RB "[ " index
.IR INDEX " ] "
-.BI dev " DEVICENAME"
+.I TARGET
.ti -8
.IR DIRECTION " := { "
.BR ingress " | " egress " }"
.ti -8
+.IR TARGET " := { " DEV " | " BLOCK " }"
+
+.ti -8
+.IR DEV " := "
+.BI dev " DEVICENAME"
+
+.ti -8
+.IR BLOCK " := "
+.BI blockid " BLOCKID"
+
+.ti -8
.IR ACTION " := { "
.BR mirror " | " redirect " }"
.SH DESCRIPTION
@@ -24,6 +35,12 @@ The
action allows packet mirroring (copying) or redirecting (stealing) the packet it
receives. Mirroring is what is sometimes referred to as Switch Port Analyzer
(SPAN) and is commonly used to analyze and/or debug flows.
+When mirroring to a tc block, the packet will be mirrored to all the ports in
+the block with exception of the port where the packet ingressed, if that port is
+part of the tc block. Redirecting is simillar to mirroring except that the
+behaviour is to mirror to the first N - 1 ports in the block and redirect to the
+last one (note that the port in which the packet arrived is not going to be
+mirrored or redirected to).
.SH OPTIONS
.TP
.B ingress
@@ -39,7 +56,7 @@ Define whether the packet should be copied
.RB ( mirror )
or moved
.RB ( redirect )
-to the destination interface.
+to the destination interface or block.
.TP
.BI index " INDEX"
Assign a unique ID to this action instead of letting the kernel choose one
@@ -49,6 +66,9 @@ is a 32bit unsigned integer greater than zero.
.TP
.BI dev " DEVICENAME"
Specify the network interface to redirect or mirror to.
+.TP
+.BI blockid " BLOCKID"
+Specify the tc block to redirect or mirror to.
.SH EXAMPLES
Limit ingress bandwidth on eth0 to 1mbit/s, redirect exceeding traffic to lo for
debugging purposes:
diff --git a/man/man8/tc.8 b/man/man8/tc.8
index 3175454b9..dce58af17 100644
--- a/man/man8/tc.8
+++ b/man/man8/tc.8
@@ -127,7 +127,7 @@ tc \- show / manipulate traffic control settings
\fB[ \fB-nm \fR| \fB-nam\fR[\fIes\fR] \fB] \fR|
\fB[ \fR{ \fB-cf \fR| \fB-c\fR[\fIonf\fR] \fR} \fB[ filename ] \fB] \fR
\fB[ -t\fR[imestamp\fR] \fB\] \fR| \fB[ -t\fR[short\fR] \fR| \fB[
--o\fR[neline\fR] \fB]\fR }
+-o\fR[neline\fR] \fB] \fR| \fB[ -echo ]\fR }
.ti 8
.IR FORMAT " := {"
@@ -743,6 +743,10 @@ When\fB\ tc monitor\fR\ runs, print timestamp before the event message in format
When\fB\ tc monitor\fR\ runs, prints short timestamp before the event message in format:
[<YYYY>-<MM>-<DD>T<hh:mm:ss>.<ms>]
+.TP
+.BR "\-echo"
+Request the kernel to send the applied configuration back.
+
.SH FORMAT
The show command has additional formatting options:
diff --git a/misc/ifstat.c b/misc/ifstat.c
index 72901097e..685e66c9c 100644
--- a/misc/ifstat.c
+++ b/misc/ifstat.c
@@ -51,7 +51,7 @@ int sub_type;
char info_source[128];
int source_mismatch;
-#define MAXS (sizeof(struct rtnl_link_stats)/sizeof(__u32))
+#define MAXS (sizeof(struct rtnl_link_stats64)/sizeof(__u64))
#define NO_SUB_TYPE 0xffff
struct ifstat_ent {
@@ -60,7 +60,7 @@ struct ifstat_ent {
int ifindex;
unsigned long long val[MAXS];
double rate[MAXS];
- __u32 ival[MAXS];
+ __u64 ival[MAXS];
};
static const char *stats[MAXS] = {
@@ -74,19 +74,25 @@ static const char *stats[MAXS] = {
"tx_dropped",
"multicast",
"collisions",
+
"rx_length_errors",
"rx_over_errors",
"rx_crc_errors",
"rx_frame_errors",
"rx_fifo_errors",
"rx_missed_errors",
+
"tx_aborted_errors",
"tx_carrier_errors",
"tx_fifo_errors",
"tx_heartbeat_errors",
"tx_window_errors",
+
"rx_compressed",
- "tx_compressed"
+ "tx_compressed",
+ "rx_nohandler",
+
+ "rx_otherhost_dropped",
};
struct ifstat_ent *kern_db;
@@ -174,7 +180,7 @@ static int get_nlmsg(struct nlmsghdr *m, void *arg)
return 0;
parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len);
- if (tb[IFLA_IFNAME] == NULL || tb[IFLA_STATS] == NULL)
+ if (tb[IFLA_IFNAME] == NULL)
return 0;
n = malloc(sizeof(*n));
@@ -182,10 +188,31 @@ static int get_nlmsg(struct nlmsghdr *m, void *arg)
errno = ENOMEM;
return -1;
}
+
n->ifindex = ifi->ifi_index;
n->name = strdup(RTA_DATA(tb[IFLA_IFNAME]));
- memcpy(&n->ival, RTA_DATA(tb[IFLA_STATS]), sizeof(n->ival));
+ if (!n->name) {
+ free(n);
+ errno = ENOMEM;
+ return -1;
+ }
+
memset(&n->rate, 0, sizeof(n->rate));
+
+ if (tb[IFLA_STATS64]) {
+ memcpy(&n->ival, RTA_DATA(tb[IFLA_STATS64]), sizeof(n->ival));
+ } else if (tb[IFLA_STATS]) {
+ __u32 *stats = RTA_DATA(tb[IFLA_STATS]);
+
+ /* expand 32 bit values to 64 bit */
+ for (i = 0; i < MAXS; i++)
+ n->ival[i] = stats[i];
+ } else {
+ /* missing stats? */
+ free(n);
+ return 0;
+ }
+
for (i = 0; i < MAXS; i++)
n->val[i] = n->ival[i];
n->next = kern_db;
@@ -379,10 +406,10 @@ static void format_rate(FILE *fp, const unsigned long long *vals,
fprintf(fp, "%8llu ", vals[i]);
if (rates[i] > mega) {
- sprintf(temp, "%uM", (unsigned int)(rates[i]/mega));
+ snprintf(temp, sizeof(temp), "%uM", (unsigned int)(rates[i]/mega));
fprintf(fp, "%-6s ", temp);
} else if (rates[i] > kilo) {
- sprintf(temp, "%uK", (unsigned int)(rates[i]/kilo));
+ snprintf(temp, sizeof(temp), "%uK", (unsigned int)(rates[i]/kilo));
fprintf(fp, "%-6s ", temp);
} else
fprintf(fp, "%-6u ", (unsigned int)rates[i]);
@@ -400,10 +427,10 @@ static void format_pair(FILE *fp, const unsigned long long *vals, int i, int k)
fprintf(fp, "%8llu ", vals[i]);
if (vals[k] > giga) {
- sprintf(temp, "%uM", (unsigned int)(vals[k]/mega));
+ snprintf(temp, sizeof(temp), "%uM", (unsigned int)(vals[k]/mega));
fprintf(fp, "%-6s ", temp);
} else if (vals[k] > mega) {
- sprintf(temp, "%uK", (unsigned int)(vals[k]/kilo));
+ snprintf(temp, sizeof(temp), "%uK", (unsigned int)(vals[k]/kilo));
fprintf(fp, "%-6s ", temp);
} else
fprintf(fp, "%-6u ", (unsigned int)vals[k]);
@@ -675,7 +702,7 @@ static void server_loop(int fd)
p.fd = fd;
p.events = p.revents = POLLIN;
- sprintf(info_source, "%d.%lu sampling_interval=%d time_const=%d",
+ snprintf(info_source, sizeof(info_source), "%d.%lu sampling_interval=%d time_const=%d",
getpid(), (unsigned long)random(), scan_interval/1000, time_constant/1000);
load_info();
@@ -893,7 +920,7 @@ int main(int argc, char *argv[])
sun.sun_family = AF_UNIX;
sun.sun_path[0] = 0;
- sprintf(sun.sun_path+1, "ifstat%d", getuid());
+ snprintf(sun.sun_path + 1, sizeof(sun.sun_path) - 1, "ifstat%d", getuid());
if (scan_interval > 0) {
if (time_constant == 0)
diff --git a/misc/nstat.c b/misc/nstat.c
index 2c10feaa3..07d010dec 100644
--- a/misc/nstat.c
+++ b/misc/nstat.c
@@ -43,35 +43,22 @@ int npatterns;
char info_source[128];
int source_mismatch;
-static int generic_proc_open(const char *env, char *name)
-{
- char store[128];
- char *p = getenv(env);
-
- if (!p) {
- p = getenv("PROC_ROOT") ? : "/proc";
- snprintf(store, sizeof(store)-1, "%s/%s", p, name);
- p = store;
- }
- return open(p, O_RDONLY);
-}
-
-static int net_netstat_open(void)
+static FILE *net_netstat_open(void)
{
return generic_proc_open("PROC_NET_NETSTAT", "net/netstat");
}
-static int net_snmp_open(void)
+static FILE *net_snmp_open(void)
{
return generic_proc_open("PROC_NET_SNMP", "net/snmp");
}
-static int net_snmp6_open(void)
+static FILE *net_snmp6_open(void)
{
return generic_proc_open("PROC_NET_SNMP6", "net/snmp6");
}
-static int net_sctp_snmp_open(void)
+static FILE *net_sctp_snmp_open(void)
{
return generic_proc_open("PROC_NET_SCTP_SNMP", "net/sctp/snmp");
}
@@ -277,7 +264,7 @@ static void load_ugly_table(FILE *fp)
static void load_sctp_snmp(void)
{
- FILE *fp = fdopen(net_sctp_snmp_open(), "r");
+ FILE *fp = net_sctp_snmp_open();
if (fp) {
load_good_table(fp);
@@ -287,7 +274,7 @@ static void load_sctp_snmp(void)
static void load_snmp(void)
{
- FILE *fp = fdopen(net_snmp_open(), "r");
+ FILE *fp = net_snmp_open();
if (fp) {
load_ugly_table(fp);
@@ -297,7 +284,7 @@ static void load_snmp(void)
static void load_snmp6(void)
{
- FILE *fp = fdopen(net_snmp6_open(), "r");
+ FILE *fp = net_snmp6_open();
if (fp) {
load_good_table(fp);
@@ -307,7 +294,7 @@ static void load_snmp6(void)
static void load_netstat(void)
{
- FILE *fp = fdopen(net_netstat_open(), "r");
+ FILE *fp = net_netstat_open();
if (fp) {
load_ugly_table(fp);
@@ -483,7 +470,7 @@ static void server_loop(int fd)
p.fd = fd;
p.events = p.revents = POLLIN;
- sprintf(info_source, "%d.%lu sampling_interval=%d time_const=%d",
+ snprintf(info_source, sizeof(info_source), "%d.%lu sampling_interval=%d time_const=%d",
getpid(), (unsigned long)random(), scan_interval/1000, time_constant/1000);
load_netstat();
@@ -580,7 +567,7 @@ static const struct option longopts[] = {
int main(int argc, char *argv[])
{
- char *hist_name;
+ char hist_name[128];
struct sockaddr_un sun;
FILE *hist_fp = NULL;
int ch;
@@ -636,7 +623,7 @@ int main(int argc, char *argv[])
sun.sun_family = AF_UNIX;
sun.sun_path[0] = 0;
- sprintf(sun.sun_path+1, "nstat%d", getuid());
+ snprintf(sun.sun_path + 1, sizeof(sun.sun_path) - 1, "nstat%d", getuid());
if (scan_interval > 0) {
if (time_constant == 0)
@@ -668,10 +655,11 @@ int main(int argc, char *argv[])
patterns = argv;
npatterns = argc;
- if ((hist_name = getenv("NSTAT_HISTORY")) == NULL) {
- hist_name = malloc(128);
- sprintf(hist_name, "/tmp/.nstat.u%d", getuid());
- }
+ if (getenv("NSTAT_HISTORY"))
+ snprintf(hist_name, sizeof(hist_name),
+ "%s", getenv("NSTAT_HISTORY"));
+ else
+ snprintf(hist_name, sizeof(hist_name), "/tmp/.nstat.u%d", getuid());
if (reset_history)
unlink(hist_name);
diff --git a/misc/ss.c b/misc/ss.c
index fb560a55e..a6e638d29 100644
--- a/misc/ss.c
+++ b/misc/ss.c
@@ -52,6 +52,26 @@
#include <linux/tls.h>
#include <linux/mptcp.h>
+#ifdef HAVE_LIBBPF
+/* If libbpf is new enough (0.5+), support for pretty-printing BPF socket-local
+ * storage is enabled, otherwise we emit a warning and disable it.
+ * ENABLE_BPF_SKSTORAGE_SUPPORT is only used to gate the socket-local storage
+ * feature, so this wouldn't prevent any feature relying on HAVE_LIBBPF to be
+ * usable.
+ */
+#define ENABLE_BPF_SKSTORAGE_SUPPORT
+
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include <linux/btf.h>
+
+#if (LIBBPF_MAJOR_VERSION == 0) && (LIBBPF_MINOR_VERSION < 5)
+#warning "libbpf version 0.5 or later is required, disabling BPF socket-local storage support"
+#undef ENABLE_BPF_SKSTORAGE_SUPPORT
+#endif
+#endif
+
#if HAVE_RPC
#include <rpc/rpc.h>
#include <rpc/xdr.h>
@@ -76,6 +96,7 @@
int preferred_family = AF_UNSPEC;
static int show_options;
int show_details;
+static int show_queues = 1;
static int show_processes;
static int show_threads;
static int show_mem;
@@ -458,19 +479,6 @@ static void filter_merge_defaults(struct filter *f)
}
}
-static FILE *generic_proc_open(const char *env, const char *name)
-{
- const char *p = getenv(env);
- char store[128];
-
- if (!p) {
- p = getenv("PROC_ROOT") ? : "/proc";
- snprintf(store, sizeof(store)-1, "%s/%s", p, name);
- p = store;
- }
-
- return fopen(p, "r");
-}
#define net_tcp_open() generic_proc_open("PROC_NET_TCP", "net/tcp")
#define net_tcp6_open() generic_proc_open("PROC_NET_TCP6", "net/tcp6")
#define net_udp_open() generic_proc_open("PROC_NET_UDP", "net/udp")
@@ -1035,11 +1043,10 @@ static int buf_update(int len)
}
/* Append content to buffer as part of the current field */
-__attribute__((format(printf, 1, 2)))
-static void out(const char *fmt, ...)
+static void vout(const char *fmt, va_list args)
{
struct column *f = current_field;
- va_list args;
+ va_list _args;
char *pos;
int len;
@@ -1050,18 +1057,27 @@ static void out(const char *fmt, ...)
buffer.head = buf_chunk_new();
again: /* Append to buffer: if we have a new chunk, print again */
+ va_copy(_args, args);
pos = buffer.cur->data + buffer.cur->len;
- va_start(args, fmt);
/* Limit to tail room. If we hit the limit, buf_update() will tell us */
- len = vsnprintf(pos, buf_chunk_avail(buffer.tail), fmt, args);
- va_end(args);
+ len = vsnprintf(pos, buf_chunk_avail(buffer.tail), fmt, _args);
if (buf_update(len))
goto again;
}
+__attribute__((format(printf, 1, 2)))
+static void out(const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vout(fmt, args);
+ va_end(args);
+}
+
static int print_left_spacing(struct column *f, int stored, int printed)
{
int s;
@@ -1426,10 +1442,13 @@ static void sock_state_print(struct sockstat *s)
out("%s", sstate_name[s->state]);
}
- field_set(COL_RECVQ);
- out("%-6d", s->rq);
- field_set(COL_SENDQ);
- out("%-6d", s->wq);
+ if (show_queues) {
+ field_set(COL_RECVQ);
+ out("%-6d", s->rq);
+ field_set(COL_SENDQ);
+ out("%-6d", s->wq);
+ }
+
field_set(COL_ADDR);
}
@@ -3395,6 +3414,318 @@ static void parse_diag_msg(struct nlmsghdr *nlh, struct sockstat *s)
memcpy(s->remote.data, r->id.idiag_dst, s->local.bytelen);
}
+#ifdef ENABLE_BPF_SKSTORAGE_SUPPORT
+
+#define MAX_NR_BPF_MAP_ID_OPTS 32
+
+struct btf;
+
+static struct bpf_map_opts {
+ unsigned int nr_maps;
+ struct bpf_sk_storage_map_info {
+ unsigned int id;
+ int fd;
+ struct bpf_map_info info;
+ struct btf *btf;
+ struct btf_dump *dump;
+ } maps[MAX_NR_BPF_MAP_ID_OPTS];
+ bool show_all;
+} bpf_map_opts;
+
+static void bpf_map_opts_mixed_error(void)
+{
+ fprintf(stderr,
+ "ss: --bpf-maps and --bpf-map-id cannot be used together\n");
+}
+
+static int bpf_maps_opts_load_btf(struct bpf_map_info *info, struct btf **btf)
+{
+ if (info->btf_value_type_id) {
+ *btf = btf__load_from_kernel_by_id(info->btf_id);
+ if (!*btf) {
+ fprintf(stderr, "ss: failed to load BTF for map ID %u\n",
+ info->id);
+ return -1;
+ }
+ } else {
+ *btf = NULL;
+ }
+
+ return 0;
+}
+
+static void out_bpf_sk_storage_print_fn(void *ctx, const char *fmt, va_list args)
+{
+ vout(fmt, args);
+}
+
+static int bpf_map_opts_load_info(unsigned int map_id)
+{
+ struct btf_dump_opts dopts = {
+ .sz = sizeof(struct btf_dump_opts)
+ };
+ struct bpf_map_info info = {};
+ uint32_t len = sizeof(info);
+ struct btf_dump *dump;
+ struct btf *btf;
+ int fd;
+ int r;
+
+ if (bpf_map_opts.nr_maps == MAX_NR_BPF_MAP_ID_OPTS) {
+ fprintf(stderr,
+ "ss: too many (> %u) BPF socket-local storage maps found, skipping map ID %u\n",
+ MAX_NR_BPF_MAP_ID_OPTS, map_id);
+ return 0;
+ }
+
+ fd = bpf_map_get_fd_by_id(map_id);
+ if (fd < 0) {
+ if (errno == -ENOENT)
+ return 0;
+
+ fprintf(stderr, "ss: cannot get fd for BPF map ID %u%s\n",
+ map_id, errno == EPERM ?
+ ": missing root permissions, CAP_BPF, or CAP_SYS_ADMIN" : "");
+ return -1;
+ }
+
+ r = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (r) {
+ fprintf(stderr, "ss: failed to get info for BPF map ID %u\n",
+ map_id);
+ close(fd);
+ return -1;
+ }
+
+ if (info.type != BPF_MAP_TYPE_SK_STORAGE) {
+ fprintf(stderr,
+ "ss: BPF map with ID %s has type ID %d, expecting %d ('sk_storage')\n",
+ optarg, info.type, BPF_MAP_TYPE_SK_STORAGE);
+ close(fd);
+ return -1;
+ }
+
+ r = bpf_maps_opts_load_btf(&info, &btf);
+ if (r) {
+ close(fd);
+ return -1;
+ }
+
+ dump = btf_dump__new(btf, out_bpf_sk_storage_print_fn, NULL, &dopts);
+ if (!dump) {
+ btf__free(btf);
+ close(fd);
+ fprintf(stderr, "Failed to create btf_dump object\n");
+ return -1;
+ }
+
+ bpf_map_opts.maps[bpf_map_opts.nr_maps].id = map_id;
+ bpf_map_opts.maps[bpf_map_opts.nr_maps].fd = fd;
+ bpf_map_opts.maps[bpf_map_opts.nr_maps].info = info;
+ bpf_map_opts.maps[bpf_map_opts.nr_maps].btf = btf;
+ bpf_map_opts.maps[bpf_map_opts.nr_maps++].dump = dump;
+
+ return 0;
+}
+
+static struct bpf_sk_storage_map_info *bpf_map_opts_get_info(
+ unsigned int map_id)
+{
+ unsigned int i;
+ int r;
+
+ for (i = 0; i < bpf_map_opts.nr_maps; ++i) {
+ if (bpf_map_opts.maps[i].id == map_id)
+ return &bpf_map_opts.maps[i];
+ }
+
+ r = bpf_map_opts_load_info(map_id);
+ if (r)
+ return NULL;
+
+ return &bpf_map_opts.maps[bpf_map_opts.nr_maps - 1];
+}
+
+static int bpf_map_opts_add_id(const char *optarg)
+{
+ size_t optarg_len;
+ unsigned long id;
+ char *end;
+
+ if (bpf_map_opts.show_all) {
+ bpf_map_opts_mixed_error();
+ return -1;
+ }
+
+ optarg_len = strlen(optarg);
+ id = strtoul(optarg, &end, 0);
+ if (end != optarg + optarg_len || id == 0 || id >= UINT32_MAX) {
+ fprintf(stderr, "ss: invalid BPF map ID %s\n", optarg);
+ return -1;
+ }
+
+ /* Force lazy loading of the map's data. */
+ if (!bpf_map_opts_get_info(id))
+ return -1;
+
+ return 0;
+}
+
+static void bpf_map_opts_destroy(void)
+{
+ int i;
+
+ for (i = 0; i < bpf_map_opts.nr_maps; ++i) {
+ btf_dump__free(bpf_map_opts.maps[i].dump);
+ btf__free(bpf_map_opts.maps[i].btf);
+ close(bpf_map_opts.maps[i].fd);
+ }
+}
+
+static struct rtattr *bpf_map_opts_alloc_rta(void)
+{
+ struct rtattr *stgs_rta, *fd_rta;
+ size_t total_size;
+ unsigned int i;
+ void *buf;
+
+ /* If bpf_map_opts.show_all == true, we will send an empty message to
+ * the kernel, which will return all the socket-local data attached to
+ * a socket, no matter their map ID
+ */
+ if (bpf_map_opts.show_all) {
+ total_size = RTA_LENGTH(0);
+ } else {
+ total_size = RTA_LENGTH(RTA_LENGTH(sizeof(int)) *
+ bpf_map_opts.nr_maps);
+ }
+
+ buf = malloc(total_size);
+ if (!buf)
+ return NULL;
+
+ stgs_rta = buf;
+ stgs_rta->rta_type = INET_DIAG_REQ_SK_BPF_STORAGES | NLA_F_NESTED;
+ stgs_rta->rta_len = total_size;
+
+ /* If inet_show_netlink() retries fetching socket data, nr_maps might
+ * be different from 0, even with show_all == true, so we return early
+ * to avoid inserting specific map IDs into the request.
+ */
+ if (bpf_map_opts.show_all)
+ return stgs_rta;
+
+ buf = RTA_DATA(stgs_rta);
+ for (i = 0; i < bpf_map_opts.nr_maps; i++) {
+ int *fd;
+
+ fd_rta = buf;
+ fd_rta->rta_type = SK_DIAG_BPF_STORAGE_REQ_MAP_FD;
+ fd_rta->rta_len = RTA_LENGTH(sizeof(int));
+
+ fd = RTA_DATA(fd_rta);
+ *fd = bpf_map_opts.maps[i].fd;
+
+ buf += fd_rta->rta_len;
+ }
+
+ return stgs_rta;
+}
+
+static void out_bpf_sk_storage_oneline(struct bpf_sk_storage_map_info *info,
+ const void *data, size_t len)
+{
+ struct btf_dump_type_data_opts opts = {
+ .sz = sizeof(struct btf_dump_type_data_opts),
+ .emit_zeroes = 1,
+ .compact = 1
+ };
+ int r;
+
+ out(" map_id:%d", info->id);
+ r = btf_dump__dump_type_data(info->dump, info->info.btf_value_type_id,
+ data, len, &opts);
+ if (r < 0)
+ out("failed to dump data: %d", r);
+}
+
+static void out_bpf_sk_storage_multiline(struct bpf_sk_storage_map_info *info,
+ const void *data, size_t len)
+{
+ struct btf_dump_type_data_opts opts = {
+ .sz = sizeof(struct btf_dump_type_data_opts),
+ .indent_level = 2,
+ .emit_zeroes = 1
+ };
+ int r;
+
+ out("\n\tmap_id:%d [\n", info->id);
+
+ r = btf_dump__dump_type_data(info->dump, info->info.btf_value_type_id,
+ data, len, &opts);
+ if (r < 0)
+ out("\t\tfailed to dump data: %d", r);
+
+ out("\n\t]");
+}
+
+static void out_bpf_sk_storage(int map_id, const void *data, size_t len)
+{
+ struct bpf_sk_storage_map_info *map_info;
+
+ map_info = bpf_map_opts_get_info(map_id);
+ if (!map_info) {
+ /* The kernel might return a map we can't get info for, skip
+ * it but print the other ones.
+ */
+ out("\n\tmap_id: %d failed to fetch info, skipping\n", map_id);
+ return;
+ }
+
+ if (map_info->info.value_size != len) {
+ fprintf(stderr,
+ "map_id: %d: invalid value size, expecting %u, got %lu\n",
+ map_id, map_info->info.value_size, len);
+ return;
+ }
+
+ if (oneline)
+ out_bpf_sk_storage_oneline(map_info, data, len);
+ else
+ out_bpf_sk_storage_multiline(map_info, data, len);
+}
+
+static void show_sk_bpf_storages(struct rtattr *bpf_stgs)
+{
+ struct rtattr *tb[SK_DIAG_BPF_STORAGE_MAX + 1], *bpf_stg;
+ unsigned int rem, map_id;
+ struct rtattr *value;
+
+ for (bpf_stg = RTA_DATA(bpf_stgs), rem = RTA_PAYLOAD(bpf_stgs);
+ RTA_OK(bpf_stg, rem); bpf_stg = RTA_NEXT(bpf_stg, rem)) {
+
+ if ((bpf_stg->rta_type & NLA_TYPE_MASK) != SK_DIAG_BPF_STORAGE)
+ continue;
+
+ parse_rtattr_nested(tb, SK_DIAG_BPF_STORAGE_MAX,
+ (struct rtattr *)bpf_stg);
+
+ if (tb[SK_DIAG_BPF_STORAGE_MAP_ID]) {
+ map_id = rta_getattr_u32(tb[SK_DIAG_BPF_STORAGE_MAP_ID]);
+ value = tb[SK_DIAG_BPF_STORAGE_MAP_VALUE];
+
+ out_bpf_sk_storage(map_id, RTA_DATA(value),
+ RTA_PAYLOAD(value));
+ }
+ }
+}
+
+static bool bpf_map_opts_is_enabled(void)
+{
+ return bpf_map_opts.nr_maps || bpf_map_opts.show_all;
+}
+#endif
+
static int inet_show_sock(struct nlmsghdr *nlh,
struct sockstat *s)
{
@@ -3402,8 +3733,9 @@ static int inet_show_sock(struct nlmsghdr *nlh,
struct inet_diag_msg *r = NLMSG_DATA(nlh);
unsigned char v6only = 0;
- parse_rtattr(tb, INET_DIAG_MAX, (struct rtattr *)(r+1),
- nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
+ parse_rtattr_flags(tb, INET_DIAG_MAX, (struct rtattr *)(r+1),
+ nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)),
+ NLA_F_NESTED);
if (tb[INET_DIAG_PROTOCOL])
s->type = rta_getattr_u8(tb[INET_DIAG_PROTOCOL]);
@@ -3500,6 +3832,11 @@ static int inet_show_sock(struct nlmsghdr *nlh,
}
sctp_ino = s->ino;
+#ifdef ENABLE_BPF_SKSTORAGE_SUPPORT
+ if (tb[INET_DIAG_SK_BPF_STORAGES])
+ show_sk_bpf_storages(tb[INET_DIAG_SK_BPF_STORAGES]);
+#endif
+
return 0;
}
@@ -3581,13 +3918,14 @@ static int sockdiag_send(int family, int fd, int protocol, struct filter *f)
{
struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
DIAG_REQUEST(req, struct inet_diag_req_v2 r);
+ struct rtattr *bpf_rta = NULL;
char *bc = NULL;
int bclen;
__u32 proto;
struct msghdr msg;
struct rtattr rta_bc;
struct rtattr rta_proto;
- struct iovec iov[5];
+ struct iovec iov[6];
int iovlen = 1;
if (family == PF_UNSPEC)
@@ -3640,6 +3978,20 @@ static int sockdiag_send(int family, int fd, int protocol, struct filter *f)
iovlen += 2;
}
+#ifdef ENABLE_BPF_SKSTORAGE_SUPPORT
+ if (bpf_map_opts_is_enabled()) {
+ bpf_rta = bpf_map_opts_alloc_rta();
+ if (!bpf_rta) {
+ fprintf(stderr,
+ "ss: cannot alloc request for --bpf-map\n");
+ return -1;
+ }
+
+ iov[iovlen++] = (struct iovec){ bpf_rta, bpf_rta->rta_len };
+ req.nlh.nlmsg_len += bpf_rta->rta_len;
+ }
+#endif
+
msg = (struct msghdr) {
.msg_name = (void *)&nladdr,
.msg_namelen = sizeof(nladdr),
@@ -3648,10 +4000,13 @@ static int sockdiag_send(int family, int fd, int protocol, struct filter *f)
};
if (sendmsg(fd, &msg, 0) < 0) {
+ free(bpf_rta);
close(fd);
return -1;
}
+ free(bpf_rta);
+
return 0;
}
@@ -5372,6 +5727,10 @@ static void _usage(FILE *dest)
" --tos show tos and priority information\n"
" --cgroup show cgroup information\n"
" -b, --bpf show bpf filter socket information\n"
+#ifdef ENABLE_BPF_SKSTORAGE_SUPPORT
+" --bpf-maps show all BPF socket-local storage maps\n"
+" --bpf-map-id=MAP-ID show a BPF socket-local storage map\n"
+#endif
" -E, --events continually display sockets as they are destroyed\n"
" -Z, --context display task SELinux security contexts\n"
" -z, --contexts display task and socket SELinux security contexts\n"
@@ -5395,6 +5754,7 @@ static void _usage(FILE *dest)
"\n"
" -K, --kill forcibly close sockets, display what was closed\n"
" -H, --no-header Suppress header line\n"
+" -Q, --no-queues Suppress sending and receiving queue columns\n"
" -O, --oneline socket's data printed on a single line\n"
" --inet-sockopt show various inet socket options\n"
"\n"
@@ -5497,6 +5857,9 @@ wrong_state:
#define OPT_INET_SOCKOPT 262
+#define OPT_BPF_MAPS 263
+#define OPT_BPF_MAP_ID 264
+
static const struct option long_opts[] = {
{ "numeric", 0, 0, 'n' },
{ "resolve", 0, 0, 'r' },
@@ -5538,10 +5901,15 @@ static const struct option long_opts[] = {
{ "cgroup", 0, 0, OPT_CGROUP },
{ "kill", 0, 0, 'K' },
{ "no-header", 0, 0, 'H' },
+ { "no-queues", 0, 0, 'Q' },
{ "xdp", 0, 0, OPT_XDPSOCK},
{ "mptcp", 0, 0, 'M' },
{ "oneline", 0, 0, 'O' },
{ "inet-sockopt", 0, 0, OPT_INET_SOCKOPT },
+#ifdef ENABLE_BPF_SKSTORAGE_SUPPORT
+ { "bpf-maps", 0, 0, OPT_BPF_MAPS},
+ { "bpf-map-id", 1, 0, OPT_BPF_MAP_ID},
+#endif
{ 0 }
};
@@ -5557,7 +5925,7 @@ int main(int argc, char *argv[])
int state_filter = 0;
while ((ch = getopt_long(argc, argv,
- "dhalBetuwxnro460spTbEf:mMiA:D:F:vVzZN:KHSO",
+ "dhalBetuwxnro460spTbEf:mMiA:D:F:vVzZN:KHQSO",
long_opts, NULL)) != EOF) {
switch (ch) {
case 'n':
@@ -5741,12 +6109,28 @@ int main(int argc, char *argv[])
case 'H':
show_header = 0;
break;
+ case 'Q':
+ show_queues = 0;
+ break;
case 'O':
oneline = 1;
break;
case OPT_INET_SOCKOPT:
show_inet_sockopt = 1;
break;
+#ifdef ENABLE_BPF_SKSTORAGE_SUPPORT
+ case OPT_BPF_MAPS:
+ if (bpf_map_opts.nr_maps) {
+ bpf_map_opts_mixed_error();
+ return -1;
+ }
+ bpf_map_opts.show_all = true;
+ break;
+ case OPT_BPF_MAP_ID:
+ if (bpf_map_opts_add_id(optarg))
+ exit(1);
+ break;
+#endif
case 'h':
help();
case '?':
@@ -5839,6 +6223,11 @@ int main(int argc, char *argv[])
if (!show_processes)
columns[COL_PROC].disabled = 1;
+ if (!show_queues) {
+ columns[COL_SENDQ].disabled = 1;
+ columns[COL_RECVQ].disabled = 1;
+ }
+
if (!(current_filter.dbs & (current_filter.dbs - 1)))
columns[COL_NETID].disabled = 1;
@@ -5881,6 +6270,10 @@ int main(int argc, char *argv[])
if (show_processes || show_threads || show_proc_ctx || show_sock_ctx)
user_ent_destroy();
+#ifdef ENABLE_BPF_SKSTORAGE_SUPPORT
+ bpf_map_opts_destroy();
+#endif
+
render();
return 0;
diff --git a/tc/m_action.c b/tc/m_action.c
index d26a1c522..e0b9ebeed 100644
--- a/tc/m_action.c
+++ b/tc/m_action.c
@@ -688,7 +688,16 @@ static int tc_action_gd(int cmd, unsigned int flags,
req.n.nlmsg_seq = rth.dump = ++rth.seq;
- if (rtnl_talk(&rth, &req.n, cmd == RTM_DELACTION ? NULL : &ans) < 0) {
+ if (cmd == RTM_DELACTION) {
+ if (echo_request)
+ ret = rtnl_echo_talk(&rth, &req.n, json, print_action);
+ else
+ ret = rtnl_talk(&rth, &req.n, NULL);
+ } else {
+ ret = rtnl_talk(&rth, &req.n, &ans);
+ }
+
+ if (ret < 0) {
fprintf(stderr, "We have an error talking to the kernel\n");
return 1;
}
@@ -738,7 +747,12 @@ static int tc_action_modify(int cmd, unsigned int flags,
}
tail->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail;
- if (rtnl_talk(&rth, &req.n, NULL) < 0) {
+ if (echo_request)
+ ret = rtnl_echo_talk(&rth, &req.n, json, print_action);
+ else
+ ret = rtnl_talk(&rth, &req.n, NULL);
+
+ if (ret < 0) {
fprintf(stderr, "We have an error talking to the kernel\n");
ret = -1;
}
@@ -836,7 +850,12 @@ static int tc_act_list_or_flush(int *argc_p, char ***argv_p, int event)
req.n.nlmsg_type = RTM_DELACTION;
req.n.nlmsg_flags |= NLM_F_ROOT;
req.n.nlmsg_flags |= NLM_F_REQUEST;
- if (rtnl_talk(&rth, &req.n, NULL) < 0) {
+
+ if (echo_request)
+ ret = rtnl_echo_talk(&rth, &req.n, json, print_action);
+ else
+ ret = rtnl_talk(&rth, &req.n, NULL);
+ if (ret < 0) {
fprintf(stderr, "We have an error flushing\n");
return 1;
}
diff --git a/tc/m_mirred.c b/tc/m_mirred.c
index e5653e67f..60bd90452 100644
--- a/tc/m_mirred.c
+++ b/tc/m_mirred.c
@@ -24,12 +24,16 @@ static void
explain(void)
{
fprintf(stderr,
- "Usage: mirred <DIRECTION> <ACTION> [index INDEX] <dev DEVICENAME>\n"
+ "Usage: mirred <DIRECTION> <ACTION> [index INDEX] <TARGET>\n"
"where:\n"
"\tDIRECTION := <ingress | egress>\n"
"\tACTION := <mirror | redirect>\n"
"\tINDEX is the specific policy instance id\n"
- "\tDEVICENAME is the devicename\n");
+ "\tTARGET := <BLOCK | DEVICE>\n"
+ "\tDEVICE := dev DEVICENAME\n"
+ "\tDEVICENAME is the devicename\n"
+ "\tBLOCK := blockid BLOCKID\n"
+ "\tBLOCKID := 32-bit unsigned block ID\n");
}
static void
@@ -94,6 +98,7 @@ parse_direction(struct action_util *a, int *argc_p, char ***argv_p,
struct tc_mirred p = {};
struct rtattr *tail;
char d[IFNAMSIZ] = {};
+ __u32 blockid = 0;
while (argc > 0) {
@@ -162,15 +167,37 @@ parse_direction(struct action_util *a, int *argc_p, char ***argv_p,
TCA_INGRESS_REDIR;
p.action = TC_ACT_STOLEN;
ok++;
- } else if ((redir || mirror) &&
- matches(*argv, "dev") == 0) {
- NEXT_ARG();
- if (strlen(d))
- duparg("dev", *argv);
-
- strncpy(d, *argv, sizeof(d)-1);
- argc--;
- argv++;
+ } else if ((redir || mirror)) {
+ if (strcmp(*argv, "blockid") == 0) {
+ if (strlen(d)) {
+ fprintf(stderr,
+ "blockid and device are mutually exclusive.\n");
+ return -1;
+ }
+ NEXT_ARG();
+ if (get_u32(&blockid, *argv, 0) ||
+ !blockid) {
+ fprintf(stderr,
+ "invalid block ID");
+ return -1;
+ }
+ argc--;
+ argv++;
+ }
+ if (argc && matches(*argv, "dev") == 0) {
+ if (blockid) {
+ fprintf(stderr,
+ "blockid and device are mutually exclusive.\n");
+ return -1;
+ }
+ NEXT_ARG();
+ if (strlen(d))
+ duparg("dev", *argv);
+
+ strncpy(d, *argv, sizeof(d)-1);
+ argc--;
+ argv++;
+ }
break;
@@ -220,6 +247,8 @@ parse_direction(struct action_util *a, int *argc_p, char ***argv_p,
tail = addattr_nest(n, MAX_MSG, tca_id);
addattr_l(n, MAX_MSG, TCA_MIRRED_PARMS, &p, sizeof(p));
+ if (blockid)
+ addattr32(n, MAX_MSG, TCA_MIRRED_BLOCKID, blockid);
addattr_nest_end(n, tail);
*argc_p = argc;
@@ -299,7 +328,15 @@ print_mirred(struct action_util *au, FILE *f, struct rtattr *arg)
mirred_action(p->eaction));
print_string(PRINT_JSON, "direction", NULL,
mirred_direction(p->eaction));
- print_string(PRINT_ANY, "to_dev", " to device %s)", dev);
+ if (tb[TCA_MIRRED_BLOCKID]) {
+ const __u32 *blockid = RTA_DATA(tb[TCA_MIRRED_BLOCKID]);
+
+ print_uint(PRINT_ANY, "to_blockid", " to blockid %u)",
+ *blockid);
+ } else {
+ print_string(PRINT_ANY, "to_dev", " to device %s)", dev);
+ }
+
print_action_control(f, " ", p->action, "");
print_nl();
diff --git a/tc/tc.c b/tc/tc.c
index 575157a86..7a746cf51 100644
--- a/tc/tc.c
+++ b/tc/tc.c
@@ -38,6 +38,8 @@ int json;
int oneline;
int brief;
+int echo_request;
+
static char *conf_file;
struct rtnl_handle rth;
@@ -196,7 +198,7 @@ static void usage(void)
" -o[neline] | -j[son] | -p[retty] | -c[olor]\n"
" -b[atch] [filename] | -n[etns] name | -N[umeric] |\n"
" -nm | -nam[es] | { -cf | -conf } path\n"
- " -br[ief] }\n");
+ " -br[ief] | -echo }\n");
}
static int do_cmd(int argc, char **argv)
@@ -314,6 +316,8 @@ int main(int argc, char **argv)
++oneline;
} else if (matches(argv[1], "-brief") == 0) {
++brief;
+ } else if (strcmp(argv[1], "-echo") == 0) {
+ ++echo_request;
} else {
fprintf(stderr,
"Option \"%s\" is unknown, try \"tc -help\".\n",
diff --git a/tc/tc_filter.c b/tc/tc_filter.c
index eb45c5887..54790ddc6 100644
--- a/tc/tc_filter.c
+++ b/tc/tc_filter.c
@@ -76,6 +76,7 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv)
char d[IFNAMSIZ] = {};
char k[FILTER_NAMESZ] = {};
struct tc_estimator est = {};
+ int ret;
if (cmd == RTM_NEWTFILTER && flags & NLM_F_CREATE)
protocol = htons(ETH_P_ALL);
@@ -221,7 +222,12 @@ static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv)
if (est.ewma_log)
addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est));
- if (rtnl_talk(&rth, &req.n, NULL) < 0) {
+ if (echo_request)
+ ret = rtnl_echo_talk(&rth, &req.n, json, print_filter);
+ else
+ ret = rtnl_talk(&rth, &req.n, NULL);
+
+ if (ret < 0) {
fprintf(stderr, "We have an error talking to the kernel\n");
return 2;
}