aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2023-06-15 22:18:58 -0700
committerJakub Kicinski <kuba@kernel.org>2023-06-15 22:19:41 -0700
commit173780ff18a93298ca84224cc79df69f9cc198ce (patch)
tree5584d6e60e0bfb581dbfaf922ef3e28809ad68f8 /net/sched
parent97c5209b3d374a25ebdb4c2ea9e9c1b121768da0 (diff)
parent40f71e7cd3c6ac04293556ab0504a372393838ff (diff)
downloadlinux-173780ff18a93298ca84224cc79df69f9cc198ce.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR. Conflicts: include/linux/mlx5/driver.h 617f5db1a626 ("RDMA/mlx5: Fix affinity assignment") dc13180824b7 ("net/mlx5: Enable devlink port for embedded cpu VF vports") https://lore.kernel.org/all/20230613125939.595e50b8@canb.auug.org.au/ tools/testing/selftests/net/mptcp/mptcp_join.sh 47867f0a7e83 ("selftests: mptcp: join: skip check if MIB counter not supported") 425ba803124b ("selftests: mptcp: join: support RM_ADDR for used endpoints or not") 45b1a1227a7a ("mptcp: introduces more address related mibs") 0639fa230a21 ("selftests: mptcp: add explicit check for new mibs") https://lore.kernel.org/netdev/20230609-upstream-net-20230610-mptcp-selftests-support-old-kernels-part-3-v1-0-2896fe2ee8a3@tessares.net/ No adjacent changes. Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/act_ct.c9
-rw-r--r--net/sched/act_pedit.c48
-rw-r--r--net/sched/cls_api.c12
-rw-r--r--net/sched/cls_u32.c18
-rw-r--r--net/sched/sch_api.c44
-rw-r--r--net/sched/sch_generic.c14
-rw-r--r--net/sched/sch_taprio.c3
7 files changed, 113 insertions, 35 deletions
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 9cc0bc7c71ed7..abc71a06d634a 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -610,6 +610,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
struct flow_offload_tuple tuple = {};
enum ip_conntrack_info ctinfo;
struct tcphdr *tcph = NULL;
+ bool force_refresh = false;
struct flow_offload *flow;
struct nf_conn *ct;
u8 dir;
@@ -647,6 +648,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
* established state, then don't refresh.
*/
return false;
+ force_refresh = true;
}
if (tcph && (unlikely(tcph->fin || tcph->rst))) {
@@ -660,7 +662,12 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
else
ctinfo = IP_CT_ESTABLISHED_REPLY;
- flow_offload_refresh(nf_ft, flow);
+ flow_offload_refresh(nf_ft, flow, force_refresh);
+ if (!test_bit(IPS_ASSURED_BIT, &ct->status)) {
+ /* Process this flow in SW to allow promoting to ASSURED */
+ return false;
+ }
+
nf_conntrack_get(&ct->ct_general);
nf_ct_set(skb, ct, ctinfo);
if (nf_ft->flags & NF_FLOWTABLE_COUNTER)
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 8c4e7fddddbf9..b562fc2bb5b19 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -13,7 +13,10 @@
#include <linux/rtnetlink.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
#include <linux/slab.h>
+#include <net/ipv6.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_pedit.h>
@@ -325,28 +328,58 @@ static bool offset_valid(struct sk_buff *skb, int offset)
return true;
}
-static void pedit_skb_hdr_offset(struct sk_buff *skb,
+static int pedit_l4_skb_offset(struct sk_buff *skb, int *hoffset, const int header_type)
+{
+ const int noff = skb_network_offset(skb);
+ int ret = -EINVAL;
+ struct iphdr _iph;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP): {
+ const struct iphdr *iph = skb_header_pointer(skb, noff, sizeof(_iph), &_iph);
+
+ if (!iph)
+ goto out;
+ *hoffset = noff + iph->ihl * 4;
+ ret = 0;
+ break;
+ }
+ case htons(ETH_P_IPV6):
+ ret = ipv6_find_hdr(skb, hoffset, header_type, NULL, NULL) == header_type ? 0 : -EINVAL;
+ break;
+ }
+out:
+ return ret;
+}
+
+static int pedit_skb_hdr_offset(struct sk_buff *skb,
enum pedit_header_type htype, int *hoffset)
{
+ int ret = -EINVAL;
/* 'htype' is validated in the netlink parsing */
switch (htype) {
case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
- if (skb_mac_header_was_set(skb))
+ if (skb_mac_header_was_set(skb)) {
*hoffset = skb_mac_offset(skb);
+ ret = 0;
+ }
break;
case TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK:
case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
*hoffset = skb_network_offset(skb);
+ ret = 0;
break;
case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
+ ret = pedit_l4_skb_offset(skb, hoffset, IPPROTO_TCP);
+ break;
case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
- if (skb_transport_header_was_set(skb))
- *hoffset = skb_transport_offset(skb);
+ ret = pedit_l4_skb_offset(skb, hoffset, IPPROTO_UDP);
break;
default:
break;
}
+ return ret;
}
TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
@@ -382,6 +415,7 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
int hoffset = 0;
u32 *ptr, hdata;
u32 val;
+ int rc;
if (tkey_ex) {
htype = tkey_ex->htype;
@@ -390,7 +424,11 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
tkey_ex++;
}
- pedit_skb_hdr_offset(skb, htype, &hoffset);
+ rc = pedit_skb_hdr_offset(skb, htype, &hoffset);
+ if (rc) {
+ pr_info_ratelimited("tc action pedit unable to extract header offset for header type (0x%x)\n", htype);
+ goto bad;
+ }
if (tkey->offmask) {
u8 *d, _d;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index c877a6343fd47..a193cc7b32418 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -657,8 +657,8 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
{
struct tcf_block *block = chain->block;
const struct tcf_proto_ops *tmplt_ops;
+ unsigned int refcnt, non_act_refcnt;
bool free_block = false;
- unsigned int refcnt;
void *tmplt_priv;
mutex_lock(&block->lock);
@@ -678,13 +678,15 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
* save these to temporary variables.
*/
refcnt = --chain->refcnt;
+ non_act_refcnt = refcnt - chain->action_refcnt;
tmplt_ops = chain->tmplt_ops;
tmplt_priv = chain->tmplt_priv;
- /* The last dropped non-action reference will trigger notification. */
- if (refcnt - chain->action_refcnt == 0 && !by_act) {
- tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index,
- block, NULL, 0, 0, false);
+ if (non_act_refcnt == chain->explicitly_created && !by_act) {
+ if (non_act_refcnt == 0)
+ tc_chain_notify_delete(tmplt_ops, tmplt_priv,
+ chain->index, block, NULL, 0, 0,
+ false);
/* Last reference to chain, no need to lock. */
chain->flushing = false;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 4e2e269f121f8..d15d50de79802 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -718,13 +718,19 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
struct nlattr *est, u32 flags, u32 fl_flags,
struct netlink_ext_ack *extack)
{
- int err;
+ int err, ifindex = -1;
err = tcf_exts_validate_ex(net, tp, tb, est, &n->exts, flags,
fl_flags, extack);
if (err < 0)
return err;
+ if (tb[TCA_U32_INDEV]) {
+ ifindex = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
+ if (ifindex < 0)
+ return -EINVAL;
+ }
+
if (tb[TCA_U32_LINK]) {
u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
struct tc_u_hnode *ht_down = NULL, *ht_old;
@@ -759,13 +765,9 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
tcf_bind_filter(tp, &n->res, base);
}
- if (tb[TCA_U32_INDEV]) {
- int ret;
- ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
- if (ret < 0)
- return -EINVAL;
- n->ifindex = ret;
- }
+ if (ifindex >= 0)
+ n->ifindex = ifindex;
+
return 0;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e4b6452318c0a..aa6b1fe651519 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1079,17 +1079,29 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
if (parent == NULL) {
unsigned int i, num_q, ingress;
+ struct netdev_queue *dev_queue;
ingress = 0;
num_q = dev->num_tx_queues;
if ((q && q->flags & TCQ_F_INGRESS) ||
(new && new->flags & TCQ_F_INGRESS)) {
- num_q = 1;
ingress = 1;
- if (!dev_ingress_queue(dev)) {
+ dev_queue = dev_ingress_queue(dev);
+ if (!dev_queue) {
NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
return -ENOENT;
}
+
+ q = rtnl_dereference(dev_queue->qdisc_sleeping);
+
+ /* This is the counterpart of that qdisc_refcount_inc_nz() call in
+ * __tcf_qdisc_find() for filter requests.
+ */
+ if (!qdisc_refcount_dec_if_one(q)) {
+ NL_SET_ERR_MSG(extack,
+ "Current ingress or clsact Qdisc has ongoing filter requests");
+ return -EBUSY;
+ }
}
if (dev->flags & IFF_UP)
@@ -1100,18 +1112,26 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
if (new && new->ops->attach && !ingress)
goto skip;
- for (i = 0; i < num_q; i++) {
- struct netdev_queue *dev_queue = dev_ingress_queue(dev);
-
- if (!ingress)
+ if (!ingress) {
+ for (i = 0; i < num_q; i++) {
dev_queue = netdev_get_tx_queue(dev, i);
+ old = dev_graft_qdisc(dev_queue, new);
- old = dev_graft_qdisc(dev_queue, new);
- if (new && i > 0)
- qdisc_refcount_inc(new);
-
- if (!ingress)
+ if (new && i > 0)
+ qdisc_refcount_inc(new);
qdisc_put(old);
+ }
+ } else {
+ old = dev_graft_qdisc(dev_queue, NULL);
+
+ /* {ingress,clsact}_destroy() @old before grafting @new to avoid
+ * unprotected concurrent accesses to net_device::miniq_{in,e}gress
+ * pointer(s) in mini_qdisc_pair_swap().
+ */
+ qdisc_notify(net, skb, n, classid, old, new, extack);
+ qdisc_destroy(old);
+
+ dev_graft_qdisc(dev_queue, new);
}
skip:
@@ -1125,8 +1145,6 @@ skip:
if (new && new->ops->attach)
new->ops->attach(new);
- } else {
- notify_and_destroy(net, skb, n, classid, old, new, extack);
}
if (dev->flags & IFF_UP)
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 3248259eba32a..5d7e23f4cc0ee 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1046,7 +1046,7 @@ static void qdisc_free_cb(struct rcu_head *head)
qdisc_free(q);
}
-static void qdisc_destroy(struct Qdisc *qdisc)
+static void __qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
@@ -1070,6 +1070,14 @@ static void qdisc_destroy(struct Qdisc *qdisc)
call_rcu(&qdisc->rcu, qdisc_free_cb);
}
+void qdisc_destroy(struct Qdisc *qdisc)
+{
+ if (qdisc->flags & TCQ_F_BUILTIN)
+ return;
+
+ __qdisc_destroy(qdisc);
+}
+
void qdisc_put(struct Qdisc *qdisc)
{
if (!qdisc)
@@ -1079,7 +1087,7 @@ void qdisc_put(struct Qdisc *qdisc)
!refcount_dec_and_test(&qdisc->refcnt))
return;
- qdisc_destroy(qdisc);
+ __qdisc_destroy(qdisc);
}
EXPORT_SYMBOL(qdisc_put);
@@ -1094,7 +1102,7 @@ void qdisc_put_unlocked(struct Qdisc *qdisc)
!refcount_dec_and_rtnl_lock(&qdisc->refcnt))
return;
- qdisc_destroy(qdisc);
+ __qdisc_destroy(qdisc);
rtnl_unlock();
}
EXPORT_SYMBOL(qdisc_put_unlocked);
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index c6627f5abdfa2..717ae51d94a0a 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -800,6 +800,9 @@ static struct sk_buff *taprio_dequeue_tc_priority(struct Qdisc *sch,
taprio_next_tc_txq(dev, tc, &q->cur_txq[tc]);
+ if (q->cur_txq[tc] >= dev->num_tx_queues)
+ q->cur_txq[tc] = first_txq;
+
if (skb)
return skb;
} while (q->cur_txq[tc] != first_txq);