aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-07-21 11:08:35 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-07-21 11:08:35 -0700
commit7ca433dc6dedb2ec98dfc943f6db0c9b8996ed11 (patch)
tree9d7b96d12c1e52063210bbddfe8bdcca8068bf89
parentb67fbebd4cf980aecbcc750e1462128bffe8ae15 (diff)
parent44484fa8eedf1c6e8f23ba2675b266abdd170a6e (diff)
downloadlinux-gpio-intel-7ca433dc6dedb2ec98dfc943f6db0c9b8996ed11.tar.gz
Merge tag 'net-5.19-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni: "Including fixes from can. Still no major regressions, most of the changes are still due to data races fixes, plus the usual bunch of drivers fixes. Previous releases - regressions: - tcp/udp: make early_demux back namespacified. - dsa: fix issues with vlan_filtering_is_global Previous releases - always broken: - ip: fix data-races around ipv4_net_table (round 2, 3 & 4) - amt: fix validation and synchronization bugs - can: fix detection of mcp251863 - eth: iavf: fix handling of dummy receive descriptors - eth: lan966x: fix issues with MAC table - eth: stmmac: dwmac-mediatek: fix clock issue Misc: - dsa: update documentation" * tag 'net-5.19-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (107 commits) mlxsw: spectrum_router: Fix IPv4 nexthop gateway indication net/sched: cls_api: Fix flow action initialization tcp: Fix data-races around sysctl_tcp_max_reordering. tcp: Fix a data-race around sysctl_tcp_abort_on_overflow. tcp: Fix a data-race around sysctl_tcp_rfc1337. tcp: Fix a data-race around sysctl_tcp_stdurg. tcp: Fix a data-race around sysctl_tcp_retrans_collapse. tcp: Fix data-races around sysctl_tcp_slow_start_after_idle. tcp: Fix a data-race around sysctl_tcp_thin_linear_timeouts. tcp: Fix data-races around sysctl_tcp_recovery. tcp: Fix a data-race around sysctl_tcp_early_retrans. tcp: Fix data-races around sysctl knobs related to SYN option. udp: Fix a data-race around sysctl_udp_l3mdev_accept. ip: Fix data-races around sysctl_ip_prot_sock. ipv4: Fix data-races around sysctl_fib_multipath_hash_fields. ipv4: Fix data-races around sysctl_fib_multipath_hash_policy. ipv4: Fix a data-race around sysctl_fib_multipath_use_neigh. can: rcar_canfd: Add missing of_node_put() in rcar_canfd_probe() can: mcp251xfd: fix detection of mcp251863 Documentation: fix udp_wmem_min in ip-sysctl.rst ...
-rw-r--r--Documentation/networking/dsa/dsa.rst363
-rw-r--r--Documentation/networking/ip-sysctl.rst6
-rw-r--r--drivers/net/amt.c243
-rw-r--r--drivers/net/can/rcar/rcar_canfd.c1
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c18
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c5
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c16
-rw-r--r--drivers/net/dsa/vitesse-vsc73xx-spi.c10
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c6
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.c10
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.h2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_ethtool.c31
-rw-r--r--drivers/net/ethernet/intel/e1000e/hw.h1
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c4
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.h1
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c30
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c13
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf.h14
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_ethtool.c10
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c11
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_txrx.c7
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_virtchnl.c65
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c3
-rw-r--r--drivers/net/ethernet/intel/igc/igc_regs.h5
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h1
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c3
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c6
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_flower.c6
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe_offload.c3
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c9
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_mac.c112
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/action.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c25
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c49
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4.h3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c7
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c22
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c12
-rw-r--r--drivers/net/usb/r8152.c16
-rw-r--r--include/linux/stmmac.h1
-rw-r--r--include/net/amt.h20
-rw-r--r--include/net/inet_hashtables.h2
-rw-r--r--include/net/inet_sock.h9
-rw-r--r--include/net/ip.h6
-rw-r--r--include/net/protocol.h4
-rw-r--r--include/net/route.h2
-rw-r--r--include/net/tcp.h20
-rw-r--r--include/net/udp.h4
-rw-r--r--net/core/filter.c4
-rw-r--r--net/core/secure_seq.c4
-rw-r--r--net/core/sock_reuseport.c4
-rw-r--r--net/dsa/port.c7
-rw-r--r--net/ipv4/af_inet.c18
-rw-r--r--net/ipv4/ah4.c2
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/icmp.c2
-rw-r--r--net/ipv4/igmp.c49
-rw-r--r--net/ipv4/inet_connection_sock.c5
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_input.c37
-rw-r--r--net/ipv4/ip_sockglue.c8
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c4
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/route.c10
-rw-r--r--net/ipv4/syncookies.c9
-rw-r--r--net/ipv4/sysctl_net_ipv4.c65
-rw-r--r--net/ipv4/tcp.c13
-rw-r--r--net/ipv4/tcp_fastopen.c9
-rw-r--r--net/ipv4/tcp_input.c51
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_metrics.c3
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c29
-rw-r--r--net/ipv4/tcp_recovery.c6
-rw-r--r--net/ipv4/tcp_timer.c30
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/ip6_input.c23
-rw-r--r--net/ipv6/syncookies.c3
-rw-r--r--net/ipv6/tcp_ipv6.c9
-rw-r--r--net/ipv6/udp.c9
-rw-r--r--net/netfilter/nf_synproxy_core.c2
-rw-r--r--net/sched/cls_api.c16
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/smc/smc_llc.c2
-rw-r--r--net/tls/tls_device.c8
-rw-r--r--net/xfrm/xfrm_policy.c5
-rw-r--r--net/xfrm/xfrm_state.c2
93 files changed, 1117 insertions, 592 deletions
diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index ed7fa76e7a404..d742ba6bd2119 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -503,26 +503,108 @@ per-port PHY specific details: interface connection, MDIO bus location, etc.
Driver development
==================
-DSA switch drivers need to implement a dsa_switch_ops structure which will
+DSA switch drivers need to implement a ``dsa_switch_ops`` structure which will
contain the various members described below.
-``register_switch_driver()`` registers this dsa_switch_ops in its internal list
-of drivers to probe for. ``unregister_switch_driver()`` does the exact opposite.
+Probing, registration and device lifetime
+-----------------------------------------
-Unless requested differently by setting the priv_size member accordingly, DSA
-does not allocate any driver private context space.
+DSA switches are regular ``device`` structures on buses (be they platform, SPI,
+I2C, MDIO or otherwise). The DSA framework is not involved in their probing
+with the device core.
+
+Switch registration from the perspective of a driver means passing a valid
+``struct dsa_switch`` pointer to ``dsa_register_switch()``, usually from the
+switch driver's probing function. The following members must be valid in the
+provided structure:
+
+- ``ds->dev``: will be used to parse the switch's OF node or platform data.
+
+- ``ds->num_ports``: will be used to create the port list for this switch, and
+ to validate the port indices provided in the OF node.
+
+- ``ds->ops``: a pointer to the ``dsa_switch_ops`` structure holding the DSA
+ method implementations.
+
+- ``ds->priv``: backpointer to a driver-private data structure which can be
+ retrieved in all further DSA method callbacks.
+
+In addition, the following flags in the ``dsa_switch`` structure may optionally
+be configured to obtain driver-specific behavior from the DSA core. Their
+behavior when set is documented through comments in ``include/net/dsa.h``.
+
+- ``ds->vlan_filtering_is_global``
+
+- ``ds->needs_standalone_vlan_filtering``
+
+- ``ds->configure_vlan_while_not_filtering``
+
+- ``ds->untag_bridge_pvid``
+
+- ``ds->assisted_learning_on_cpu_port``
+
+- ``ds->mtu_enforcement_ingress``
+
+- ``ds->fdb_isolation``
+
+Internally, DSA keeps an array of switch trees (group of switches) global to
+the kernel, and attaches a ``dsa_switch`` structure to a tree on registration.
+The tree ID to which the switch is attached is determined by the first u32
+number of the ``dsa,member`` property of the switch's OF node (0 if missing).
+The switch ID within the tree is determined by the second u32 number of the
+same OF property (0 if missing). Registering multiple switches with the same
+switch ID and tree ID is illegal and will cause an error. Using platform data,
+a single switch and a single switch tree is permitted.
+
+In case of a tree with multiple switches, probing takes place asymmetrically.
+The first N-1 callers of ``dsa_register_switch()`` only add their ports to the
+port list of the tree (``dst->ports``), each port having a backpointer to its
+associated switch (``dp->ds``). Then, these switches exit their
+``dsa_register_switch()`` call early, because ``dsa_tree_setup_routing_table()``
+has determined that the tree is not yet complete (not all ports referenced by
+DSA links are present in the tree's port list). The tree becomes complete when
+the last switch calls ``dsa_register_switch()``, and this triggers the effective
+continuation of initialization (including the call to ``ds->ops->setup()``) for
+all switches within that tree, all as part of the calling context of the last
+switch's probe function.
+
+The opposite of registration takes place when calling ``dsa_unregister_switch()``,
+which removes a switch's ports from the port list of the tree. The entire tree
+is torn down when the first switch unregisters.
+
+It is mandatory for DSA switch drivers to implement the ``shutdown()`` callback
+of their respective bus, and call ``dsa_switch_shutdown()`` from it (a minimal
+version of the full teardown performed by ``dsa_unregister_switch()``).
+The reason is that DSA keeps a reference on the master net device, and if the
+driver for the master device decides to unbind on shutdown, DSA's reference
+will block that operation from finalizing.
+
+Either ``dsa_switch_shutdown()`` or ``dsa_unregister_switch()`` must be called,
+but not both, and the device driver model permits the bus' ``remove()`` method
+to be called even if ``shutdown()`` was already called. Therefore, drivers are
+expected to implement a mutual exclusion method between ``remove()`` and
+``shutdown()`` by setting their drvdata to NULL after any of these has run, and
+checking whether the drvdata is NULL before proceeding to take any action.
+
+After ``dsa_switch_shutdown()`` or ``dsa_unregister_switch()`` was called, no
+further callbacks via the provided ``dsa_switch_ops`` may take place, and the
+driver may free the data structures associated with the ``dsa_switch``.
Switch configuration
--------------------
-- ``tag_protocol``: this is to indicate what kind of tagging protocol is supported,
- should be a valid value from the ``dsa_tag_protocol`` enum
+- ``get_tag_protocol``: this is to indicate what kind of tagging protocol is
+ supported, should be a valid value from the ``dsa_tag_protocol`` enum.
+ The returned information does not have to be static; the driver is passed the
+ CPU port number, as well as the tagging protocol of a possibly stacked
+ upstream switch, in case there are hardware limitations in terms of supported
+ tag formats.
-- ``probe``: probe routine which will be invoked by the DSA platform device upon
- registration to test for the presence/absence of a switch device. For MDIO
- devices, it is recommended to issue a read towards internal registers using
- the switch pseudo-PHY and return whether this is a supported device. For other
- buses, return a non-NULL string
+- ``change_tag_protocol``: when the default tagging protocol has compatibility
+ problems with the master or other issues, the driver may support changing it
+ at runtime, either through a device tree property or through sysfs. In that
+ case, further calls to ``get_tag_protocol`` should report the protocol in
+ current use.
- ``setup``: setup function for the switch, this function is responsible for setting
up the ``dsa_switch_ops`` private structure with all it needs: register maps,
@@ -535,7 +617,17 @@ Switch configuration
fully configured and ready to serve any kind of request. It is recommended
to issue a software reset of the switch during this setup function in order to
avoid relying on what a previous software agent such as a bootloader/firmware
- may have previously configured.
+ may have previously configured. The method responsible for undoing any
+ applicable allocations or operations done here is ``teardown``.
+
+- ``port_setup`` and ``port_teardown``: methods for initialization and
+ destruction of per-port data structures. It is mandatory for some operations
+ such as registering and unregistering devlink port regions to be done from
+ these methods, otherwise they are optional. A port will be torn down only if
+ it has been previously set up. It is possible for a port to be set up during
+ probing only to be torn down immediately afterwards, for example in case its
+ PHY cannot be found. In this case, probing of the DSA switch continues
+ without that particular port.
PHY devices and link management
-------------------------------
@@ -635,26 +727,198 @@ Power management
``BR_STATE_DISABLED`` and propagating changes to the hardware if this port is
disabled while being a bridge member
+Address databases
+-----------------
+
+Switching hardware is expected to have a table for FDB entries, however not all
+of them are active at the same time. An address database is the subset (partition)
+of FDB entries that is active (can be matched by address learning on RX, or FDB
+lookup on TX) depending on the state of the port. An address database may
+occasionally be called "FID" (Filtering ID) in this document, although the
+underlying implementation may choose whatever is available to the hardware.
+
+For example, all ports that belong to a VLAN-unaware bridge (which is
+*currently* VLAN-unaware) are expected to learn source addresses in the
+database associated by the driver with that bridge (and not with other
+VLAN-unaware bridges). During forwarding and FDB lookup, a packet received on a
+VLAN-unaware bridge port should be able to find a VLAN-unaware FDB entry having
+the same MAC DA as the packet, which is present on another port member of the
+same bridge. At the same time, the FDB lookup process must be able to not find
+an FDB entry having the same MAC DA as the packet, if that entry points towards
+a port which is a member of a different VLAN-unaware bridge (and is therefore
+associated with a different address database).
+
+Similarly, each VLAN of each offloaded VLAN-aware bridge should have an
+associated address database, which is shared by all ports which are members of
+that VLAN, but not shared by ports belonging to different bridges that are
+members of the same VID.
+
+In this context, a VLAN-unaware database means that all packets are expected to
+match on it irrespective of VLAN ID (only MAC address lookup), whereas a
+VLAN-aware database means that packets are supposed to match based on the VLAN
+ID from the classified 802.1Q header (or the pvid if untagged).
+
+At the bridge layer, VLAN-unaware FDB entries have the special VID value of 0,
+whereas VLAN-aware FDB entries have non-zero VID values. Note that a
+VLAN-unaware bridge may have VLAN-aware (non-zero VID) FDB entries, and a
+VLAN-aware bridge may have VLAN-unaware FDB entries. As in hardware, the
+software bridge keeps separate address databases, and offloads to hardware the
+FDB entries belonging to these databases, through switchdev, asynchronously
+relative to the moment when the databases become active or inactive.
+
+When a user port operates in standalone mode, its driver should configure it to
+use a separate database called a port private database. This is different from
+the databases described above, and should impede operation as standalone port
+(packet in, packet out to the CPU port) as little as possible. For example,
+on ingress, it should not attempt to learn the MAC SA of ingress traffic, since
+learning is a bridging layer service and this is a standalone port, therefore
+it would consume useless space. With no address learning, the port private
+database should be empty in a naive implementation, and in this case, all
+received packets should be trivially flooded to the CPU port.
+
+DSA (cascade) and CPU ports are also called "shared" ports because they service
+multiple address databases, and the database that a packet should be associated
+to is usually embedded in the DSA tag. This means that the CPU port may
+simultaneously transport packets coming from a standalone port (which were
+classified by hardware in one address database), and from a bridge port (which
+were classified to a different address database).
+
+Switch drivers which satisfy certain criteria are able to optimize the naive
+configuration by removing the CPU port from the flooding domain of the switch,
+and just program the hardware with FDB entries pointing towards the CPU port
+for which it is known that software is interested in those MAC addresses.
+Packets which do not match a known FDB entry will not be delivered to the CPU,
+which will save CPU cycles required for creating an skb just to drop it.
+
+DSA is able to perform host address filtering for the following kinds of
+addresses:
+
+- Primary unicast MAC addresses of ports (``dev->dev_addr``). These are
+ associated with the port private database of the respective user port,
+ and the driver is notified to install them through ``port_fdb_add`` towards
+ the CPU port.
+
+- Secondary unicast and multicast MAC addresses of ports (addresses added
+ through ``dev_uc_add()`` and ``dev_mc_add()``). These are also associated
+ with the port private database of the respective user port.
+
+- Local/permanent bridge FDB entries (``BR_FDB_LOCAL``). These are the MAC
+ addresses of the bridge ports, for which packets must be terminated locally
+ and not forwarded. They are associated with the address database for that
+ bridge.
+
+- Static bridge FDB entries installed towards foreign (non-DSA) interfaces
+ present in the same bridge as some DSA switch ports. These are also
+ associated with the address database for that bridge.
+
+- Dynamically learned FDB entries on foreign interfaces present in the same
+ bridge as some DSA switch ports, only if ``ds->assisted_learning_on_cpu_port``
+ is set to true by the driver. These are associated with the address database
+ for that bridge.
+
+For various operations detailed below, DSA provides a ``dsa_db`` structure
+which can be of the following types:
+
+- ``DSA_DB_PORT``: the FDB (or MDB) entry to be installed or deleted belongs to
+ the port private database of user port ``db->dp``.
+- ``DSA_DB_BRIDGE``: the entry belongs to one of the address databases of bridge
+ ``db->bridge``. Separation between the VLAN-unaware database and the per-VID
+ databases of this bridge is expected to be done by the driver.
+- ``DSA_DB_LAG``: the entry belongs to the address database of LAG ``db->lag``.
+ Note: ``DSA_DB_LAG`` is currently unused and may be removed in the future.
+
+The drivers which act upon the ``dsa_db`` argument in ``port_fdb_add``,
+``port_mdb_add`` etc should declare ``ds->fdb_isolation`` as true.
+
+DSA associates each offloaded bridge and each offloaded LAG with a one-based ID
+(``struct dsa_bridge :: num``, ``struct dsa_lag :: id``) for the purposes of
+refcounting addresses on shared ports. Drivers may piggyback on DSA's numbering
+scheme (the ID is readable through ``db->bridge.num`` and ``db->lag.id`` or may
+implement their own.
+
+Only the drivers which declare support for FDB isolation are notified of FDB
+entries on the CPU port belonging to ``DSA_DB_PORT`` databases.
+For compatibility/legacy reasons, ``DSA_DB_BRIDGE`` addresses are notified to
+drivers even if they do not support FDB isolation. However, ``db->bridge.num``
+and ``db->lag.id`` are always set to 0 in that case (to denote the lack of
+isolation, for refcounting purposes).
+
+Note that it is not mandatory for a switch driver to implement physically
+separate address databases for each standalone user port. Since FDB entries in
+the port private databases will always point to the CPU port, there is no risk
+for incorrect forwarding decisions. In this case, all standalone ports may
+share the same database, but the reference counting of host-filtered addresses
+(not deleting the FDB entry for a port's MAC address if it's still in use by
+another port) becomes the responsibility of the driver, because DSA is unaware
+that the port databases are in fact shared. This can be achieved by calling
+``dsa_fdb_present_in_other_db()`` and ``dsa_mdb_present_in_other_db()``.
+The down side is that the RX filtering lists of each user port are in fact
+shared, which means that user port A may accept a packet with a MAC DA it
+shouldn't have, only because that MAC address was in the RX filtering list of
+user port B. These packets will still be dropped in software, however.
+
Bridge layer
------------
+Offloading the bridge forwarding plane is optional and handled by the methods
+below. They may be absent, return -EOPNOTSUPP, or ``ds->max_num_bridges`` may
+be non-zero and exceeded, and in this case, joining a bridge port is still
+possible, but the packet forwarding will take place in software, and the ports
+under a software bridge must remain configured in the same way as for
+standalone operation, i.e. have all bridging service functions (address
+learning etc) disabled, and send all received packets to the CPU port only.
+
+Concretely, a port starts offloading the forwarding plane of a bridge once it
+returns success to the ``port_bridge_join`` method, and stops doing so after
+``port_bridge_leave`` has been called. Offloading the bridge means autonomously
+learning FDB entries in accordance with the software bridge port's state, and
+autonomously forwarding (or flooding) received packets without CPU intervention.
+This is optional even when offloading a bridge port. Tagging protocol drivers
+are expected to call ``dsa_default_offload_fwd_mark(skb)`` for packets which
+have already been autonomously forwarded in the forwarding domain of the
+ingress switch port. DSA, through ``dsa_port_devlink_setup()``, considers all
+switch ports part of the same tree ID to be part of the same bridge forwarding
+domain (capable of autonomous forwarding to each other).
+
+Offloading the TX forwarding process of a bridge is a distinct concept from
+simply offloading its forwarding plane, and refers to the ability of certain
+driver and tag protocol combinations to transmit a single skb coming from the
+bridge device's transmit function to potentially multiple egress ports (and
+thereby avoid its cloning in software).
+
+Packets for which the bridge requests this behavior are called data plane
+packets and have ``skb->offload_fwd_mark`` set to true in the tag protocol
+driver's ``xmit`` function. Data plane packets are subject to FDB lookup,
+hardware learning on the CPU port, and do not override the port STP state.
+Additionally, replication of data plane packets (multicast, flooding) is
+handled in hardware and the bridge driver will transmit a single skb for each
+packet that may or may not need replication.
+
+When the TX forwarding offload is enabled, the tag protocol driver is
+responsible to inject packets into the data plane of the hardware towards the
+correct bridging domain (FID) that the port is a part of. The port may be
+VLAN-unaware, and in this case the FID must be equal to the FID used by the
+driver for its VLAN-unaware address database associated with that bridge.
+Alternatively, the bridge may be VLAN-aware, and in that case, it is guaranteed
+that the packet is also VLAN-tagged with the VLAN ID that the bridge processed
+this packet in. It is the responsibility of the hardware to untag the VID on
+the egress-untagged ports, or keep the tag on the egress-tagged ones.
+
- ``port_bridge_join``: bridge layer function invoked when a given switch port is
added to a bridge, this function should do what's necessary at the switch
level to permit the joining port to be added to the relevant logical
domain for it to ingress/egress traffic with other members of the bridge.
+ By setting the ``tx_fwd_offload`` argument to true, the TX forwarding process
+ of this bridge is also offloaded.
- ``port_bridge_leave``: bridge layer function invoked when a given switch port is
removed from a bridge, this function should do what's necessary at the
switch level to deny the leaving port from ingress/egress traffic from the
- remaining bridge members. When the port leaves the bridge, it should be aged
- out at the switch hardware for the switch to (re) learn MAC addresses behind
- this port.
+ remaining bridge members.
- ``port_stp_state_set``: bridge layer function invoked when a given switch port STP
state is computed by the bridge layer and should be propagated to switch
- hardware to forward/block/learn traffic. The switch driver is responsible for
- computing a STP state change based on current and asked parameters and perform
- the relevant ageing based on the intersection results
+ hardware to forward/block/learn traffic.
- ``port_bridge_flags``: bridge layer function invoked when a port must
configure its settings for e.g. flooding of unknown traffic or source address
@@ -667,21 +931,11 @@ Bridge layer
CPU port, and flooding towards the CPU port should also be enabled, due to a
lack of an explicit address filtering mechanism in the DSA core.
-- ``port_bridge_tx_fwd_offload``: bridge layer function invoked after
- ``port_bridge_join`` when a driver sets ``ds->num_fwd_offloading_bridges`` to
- a non-zero value. Returning success in this function activates the TX
- forwarding offload bridge feature for this port, which enables the tagging
- protocol driver to inject data plane packets towards the bridging domain that
- the port is a part of. Data plane packets are subject to FDB lookup, hardware
- learning on the CPU port, and do not override the port STP state.
- Additionally, replication of data plane packets (multicast, flooding) is
- handled in hardware and the bridge driver will transmit a single skb for each
- packet that needs replication. The method is provided as a configuration
- point for drivers that need to configure the hardware for enabling this
- feature.
-
-- ``port_bridge_tx_fwd_unoffload``: bridge layer function invoked when a driver
- leaves a bridge port which had the TX forwarding offload feature enabled.
+- ``port_fast_age``: bridge layer function invoked when flushing the
+ dynamically learned FDB entries on the port is necessary. This is called when
+ transitioning from an STP state where learning should take place to an STP
+ state where it shouldn't, or when leaving a bridge, or when address learning
+ is turned off via ``port_bridge_flags``.
Bridge VLAN filtering
---------------------
@@ -697,55 +951,44 @@ Bridge VLAN filtering
allowed.
- ``port_vlan_add``: bridge layer function invoked when a VLAN is configured
- (tagged or untagged) for the given switch port. If the operation is not
- supported by the hardware, this function should return ``-EOPNOTSUPP`` to
- inform the bridge code to fallback to a software implementation.
+ (tagged or untagged) for the given switch port. The CPU port becomes a member
+ of a VLAN only if a foreign bridge port is also a member of it (and
+ forwarding needs to take place in software), or the VLAN is installed to the
+ VLAN group of the bridge device itself, for termination purposes
+ (``bridge vlan add dev br0 vid 100 self``). VLANs on shared ports are
+ reference counted and removed when there is no user left. Drivers do not need
+ to manually install a VLAN on the CPU port.
- ``port_vlan_del``: bridge layer function invoked when a VLAN is removed from the
given switch port
-- ``port_vlan_dump``: bridge layer function invoked with a switchdev callback
- function that the driver has to call for each VLAN the given port is a member
- of. A switchdev object is used to carry the VID and bridge flags.
-
- ``port_fdb_add``: bridge layer function invoked when the bridge wants to install a
Forwarding Database entry, the switch hardware should be programmed with the
specified address in the specified VLAN Id in the forwarding database
- associated with this VLAN ID. If the operation is not supported, this
- function should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to
- a software implementation.
-
-.. note:: VLAN ID 0 corresponds to the port private database, which, in the context
- of DSA, would be its port-based VLAN, used by the associated bridge device.
+ associated with this VLAN ID.
- ``port_fdb_del``: bridge layer function invoked when the bridge wants to remove a
Forwarding Database entry, the switch hardware should be programmed to delete
the specified MAC address from the specified VLAN ID if it was mapped into
this port forwarding database
-- ``port_fdb_dump``: bridge layer function invoked with a switchdev callback
- function that the driver has to call for each MAC address known to be behind
- the given port. A switchdev object is used to carry the VID and FDB info.
+- ``port_fdb_dump``: bridge bypass function invoked by ``ndo_fdb_dump`` on the
+ physical DSA port interfaces. Since DSA does not attempt to keep in sync its
+ hardware FDB entries with the software bridge, this method is implemented as
+ a means to view the entries visible on user ports in the hardware database.
+ The entries reported by this function have the ``self`` flag in the output of
+ the ``bridge fdb show`` command.
- ``port_mdb_add``: bridge layer function invoked when the bridge wants to install
- a multicast database entry. If the operation is not supported, this function
- should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to a
- software implementation. The switch hardware should be programmed with the
+ a multicast database entry. The switch hardware should be programmed with the
specified address in the specified VLAN ID in the forwarding database
associated with this VLAN ID.
-.. note:: VLAN ID 0 corresponds to the port private database, which, in the context
- of DSA, would be its port-based VLAN, used by the associated bridge device.
-
- ``port_mdb_del``: bridge layer function invoked when the bridge wants to remove a
multicast database entry, the switch hardware should be programmed to delete
the specified MAC address from the specified VLAN ID if it was mapped into
this port forwarding database.
-- ``port_mdb_dump``: bridge layer function invoked with a switchdev callback
- function that the driver has to call for each MAC address known to be behind
- the given port. A switchdev object is used to carry the VID and MDB info.
-
Link aggregation
----------------
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index b3a534ed0e7c5..66c72230eaade 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1052,11 +1052,7 @@ udp_rmem_min - INTEGER
Default: 4K
udp_wmem_min - INTEGER
- Minimal size of send buffer used by UDP sockets in moderation.
- Each UDP socket is able to use the size for sending data, even if
- total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
-
- Default: 4K
+ UDP does not have tx memory accounting and this tunable has no effect.
RAW variables
=============
diff --git a/drivers/net/amt.c b/drivers/net/amt.c
index be2719a3ba702..e019526e1df67 100644
--- a/drivers/net/amt.c
+++ b/drivers/net/amt.c
@@ -563,7 +563,7 @@ static struct sk_buff *amt_build_igmp_gq(struct amt_dev *amt)
ihv3->nsrcs = 0;
ihv3->resv = 0;
ihv3->suppress = false;
- ihv3->qrv = amt->net->ipv4.sysctl_igmp_qrv;
+ ihv3->qrv = READ_ONCE(amt->net->ipv4.sysctl_igmp_qrv);
ihv3->csum = 0;
csum = &ihv3->csum;
csum_start = (void *)ihv3;
@@ -577,14 +577,14 @@ static struct sk_buff *amt_build_igmp_gq(struct amt_dev *amt)
return skb;
}
-static void __amt_update_gw_status(struct amt_dev *amt, enum amt_status status,
- bool validate)
+static void amt_update_gw_status(struct amt_dev *amt, enum amt_status status,
+ bool validate)
{
if (validate && amt->status >= status)
return;
netdev_dbg(amt->dev, "Update GW status %s -> %s",
status_str[amt->status], status_str[status]);
- amt->status = status;
+ WRITE_ONCE(amt->status, status);
}
static void __amt_update_relay_status(struct amt_tunnel_list *tunnel,
@@ -600,14 +600,6 @@ static void __amt_update_relay_status(struct amt_tunnel_list *tunnel,
tunnel->status = status;
}
-static void amt_update_gw_status(struct amt_dev *amt, enum amt_status status,
- bool validate)
-{
- spin_lock_bh(&amt->lock);
- __amt_update_gw_status(amt, status, validate);
- spin_unlock_bh(&amt->lock);
-}
-
static void amt_update_relay_status(struct amt_tunnel_list *tunnel,
enum amt_status status, bool validate)
{
@@ -700,9 +692,7 @@ static void amt_send_discovery(struct amt_dev *amt)
if (unlikely(net_xmit_eval(err)))
amt->dev->stats.tx_errors++;
- spin_lock_bh(&amt->lock);
- __amt_update_gw_status(amt, AMT_STATUS_SENT_DISCOVERY, true);
- spin_unlock_bh(&amt->lock);
+ amt_update_gw_status(amt, AMT_STATUS_SENT_DISCOVERY, true);
out:
rcu_read_unlock();
}
@@ -900,6 +890,28 @@ static void amt_send_mld_gq(struct amt_dev *amt, struct amt_tunnel_list *tunnel)
}
#endif
+static bool amt_queue_event(struct amt_dev *amt, enum amt_event event,
+ struct sk_buff *skb)
+{
+ int index;
+
+ spin_lock_bh(&amt->lock);
+ if (amt->nr_events >= AMT_MAX_EVENTS) {
+ spin_unlock_bh(&amt->lock);
+ return 1;
+ }
+
+ index = (amt->event_idx + amt->nr_events) % AMT_MAX_EVENTS;
+ amt->events[index].event = event;
+ amt->events[index].skb = skb;
+ amt->nr_events++;
+ amt->event_idx %= AMT_MAX_EVENTS;
+ queue_work(amt_wq, &amt->event_wq);
+ spin_unlock_bh(&amt->lock);
+
+ return 0;
+}
+
static void amt_secret_work(struct work_struct *work)
{
struct amt_dev *amt = container_of(to_delayed_work(work),
@@ -913,58 +925,72 @@ static void amt_secret_work(struct work_struct *work)
msecs_to_jiffies(AMT_SECRET_TIMEOUT));
}
-static void amt_discovery_work(struct work_struct *work)
+static void amt_event_send_discovery(struct amt_dev *amt)
{
- struct amt_dev *amt = container_of(to_delayed_work(work),
- struct amt_dev,
- discovery_wq);
-
- spin_lock_bh(&amt->lock);
if (amt->status > AMT_STATUS_SENT_DISCOVERY)
goto out;
get_random_bytes(&amt->nonce, sizeof(__be32));
- spin_unlock_bh(&amt->lock);
amt_send_discovery(amt);
- spin_lock_bh(&amt->lock);
out:
mod_delayed_work(amt_wq, &amt->discovery_wq,
msecs_to_jiffies(AMT_DISCOVERY_TIMEOUT));
- spin_unlock_bh(&amt->lock);
}
-static void amt_req_work(struct work_struct *work)
+static void amt_discovery_work(struct work_struct *work)
{
struct amt_dev *amt = container_of(to_delayed_work(work),
struct amt_dev,
- req_wq);
+ discovery_wq);
+
+ if (amt_queue_event(amt, AMT_EVENT_SEND_DISCOVERY, NULL))
+ mod_delayed_work(amt_wq, &amt->discovery_wq,
+ msecs_to_jiffies(AMT_DISCOVERY_TIMEOUT));
+}
+
+static void amt_event_send_request(struct amt_dev *amt)
+{
u32 exp;
- spin_lock_bh(&amt->lock);
if (amt->status < AMT_STATUS_RECEIVED_ADVERTISEMENT)
goto out;
if (amt->req_cnt > AMT_MAX_REQ_COUNT) {
netdev_dbg(amt->dev, "Gateway is not ready");
amt->qi = AMT_INIT_REQ_TIMEOUT;
- amt->ready4 = false;
- amt->ready6 = false;
+ WRITE_ONCE(amt->ready4, false);
+ WRITE_ONCE(amt->ready6, false);
amt->remote_ip = 0;
- __amt_update_gw_status(amt, AMT_STATUS_INIT, false);
+ amt_update_gw_status(amt, AMT_STATUS_INIT, false);
amt->req_cnt = 0;
+ amt->nonce = 0;
goto out;
}
- spin_unlock_bh(&amt->lock);
+
+ if (!amt->req_cnt) {
+ WRITE_ONCE(amt->ready4, false);
+ WRITE_ONCE(amt->ready6, false);
+ get_random_bytes(&amt->nonce, sizeof(__be32));
+ }
amt_send_request(amt, false);
amt_send_request(amt, true);
- spin_lock_bh(&amt->lock);
- __amt_update_gw_status(amt, AMT_STATUS_SENT_REQUEST, true);
+ amt_update_gw_status(amt, AMT_STATUS_SENT_REQUEST, true);
amt->req_cnt++;
out:
exp = min_t(u32, (1 * (1 << amt->req_cnt)), AMT_MAX_REQ_TIMEOUT);
mod_delayed_work(amt_wq, &amt->req_wq, msecs_to_jiffies(exp * 1000));
- spin_unlock_bh(&amt->lock);
+}
+
+static void amt_req_work(struct work_struct *work)
+{
+ struct amt_dev *amt = container_of(to_delayed_work(work),
+ struct amt_dev,
+ req_wq);
+
+ if (amt_queue_event(amt, AMT_EVENT_SEND_REQUEST, NULL))
+ mod_delayed_work(amt_wq, &amt->req_wq,
+ msecs_to_jiffies(100));
}
static bool amt_send_membership_update(struct amt_dev *amt,
@@ -1220,7 +1246,8 @@ static netdev_tx_t amt_dev_xmit(struct sk_buff *skb, struct net_device *dev)
/* Gateway only passes IGMP/MLD packets */
if (!report)
goto free;
- if ((!v6 && !amt->ready4) || (v6 && !amt->ready6))
+ if ((!v6 && !READ_ONCE(amt->ready4)) ||
+ (v6 && !READ_ONCE(amt->ready6)))
goto free;
if (amt_send_membership_update(amt, skb, v6))
goto free;
@@ -2236,6 +2263,10 @@ static bool amt_advertisement_handler(struct amt_dev *amt, struct sk_buff *skb)
ipv4_is_zeronet(amta->ip4))
return true;
+ if (amt->status != AMT_STATUS_SENT_DISCOVERY ||
+ amt->nonce != amta->nonce)
+ return true;
+
amt->remote_ip = amta->ip4;
netdev_dbg(amt->dev, "advertised remote ip = %pI4\n", &amt->remote_ip);
mod_delayed_work(amt_wq, &amt->req_wq, 0);
@@ -2251,6 +2282,9 @@ static bool amt_multicast_data_handler(struct amt_dev *amt, struct sk_buff *skb)
struct ethhdr *eth;
struct iphdr *iph;
+ if (READ_ONCE(amt->status) != AMT_STATUS_SENT_UPDATE)
+ return true;
+
hdr_size = sizeof(*amtmd) + sizeof(struct udphdr);
if (!pskb_may_pull(skb, hdr_size))
return true;
@@ -2325,6 +2359,9 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
if (amtmq->reserved || amtmq->version)
return true;
+ if (amtmq->nonce != amt->nonce)
+ return true;
+
hdr_size -= sizeof(*eth);
if (iptunnel_pull_header(skb, hdr_size, htons(ETH_P_TEB), false))
return true;
@@ -2339,6 +2376,9 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
iph = ip_hdr(skb);
if (iph->version == 4) {
+ if (READ_ONCE(amt->ready4))
+ return true;
+
if (!pskb_may_pull(skb, sizeof(*iph) + AMT_IPHDR_OPTS +
sizeof(*ihv3)))
return true;
@@ -2349,12 +2389,10 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
ihv3 = skb_pull(skb, sizeof(*iph) + AMT_IPHDR_OPTS);
skb_reset_transport_header(skb);
skb_push(skb, sizeof(*iph) + AMT_IPHDR_OPTS);
- spin_lock_bh(&amt->lock);
- amt->ready4 = true;
+ WRITE_ONCE(amt->ready4, true);
amt->mac = amtmq->response_mac;
amt->req_cnt = 0;
amt->qi = ihv3->qqic;
- spin_unlock_bh(&amt->lock);
skb->protocol = htons(ETH_P_IP);
eth->h_proto = htons(ETH_P_IP);
ip_eth_mc_map(iph->daddr, eth->h_dest);
@@ -2363,6 +2401,9 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
struct mld2_query *mld2q;
struct ipv6hdr *ip6h;
+ if (READ_ONCE(amt->ready6))
+ return true;
+
if (!pskb_may_pull(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS +
sizeof(*mld2q)))
return true;
@@ -2374,12 +2415,10 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
mld2q = skb_pull(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS);
skb_reset_transport_header(skb);
skb_push(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS);
- spin_lock_bh(&amt->lock);
- amt->ready6 = true;
+ WRITE_ONCE(amt->ready6, true);
amt->mac = amtmq->response_mac;
amt->req_cnt = 0;
amt->qi = mld2q->mld2q_qqic;
- spin_unlock_bh(&amt->lock);
skb->protocol = htons(ETH_P_IPV6);
eth->h_proto = htons(ETH_P_IPV6);
ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
@@ -2392,12 +2431,14 @@ static bool amt_membership_query_handler(struct amt_dev *amt,
skb->pkt_type = PACKET_MULTICAST;
skb->ip_summed = CHECKSUM_NONE;
len = skb->len;
+ local_bh_disable();
if (__netif_rx(skb) == NET_RX_SUCCESS) {
amt_update_gw_status(amt, AMT_STATUS_RECEIVED_QUERY, true);
dev_sw_netstats_rx_add(amt->dev, len);
} else {
amt->dev->stats.rx_dropped++;
}
+ local_bh_enable();
return false;
}
@@ -2638,7 +2679,9 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb)
if (tunnel->ip4 == iph->saddr)
goto send;
+ spin_lock_bh(&amt->lock);
if (amt->nr_tunnels >= amt->max_tunnels) {
+ spin_unlock_bh(&amt->lock);
icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
return true;
}
@@ -2646,8 +2689,10 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb)
tunnel = kzalloc(sizeof(*tunnel) +
(sizeof(struct hlist_head) * amt->hash_buckets),
GFP_ATOMIC);
- if (!tunnel)
+ if (!tunnel) {
+ spin_unlock_bh(&amt->lock);
return true;
+ }
tunnel->source_port = udph->source;
tunnel->ip4 = iph->saddr;
@@ -2660,10 +2705,9 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb)
INIT_DELAYED_WORK(&tunnel->gc_wq, amt_tunnel_expire);
- spin_lock_bh(&amt->lock);
list_add_tail_rcu(&tunnel->list, &amt->tunnel_list);
tunnel->key = amt->key;
- amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_REQUEST, true);
+ __amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_REQUEST, true);
amt->nr_tunnels++;
mod_delayed_work(amt_wq, &tunnel->gc_wq,
msecs_to_jiffies(amt_gmi(amt)));
@@ -2688,6 +2732,38 @@ send:
return false;
}
+static void amt_gw_rcv(struct amt_dev *amt, struct sk_buff *skb)
+{
+ int type = amt_parse_type(skb);
+ int err = 1;
+
+ if (type == -1)
+ goto drop;
+
+ if (amt->mode == AMT_MODE_GATEWAY) {
+ switch (type) {
+ case AMT_MSG_ADVERTISEMENT:
+ err = amt_advertisement_handler(amt, skb);
+ break;
+ case AMT_MSG_MEMBERSHIP_QUERY:
+ err = amt_membership_query_handler(amt, skb);
+ if (!err)
+ return;
+ break;
+ default:
+ netdev_dbg(amt->dev, "Invalid type of Gateway\n");
+ break;
+ }
+ }
+drop:
+ if (err) {
+ amt->dev->stats.rx_dropped++;
+ kfree_skb(skb);
+ } else {
+ consume_skb(skb);
+ }
+}
+
static int amt_rcv(struct sock *sk, struct sk_buff *skb)
{
struct amt_dev *amt;
@@ -2719,8 +2795,12 @@ static int amt_rcv(struct sock *sk, struct sk_buff *skb)
err = true;
goto drop;
}
- err = amt_advertisement_handler(amt, skb);
- break;
+ if (amt_queue_event(amt, AMT_EVENT_RECEIVE, skb)) {
+ netdev_dbg(amt->dev, "AMT Event queue full\n");
+ err = true;
+ goto drop;
+ }
+ goto out;
case AMT_MSG_MULTICAST_DATA:
if (iph->saddr != amt->remote_ip) {
netdev_dbg(amt->dev, "Invalid Relay IP\n");
@@ -2738,11 +2818,12 @@ static int amt_rcv(struct sock *sk, struct sk_buff *skb)
err = true;
goto drop;
}
- err = amt_membership_query_handler(amt, skb);
- if (err)
+ if (amt_queue_event(amt, AMT_EVENT_RECEIVE, skb)) {
+ netdev_dbg(amt->dev, "AMT Event queue full\n");
+ err = true;
goto drop;
- else
- goto out;
+ }
+ goto out;
default:
err = true;
netdev_dbg(amt->dev, "Invalid type of Gateway\n");
@@ -2780,6 +2861,46 @@ out:
return 0;
}
+static void amt_event_work(struct work_struct *work)
+{
+ struct amt_dev *amt = container_of(work, struct amt_dev, event_wq);
+ struct sk_buff *skb;
+ u8 event;
+ int i;
+
+ for (i = 0; i < AMT_MAX_EVENTS; i++) {
+ spin_lock_bh(&amt->lock);
+ if (amt->nr_events == 0) {
+ spin_unlock_bh(&amt->lock);
+ return;
+ }
+ event = amt->events[amt->event_idx].event;
+ skb = amt->events[amt->event_idx].skb;
+ amt->events[amt->event_idx].event = AMT_EVENT_NONE;
+ amt->events[amt->event_idx].skb = NULL;
+ amt->nr_events--;
+ amt->event_idx++;
+ amt->event_idx %= AMT_MAX_EVENTS;
+ spin_unlock_bh(&amt->lock);
+
+ switch (event) {
+ case AMT_EVENT_RECEIVE:
+ amt_gw_rcv(amt, skb);
+ break;
+ case AMT_EVENT_SEND_DISCOVERY:
+ amt_event_send_discovery(amt);
+ break;
+ case AMT_EVENT_SEND_REQUEST:
+ amt_event_send_request(amt);
+ break;
+ default:
+ if (skb)
+ kfree_skb(skb);
+ break;
+ }
+ }
+}
+
static int amt_err_lookup(struct sock *sk, struct sk_buff *skb)
{
struct amt_dev *amt;
@@ -2804,7 +2925,7 @@ static int amt_err_lookup(struct sock *sk, struct sk_buff *skb)
break;
case AMT_MSG_REQUEST:
case AMT_MSG_MEMBERSHIP_UPDATE:
- if (amt->status >= AMT_STATUS_RECEIVED_ADVERTISEMENT)
+ if (READ_ONCE(amt->status) >= AMT_STATUS_RECEIVED_ADVERTISEMENT)
mod_delayed_work(amt_wq, &amt->req_wq, 0);
break;
default:
@@ -2867,6 +2988,8 @@ static int amt_dev_open(struct net_device *dev)
amt->ready4 = false;
amt->ready6 = false;
+ amt->event_idx = 0;
+ amt->nr_events = 0;
err = amt_socket_create(amt);
if (err)
@@ -2874,6 +2997,7 @@ static int amt_dev_open(struct net_device *dev)
amt->req_cnt = 0;
amt->remote_ip = 0;
+ amt->nonce = 0;
get_random_bytes(&amt->key, sizeof(siphash_key_t));
amt->status = AMT_STATUS_INIT;
@@ -2892,6 +3016,8 @@ static int amt_dev_stop(struct net_device *dev)
struct amt_dev *amt = netdev_priv(dev);
struct amt_tunnel_list *tunnel, *tmp;
struct socket *sock;
+ struct sk_buff *skb;
+ int i;
cancel_delayed_work_sync(&amt->req_wq);
cancel_delayed_work_sync(&amt->discovery_wq);
@@ -2904,6 +3030,15 @@ static int amt_dev_stop(struct net_device *dev)
if (sock)
udp_tunnel_sock_release(sock);
+ cancel_work_sync(&amt->event_wq);
+ for (i = 0; i < AMT_MAX_EVENTS; i++) {
+ skb = amt->events[i].skb;
+ if (skb)
+ kfree_skb(skb);
+ amt->events[i].event = AMT_EVENT_NONE;
+ amt->events[i].skb = NULL;
+ }
+
amt->ready4 = false;
amt->ready6 = false;
amt->req_cnt = 0;
@@ -3095,7 +3230,7 @@ static int amt_newlink(struct net *net, struct net_device *dev,
goto err;
}
if (amt->mode == AMT_MODE_RELAY) {
- amt->qrv = amt->net->ipv4.sysctl_igmp_qrv;
+ amt->qrv = READ_ONCE(amt->net->ipv4.sysctl_igmp_qrv);
amt->qri = 10;
dev->needed_headroom = amt->stream_dev->needed_headroom +
AMT_RELAY_HLEN;
@@ -3146,8 +3281,8 @@ static int amt_newlink(struct net *net, struct net_device *dev,
INIT_DELAYED_WORK(&amt->discovery_wq, amt_discovery_work);
INIT_DELAYED_WORK(&amt->req_wq, amt_req_work);
INIT_DELAYED_WORK(&amt->secret_wq, amt_secret_work);
+ INIT_WORK(&amt->event_wq, amt_event_work);
INIT_LIST_HEAD(&amt->tunnel_list);
-
return 0;
err:
dev_put(amt->stream_dev);
@@ -3280,7 +3415,7 @@ static int __init amt_init(void)
if (err < 0)
goto unregister_notifier;
- amt_wq = alloc_workqueue("amt", WQ_UNBOUND, 1);
+ amt_wq = alloc_workqueue("amt", WQ_UNBOUND, 0);
if (!amt_wq) {
err = -ENOMEM;
goto rtnl_unregister;
diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c
index ba42cef10a532..cb0321ea853c1 100644
--- a/drivers/net/can/rcar/rcar_canfd.c
+++ b/drivers/net/can/rcar/rcar_canfd.c
@@ -1843,6 +1843,7 @@ static int rcar_canfd_probe(struct platform_device *pdev)
of_child = of_get_child_by_name(pdev->dev.of_node, name);
if (of_child && of_device_is_available(of_child))
channels_mask |= BIT(i);
+ of_node_put(of_child);
}
if (chip_id != RENESAS_RZG2L) {
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 9b47b07162fe5..bc6518504fd46 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -1690,8 +1690,8 @@ static int mcp251xfd_register_chip_detect(struct mcp251xfd_priv *priv)
u32 osc;
int err;
- /* The OSC_LPMEN is only supported on MCP2518FD, so use it to
- * autodetect the model.
+ /* The OSC_LPMEN is only supported on MCP2518FD and MCP251863,
+ * so use it to autodetect the model.
*/
err = regmap_update_bits(priv->map_reg, MCP251XFD_REG_OSC,
MCP251XFD_REG_OSC_LPMEN,
@@ -1703,10 +1703,18 @@ static int mcp251xfd_register_chip_detect(struct mcp251xfd_priv *priv)
if (err)
return err;
- if (osc & MCP251XFD_REG_OSC_LPMEN)
- devtype_data = &mcp251xfd_devtype_data_mcp2518fd;
- else
+ if (osc & MCP251XFD_REG_OSC_LPMEN) {
+ /* We cannot distinguish between MCP2518FD and
+ * MCP251863. If firmware specifies MCP251863, keep
+ * it, otherwise set to MCP2518FD.
+ */
+ if (mcp251xfd_is_251863(priv))
+ devtype_data = &mcp251xfd_devtype_data_mcp251863;
+ else
+ devtype_data = &mcp251xfd_devtype_data_mcp2518fd;
+ } else {
devtype_data = &mcp251xfd_devtype_data_mcp2517fd;
+ }
if (!mcp251xfd_is_251XFD(priv) &&
priv->devtype_data.model != devtype_data->model) {
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 9ca8c8d7740f4..92a500e1ccd21 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -1038,18 +1038,21 @@ int ksz_switch_register(struct ksz_device *dev,
ports = of_get_child_by_name(dev->dev->of_node, "ethernet-ports");
if (!ports)
ports = of_get_child_by_name(dev->dev->of_node, "ports");
- if (ports)
+ if (ports) {
for_each_available_child_of_node(ports, port) {
if (of_property_read_u32(port, "reg",
&port_num))
continue;
if (!(dev->port_mask & BIT(port_num))) {
of_node_put(port);
+ of_node_put(ports);
return -EINVAL;
}
of_get_phy_mode(port,
&dev->ports[port_num].interface);
}
+ of_node_put(ports);
+ }
dev->synclko_125 = of_property_read_bool(dev->dev->of_node,
"microchip,synclko-125");
dev->synclko_disable = of_property_read_bool(dev->dev->of_node,
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 72b6fc1932b54..698c7d1fb45cc 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -3382,12 +3382,28 @@ static const struct of_device_id sja1105_dt_ids[] = {
};
MODULE_DEVICE_TABLE(of, sja1105_dt_ids);
+static const struct spi_device_id sja1105_spi_ids[] = {
+ { "sja1105e" },
+ { "sja1105t" },
+ { "sja1105p" },
+ { "sja1105q" },
+ { "sja1105r" },
+ { "sja1105s" },
+ { "sja1110a" },
+ { "sja1110b" },
+ { "sja1110c" },
+ { "sja1110d" },
+ { },
+};
+MODULE_DEVICE_TABLE(spi, sja1105_spi_ids);
+
static struct spi_driver sja1105_driver = {
.driver = {
.name = "sja1105",
.owner = THIS_MODULE,
.of_match_table = of_match_ptr(sja1105_dt_ids),
},
+ .id_table = sja1105_spi_ids,
.probe = sja1105_probe,
.remove = sja1105_remove,
.shutdown = sja1105_shutdown,
diff --git a/drivers/net/dsa/vitesse-vsc73xx-spi.c b/drivers/net/dsa/vitesse-vsc73xx-spi.c
index 3110895358d8d..97a92e6da60d8 100644
--- a/drivers/net/dsa/vitesse-vsc73xx-spi.c
+++ b/drivers/net/dsa/vitesse-vsc73xx-spi.c
@@ -205,10 +205,20 @@ static const struct of_device_id vsc73xx_of_match[] = {
};
MODULE_DEVICE_TABLE(of, vsc73xx_of_match);
+static const struct spi_device_id vsc73xx_spi_ids[] = {
+ { "vsc7385" },
+ { "vsc7388" },
+ { "vsc7395" },
+ { "vsc7398" },
+ { },
+};
+MODULE_DEVICE_TABLE(spi, vsc73xx_spi_ids);
+
static struct spi_driver vsc73xx_spi_driver = {
.probe = vsc73xx_spi_probe,
.remove = vsc73xx_spi_remove,
.shutdown = vsc73xx_spi_shutdown,
+ .id_table = vsc73xx_spi_ids,
.driver = {
.name = "vsc73xx-spi",
.of_match_table = vsc73xx_of_match,
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
index 7c760aa655404..ddfe9208529a5 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
@@ -1236,8 +1236,8 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
csk->sndbuf = newsk->sk_sndbuf;
csk->smac_idx = ((struct port_info *)netdev_priv(ndev))->smt_idx;
RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk),
- sock_net(newsk)->
- ipv4.sysctl_tcp_window_scaling,
+ READ_ONCE(sock_net(newsk)->
+ ipv4.sysctl_tcp_window_scaling),
tp->window_clamp);
neigh_release(n);
inet_inherit_port(&tcp_hashinfo, lsk, newsk);
@@ -1384,7 +1384,7 @@ static void chtls_pass_accept_request(struct sock *sk,
#endif
}
if (req->tcpopt.wsf <= 14 &&
- sock_net(sk)->ipv4.sysctl_tcp_window_scaling) {
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
inet_rsk(oreq)->wscale_ok = 1;
inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf;
}
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 528eb0f223b17..b4f5e57d0285c 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -2287,7 +2287,7 @@ err:
/* Uses sync mcc */
int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
- u8 page_num, u8 *data)
+ u8 page_num, u32 off, u32 len, u8 *data)
{
struct be_dma_mem cmd;
struct be_mcc_wrb *wrb;
@@ -2321,10 +2321,10 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
req->port = cpu_to_le32(adapter->hba_port_num);
req->page_num = cpu_to_le32(page_num);
status = be_mcc_notify_wait(adapter);
- if (!status) {
+ if (!status && len > 0) {
struct be_cmd_resp_port_type *resp = cmd.va;
- memcpy(data, resp->page_data, PAGE_DATA_LEN);
+ memcpy(data, resp->page_data + off, len);
}
err:
mutex_unlock(&adapter->mcc_lock);
@@ -2415,7 +2415,7 @@ int be_cmd_query_cable_type(struct be_adapter *adapter)
int status;
status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
- page_data);
+ 0, PAGE_DATA_LEN, page_data);
if (!status) {
switch (adapter->phy.interface_type) {
case PHY_TYPE_QSFP:
@@ -2440,7 +2440,7 @@ int be_cmd_query_sfp_info(struct be_adapter *adapter)
int status;
status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
- page_data);
+ 0, PAGE_DATA_LEN, page_data);
if (!status) {
strlcpy(adapter->phy.vendor_name, page_data +
SFP_VENDOR_NAME_OFFSET, SFP_VENDOR_NAME_LEN - 1);
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index db1f3b908582e..e2085c68c0ee7 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -2427,7 +2427,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon,
int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num,
u32 *state);
int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
- u8 page_num, u8 *data);
+ u8 page_num, u32 off, u32 len, u8 *data);
int be_cmd_query_cable_type(struct be_adapter *adapter);
int be_cmd_query_sfp_info(struct be_adapter *adapter);
int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd,
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index dfa784339781d..bd0df189d8719 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -1344,7 +1344,7 @@ static int be_get_module_info(struct net_device *netdev,
return -EOPNOTSUPP;
status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
- page_data);
+ 0, PAGE_DATA_LEN, page_data);
if (!status) {
if (!page_data[SFP_PLUS_SFF_8472_COMP]) {
modinfo->type = ETH_MODULE_SFF_8079;
@@ -1362,25 +1362,32 @@ static int be_get_module_eeprom(struct net_device *netdev,
{
struct be_adapter *adapter = netdev_priv(netdev);
int status;
+ u32 begin, end;
if (!check_privilege(adapter, MAX_PRIVILEGES))
return -EOPNOTSUPP;
- status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
- data);
- if (status)
- goto err;
+ begin = eeprom->offset;
+ end = eeprom->offset + eeprom->len;
+
+ if (begin < PAGE_DATA_LEN) {
+ status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, begin,
+ min_t(u32, end, PAGE_DATA_LEN) - begin,
+ data);
+ if (status)
+ goto err;
+
+ data += PAGE_DATA_LEN - begin;
+ begin = PAGE_DATA_LEN;
+ }
- if (eeprom->offset + eeprom->len > PAGE_DATA_LEN) {
- status = be_cmd_read_port_transceiver_data(adapter,
- TR_PAGE_A2,
- data +
- PAGE_DATA_LEN);
+ if (end > PAGE_DATA_LEN) {
+ status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A2,
+ begin - PAGE_DATA_LEN,
+ end - begin, data);
if (status)
goto err;
}
- if (eeprom->offset)
- memcpy(data, data + eeprom->offset, eeprom->len);
err:
return be_cmd_status(status);
}
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index 13382df2f2eff..bcf680e838113 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -630,7 +630,6 @@ struct e1000_phy_info {
bool disable_polarity_correction;
bool is_mdix;
bool polarity_correction;
- bool reset_disable;
bool speed_downgraded;
bool autoneg_wait_to_complete;
};
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index e6c8e6d5234f8..9466f65a6da77 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -2050,10 +2050,6 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw)
bool blocked = false;
int i = 0;
- /* Check the PHY (LCD) reset flag */
- if (hw->phy.reset_disable)
- return true;
-
while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) &&
(i++ < 30))
usleep_range(10000, 11000);
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h
index 638a3ddd7ada8..2504b11c3169f 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.h
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h
@@ -271,7 +271,6 @@
#define I217_CGFREG_ENABLE_MTA_RESET 0x0002
#define I217_MEMPWR PHY_REG(772, 26)
#define I217_MEMPWR_DISABLE_SMB_RELEASE 0x0010
-#define I217_MEMPWR_MOEM 0x1000
/* Receive Address Initial CRC Calculation */
#define E1000_PCH_RAICC(_n) (0x05F50 + ((_n) * 4))
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index fa06f68c8c803..f1729940e46ce 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6494,6 +6494,10 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
hw->mac.type >= e1000_pch_adp) {
+ /* Keep the GPT clock enabled for CSME */
+ mac_data = er32(FEXTNVM);
+ mac_data |= BIT(3);
+ ew32(FEXTNVM, mac_data);
/* Request ME unconfigure the device from S0ix */
mac_data = er32(H2ME);
mac_data &= ~E1000_H2ME_START_DPG;
@@ -6987,21 +6991,8 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev)
struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
struct e1000_adapter *adapter = netdev_priv(netdev);
struct pci_dev *pdev = to_pci_dev(dev);
- struct e1000_hw *hw = &adapter->hw;
- u16 phy_data;
int rc;
- if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
- hw->mac.type >= e1000_pch_adp) {
- /* Mask OEM Bits / Gig Disable / Restart AN (772_26[12] = 1) */
- e1e_rphy(hw, I217_MEMPWR, &phy_data);
- phy_data |= I217_MEMPWR_MOEM;
- e1e_wphy(hw, I217_MEMPWR, phy_data);
-
- /* Disable LCD reset */
- hw->phy.reset_disable = true;
- }
-
e1000e_flush_lpic(pdev);
e1000e_pm_freeze(dev);
@@ -7023,8 +7014,6 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev)
struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
struct e1000_adapter *adapter = netdev_priv(netdev);
struct pci_dev *pdev = to_pci_dev(dev);
- struct e1000_hw *hw = &adapter->hw;
- u16 phy_data;
int rc;
/* Introduce S0ix implementation */
@@ -7035,17 +7024,6 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev)
if (rc)
return rc;
- if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
- hw->mac.type >= e1000_pch_adp) {
- /* Unmask OEM Bits / Gig Disable / Restart AN 772_26[12] = 0 */
- e1e_rphy(hw, I217_MEMPWR, &phy_data);
- phy_data &= ~I217_MEMPWR_MOEM;
- e1e_wphy(hw, I217_MEMPWR, phy_data);
-
- /* Enable LCD reset */
- hw->phy.reset_disable = false;
- }
-
return e1000e_pm_thaw(dev);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index aa786fd559511..7f1a0d90dc51e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -10650,7 +10650,7 @@ static int i40e_reset(struct i40e_pf *pf)
**/
static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
{
- int old_recovery_mode_bit = test_bit(__I40E_RECOVERY_MODE, pf->state);
+ const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf);
struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
struct i40e_hw *hw = &pf->hw;
i40e_status ret;
@@ -10658,13 +10658,11 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
int v;
if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
- i40e_check_recovery_mode(pf)) {
+ is_recovery_mode_reported)
i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev);
- }
if (test_bit(__I40E_DOWN, pf->state) &&
- !test_bit(__I40E_RECOVERY_MODE, pf->state) &&
- !old_recovery_mode_bit)
+ !test_bit(__I40E_RECOVERY_MODE, pf->state))
goto clear_recovery;
dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n");
@@ -10691,13 +10689,12 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
* accordingly with regard to resources initialization
* and deinitialization
*/
- if (test_bit(__I40E_RECOVERY_MODE, pf->state) ||
- old_recovery_mode_bit) {
+ if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
if (i40e_get_capabilities(pf,
i40e_aqc_opc_list_func_capabilities))
goto end_unlock;
- if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
+ if (is_recovery_mode_reported) {
/* we're staying in recovery mode so we'll reinitialize
* misc vector here
*/
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index 49aed3e506a66..0ea0361cd86b1 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -64,7 +64,6 @@ struct iavf_vsi {
u16 id;
DECLARE_BITMAP(state, __IAVF_VSI_STATE_SIZE__);
int base_vector;
- u16 work_limit;
u16 qs_handle;
void *priv; /* client driver data reference. */
};
@@ -159,8 +158,12 @@ struct iavf_vlan {
struct iavf_vlan_filter {
struct list_head list;
struct iavf_vlan vlan;
- bool remove; /* filter needs to be removed */
- bool add; /* filter needs to be added */
+ struct {
+ u8 is_new_vlan:1; /* filter is new, wait for PF answer */
+ u8 remove:1; /* filter needs to be removed */
+ u8 add:1; /* filter needs to be added */
+ u8 padding:5;
+ };
};
#define IAVF_MAX_TRAFFIC_CLASS 4
@@ -461,6 +464,10 @@ static inline const char *iavf_state_str(enum iavf_state_t state)
return "__IAVF_INIT_VERSION_CHECK";
case __IAVF_INIT_GET_RESOURCES:
return "__IAVF_INIT_GET_RESOURCES";
+ case __IAVF_INIT_EXTENDED_CAPS:
+ return "__IAVF_INIT_EXTENDED_CAPS";
+ case __IAVF_INIT_CONFIG_ADAPTER:
+ return "__IAVF_INIT_CONFIG_ADAPTER";
case __IAVF_INIT_SW:
return "__IAVF_INIT_SW";
case __IAVF_INIT_FAILED:
@@ -520,6 +527,7 @@ int iavf_get_vf_config(struct iavf_adapter *adapter);
int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter);
int iavf_send_vf_offload_vlan_v2_msg(struct iavf_adapter *adapter);
void iavf_set_queue_vlan_tag_loc(struct iavf_adapter *adapter);
+u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter);
void iavf_irq_enable(struct iavf_adapter *adapter, bool flush);
void iavf_configure_queues(struct iavf_adapter *adapter);
void iavf_deconfigure_queues(struct iavf_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index 3bb56714beb03..e535d4c3da49d 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -692,12 +692,8 @@ static int __iavf_get_coalesce(struct net_device *netdev,
struct ethtool_coalesce *ec, int queue)
{
struct iavf_adapter *adapter = netdev_priv(netdev);
- struct iavf_vsi *vsi = &adapter->vsi;
struct iavf_ring *rx_ring, *tx_ring;
- ec->tx_max_coalesced_frames = vsi->work_limit;
- ec->rx_max_coalesced_frames = vsi->work_limit;
-
/* Rx and Tx usecs per queue value. If user doesn't specify the
* queue, return queue 0's value to represent.
*/
@@ -825,12 +821,8 @@ static int __iavf_set_coalesce(struct net_device *netdev,
struct ethtool_coalesce *ec, int queue)
{
struct iavf_adapter *adapter = netdev_priv(netdev);
- struct iavf_vsi *vsi = &adapter->vsi;
int i;
- if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
- vsi->work_limit = ec->tx_max_coalesced_frames_irq;
-
if (ec->rx_coalesce_usecs == 0) {
if (ec->use_adaptive_rx_coalesce)
netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
@@ -1969,8 +1961,6 @@ static int iavf_set_rxfh(struct net_device *netdev, const u32 *indir,
static const struct ethtool_ops iavf_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
- ETHTOOL_COALESCE_MAX_FRAMES |
- ETHTOOL_COALESCE_MAX_FRAMES_IRQ |
ETHTOOL_COALESCE_USE_ADAPTIVE,
.get_drvinfo = iavf_get_drvinfo,
.get_link = ethtool_op_get_link,
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index f3ecb3bca33dd..2e2c153ce46a3 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -843,7 +843,7 @@ static void iavf_restore_filters(struct iavf_adapter *adapter)
* iavf_get_num_vlans_added - get number of VLANs added
* @adapter: board private structure
*/
-static u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter)
+u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter)
{
return bitmap_weight(adapter->vsi.active_cvlans, VLAN_N_VID) +
bitmap_weight(adapter->vsi.active_svlans, VLAN_N_VID);
@@ -906,11 +906,6 @@ static int iavf_vlan_rx_add_vid(struct net_device *netdev,
if (!iavf_add_vlan(adapter, IAVF_VLAN(vid, be16_to_cpu(proto))))
return -ENOMEM;
- if (proto == cpu_to_be16(ETH_P_8021Q))
- set_bit(vid, adapter->vsi.active_cvlans);
- else
- set_bit(vid, adapter->vsi.active_svlans);
-
return 0;
}
@@ -2245,7 +2240,6 @@ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter)
adapter->vsi.back = adapter;
adapter->vsi.base_vector = 1;
- adapter->vsi.work_limit = IAVF_DEFAULT_IRQ_WORK;
vsi->netdev = adapter->netdev;
vsi->qs_handle = adapter->vsi_res->qset_handle;
if (adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
@@ -2956,6 +2950,9 @@ continue_reset:
adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER;
iavf_misc_irq_enable(adapter);
+ bitmap_clear(adapter->vsi.active_cvlans, 0, VLAN_N_VID);
+ bitmap_clear(adapter->vsi.active_svlans, 0, VLAN_N_VID);
+
mod_delayed_work(iavf_wq, &adapter->watchdog_task, 2);
/* We were running when the reset started, so we need to restore some
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index 978f651c6b093..06d18797d25a2 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -194,7 +194,7 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi,
struct iavf_tx_buffer *tx_buf;
struct iavf_tx_desc *tx_desc;
unsigned int total_bytes = 0, total_packets = 0;
- unsigned int budget = vsi->work_limit;
+ unsigned int budget = IAVF_DEFAULT_IRQ_WORK;
tx_buf = &tx_ring->tx_bi[i];
tx_desc = IAVF_TX_DESC(tx_ring, i);
@@ -1285,11 +1285,10 @@ static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring,
{
struct iavf_rx_buffer *rx_buffer;
- if (!size)
- return NULL;
-
rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
prefetchw(rx_buffer->page);
+ if (!size)
+ return rx_buffer;
/* we are reusing so sync this buffer for CPU use */
dma_sync_single_range_for_cpu(rx_ring->dev,
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 782450d5c12fc..1603e99bae4af 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -627,6 +627,33 @@ static void iavf_mac_add_reject(struct iavf_adapter *adapter)
}
/**
+ * iavf_vlan_add_reject
+ * @adapter: adapter structure
+ *
+ * Remove VLAN filters from list based on PF response.
+ **/
+static void iavf_vlan_add_reject(struct iavf_adapter *adapter)
+{
+ struct iavf_vlan_filter *f, *ftmp;
+
+ spin_lock_bh(&adapter->mac_vlan_list_lock);
+ list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
+ if (f->is_new_vlan) {
+ if (f->vlan.tpid == ETH_P_8021Q)
+ clear_bit(f->vlan.vid,
+ adapter->vsi.active_cvlans);
+ else
+ clear_bit(f->vlan.vid,
+ adapter->vsi.active_svlans);
+
+ list_del(&f->list);
+ kfree(f);
+ }
+ }
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+}
+
+/**
* iavf_add_vlans
* @adapter: adapter structure
*
@@ -683,6 +710,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
vvfl->vlan_id[i] = f->vlan.vid;
i++;
f->add = false;
+ f->is_new_vlan = true;
if (i == count)
break;
}
@@ -695,10 +723,18 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len);
kfree(vvfl);
} else {
+ u16 max_vlans = adapter->vlan_v2_caps.filtering.max_filters;
+ u16 current_vlans = iavf_get_num_vlans_added(adapter);
struct virtchnl_vlan_filter_list_v2 *vvfl_v2;
adapter->current_op = VIRTCHNL_OP_ADD_VLAN_V2;
+ if ((count + current_vlans) > max_vlans &&
+ current_vlans < max_vlans) {
+ count = max_vlans - iavf_get_num_vlans_added(adapter);
+ more = true;
+ }
+
len = sizeof(*vvfl_v2) + ((count - 1) *
sizeof(struct virtchnl_vlan_filter));
if (len > IAVF_MAX_AQ_BUF_SIZE) {
@@ -725,6 +761,9 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
&adapter->vlan_v2_caps.filtering.filtering_support;
struct virtchnl_vlan *vlan;
+ if (i == count)
+ break;
+
/* give priority over outer if it's enabled */
if (filtering_support->outer)
vlan = &vvfl_v2->filters[i].outer;
@@ -736,8 +775,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
i++;
f->add = false;
- if (i == count)
- break;
+ f->is_new_vlan = true;
}
}
@@ -2080,6 +2118,11 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
*/
iavf_netdev_features_vlan_strip_set(netdev, true);
break;
+ case VIRTCHNL_OP_ADD_VLAN_V2:
+ iavf_vlan_add_reject(adapter);
+ dev_warn(&adapter->pdev->dev, "Failed to add VLAN filter, error %s\n",
+ iavf_stat_str(&adapter->hw, v_retval));
+ break;
default:
dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
v_retval, iavf_stat_str(&adapter->hw, v_retval),
@@ -2332,6 +2375,24 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
spin_unlock_bh(&adapter->adv_rss_lock);
}
break;
+ case VIRTCHNL_OP_ADD_VLAN_V2: {
+ struct iavf_vlan_filter *f;
+
+ spin_lock_bh(&adapter->mac_vlan_list_lock);
+ list_for_each_entry(f, &adapter->vlan_filter_list, list) {
+ if (f->is_new_vlan) {
+ f->is_new_vlan = false;
+ if (f->vlan.tpid == ETH_P_8021Q)
+ set_bit(f->vlan.vid,
+ adapter->vsi.active_cvlans);
+ else
+ set_bit(f->vlan.vid,
+ adapter->vsi.active_svlans);
+ }
+ }
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+ }
+ break;
case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
/* PF enabled vlan strip on this VF.
* Update netdev->features if needed to be in sync with ethtool.
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index ae17af44fe025..a5ebee7df4a85 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -6171,6 +6171,9 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg)
u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
u32 value = 0;
+ if (IGC_REMOVED(hw_addr))
+ return ~value;
+
value = readl(&hw_addr[reg]);
/* reads should not return all F's */
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index e197a33d93a03..026c3b65fc37a 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -306,7 +306,8 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg);
#define wr32(reg, val) \
do { \
u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
- writel((val), &hw_addr[(reg)]); \
+ if (!IGC_REMOVED(hw_addr)) \
+ writel((val), &hw_addr[(reg)]); \
} while (0)
#define rd32(reg) (igc_rd32(hw, reg))
@@ -318,4 +319,6 @@ do { \
#define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2)))
+#define IGC_REMOVED(h) unlikely(!(h))
+
#endif
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 921a4d977d651..8813b4dd6872f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -779,6 +779,7 @@ struct ixgbe_adapter {
#ifdef CONFIG_IXGBE_IPSEC
struct ixgbe_ipsec *ipsec;
#endif /* CONFIG_IXGBE_IPSEC */
+ spinlock_t vfs_lock;
};
static inline int ixgbe_determine_xdp_q_idx(int cpu)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 77c2e70b0860d..55f91c9ff0478 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6403,6 +6403,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter,
/* n-tuple support exists, always init our spinlock */
spin_lock_init(&adapter->fdir_perfect_lock);
+ /* init spinlock to avoid concurrency of VF resources */
+ spin_lock_init(&adapter->vfs_lock);
+
#ifdef CONFIG_IXGBE_DCB
ixgbe_init_dcb(adapter);
#endif
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index d4e63f0644c36..a1e69c7348632 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -205,10 +205,13 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs)
int ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
{
unsigned int num_vfs = adapter->num_vfs, vf;
+ unsigned long flags;
int rss;
+ spin_lock_irqsave(&adapter->vfs_lock, flags);
/* set num VFs to 0 to prevent access to vfinfo */
adapter->num_vfs = 0;
+ spin_unlock_irqrestore(&adapter->vfs_lock, flags);
/* put the reference to all of the vf devices */
for (vf = 0; vf < num_vfs; ++vf) {
@@ -1355,8 +1358,10 @@ static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter *adapter, u32 vf)
void ixgbe_msg_task(struct ixgbe_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
+ unsigned long flags;
u32 vf;
+ spin_lock_irqsave(&adapter->vfs_lock, flags);
for (vf = 0; vf < adapter->num_vfs; vf++) {
/* process any reset requests */
if (!ixgbe_check_for_rst(hw, vf))
@@ -1370,6 +1375,7 @@ void ixgbe_msg_task(struct ixgbe_adapter *adapter)
if (!ixgbe_check_for_ack(hw, vf))
ixgbe_rcv_ack_from_vf(adapter, vf);
}
+ spin_unlock_irqrestore(&adapter->vfs_lock, flags);
}
static inline void ixgbe_ping_vf(struct ixgbe_adapter *adapter, int vf)
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flower.c b/drivers/net/ethernet/marvell/prestera/prestera_flower.c
index d43e503c644f8..4d93ad6a284c0 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_flower.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_flower.c
@@ -167,12 +167,12 @@ static int prestera_flower_parse_meta(struct prestera_acl_rule *rule,
}
port = netdev_priv(ingress_dev);
- mask = htons(0x1FFF);
- key = htons(port->hw_id);
+ mask = htons(0x1FFF << 3);
+ key = htons(port->hw_id << 3);
rule_match_set(r_match->key, SYS_PORT, key);
rule_match_set(r_match->mask, SYS_PORT, mask);
- mask = htons(0x1FF);
+ mask = htons(0x3FF);
key = htons(port->dev_id);
rule_match_set(r_match->key, SYS_DEV, key);
rule_match_set(r_match->mask, SYS_DEV, mask);
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
index 90e7dfd011c91..5d457bc9acc1c 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -93,6 +93,9 @@ mtk_flow_get_wdma_info(struct net_device *dev, const u8 *addr, struct mtk_wdma_i
};
struct net_device_path path = {};
+ if (!ctx.dev)
+ return -ENODEV;
+
memcpy(ctx.daddr, addr, sizeof(ctx.daddr));
if (!IS_ENABLED(CONFIG_NET_MEDIATEK_SOC_WED))
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
index 8f0cd3196aacf..29be2fcafea3b 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed.c
@@ -651,7 +651,7 @@ mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs)
* WDMA RX.
*/
- BUG_ON(idx > ARRAY_SIZE(dev->tx_ring));
+ BUG_ON(idx >= ARRAY_SIZE(dev->tx_ring));
if (mtk_wed_ring_alloc(dev, ring, MTK_WED_TX_RING_SIZE))
return -ENOMEM;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 0d8a0068e4ca1..ce33dbde124d9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -5384,7 +5384,7 @@ static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
{
const struct fib_nh *nh = fib_info_nh(fi, 0);
- return nh->fib_nh_scope == RT_SCOPE_LINK ||
+ return nh->fib_nh_gw_family ||
mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
}
@@ -10324,7 +10324,7 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
unsigned long *fields = config->fields;
u32 hash_fields;
- switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
+ switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
case 0:
mlxsw_sp_mp4_hash_outer_addr(config);
break;
@@ -10342,7 +10342,7 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_mp_hash_inner_l3(config);
break;
case 3:
- hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
/* Outer */
MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
@@ -10523,13 +10523,14 @@ static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
{
struct net *net = mlxsw_sp_net(mlxsw_sp);
- bool usp = net->ipv4.sysctl_ip_fwd_update_priority;
char rgcr_pl[MLXSW_REG_RGCR_LEN];
u64 max_rifs;
+ bool usp;
if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
return -EIO;
max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
+ usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority);
mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
index 005e56ea5da12..5893770bfd946 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
@@ -75,6 +75,9 @@ static int __lan966x_mac_learn(struct lan966x *lan966x, int pgid,
unsigned int vid,
enum macaccess_entry_type type)
{
+ int ret;
+
+ spin_lock(&lan966x->mac_lock);
lan966x_mac_select(lan966x, mac, vid);
/* Issue a write command */
@@ -86,7 +89,10 @@ static int __lan966x_mac_learn(struct lan966x *lan966x, int pgid,
ANA_MACACCESS_MAC_TABLE_CMD_SET(MACACCESS_CMD_LEARN),
lan966x, ANA_MACACCESS);
- return lan966x_mac_wait_for_completion(lan966x);
+ ret = lan966x_mac_wait_for_completion(lan966x);
+ spin_unlock(&lan966x->mac_lock);
+
+ return ret;
}
/* The mask of the front ports is encoded inside the mac parameter via a call
@@ -113,11 +119,13 @@ int lan966x_mac_learn(struct lan966x *lan966x, int port,
return __lan966x_mac_learn(lan966x, port, false, mac, vid, type);
}
-int lan966x_mac_forget(struct lan966x *lan966x,
- const unsigned char mac[ETH_ALEN],
- unsigned int vid,
- enum macaccess_entry_type type)
+static int lan966x_mac_forget_locked(struct lan966x *lan966x,
+ const unsigned char mac[ETH_ALEN],
+ unsigned int vid,
+ enum macaccess_entry_type type)
{
+ lockdep_assert_held(&lan966x->mac_lock);
+
lan966x_mac_select(lan966x, mac, vid);
/* Issue a forget command */
@@ -128,6 +136,20 @@ int lan966x_mac_forget(struct lan966x *lan966x,
return lan966x_mac_wait_for_completion(lan966x);
}
+int lan966x_mac_forget(struct lan966x *lan966x,
+ const unsigned char mac[ETH_ALEN],
+ unsigned int vid,
+ enum macaccess_entry_type type)
+{
+ int ret;
+
+ spin_lock(&lan966x->mac_lock);
+ ret = lan966x_mac_forget_locked(lan966x, mac, vid, type);
+ spin_unlock(&lan966x->mac_lock);
+
+ return ret;
+}
+
int lan966x_mac_cpu_learn(struct lan966x *lan966x, const char *addr, u16 vid)
{
return lan966x_mac_learn(lan966x, PGID_CPU, addr, vid, ENTRYTYPE_LOCKED);
@@ -161,7 +183,7 @@ static struct lan966x_mac_entry *lan966x_mac_alloc_entry(const unsigned char *ma
{
struct lan966x_mac_entry *mac_entry;
- mac_entry = kzalloc(sizeof(*mac_entry), GFP_KERNEL);
+ mac_entry = kzalloc(sizeof(*mac_entry), GFP_ATOMIC);
if (!mac_entry)
return NULL;
@@ -179,7 +201,6 @@ static struct lan966x_mac_entry *lan966x_mac_find_entry(struct lan966x *lan966x,
struct lan966x_mac_entry *res = NULL;
struct lan966x_mac_entry *mac_entry;
- spin_lock(&lan966x->mac_lock);
list_for_each_entry(mac_entry, &lan966x->mac_entries, list) {
if (mac_entry->vid == vid &&
ether_addr_equal(mac, mac_entry->mac) &&
@@ -188,7 +209,6 @@ static struct lan966x_mac_entry *lan966x_mac_find_entry(struct lan966x *lan966x,
break;
}
}
- spin_unlock(&lan966x->mac_lock);
return res;
}
@@ -231,8 +251,11 @@ int lan966x_mac_add_entry(struct lan966x *lan966x, struct lan966x_port *port,
{
struct lan966x_mac_entry *mac_entry;
- if (lan966x_mac_lookup(lan966x, addr, vid, ENTRYTYPE_NORMAL))
+ spin_lock(&lan966x->mac_lock);
+ if (lan966x_mac_lookup(lan966x, addr, vid, ENTRYTYPE_NORMAL)) {
+ spin_unlock(&lan966x->mac_lock);
return 0;
+ }
/* In case the entry already exists, don't add it again to SW,
* just update HW, but we need to look in the actual HW because
@@ -241,21 +264,25 @@ int lan966x_mac_add_entry(struct lan966x *lan966x, struct lan966x_port *port,
* add the entry but without the extern_learn flag.
*/
mac_entry = lan966x_mac_find_entry(lan966x, addr, vid, port->chip_port);
- if (mac_entry)
- return lan966x_mac_learn(lan966x, port->chip_port,
- addr, vid, ENTRYTYPE_LOCKED);
+ if (mac_entry) {
+ spin_unlock(&lan966x->mac_lock);
+ goto mac_learn;
+ }
mac_entry = lan966x_mac_alloc_entry(addr, vid, port->chip_port);
- if (!mac_entry)
+ if (!mac_entry) {
+ spin_unlock(&lan966x->mac_lock);
return -ENOMEM;
+ }
- spin_lock(&lan966x->mac_lock);
list_add_tail(&mac_entry->list, &lan966x->mac_entries);
spin_unlock(&lan966x->mac_lock);
- lan966x_mac_learn(lan966x, port->chip_port, addr, vid, ENTRYTYPE_LOCKED);
lan966x_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, addr, vid, port->dev);
+mac_learn:
+ lan966x_mac_learn(lan966x, port->chip_port, addr, vid, ENTRYTYPE_LOCKED);
+
return 0;
}
@@ -269,8 +296,9 @@ int lan966x_mac_del_entry(struct lan966x *lan966x, const unsigned char *addr,
list) {
if (mac_entry->vid == vid &&
ether_addr_equal(addr, mac_entry->mac)) {
- lan966x_mac_forget(lan966x, mac_entry->mac, mac_entry->vid,
- ENTRYTYPE_LOCKED);
+ lan966x_mac_forget_locked(lan966x, mac_entry->mac,
+ mac_entry->vid,
+ ENTRYTYPE_LOCKED);
list_del(&mac_entry->list);
kfree(mac_entry);
@@ -288,8 +316,8 @@ void lan966x_mac_purge_entries(struct lan966x *lan966x)
spin_lock(&lan966x->mac_lock);
list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries,
list) {
- lan966x_mac_forget(lan966x, mac_entry->mac, mac_entry->vid,
- ENTRYTYPE_LOCKED);
+ lan966x_mac_forget_locked(lan966x, mac_entry->mac,
+ mac_entry->vid, ENTRYTYPE_LOCKED);
list_del(&mac_entry->list);
kfree(mac_entry);
@@ -325,10 +353,13 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
{
struct lan966x_mac_entry *mac_entry, *tmp;
unsigned char mac[ETH_ALEN] __aligned(2);
+ struct list_head mac_deleted_entries;
u32 dest_idx;
u32 column;
u16 vid;
+ INIT_LIST_HEAD(&mac_deleted_entries);
+
spin_lock(&lan966x->mac_lock);
list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries, list) {
bool found = false;
@@ -362,20 +393,26 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
}
if (!found) {
- /* Notify the bridge that the entry doesn't exist
- * anymore in the HW and remove the entry from the SW
- * list
- */
- lan966x_mac_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
- mac_entry->mac, mac_entry->vid,
- lan966x->ports[mac_entry->port_index]->dev);
-
list_del(&mac_entry->list);
- kfree(mac_entry);
+ /* Move the entry from SW list to a tmp list such that
+ * it would be deleted later
+ */
+ list_add_tail(&mac_entry->list, &mac_deleted_entries);
}
}
spin_unlock(&lan966x->mac_lock);
+ list_for_each_entry_safe(mac_entry, tmp, &mac_deleted_entries, list) {
+ /* Notify the bridge that the entry doesn't exist
+ * anymore in the HW
+ */
+ lan966x_mac_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
+ mac_entry->mac, mac_entry->vid,
+ lan966x->ports[mac_entry->port_index]->dev);
+ list_del(&mac_entry->list);
+ kfree(mac_entry);
+ }
+
/* Now go to the list of columns and see if any entry was not in the SW
* list, then that means that the entry is new so it needs to notify the
* bridge.
@@ -396,13 +433,20 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
if (WARN_ON(dest_idx >= lan966x->num_phys_ports))
continue;
+ spin_lock(&lan966x->mac_lock);
+ mac_entry = lan966x_mac_find_entry(lan966x, mac, vid, dest_idx);
+ if (mac_entry) {
+ spin_unlock(&lan966x->mac_lock);
+ continue;
+ }
+
mac_entry = lan966x_mac_alloc_entry(mac, vid, dest_idx);
- if (!mac_entry)
+ if (!mac_entry) {
+ spin_unlock(&lan966x->mac_lock);
return;
+ }
mac_entry->row = row;
-
- spin_lock(&lan966x->mac_lock);
list_add_tail(&mac_entry->list, &lan966x->mac_entries);
spin_unlock(&lan966x->mac_lock);
@@ -424,6 +468,7 @@ irqreturn_t lan966x_mac_irq_handler(struct lan966x *lan966x)
lan966x, ANA_MACTINDX);
while (1) {
+ spin_lock(&lan966x->mac_lock);
lan_rmw(ANA_MACACCESS_MAC_TABLE_CMD_SET(MACACCESS_CMD_SYNC_GET_NEXT),
ANA_MACACCESS_MAC_TABLE_CMD,
lan966x, ANA_MACACCESS);
@@ -447,12 +492,15 @@ irqreturn_t lan966x_mac_irq_handler(struct lan966x *lan966x)
stop = false;
if (column == LAN966X_MAC_COLUMNS - 1 &&
- index == 0 && stop)
+ index == 0 && stop) {
+ spin_unlock(&lan966x->mac_lock);
break;
+ }
entry[column].mach = lan_rd(lan966x, ANA_MACHDATA);
entry[column].macl = lan_rd(lan966x, ANA_MACLDATA);
entry[column].maca = lan_rd(lan966x, ANA_MACACCESS);
+ spin_unlock(&lan966x->mac_lock);
/* Once all the columns are read process them */
if (column == LAN966X_MAC_COLUMNS - 1) {
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index 0147de4053653..ffb6f6d05a07b 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -474,7 +474,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun,
set_tun->ttl = ip4_dst_hoplimit(&rt->dst);
ip_rt_put(rt);
} else {
- set_tun->ttl = net->ipv4.sysctl_ip_default_ttl;
+ set_tun->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
}
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 38fe77d1035e3..3fe720c5dc9fc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -298,6 +298,11 @@ static void get_arttime(struct mii_bus *mii, int intel_adhoc_addr,
*art_time = ns;
}
+static int stmmac_cross_ts_isr(struct stmmac_priv *priv)
+{
+ return (readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE);
+}
+
static int intel_crosststamp(ktime_t *device,
struct system_counterval_t *system,
void *ctx)
@@ -313,8 +318,6 @@ static int intel_crosststamp(ktime_t *device,
u32 num_snapshot;
u32 gpio_value;
u32 acr_value;
- int ret;
- u32 v;
int i;
if (!boot_cpu_has(X86_FEATURE_ART))
@@ -328,6 +331,8 @@ static int intel_crosststamp(ktime_t *device,
if (priv->plat->ext_snapshot_en)
return -EBUSY;
+ priv->plat->int_snapshot_en = 1;
+
mutex_lock(&priv->aux_ts_lock);
/* Enable Internal snapshot trigger */
acr_value = readl(ptpaddr + PTP_ACR);
@@ -347,6 +352,7 @@ static int intel_crosststamp(ktime_t *device,
break;
default:
mutex_unlock(&priv->aux_ts_lock);
+ priv->plat->int_snapshot_en = 0;
return -EINVAL;
}
writel(acr_value, ptpaddr + PTP_ACR);
@@ -368,13 +374,12 @@ static int intel_crosststamp(ktime_t *device,
gpio_value |= GMAC_GPO1;
writel(gpio_value, ioaddr + GMAC_GPIO_STATUS);
- /* Poll for time sync operation done */
- ret = readl_poll_timeout(priv->ioaddr + GMAC_INT_STATUS, v,
- (v & GMAC_INT_TSIE), 100, 10000);
-
- if (ret == -ETIMEDOUT) {
- pr_err("%s: Wait for time sync operation timeout\n", __func__);
- return ret;
+ /* Time sync done Indication - Interrupt method */
+ if (!wait_event_interruptible_timeout(priv->tstamp_busy_wait,
+ stmmac_cross_ts_isr(priv),
+ HZ / 100)) {
+ priv->plat->int_snapshot_en = 0;
+ return -ETIMEDOUT;
}
num_snapshot = (readl(ioaddr + GMAC_TIMESTAMP_STATUS) &
@@ -392,6 +397,7 @@ static int intel_crosststamp(ktime_t *device,
}
system->cycles *= intel_priv->crossts_adj;
+ priv->plat->int_snapshot_en = 0;
return 0;
}
@@ -576,6 +582,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
plat->has_crossts = true;
plat->crosststamp = intel_crosststamp;
+ plat->int_snapshot_en = 0;
/* Setup MSI vector offset specific to Intel mGbE controller */
plat->msi_mac_vec = 29;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
index 6ff88df587673..ca8ab290013ce 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
@@ -576,32 +576,7 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv)
}
}
- ret = clk_bulk_prepare_enable(variant->num_clks, plat->clks);
- if (ret) {
- dev_err(plat->dev, "failed to enable clks, err = %d\n", ret);
- return ret;
- }
-
- ret = clk_prepare_enable(plat->rmii_internal_clk);
- if (ret) {
- dev_err(plat->dev, "failed to enable rmii internal clk, err = %d\n", ret);
- goto err_clk;
- }
-
return 0;
-
-err_clk:
- clk_bulk_disable_unprepare(variant->num_clks, plat->clks);
- return ret;
-}
-
-static void mediatek_dwmac_exit(struct platform_device *pdev, void *priv)
-{
- struct mediatek_dwmac_plat_data *plat = priv;
- const struct mediatek_dwmac_variant *variant = plat->variant;
-
- clk_disable_unprepare(plat->rmii_internal_clk);
- clk_bulk_disable_unprepare(variant->num_clks, plat->clks);
}
static int mediatek_dwmac_clks_config(void *priv, bool enabled)
@@ -643,7 +618,6 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev,
plat->addr64 = priv_plat->variant->dma_bit_mask;
plat->bsp_priv = priv_plat;
plat->init = mediatek_dwmac_init;
- plat->exit = mediatek_dwmac_exit;
plat->clks_config = mediatek_dwmac_clks_config;
if (priv_plat->variant->dwmac_fix_mac_speed)
plat->fix_mac_speed = priv_plat->variant->dwmac_fix_mac_speed;
@@ -712,13 +686,32 @@ static int mediatek_dwmac_probe(struct platform_device *pdev)
mediatek_dwmac_common_data(pdev, plat_dat, priv_plat);
mediatek_dwmac_init(pdev, priv_plat);
+ ret = mediatek_dwmac_clks_config(priv_plat, true);
+ if (ret)
+ return ret;
+
ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
if (ret) {
stmmac_remove_config_dt(pdev, plat_dat);
- return ret;
+ goto err_drv_probe;
}
return 0;
+
+err_drv_probe:
+ mediatek_dwmac_clks_config(priv_plat, false);
+ return ret;
+}
+
+static int mediatek_dwmac_remove(struct platform_device *pdev)
+{
+ struct mediatek_dwmac_plat_data *priv_plat = get_stmmac_bsp_priv(&pdev->dev);
+ int ret;
+
+ ret = stmmac_pltfr_remove(pdev);
+ mediatek_dwmac_clks_config(priv_plat, false);
+
+ return ret;
}
static const struct of_device_id mediatek_dwmac_match[] = {
@@ -733,7 +726,7 @@ MODULE_DEVICE_TABLE(of, mediatek_dwmac_match);
static struct platform_driver mediatek_dwmac_driver = {
.probe = mediatek_dwmac_probe,
- .remove = stmmac_pltfr_remove,
+ .remove = mediatek_dwmac_remove,
.driver = {
.name = "dwmac-mediatek",
.pm = &stmmac_pltfr_pm_ops,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index 462ca7ed095a2..71dad409f78b0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -150,7 +150,8 @@
#define GMAC_PCS_IRQ_DEFAULT (GMAC_INT_RGSMIIS | GMAC_INT_PCS_LINK | \
GMAC_INT_PCS_ANE)
-#define GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN)
+#define GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN | \
+ GMAC_INT_TSIE)
enum dwmac4_irq_status {
time_stamp_irq = 0x00001000,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index fd41db65fe1df..d8f1fbc25bdd3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -23,6 +23,7 @@
static void dwmac4_core_init(struct mac_device_info *hw,
struct net_device *dev)
{
+ struct stmmac_priv *priv = netdev_priv(dev);
void __iomem *ioaddr = hw->pcsr;
u32 value = readl(ioaddr + GMAC_CONFIG);
@@ -58,6 +59,9 @@ static void dwmac4_core_init(struct mac_device_info *hw,
value |= GMAC_INT_FPE_EN;
writel(value, ioaddr + GMAC_INT_EN);
+
+ if (GMAC_INT_DEFAULT_ENABLE & GMAC_INT_TSIE)
+ init_waitqueue_head(&priv->tstamp_busy_wait);
}
static void dwmac4_rx_queue_enable(struct mac_device_info *hw,
@@ -219,6 +223,9 @@ static void dwmac4_map_mtl_dma(struct mac_device_info *hw, u32 queue, u32 chan)
if (queue == 0 || queue == 4) {
value &= ~MTL_RXQ_DMA_Q04MDMACH_MASK;
value |= MTL_RXQ_DMA_Q04MDMACH(chan);
+ } else if (queue > 4) {
+ value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue - 4);
+ value |= MTL_RXQ_DMA_QXMDMACH(chan, queue - 4);
} else {
value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue);
value |= MTL_RXQ_DMA_QXMDMACH(chan, queue);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 57970ae2178da..f9e83964aa7ea 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -266,6 +266,7 @@ struct stmmac_priv {
rwlock_t ptp_lock;
/* Protects auxiliary snapshot registers from concurrent access. */
struct mutex aux_ts_lock;
+ wait_queue_head_t tstamp_busy_wait;
void __iomem *mmcaddr;
void __iomem *ptpaddr;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index abfb3cd5958df..9c3055ee26085 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -803,14 +803,6 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev,
netdev_warn(priv->dev,
"Setting EEE tx-lpi is not supported\n");
- if (priv->hw->xpcs) {
- ret = xpcs_config_eee(priv->hw->xpcs,
- priv->plat->mult_fact_100ns,
- edata->eee_enabled);
- if (ret)
- return ret;
- }
-
if (!edata->eee_enabled)
stmmac_disable_eee_mode(priv);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 92d32940aff00..764832f4dae1a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -179,6 +179,11 @@ static void timestamp_interrupt(struct stmmac_priv *priv)
u64 ptp_time;
int i;
+ if (priv->plat->int_snapshot_en) {
+ wake_up(&priv->tstamp_busy_wait);
+ return;
+ }
+
tsync_int = readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE;
if (!tsync_int)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d1a7cf4567bc2..c5f33630e7718 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -834,19 +834,10 @@ int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags)
struct timespec64 now;
u32 sec_inc = 0;
u64 temp = 0;
- int ret;
if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp))
return -EOPNOTSUPP;
- ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
- if (ret < 0) {
- netdev_warn(priv->dev,
- "failed to enable PTP reference clock: %pe\n",
- ERR_PTR(ret));
- return ret;
- }
-
stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags);
priv->systime_flags = systime_flags;
@@ -3270,6 +3261,14 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register)
stmmac_mmc_setup(priv);
+ if (ptp_register) {
+ ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
+ if (ret < 0)
+ netdev_warn(priv->dev,
+ "failed to enable PTP reference clock: %pe\n",
+ ERR_PTR(ret));
+ }
+
ret = stmmac_init_ptp(priv);
if (ret == -EOPNOTSUPP)
netdev_info(priv->dev, "PTP not supported by HW\n");
@@ -7213,8 +7212,6 @@ int stmmac_dvr_remove(struct device *dev)
netdev_info(priv->dev, "%s: removing driver", __func__);
pm_runtime_get_sync(dev);
- pm_runtime_disable(dev);
- pm_runtime_put_noidle(dev);
stmmac_stop_all_dma(priv);
stmmac_mac_set(priv, priv->ioaddr, false);
@@ -7241,6 +7238,9 @@ int stmmac_dvr_remove(struct device *dev)
mutex_destroy(&priv->lock);
bitmap_free(priv->af_xdp_zc_qps);
+ pm_runtime_disable(dev);
+ pm_runtime_put_noidle(dev);
+
return 0;
}
EXPORT_SYMBOL_GPL(stmmac_dvr_remove);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 11e1055e8260f..9f5cac4000da6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -815,7 +815,13 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev)
if (ret)
return ret;
- stmmac_init_tstamp_counter(priv, priv->systime_flags);
+ ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
+ if (ret < 0) {
+ netdev_warn(priv->dev,
+ "failed to enable PTP reference clock: %pe\n",
+ ERR_PTR(ret));
+ return ret;
+ }
}
return 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index e45fb191d8e6e..4d11980dcd64d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -175,11 +175,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
struct stmmac_priv *priv =
container_of(ptp, struct stmmac_priv, ptp_clock_ops);
void __iomem *ptpaddr = priv->ptpaddr;
- void __iomem *ioaddr = priv->hw->pcsr;
struct stmmac_pps_cfg *cfg;
- u32 intr_value, acr_value;
int ret = -EOPNOTSUPP;
unsigned long flags;
+ u32 acr_value;
switch (rq->type) {
case PTP_CLK_REQ_PEROUT:
@@ -213,19 +212,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
netdev_dbg(priv->dev, "Auxiliary Snapshot %d enabled.\n",
priv->plat->ext_snapshot_num >>
PTP_ACR_ATSEN_SHIFT);
- /* Enable Timestamp Interrupt */
- intr_value = readl(ioaddr + GMAC_INT_EN);
- intr_value |= GMAC_INT_TSIE;
- writel(intr_value, ioaddr + GMAC_INT_EN);
-
} else {
netdev_dbg(priv->dev, "Auxiliary Snapshot %d disabled.\n",
priv->plat->ext_snapshot_num >>
PTP_ACR_ATSEN_SHIFT);
- /* Disable Timestamp Interrupt */
- intr_value = readl(ioaddr + GMAC_INT_EN);
- intr_value &= ~GMAC_INT_TSIE;
- writel(intr_value, ioaddr + GMAC_INT_EN);
}
writel(acr_value, ptpaddr + PTP_ACR);
mutex_unlock(&priv->aux_ts_lock);
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index b082819509e10..0f6efaabaa32b 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -32,7 +32,7 @@
#define NETNEXT_VERSION "12"
/* Information for net */
-#define NET_VERSION "12"
+#define NET_VERSION "13"
#define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION
#define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -5917,7 +5917,8 @@ static void r8153_enter_oob(struct r8152 *tp)
wait_oob_link_list_ready(tp);
- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, mtu_to_size(tp->netdev->mtu));
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, 1522);
+ ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_DEFAULT);
switch (tp->version) {
case RTL_VER_03:
@@ -5953,6 +5954,10 @@ static void r8153_enter_oob(struct r8152 *tp)
ocp_data |= NOW_IS_OOB | DIS_MCU_CLROOB;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+ ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
+ ocp_data |= MCU_BORW_EN;
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
+
rxdy_gated_en(tp, false);
ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
@@ -6555,6 +6560,9 @@ static void rtl8156_down(struct r8152 *tp)
rtl_disable(tp);
rtl_reset_bmu(tp);
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, 1522);
+ ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_DEFAULT);
+
/* Clear teredo wake event. bit[15:8] is the teredo wakeup
* type. Set it to zero. bits[7:0] are the W1C bits about
* the events. Set them to all 1 to clear them.
@@ -6565,6 +6573,10 @@ static void rtl8156_down(struct r8152 *tp)
ocp_data |= NOW_IS_OOB;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+ ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
+ ocp_data |= MCU_BORW_EN;
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
+
rtl_rx_vlan_en(tp, true);
rxdy_gated_en(tp, false);
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 29917850f0794..8df475db88c04 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -260,6 +260,7 @@ struct plat_stmmacenet_data {
bool has_crossts;
int int_snapshot_num;
int ext_snapshot_num;
+ bool int_snapshot_en;
bool ext_snapshot_en;
bool multi_msi_en;
int msi_mac_vec;
diff --git a/include/net/amt.h b/include/net/amt.h
index 0e40c3d64fcf4..08fc30cf2f34c 100644
--- a/include/net/amt.h
+++ b/include/net/amt.h
@@ -78,6 +78,15 @@ enum amt_status {
#define AMT_STATUS_MAX (__AMT_STATUS_MAX - 1)
+/* Gateway events only */
+enum amt_event {
+ AMT_EVENT_NONE,
+ AMT_EVENT_RECEIVE,
+ AMT_EVENT_SEND_DISCOVERY,
+ AMT_EVENT_SEND_REQUEST,
+ __AMT_EVENT_MAX,
+};
+
struct amt_header {
#if defined(__LITTLE_ENDIAN_BITFIELD)
u8 type:4,
@@ -292,6 +301,12 @@ struct amt_group_node {
struct hlist_head sources[];
};
+#define AMT_MAX_EVENTS 16
+struct amt_events {
+ enum amt_event event;
+ struct sk_buff *skb;
+};
+
struct amt_dev {
struct net_device *dev;
struct net_device *stream_dev;
@@ -308,6 +323,7 @@ struct amt_dev {
struct delayed_work req_wq;
/* Protected by RTNL */
struct delayed_work secret_wq;
+ struct work_struct event_wq;
/* AMT status */
enum amt_status status;
/* Generated key */
@@ -345,6 +361,10 @@ struct amt_dev {
/* Used only in gateway mode */
u64 mac:48,
reserved:16;
+ /* AMT gateway side message handler queue */
+ struct amt_events events[AMT_MAX_EVENTS];
+ u8 event_idx;
+ u8 nr_events;
};
#define AMT_TOS 0xc0
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index ebfa3df6f8dc3..fd6b510d114bc 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -179,7 +179,7 @@ static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if,
int dif, int sdif)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept,
+ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept),
bound_dev_if, dif, sdif);
#else
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index daead5fb389ae..6395f6b9a5d29 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -107,7 +107,8 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
{
- if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)
+ if (!sk->sk_mark &&
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
return skb->mark;
return sk->sk_mark;
@@ -120,7 +121,7 @@ static inline int inet_request_bound_dev_if(const struct sock *sk,
#ifdef CONFIG_NET_L3_MASTER_DEV
struct net *net = sock_net(sk);
- if (!bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept)
+ if (!bound_dev_if && READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept))
return l3mdev_master_ifindex_by_index(net, skb->skb_iif);
#endif
@@ -132,7 +133,7 @@ static inline int inet_sk_bound_l3mdev(const struct sock *sk)
#ifdef CONFIG_NET_L3_MASTER_DEV
struct net *net = sock_net(sk);
- if (!net->ipv4.sysctl_tcp_l3mdev_accept)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept))
return l3mdev_master_ifindex_by_index(net,
sk->sk_bound_dev_if);
#endif
@@ -374,7 +375,7 @@ static inline bool inet_get_convert_csum(struct sock *sk)
static inline bool inet_can_nonlocal_bind(struct net *net,
struct inet_sock *inet)
{
- return net->ipv4.sysctl_ip_nonlocal_bind ||
+ return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) ||
inet->freebind || inet->transparent;
}
diff --git a/include/net/ip.h b/include/net/ip.h
index 26fffda78cca4..1c979fd1904ce 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -357,7 +357,7 @@ static inline bool sysctl_dev_name_is_allowed(const char *name)
static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port)
{
- return port < net->ipv4.sysctl_ip_prot_sock;
+ return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
}
#else
@@ -384,7 +384,7 @@ void ipfrag_init(void);
void ip_static_sysctl_init(void);
#define IP4_REPLY_MARK(net, mark) \
- ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)
+ (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0)
static inline bool ip_is_fragment(const struct iphdr *iph)
{
@@ -446,7 +446,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
struct net *net = dev_net(dst->dev);
unsigned int mtu;
- if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
+ if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) ||
ip_mtu_locked(dst) ||
!forwarding) {
mtu = rt->rt_pmtu;
diff --git a/include/net/protocol.h b/include/net/protocol.h
index f51c06ae365f5..6aef8cb11cc8c 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -35,8 +35,6 @@
/* This is used to register protocols. */
struct net_protocol {
- int (*early_demux)(struct sk_buff *skb);
- int (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb);
/* This returns an error if we weren't able to handle the error. */
@@ -52,8 +50,6 @@ struct net_protocol {
#if IS_ENABLED(CONFIG_IPV6)
struct inet6_protocol {
- void (*early_demux)(struct sk_buff *skb);
- void (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb);
/* This returns an error if we weren't able to handle the error. */
diff --git a/include/net/route.h b/include/net/route.h
index 991a3985712dc..bbcf2aba149f9 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -373,7 +373,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
struct net *net = dev_net(dst->dev);
if (hoplimit == 0)
- hoplimit = net->ipv4.sysctl_ip_default_ttl;
+ hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
return hoplimit;
}
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1e99f5c61f849..071735e10872c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -932,7 +932,7 @@ extern const struct inet_connection_sock_af_ops ipv6_specific;
INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb));
INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb));
-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb));
+void tcp_v6_early_demux(struct sk_buff *skb);
#endif
@@ -1403,8 +1403,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
s32 delta;
- if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out ||
- ca_ops->cong_control)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) ||
+ tp->packets_out || ca_ops->cong_control)
return;
delta = tcp_jiffies32 - tp->lsndtime;
if (delta > inet_csk(sk)->icsk_rto)
@@ -1493,21 +1493,24 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl;
+ return tp->keepalive_intvl ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
}
static inline int keepalive_time_when(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time;
+ return tp->keepalive_time ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
}
static inline int keepalive_probes(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes;
+ return tp->keepalive_probes ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
}
static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
@@ -1520,7 +1523,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
static inline int tcp_fin_time(const struct sock *sk)
{
- int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout;
+ int fin_timeout = tcp_sk(sk)->linger2 ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout);
const int rto = inet_csk(sk)->icsk_rto;
if (fin_timeout < (rto << 2) - (rto >> 1))
@@ -2023,7 +2027,7 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat;
+ return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
}
bool tcp_stream_memory_free(const struct sock *sk, int wake);
diff --git a/include/net/udp.h b/include/net/udp.h
index b83a003305667..8dd4aa1485a69 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -167,7 +167,7 @@ static inline void udp_csum_pull_header(struct sk_buff *skb)
typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport,
__be16 dport);
-INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *));
+void udp_v6_early_demux(struct sk_buff *skb);
INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
@@ -238,7 +238,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if,
int dif, int sdif)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept,
+ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept),
bound_dev_if, dif, sdif);
#else
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
diff --git a/net/core/filter.c b/net/core/filter.c
index 2a6a0b0ce43e4..7950f75207658 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7041,7 +7041,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
return -EINVAL;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
return -EINVAL;
if (!th->ack || th->rst || th->syn)
@@ -7116,7 +7116,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
return -EINVAL;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
return -ENOENT;
if (!th->syn || th->ack || th->fin || th->rst)
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 5f85e01d4093b..b0ff6153be623 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -64,7 +64,7 @@ u32 secure_tcpv6_ts_off(const struct net *net,
.daddr = *(struct in6_addr *)daddr,
};
- if (net->ipv4.sysctl_tcp_timestamps != 1)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
return 0;
ts_secret_init();
@@ -120,7 +120,7 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#ifdef CONFIG_INET
u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr)
{
- if (net->ipv4.sysctl_tcp_timestamps != 1)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
return 0;
ts_secret_init();
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 3f00a28fe762a..5daa1fa542490 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -387,7 +387,7 @@ void reuseport_stop_listen_sock(struct sock *sk)
prog = rcu_dereference_protected(reuse->prog,
lockdep_is_held(&reuseport_lock));
- if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req ||
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req) ||
(prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) {
/* Migration capable, move sk from the listening section
* to the closed section.
@@ -545,7 +545,7 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
hash = migrating_sk->sk_hash;
prog = rcu_dereference(reuse->prog);
if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) {
- if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req))
goto select_by_hash;
goto failure;
}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 3738f2d40a0ba..2dd76eb1621c7 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -248,6 +248,7 @@ static void dsa_port_reset_vlan_filtering(struct dsa_port *dp,
struct netlink_ext_ack extack = {0};
bool change_vlan_filtering = false;
struct dsa_switch *ds = dp->ds;
+ struct dsa_port *other_dp;
bool vlan_filtering;
int err;
@@ -270,8 +271,8 @@ static void dsa_port_reset_vlan_filtering(struct dsa_port *dp,
* VLAN-aware bridge.
*/
if (change_vlan_filtering && ds->vlan_filtering_is_global) {
- dsa_switch_for_each_port(dp, ds) {
- struct net_device *br = dsa_port_bridge_dev_get(dp);
+ dsa_switch_for_each_port(other_dp, ds) {
+ struct net_device *br = dsa_port_bridge_dev_get(other_dp);
if (br && br_vlan_enabled(br)) {
change_vlan_filtering = false;
@@ -799,7 +800,7 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
ds->vlan_filtering = vlan_filtering;
dsa_switch_for_each_user_port(other_dp, ds) {
- struct net_device *slave = dp->slave;
+ struct net_device *slave = other_dp->slave;
/* We might be called in the unbind path, so not
* all slave devices might still be registered.
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ac67f6b4ec700..252c8bceaba42 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -217,7 +217,7 @@ int inet_listen(struct socket *sock, int backlog)
* because the socket was in TCP_LISTEN state previously but
* was shutdown() rather than close().
*/
- tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
(tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
@@ -335,7 +335,7 @@ lookup_protocol:
inet->hdrincl = 1;
}
- if (net->ipv4.sysctl_ip_no_pmtu_disc)
+ if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
@@ -1710,24 +1710,14 @@ static const struct net_protocol igmp_protocol = {
};
#endif
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol tcp_protocol = {
- .early_demux = tcp_v4_early_demux,
- .early_demux_handler = tcp_v4_early_demux,
+static const struct net_protocol tcp_protocol = {
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
.no_policy = 1,
.icmp_strict_tag_validation = 1,
};
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol udp_protocol = {
- .early_demux = udp_v4_early_demux,
- .early_demux_handler = udp_v4_early_demux,
+static const struct net_protocol udp_protocol = {
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 6eea1e9e998d5..f8ad04470d3ac 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -507,7 +507,7 @@ static int ah_init_state(struct xfrm_state *x)
if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
crypto_ahash_digestsize(ahash)) {
- pr_info("%s: %s digestsize %u != %hu\n",
+ pr_info("%s: %s digestsize %u != %u\n",
__func__, x->aalg->alg_name,
crypto_ahash_digestsize(ahash),
aalg_desc->uinfo.auth.icv_fullbits / 8);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b21238df33014..b694f352ce7a8 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1108,7 +1108,7 @@ static int esp_init_authenc(struct xfrm_state *x)
err = -EINVAL;
if (aalg_desc->uinfo.auth.icv_fullbits / 8 !=
crypto_aead_authsize(aead)) {
- pr_info("ESP: %s digestsize %u != %hu\n",
+ pr_info("ESP: %s digestsize %u != %u\n",
x->aalg->alg_name,
crypto_aead_authsize(aead),
aalg_desc->uinfo.auth.icv_fullbits / 8);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d9fdcbae16ee3..db7b2503f0682 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -2216,7 +2216,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
}
change_nexthops(fi) {
- if (net->ipv4.sysctl_fib_multipath_use_neigh) {
+ if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) {
if (!fib_good_nh(nexthop_nh))
continue;
if (!first) {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 57c4f0d87a7a2..d5d745c3e345b 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -881,7 +881,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)
* values please see
* Documentation/networking/ip-sysctl.rst
*/
- switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
+ switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) {
default:
net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n",
&iph->daddr);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index b65d074d9620a..e3ab0cb616246 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -467,7 +467,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
if (pmc->multiaddr == IGMP_ALL_HOSTS)
return skb;
- if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(pmc->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return skb;
mtu = READ_ONCE(dev->mtu);
@@ -593,7 +594,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
if (pmc->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(pmc->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
spin_lock_bh(&pmc->lock);
if (pmc->sfcount[MCAST_EXCLUDE])
@@ -736,7 +737,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
return igmpv3_send_report(in_dev, pmc);
- if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(group) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return 0;
if (type == IGMP_HOST_LEAVE_MESSAGE)
@@ -825,7 +827,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
struct net *net = dev_net(in_dev->dev);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
return;
- WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv);
+ WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv));
igmp_ifc_start_timer(in_dev, 1);
}
@@ -920,7 +922,8 @@ static bool igmp_heard_report(struct in_device *in_dev, __be32 group)
if (group == IGMP_ALL_HOSTS)
return false;
- if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(group) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return false;
rcu_read_lock();
@@ -1006,7 +1009,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
* received value was zero, use the default or statically
* configured value.
*/
- in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
+ in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
/* RFC3376, 8.3. Query Response Interval:
@@ -1045,7 +1048,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(im->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
spin_lock_bh(&im->lock);
if (im->tm_running)
@@ -1186,7 +1189,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im,
pmc->interface = im->interface;
in_dev_hold(in_dev);
pmc->multiaddr = im->multiaddr;
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
pmc->sfmode = im->sfmode;
if (pmc->sfmode == MCAST_INCLUDE) {
struct ip_sf_list *psf;
@@ -1237,9 +1240,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
swap(im->tomb, pmc->tomb);
swap(im->sources, pmc->sources);
for (psf = im->sources; psf; psf = psf->sf_next)
- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ psf->sf_crcount = in_dev->mr_qrv ?:
+ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
} else {
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ im->crcount = in_dev->mr_qrv ?:
+ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
}
in_dev_put(pmc->interface);
kfree_pmc(pmc);
@@ -1296,7 +1301,8 @@ static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp)
#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
- if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(im->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return;
reporter = im->reporter;
@@ -1338,13 +1344,14 @@ static void igmp_group_added(struct ip_mc_list *im)
#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
- if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(im->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return;
if (in_dev->dead)
return;
- im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
+ im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
spin_lock_bh(&im->lock);
igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
@@ -1358,7 +1365,7 @@ static void igmp_group_added(struct ip_mc_list *im)
* IN() to IN(A).
*/
if (im->sfmode == MCAST_EXCLUDE)
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
igmp_ifc_event(in_dev);
#endif
@@ -1642,7 +1649,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev)
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(im->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
/* a failover is happening and switches
@@ -1749,7 +1756,7 @@ static void ip_mc_reset(struct in_device *in_dev)
in_dev->mr_qi = IGMP_QUERY_INTERVAL;
in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
+ in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
}
#else
static void ip_mc_reset(struct in_device *in_dev)
@@ -1883,7 +1890,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
#ifdef CONFIG_IP_MULTICAST
if (psf->sf_oldin &&
!IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
psf->sf_next = pmc->tomb;
pmc->tomb = psf;
rv = 1;
@@ -1947,7 +1954,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
/* filter mode change */
pmc->sfmode = MCAST_INCLUDE;
#ifdef CONFIG_IP_MULTICAST
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
@@ -2126,7 +2133,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
#ifdef CONFIG_IP_MULTICAST
/* else no filters; keep old mode for reports */
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
@@ -2192,7 +2199,7 @@ static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr,
count++;
}
err = -ENOBUFS;
- if (count >= net->ipv4.sysctl_igmp_max_memberships)
+ if (count >= READ_ONCE(net->ipv4.sysctl_igmp_max_memberships))
goto done;
iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
if (!iml)
@@ -2379,7 +2386,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
}
/* else, add a new source to the filter */
- if (psl && psl->sl_count >= net->ipv4.sysctl_igmp_max_msf) {
+ if (psl && psl->sl_count >= READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) {
err = -ENOBUFS;
goto done;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 53f5f956d9485..eb31c7158b39c 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -263,7 +263,7 @@ next_port:
goto other_half_scan;
}
- if (net->ipv4.sysctl_ip_autobind_reuse && !relax) {
+ if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) {
/* We still have a chance to connect to different destinations */
relax = true;
goto ports_exhausted;
@@ -833,7 +833,8 @@ static void reqsk_timer_handler(struct timer_list *t)
icsk = inet_csk(sk_listener);
net = sock_net(sk_listener);
- max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
+ max_syn_ack_retries = icsk->icsk_syn_retries ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_synack_retries);
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
* If synack was not acknowledged for 1 second, it means
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index e3aa436a1bdf8..e18931a6d1534 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -157,7 +157,7 @@ int ip_forward(struct sk_buff *skb)
!skb_sec_path(skb))
ip_rt_send_redirect(skb);
- if (net->ipv4.sysctl_ip_fwd_update_priority)
+ if (READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority))
skb->priority = rt_tos2priority(iph->tos);
return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index b1165f717cd1f..1b512390b3cf3 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -312,14 +312,13 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph,
ip_hdr(hint)->tos == iph->tos;
}
-INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
+int tcp_v4_early_demux(struct sk_buff *skb);
+int udp_v4_early_demux(struct sk_buff *skb);
static int ip_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb, struct net_device *dev,
const struct sk_buff *hint)
{
const struct iphdr *iph = ip_hdr(skb);
- int (*edemux)(struct sk_buff *skb);
int err, drop_reason;
struct rtable *rt;
@@ -332,21 +331,29 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
goto drop_error;
}
- if (net->ipv4.sysctl_ip_early_demux &&
+ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
!skb_dst(skb) &&
!skb->sk &&
!ip_is_fragment(iph)) {
- const struct net_protocol *ipprot;
- int protocol = iph->protocol;
-
- ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
- err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
- udp_v4_early_demux, skb);
- if (unlikely(err))
- goto drop_error;
- /* must reload iph, skb->head might have changed */
- iph = ip_hdr(skb);
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) {
+ tcp_v4_early_demux(skb);
+
+ /* must reload iph, skb->head might have changed */
+ iph = ip_hdr(skb);
+ }
+ break;
+ case IPPROTO_UDP:
+ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
+ err = udp_v4_early_demux(skb);
+ if (unlikely(err))
+ goto drop_error;
+
+ /* must reload iph, skb->head might have changed */
+ iph = ip_hdr(skb);
+ }
+ break;
}
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 445a9ecaefa19..a8a323ecbb54b 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -782,7 +782,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
/* numsrc >= (4G-140)/128 overflow in 32 bits */
err = -ENOBUFS;
if (gsf->gf_numsrc >= 0x1ffffff ||
- gsf->gf_numsrc > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+ gsf->gf_numsrc > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
goto out_free_gsf;
err = -EINVAL;
@@ -832,7 +832,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
/* numsrc >= (4G-140)/128 overflow in 32 bits */
err = -ENOBUFS;
- if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+ if (n > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
goto out_free_gsf;
err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
&gf32->gf_group, gf32->gf_slist_flex);
@@ -1244,7 +1244,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
}
/* numsrc >= (1G-4) overflow in 32 bits */
if (msf->imsf_numsrc >= 0x3ffffffcU ||
- msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
+ msf->imsf_numsrc > READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) {
kfree(msf);
err = -ENOBUFS;
break;
@@ -1606,7 +1606,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
{
struct net *net = sock_net(sk);
val = (inet->uc_ttl == -1 ?
- net->ipv4.sysctl_ip_default_ttl :
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl) :
inet->uc_ttl);
break;
}
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 918c61fda0f30..d640adcaf1b12 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -62,7 +62,7 @@ struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net,
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
- net->ipv4.sysctl_ip_default_ttl);
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
@@ -117,7 +117,7 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP,
- net->ipv4.sysctl_ip_default_ttl);
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
skb_reset_transport_header(nskb);
icmph = skb_put_zero(nskb, sizeof(struct icmphdr));
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 28836071f0a69..0088a4c64d77e 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -387,7 +387,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
seq_printf(seq, "\nIp: %d %d",
IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
- net->ipv4.sysctl_ip_default_ttl);
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 356f535f3443b..4702c61207a87 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1398,7 +1398,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
struct fib_info *fi = res->fi;
u32 mtu = 0;
- if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
+ if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) ||
fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
mtu = fi->fib_mtu;
@@ -1929,7 +1929,7 @@ static u32 fib_multipath_custom_hash_outer(const struct net *net,
const struct sk_buff *skb,
bool *p_has_inner)
{
- u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
struct flow_keys keys, hash_keys;
if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
@@ -1958,7 +1958,7 @@ static u32 fib_multipath_custom_hash_inner(const struct net *net,
const struct sk_buff *skb,
bool has_inner)
{
- u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
struct flow_keys keys, hash_keys;
/* We assume the packet carries an encapsulation, but if none was
@@ -2018,7 +2018,7 @@ static u32 fib_multipath_custom_hash_skb(const struct net *net,
static u32 fib_multipath_custom_hash_fl4(const struct net *net,
const struct flowi4 *fl4)
{
- u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
struct flow_keys hash_keys;
if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
@@ -2048,7 +2048,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
struct flow_keys hash_keys;
u32 mhash = 0;
- switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
+ switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
case 0:
memset(&hash_keys, 0, sizeof(hash_keys));
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b387c48351559..942d2dfa11151 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -247,12 +247,12 @@ bool cookie_timestamp_decode(const struct net *net,
return true;
}
- if (!net->ipv4.sysctl_tcp_timestamps)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps))
return false;
tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0;
- if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack)
+ if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack))
return false;
if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK)
@@ -261,7 +261,7 @@ bool cookie_timestamp_decode(const struct net *net,
tcp_opt->wscale_ok = 1;
tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK;
- return net->ipv4.sysctl_tcp_window_scaling != 0;
+ return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0;
}
EXPORT_SYMBOL(cookie_timestamp_decode);
@@ -340,7 +340,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
struct flowi4 fl4;
u32 tsoff = 0;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
goto out;
if (tcp_synq_no_recent_overflow(sk))
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 108fd86f2718d..5490c285668b9 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -84,7 +84,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
* port limit.
*/
if ((range[1] < range[0]) ||
- (range[0] < net->ipv4.sysctl_ip_prot_sock))
+ (range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock)))
ret = -EINVAL;
else
set_local_port_range(net, range);
@@ -110,7 +110,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write,
.extra2 = &ip_privileged_port_max,
};
- pports = net->ipv4.sysctl_ip_prot_sock;
+ pports = READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
@@ -122,7 +122,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write,
if (range[0] < pports)
ret = -EINVAL;
else
- net->ipv4.sysctl_ip_prot_sock = pports;
+ WRITE_ONCE(net->ipv4.sysctl_ip_prot_sock, pports);
}
return ret;
@@ -350,61 +350,6 @@ bad_key:
return ret;
}
-static void proc_configure_early_demux(int enabled, int protocol)
-{
- struct net_protocol *ipprot;
-#if IS_ENABLED(CONFIG_IPV6)
- struct inet6_protocol *ip6prot;
-#endif
-
- rcu_read_lock();
-
- ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot)
- ipprot->early_demux = enabled ? ipprot->early_demux_handler :
- NULL;
-
-#if IS_ENABLED(CONFIG_IPV6)
- ip6prot = rcu_dereference(inet6_protos[protocol]);
- if (ip6prot)
- ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
- NULL;
-#endif
- rcu_read_unlock();
-}
-
-static int proc_tcp_early_demux(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- int ret = 0;
-
- ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
-
- if (write && !ret) {
- int enabled = init_net.ipv4.sysctl_tcp_early_demux;
-
- proc_configure_early_demux(enabled, IPPROTO_TCP);
- }
-
- return ret;
-}
-
-static int proc_udp_early_demux(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- int ret = 0;
-
- ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
-
- if (write && !ret) {
- int enabled = init_net.ipv4.sysctl_udp_early_demux;
-
- proc_configure_early_demux(enabled, IPPROTO_UDP);
- }
-
- return ret;
-}
-
static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
int write, void *buffer,
size_t *lenp, loff_t *ppos)
@@ -707,14 +652,14 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_udp_early_demux,
.maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_udp_early_demux
+ .proc_handler = proc_dou8vec_minmax,
},
{
.procname = "tcp_early_demux",
.data = &init_net.ipv4.sysctl_tcp_early_demux,
.maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_tcp_early_demux
+ .proc_handler = proc_dou8vec_minmax,
},
{
.procname = "nexthop_compat_mode",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2222dfdde3168..2faaaaf540ac1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -441,7 +441,7 @@ void tcp_init_sock(struct sock *sk)
tp->snd_cwnd_clamp = ~0;
tp->mss_cache = TCP_MSS_DEFAULT;
- tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
+ tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering);
tcp_assign_congestion_control(sk);
tp->tsoffset = 0;
@@ -1150,7 +1150,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
struct sockaddr *uaddr = msg->msg_name;
int err, flags;
- if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+ if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) &
+ TFO_CLIENT_ENABLE) ||
(uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
uaddr->sa_family == AF_UNSPEC))
return -EOPNOTSUPP;
@@ -3617,7 +3618,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
case TCP_FASTOPEN_CONNECT:
if (val > 1 || val < 0) {
err = -EINVAL;
- } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
+ } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) &
+ TFO_CLIENT_ENABLE) {
if (sk->sk_state == TCP_CLOSE)
tp->fastopen_connect = val;
else
@@ -3967,12 +3969,13 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = keepalive_probes(tp);
break;
case TCP_SYNCNT:
- val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
+ val = icsk->icsk_syn_retries ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
break;
case TCP_LINGER2:
val = tp->linger2;
if (val >= 0)
- val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ;
+ val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
break;
case TCP_DEFER_ACCEPT:
val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index fdbcf2a6d08ef..825b216d11f52 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -332,7 +332,7 @@ static bool tcp_fastopen_no_cookie(const struct sock *sk,
const struct dst_entry *dst,
int flag)
{
- return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) ||
+ return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) ||
tcp_sk(sk)->fastopen_no_cookie ||
(dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE));
}
@@ -347,7 +347,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
const struct dst_entry *dst)
{
bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
- int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
struct tcp_fastopen_cookie valid_foc = { .len = -1 };
struct sock *child;
int ret = 0;
@@ -489,7 +489,7 @@ void tcp_fastopen_active_disable(struct sock *sk)
{
struct net *net = sock_net(sk);
- if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout))
return;
/* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */
@@ -510,7 +510,8 @@ void tcp_fastopen_active_disable(struct sock *sk)
*/
bool tcp_fastopen_active_should_disable(struct sock *sk)
{
- unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
+ unsigned int tfo_bh_timeout =
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout);
unsigned long timeout;
int tfo_da_times;
int multiplier;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3ec4edc37313b..07dbcbae77828 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1051,7 +1051,7 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
tp->undo_marker ? tp->undo_retrans : 0);
#endif
tp->reordering = min_t(u32, (metric + mss - 1) / mss,
- sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
}
/* This exciting event is worth to be remembered. 8) */
@@ -2030,7 +2030,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
return;
tp->reordering = min_t(u32, tp->packets_out + addend,
- sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
tp->reord_seen++;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
}
@@ -2095,7 +2095,8 @@ static inline void tcp_init_undo(struct tcp_sock *tp)
static bool tcp_is_rack(const struct sock *sk)
{
- return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
+ return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_LOSS_DETECTION;
}
/* If we detect SACK reneging, forget all SACK information
@@ -2139,6 +2140,7 @@ void tcp_enter_loss(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
+ u8 reordering;
tcp_timeout_mark_lost(sk);
@@ -2159,10 +2161,12 @@ void tcp_enter_loss(struct sock *sk)
/* Timeout in disordered state after receiving substantial DUPACKs
* suggests that the degree of reordering is over-estimated.
*/
+ reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering);
if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
- tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
+ tp->sacked_out >= reordering)
tp->reordering = min_t(unsigned int, tp->reordering,
- net->ipv4.sysctl_tcp_reordering);
+ reordering);
+
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
tcp_ecn_queue_cwr(tp);
@@ -3464,7 +3468,8 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
* new SACK or ECE mark may first advance cwnd here and later reduce
* cwnd in tcp_fastretrans_alert() based on more states.
*/
- if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
+ if (tcp_sk(sk)->reordering >
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering))
return flag & FLAG_FORWARD_PROGRESS;
return flag & FLAG_DATA_ACKED;
@@ -4056,7 +4061,7 @@ void tcp_parse_options(const struct net *net,
break;
case TCPOPT_WINDOW:
if (opsize == TCPOLEN_WINDOW && th->syn &&
- !estab && net->ipv4.sysctl_tcp_window_scaling) {
+ !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) {
__u8 snd_wscale = *(__u8 *)ptr;
opt_rx->wscale_ok = 1;
if (snd_wscale > TCP_MAX_WSCALE) {
@@ -4072,7 +4077,7 @@ void tcp_parse_options(const struct net *net,
case TCPOPT_TIMESTAMP:
if ((opsize == TCPOLEN_TIMESTAMP) &&
((estab && opt_rx->tstamp_ok) ||
- (!estab && net->ipv4.sysctl_tcp_timestamps))) {
+ (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) {
opt_rx->saw_tstamp = 1;
opt_rx->rcv_tsval = get_unaligned_be32(ptr);
opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
@@ -4080,7 +4085,7 @@ void tcp_parse_options(const struct net *net,
break;
case TCPOPT_SACK_PERM:
if (opsize == TCPOLEN_SACK_PERM && th->syn &&
- !estab && net->ipv4.sysctl_tcp_sack) {
+ !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) {
opt_rx->sack_ok = TCP_SACK_SEEN;
tcp_sack_reset(opt_rx);
}
@@ -5567,7 +5572,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
struct tcp_sock *tp = tcp_sk(sk);
u32 ptr = ntohs(th->urg_ptr);
- if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
+ if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg))
ptr--;
ptr += ntohl(th->seq);
@@ -6797,11 +6802,14 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
{
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
const char *msg = "Dropping request";
- bool want_cookie = false;
struct net *net = sock_net(sk);
+ bool want_cookie = false;
+ u8 syncookies;
+
+ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
#ifdef CONFIG_SYN_COOKIES
- if (net->ipv4.sysctl_tcp_syncookies) {
+ if (syncookies) {
msg = "Sending cookies";
want_cookie = true;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
@@ -6809,8 +6817,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
#endif
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
- if (!queue->synflood_warned &&
- net->ipv4.sysctl_tcp_syncookies != 2 &&
+ if (!queue->synflood_warned && syncookies != 2 &&
xchg(&queue->synflood_warned, 1) == 0)
net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
proto, sk->sk_num, msg);
@@ -6859,7 +6866,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
struct tcp_sock *tp = tcp_sk(sk);
u16 mss;
- if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 &&
!inet_csk_reqsk_queue_is_full(sk))
return 0;
@@ -6893,13 +6900,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
bool want_cookie = false;
struct dst_entry *dst;
struct flowi fl;
+ u8 syncookies;
+
+ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
/* TW buckets are converted to open requests without
* limitations, they conserve resources and peer is
* evidently real one.
*/
- if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
- inet_csk_reqsk_queue_is_full(sk)) && !isn) {
+ if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
if (!want_cookie)
goto drop;
@@ -6948,10 +6957,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
if (!want_cookie && !isn) {
+ int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
+
/* Kill the following clause, if you dislike this way. */
- if (!net->ipv4.sysctl_tcp_syncookies &&
- (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
- (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
+ if (!syncookies &&
+ (max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+ (max_syn_backlog >> 2)) &&
!tcp_peer_is_proven(req, dst)) {
/* Without syncookies last quarter of
* backlog is filled with destinations,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index da5a3c44c4fb7..d16e6e40f47ba 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -108,10 +108,10 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
{
+ int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse);
const struct inet_timewait_sock *tw = inet_twsk(sktw);
const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
struct tcp_sock *tp = tcp_sk(sk);
- int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
if (reuse == 2) {
/* Still does not detect *everything* that goes through
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 7029b0e98edb2..a501150deaa3b 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -428,7 +428,8 @@ void tcp_update_metrics(struct sock *sk)
if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) {
val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
if (val < tp->reordering &&
- tp->reordering != net->ipv4.sysctl_tcp_reordering)
+ tp->reordering !=
+ READ_ONCE(net->ipv4.sysctl_tcp_reordering))
tcp_metric_set(tm, TCP_METRIC_REORDERING,
tp->reordering);
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6854bb1fb32b2..cb95d88497aea 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -173,7 +173,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
* Oh well... nobody has a sufficient solution to this
* protocol bug yet.
*/
- if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
+ if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) {
kill:
inet_twsk_deschedule_put(tw);
return TCP_TW_SUCCESS;
@@ -781,7 +781,7 @@ listen_overflow:
if (sk != req->rsk_listener)
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
- if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow)) {
inet_rsk(req)->acked = 1;
return NULL;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 11aa0ab10bbad..c38e07b50639c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -791,18 +791,18 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
opts->mss = tcp_advertise_mss(sk);
remaining -= TCPOLEN_MSS_ALIGNED;
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) {
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) {
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
opts->tsecr = tp->rx_opt.ts_recent;
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling))) {
opts->ws = tp->rx_opt.rcv_wscale;
opts->options |= OPTION_WSCALE;
remaining -= TCPOLEN_WSCALE_ALIGNED;
}
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) {
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_sack))) {
opts->options |= OPTION_SACK_ADVERTISE;
if (unlikely(!(OPTION_TS & opts->options)))
remaining -= TCPOLEN_SACKPERM_ALIGNED;
@@ -1719,7 +1719,8 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
mss_now -= icsk->icsk_ext_hdr_len;
/* Then reserve room for full set of TCP options and 8 bytes of data */
- mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
+ mss_now = max(mss_now,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss));
return mss_now;
}
@@ -1762,10 +1763,10 @@ void tcp_mtup_init(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct net *net = sock_net(sk);
- icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
+ icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1;
icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
icsk->icsk_af_ops->net_header_len;
- icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
+ icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, READ_ONCE(net->ipv4.sysctl_tcp_base_mss));
icsk->icsk_mtup.probe_size = 0;
if (icsk->icsk_mtup.enabled)
icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
@@ -1897,7 +1898,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
if (tp->packets_out > tp->snd_cwnd_used)
tp->snd_cwnd_used = tp->packets_out;
- if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) &&
(s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
!ca_ops->cong_control)
tcp_cwnd_application_limited(sk);
@@ -2282,7 +2283,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
u32 interval;
s32 delta;
- interval = net->ipv4.sysctl_tcp_probe_interval;
+ interval = READ_ONCE(net->ipv4.sysctl_tcp_probe_interval);
delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp;
if (unlikely(delta >= interval * HZ)) {
int mss = tcp_current_mss(sk);
@@ -2366,7 +2367,7 @@ static int tcp_mtu_probe(struct sock *sk)
* probing process by not resetting search range to its orignal.
*/
if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
- interval < net->ipv4.sysctl_tcp_probe_threshold) {
+ interval < READ_ONCE(net->ipv4.sysctl_tcp_probe_threshold)) {
/* Check whether enough time has elaplased for
* another round of probing.
*/
@@ -2740,7 +2741,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
if (rcu_access_pointer(tp->fastopen_rsk))
return false;
- early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
+ early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans);
/* Schedule a loss probe in 2*RTT for SACK capable connections
* not in loss recovery, that are either limited by cwnd or application.
*/
@@ -3104,7 +3105,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
struct sk_buff *skb = to, *tmp;
bool first = true;
- if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse))
return;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
return;
@@ -3646,7 +3647,7 @@ static void tcp_connect_init(struct sock *sk)
* See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
*/
tp->tcp_header_len = sizeof(struct tcphdr);
- if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps))
tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
#ifdef CONFIG_TCP_MD5SIG
@@ -3682,7 +3683,7 @@ static void tcp_connect_init(struct sock *sk)
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
&tp->rcv_wnd,
&tp->window_clamp,
- sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling),
&rcv_wscale,
rcv_wnd);
@@ -4089,7 +4090,7 @@ void tcp_send_probe0(struct sock *sk)
icsk->icsk_probes_out++;
if (err <= 0) {
- if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
+ if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2))
icsk->icsk_backoff++;
timeout = tcp_probe0_when(sk, TCP_RTO_MAX);
} else {
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 48f30e7209f29..50abaa941387d 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -14,7 +14,8 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk)
return 0;
if (tp->sacked_out >= tp->reordering &&
- !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH))
+ !(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_NO_DUPTHRESH))
return 0;
}
@@ -187,7 +188,8 @@ void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
+ if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_STATIC_REO_WND) ||
!rs->prior_delivered)
return;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 20cf4a98c69d8..50bba370486e8 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -143,7 +143,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
*/
static int tcp_orphan_retries(struct sock *sk, bool alive)
{
- int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
+ int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */
/* We know from an ICMP that something is wrong. */
if (sk->sk_err_soft && !alive)
@@ -163,7 +163,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
int mss;
/* Black hole detection */
- if (!net->ipv4.sysctl_tcp_mtu_probing)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing))
return;
if (!icsk->icsk_mtup.enabled) {
@@ -171,9 +171,9 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
} else {
mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
- mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
- mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor);
- mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
+ mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss);
+ mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor));
+ mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss));
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
}
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
@@ -239,17 +239,18 @@ static int tcp_write_timeout(struct sock *sk)
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits)
__dst_negative_advice(sk);
- retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
+ retry_until = icsk->icsk_syn_retries ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
expired = icsk->icsk_retransmits >= retry_until;
} else {
- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) {
+ if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) {
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
__dst_negative_advice(sk);
}
- retry_until = net->ipv4.sysctl_tcp_retries2;
+ retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
@@ -380,7 +381,7 @@ static void tcp_probe_timer(struct sock *sk)
msecs_to_jiffies(icsk->icsk_user_timeout))
goto abort;
- max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
+ max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
@@ -406,12 +407,15 @@ abort: tcp_write_err(sk);
static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- int max_retries = icsk->icsk_syn_retries ? :
- sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
struct tcp_sock *tp = tcp_sk(sk);
+ int max_retries;
req->rsk_ops->syn_ack_timeout(req);
+ /* add one more retry for fastopen */
+ max_retries = icsk->icsk_syn_retries ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1;
+
if (req->num_timeout >= max_retries) {
tcp_write_err(sk);
return;
@@ -574,7 +578,7 @@ out_reset_timer:
* linear-timeout retransmissions into a black hole
*/
if (sk->sk_state == TCP_ESTABLISHED &&
- (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
+ (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) &&
tcp_stream_is_thin(tp) &&
icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
icsk->icsk_backoff = 0;
@@ -585,7 +589,7 @@ out_reset_timer:
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX);
- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0))
+ if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0))
__sk_dst_reset(sk);
out:;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 70564ddccc467..6f354f8be2c57 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -226,7 +226,7 @@ lookup_protocol:
RCU_INIT_POINTER(inet->mc_list, NULL);
inet->rcv_tos = 0;
- if (net->ipv4.sysctl_ip_no_pmtu_disc)
+ if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 0322cc86b84ea..e1ebf5e42ebe9 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -45,20 +45,23 @@
#include <net/inet_ecn.h>
#include <net/dst_metadata.h>
-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *));
static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
- void (*edemux)(struct sk_buff *skb);
-
- if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
- const struct inet6_protocol *ipprot;
-
- ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
- INDIRECT_CALL_2(edemux, tcp_v6_early_demux,
- udp_v6_early_demux, skb);
+ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
+ !skb_dst(skb) && !skb->sk) {
+ switch (ipv6_hdr(skb)->nexthdr) {
+ case IPPROTO_TCP:
+ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux))
+ tcp_v6_early_demux(skb);
+ break;
+ case IPPROTO_UDP:
+ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux))
+ udp_v6_early_demux(skb);
+ break;
+ }
}
+
if (!skb_valid_dst(skb))
ip6_route_input(skb);
}
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 9cc123f000fbc..5014aa6634527 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -141,7 +141,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
__u8 rcv_wscale;
u32 tsoff = 0;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
goto out;
if (tcp_synq_no_recent_overflow(sk))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f37dd4aa91c6b..9d3ede2932582 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1822,7 +1822,7 @@ do_time_wait:
goto discard_it;
}
-INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
+void tcp_v6_early_demux(struct sk_buff *skb)
{
const struct ipv6hdr *hdr;
const struct tcphdr *th;
@@ -2176,12 +2176,7 @@ struct proto tcpv6_prot = {
};
EXPORT_SYMBOL_GPL(tcpv6_prot);
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol tcpv6_protocol = {
- .early_demux = tcp_v6_early_demux,
- .early_demux_handler = tcp_v6_early_demux,
+static const struct inet6_protocol tcpv6_protocol = {
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 55afd7f39c045..e2f2e087a7531 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1052,7 +1052,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
return NULL;
}
-INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
+void udp_v6_early_demux(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
const struct udphdr *uh;
@@ -1660,12 +1660,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname,
return ipv6_getsockopt(sk, level, optname, optval, optlen);
}
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol udpv6_protocol = {
- .early_demux = udp_v6_early_demux,
- .early_demux_handler = udp_v6_early_demux,
+static const struct inet6_protocol udpv6_protocol = {
.handler = udpv6_rcv,
.err_handler = udpv6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index e479dd0561c54..16915f8eef2b1 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -405,7 +405,7 @@ synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr,
iph->tos = 0;
iph->id = 0;
iph->frag_off = htons(IP_DF);
- iph->ttl = net->ipv4.sysctl_ip_default_ttl;
+ iph->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
iph->protocol = IPPROTO_TCP;
iph->check = 0;
iph->saddr = saddr;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 9bb4d3dcc994f..ac366c99086fd 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3533,7 +3533,7 @@ int tc_setup_action(struct flow_action *flow_action,
struct tc_action *actions[],
struct netlink_ext_ack *extack)
{
- int i, j, index, err = 0;
+ int i, j, k, index, err = 0;
struct tc_action *act;
BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY);
@@ -3553,14 +3553,18 @@ int tc_setup_action(struct flow_action *flow_action,
if (err)
goto err_out_locked;
- entry->hw_stats = tc_act_hw_stats(act->hw_stats);
- entry->hw_index = act->tcfa_index;
index = 0;
err = tc_setup_offload_act(act, entry, &index, extack);
- if (!err)
- j += index;
- else
+ if (err)
goto err_out_locked;
+
+ for (k = 0; k < index ; k++) {
+ entry[k].hw_stats = tc_act_hw_stats(act->hw_stats);
+ entry[k].hw_index = act->tcfa_index;
+ }
+
+ j += index;
+
spin_unlock_bh(&act->tcfa_lock);
}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 35928fefae332..1a094b087d88b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -358,7 +358,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) &&
ret != RTN_LOCAL &&
!sp->inet.freebind &&
- !net->ipv4.sysctl_ip_nonlocal_bind)
+ !READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind))
return 0;
if (ipv6_only_sock(sctp_opt2sk(sp)))
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index c4d057b2941d5..0bde36b564727 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -2122,7 +2122,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
init_waitqueue_head(&lgr->llc_flow_waiter);
init_waitqueue_head(&lgr->llc_msg_waiter);
mutex_init(&lgr->llc_conf_mutex);
- lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
+ lgr->llc_testlink_time = READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
}
/* called after lgr was removed from lgr_list */
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index ce827e79c66a4..879b9024678ed 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -97,13 +97,16 @@ static void tls_device_queue_ctx_destruction(struct tls_context *ctx)
unsigned long flags;
spin_lock_irqsave(&tls_device_lock, flags);
+ if (unlikely(!refcount_dec_and_test(&ctx->refcount)))
+ goto unlock;
+
list_move_tail(&ctx->list, &tls_device_gc_list);
/* schedule_work inside the spinlock
* to make sure tls_device_down waits for that work.
*/
schedule_work(&tls_device_gc_work);
-
+unlock:
spin_unlock_irqrestore(&tls_device_lock, flags);
}
@@ -194,8 +197,7 @@ void tls_device_sk_destruct(struct sock *sk)
clean_acked_data_disable(inet_csk(sk));
}
- if (refcount_dec_and_test(&tls_ctx->refcount))
- tls_device_queue_ctx_destruction(tls_ctx);
+ tls_device_queue_ctx_destruction(tls_ctx);
}
EXPORT_SYMBOL_GPL(tls_device_sk_destruct);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f1876ea61fdce..f1a0bab920a55 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2678,8 +2678,10 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
*num_xfrms = 0;
return 0;
}
- if (IS_ERR(pols[0]))
+ if (IS_ERR(pols[0])) {
+ *num_pols = 0;
return PTR_ERR(pols[0]);
+ }
*num_xfrms = pols[0]->xfrm_nr;
@@ -2694,6 +2696,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
if (pols[1]) {
if (IS_ERR(pols[1])) {
xfrm_pols_put(pols, *num_pols);
+ *num_pols = 0;
return PTR_ERR(pols[1]);
}
(*num_pols)++;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 08564e0eef203..ccfb172eb5b8d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2620,7 +2620,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
int err;
if (family == AF_INET &&
- xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)
+ READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc))
x->props.flags |= XFRM_STATE_NOPMTUDISC;
err = -EPROTONOSUPPORT;