diff options
author | John W. Linville <linville@tuxdriver.com> | 2015-12-10 19:58:53 -0500 |
---|---|---|
committer | John W. Linville <linville@tuxdriver.com> | 2015-12-10 19:58:53 -0500 |
commit | 661ad895ae4f51bd48f5d966f78ac9a61d977bf1 (patch) | |
tree | d5b61131dc32b41376d713973679556f5c89fb33 | |
parent | 02187c809fefe8692a4c3dab53e0048827067ef5 (diff) | |
parent | 264a4aca531bca789d45b9d68158a423e9d54eaf (diff) | |
download | wireless-testing-661ad895ae4f51bd48f5d966f78ac9a61d977bf1.tar.gz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-nextmaster-2015-12-10
267 files changed, 16510 insertions, 5813 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-net-cdc_ncm b/Documentation/ABI/testing/sysfs-class-net-cdc_ncm index 5cedf72df3584c..f7be0e88b139d0 100644 --- a/Documentation/ABI/testing/sysfs-class-net-cdc_ncm +++ b/Documentation/ABI/testing/sysfs-class-net-cdc_ncm @@ -19,6 +19,25 @@ Description: Set to 0 to pad all frames. Set greater than tx_max to disable all padding. +What: /sys/class/net/<iface>/cdc_ncm/ndp_to_end +Date: Dec 2015 +KernelVersion: 4.5 +Contact: Bjørn Mork <bjorn@mork.no> +Description: + Boolean attribute showing the status of the "NDP to + end" quirk. Defaults to 'N', except for devices + already known to need it enabled. + + The "NDP to end" quirk makes the driver place the NDP + (the packet index table) after the payload. The NCM + specification does not mandate this, but some devices + are known to be more restrictive. Write 'Y' to this + attribute for temporary testing of a suspect device + failing to work with the default driver settings. + + A device entry should be added to the driver if this + quirk is found to be required. + What: /sys/class/net/<iface>/cdc_ncm/rx_max Date: May 2014 KernelVersion: 3.16 diff --git a/Documentation/ABI/testing/sysfs-class-net-qmi b/Documentation/ABI/testing/sysfs-class-net-qmi new file mode 100644 index 00000000000000..fa5a00bb114372 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-net-qmi @@ -0,0 +1,23 @@ +What: /sys/class/net/<iface>/qmi/raw_ip +Date: Dec 2015 +KernelVersion: 4.4 +Contact: Bjørn Mork <bjorn@mork.no> +Description: + Boolean. Default: 'N' + + Set this to 'Y' to change the network device link + framing from '802.3' to 'raw-ip'. + + The netdev will change to reflect the link framing + mode. The netdev is an ordinary ethernet device in + '802.3' mode, and the driver expects to exchange + frames with an ethernet header over the USB link. The + netdev is a headerless p-t-p device in 'raw-ip' mode, + and the driver expects to echange IPv4 or IPv6 packets + without any L2 header over the USB link. + + Userspace is in full control of firmware configuration + through the delegation of the QMI protocol. Userspace + is responsible for coordination of driver and firmware + link framing mode, changing this setting to 'Y' if the + firmware is configured for 'raw-ip' mode. diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt index b486f3f5f6a326..81a9f9e6b45ff8 100644 --- a/Documentation/devicetree/bindings/net/renesas,ravb.txt +++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt @@ -5,8 +5,18 @@ interface contains. Required properties: - compatible: "renesas,etheravb-r8a7790" if the device is a part of R8A7790 SoC. + "renesas,etheravb-r8a7791" if the device is a part of R8A7791 SoC. + "renesas,etheravb-r8a7792" if the device is a part of R8A7792 SoC. + "renesas,etheravb-r8a7793" if the device is a part of R8A7793 SoC. "renesas,etheravb-r8a7794" if the device is a part of R8A7794 SoC. "renesas,etheravb-r8a7795" if the device is a part of R8A7795 SoC. + "renesas,etheravb-rcar-gen2" for generic R-Car Gen 2 compatible interface. + "renesas,etheravb-rcar-gen3" for generic R-Car Gen 3 compatible interface. + + When compatible with the generic version, nodes must list the + SoC-specific version corresponding to the platform first + followed by the generic version. + - reg: offset and length of (1) the register block and (2) the stream buffer. - interrupts: A list of interrupt-specifiers, one for each entry in interrupt-names. @@ -37,7 +47,7 @@ Optional properties: Example: ethernet@e6800000 { - compatible = "renesas,etheravb-r8a7795"; + compatible = "renesas,etheravb-r8a7795", "renesas,etheravb-rcar-gen3"; reg = <0 0xe6800000 0 0x800>, <0 0xe6a00000 0 0x10000>; interrupt-parent = <&gic>; interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>, diff --git a/Documentation/fault-injection/notifier-error-inject.txt b/Documentation/fault-injection/notifier-error-inject.txt index 09adabef513ff8..83d3f4e43e9172 100644 --- a/Documentation/fault-injection/notifier-error-inject.txt +++ b/Documentation/fault-injection/notifier-error-inject.txt @@ -10,6 +10,7 @@ modules that can be used to test the following notifiers. * PM notifier * Memory hotplug notifier * powerpc pSeries reconfig notifier + * Netdevice notifier CPU notifier error injection module ----------------------------------- @@ -87,6 +88,30 @@ Possible pSeries reconfig notifier events to be failed are: * PSERIES_DRCONF_MEM_ADD * PSERIES_DRCONF_MEM_REMOVE +Netdevice notifier error injection module +---------------------------------------------- +This feature is controlled through debugfs interface +/sys/kernel/debug/notifier-error-inject/netdev/actions/<notifier event>/error + +Netdevice notifier events which can be failed are: + + * NETDEV_REGISTER + * NETDEV_CHANGEMTU + * NETDEV_CHANGENAME + * NETDEV_PRE_UP + * NETDEV_PRE_TYPE_CHANGE + * NETDEV_POST_INIT + * NETDEV_PRECHANGEMTU + * NETDEV_PRECHANGEUPPER + * NETDEV_CHANGEUPPER + +Example: Inject netdevice mtu change error (-22 == -EINVAL) + + # cd /sys/kernel/debug/notifier-error-inject/netdev + # echo -22 > actions/NETDEV_CHANGEMTU/error + # ip link set eth0 mtu 1024 + RTNETLINK answers: Invalid argument + For more usage examples ----------------------- There are tools/testing/selftests using the notifier error injection features diff --git a/MAINTAINERS b/MAINTAINERS index 7753e54ba2f272..a1d9d5196f3d1a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7368,6 +7368,13 @@ F: include/net/netrom.h F: include/uapi/linux/netrom.h F: net/netrom/ +NETRONOME ETHERNET DRIVERS +M: Jakub Kicinski <jakub.kicinski@netronome.com> +M: Rolf Neugebauer <rolf.neugebauer@netronome.com> +L: oss-drivers@netronome.com +S: Maintained +F: drivers/net/ethernet/netronome/ + NETWORK BLOCK DEVICE (NBD) M: Markus Pargmann <mpa@pengutronix.de> S: Maintained @@ -11176,6 +11183,13 @@ L: linux-usb@vger.kernel.org S: Supported F: drivers/usb/class/usblp.c +USB QMI WWAN NETWORK DRIVER +M: Bjørn Mork <bjorn@mork.no> +L: netdev@vger.kernel.org +S: Maintained +F: Documentation/ABI/testing/sysfs-class-net-qmi +F: drivers/net/usb/qmi_wwan.c + USB RTL8150 DRIVER M: Petko Manolov <petkan@nucleusys.com> L: linux-usb@vger.kernel.org diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 6d4d4569447ee0..23525c271274c7 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -106,7 +106,7 @@ static void aead_wmem_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); @@ -157,7 +157,7 @@ static void aead_data_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLRDNORM | POLLRDBAND); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index ca9efe17db1ac4..caf58b4e46be24 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -238,7 +238,7 @@ static void skcipher_wmem_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); @@ -288,7 +288,7 @@ static void skcipher_data_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLRDNORM | POLLRDBAND); diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index 3d7fb6516f74f8..0c2b4ba0681395 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -347,8 +347,8 @@ static char *next_string(struct sk_buff *skb) */ static int process_status(struct solos_card *card, int port, struct sk_buff *skb) { - char *str, *end, *state_str, *snr, *attn; - int ver, rate_up, rate_down; + char *str, *state_str, *snr, *attn; + int ver, rate_up, rate_down, err; if (!card->atmdev[port]) return -ENODEV; @@ -357,7 +357,11 @@ static int process_status(struct solos_card *card, int port, struct sk_buff *skb if (!str) return -EIO; - ver = simple_strtol(str, NULL, 10); + err = kstrtoint(str, 10, &ver); + if (err) { + dev_warn(&card->dev->dev, "Unexpected status interrupt version\n"); + return err; + } if (ver < 1) { dev_warn(&card->dev->dev, "Unexpected status interrupt version %d\n", ver); @@ -373,16 +377,16 @@ static int process_status(struct solos_card *card, int port, struct sk_buff *skb return 0; } - rate_down = simple_strtol(str, &end, 10); - if (*end) - return -EIO; + err = kstrtoint(str, 10, &rate_down); + if (err) + return err; str = next_string(skb); if (!str) return -EIO; - rate_up = simple_strtol(str, &end, 10); - if (*end) - return -EIO; + err = kstrtoint(str, 10, &rate_up); + if (err) + return err; state_str = next_string(skb); if (!state_str) @@ -417,7 +421,7 @@ static int process_command(struct solos_card *card, int port, struct sk_buff *sk struct solos_param *prm; unsigned long flags; int cmdpid; - int found = 0; + int found = 0, err; if (skb->len < 7) return 0; @@ -428,7 +432,9 @@ static int process_command(struct solos_card *card, int port, struct sk_buff *sk skb->data[6] != '\n') return 0; - cmdpid = simple_strtol(&skb->data[1], NULL, 10); + err = kstrtoint(&skb->data[1], 10, &cmdpid); + if (err) + return err; spin_lock_irqsave(&card->param_queue_lock, flags); list_for_each_entry(prm, &card->param_queue, list) { diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 870e56b6b25f5c..26833bfa639bb6 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -40,6 +40,7 @@ #include <linux/gfp.h> #include <rdma/ib_pma.h> +#include <linux/mlx4/driver.h> #include "mlx4_ib.h" enum { @@ -606,8 +607,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, struct ib_mad *mad) { struct mlx4_ib_dev *dev = to_mdev(ibdev); - int err; - int slave; + int err, other_port; + int slave = -1; u8 *slave_id; int is_eth = 0; @@ -625,7 +626,17 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n"); return -EINVAL; } - if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) { + err = mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave); + if (err && mlx4_is_mf_bonded(dev->dev)) { + other_port = (port == 1) ? 2 : 1; + err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, grh->dgid.raw, &slave); + if (!err) { + port = other_port; + pr_debug("resolved slave %d from gid %pI6 wire port %d other %d\n", + slave, grh->dgid.raw, port, other_port); + } + } + if (err) { mlx4_ib_warn(ibdev, "failed matching grh\n"); return -ENOENT; } diff --git a/drivers/isdn/Makefile b/drivers/isdn/Makefile index f1f777570e8e66..91c81965e7ca27 100644 --- a/drivers/isdn/Makefile +++ b/drivers/isdn/Makefile @@ -10,7 +10,6 @@ obj-$(CONFIG_ISDN_DIVERSION) += divert/ obj-$(CONFIG_ISDN_DRV_HISAX) += hisax/ obj-$(CONFIG_ISDN_DRV_ICN) += icn/ obj-$(CONFIG_ISDN_DRV_PCBIT) += pcbit/ -obj-$(CONFIG_ISDN_DRV_SC) += sc/ obj-$(CONFIG_ISDN_DRV_LOOP) += isdnloop/ obj-$(CONFIG_ISDN_DRV_ACT2000) += act2000/ obj-$(CONFIG_HYSDN) += hysdn/ diff --git a/drivers/isdn/i4l/Kconfig b/drivers/isdn/i4l/Kconfig index 9c6650ea848ece..f5b714cd761891 100644 --- a/drivers/isdn/i4l/Kconfig +++ b/drivers/isdn/i4l/Kconfig @@ -130,8 +130,6 @@ source "drivers/isdn/icn/Kconfig" source "drivers/isdn/pcbit/Kconfig" -source "drivers/isdn/sc/Kconfig" - source "drivers/isdn/act2000/Kconfig" endmenu diff --git a/drivers/isdn/sc/Kconfig b/drivers/isdn/sc/Kconfig deleted file mode 100644 index 7469863a792539..00000000000000 --- a/drivers/isdn/sc/Kconfig +++ /dev/null @@ -1,8 +0,0 @@ -config ISDN_DRV_SC - tristate "Spellcaster support" - depends on ISA - help - This enables support for the Spellcaster BRI ISDN boards. This - driver currently builds only in a modularized version. - To build it, choose M here: the module will be called sc. - See <file:Documentation/isdn/README.sc> for more information. diff --git a/drivers/isdn/sc/Makefile b/drivers/isdn/sc/Makefile deleted file mode 100644 index 0f2b7d602ac031..00000000000000 --- a/drivers/isdn/sc/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# Makefile for the sc ISDN device driver - -# Each configuration option enables a list of files. - -obj-$(CONFIG_ISDN_DRV_SC) += sc.o - -# Multipart objects. - -sc-y := shmem.o init.o packet.o command.o event.o \ - ioctl.o interrupt.o message.o timer.o diff --git a/drivers/isdn/sc/card.h b/drivers/isdn/sc/card.h deleted file mode 100644 index 3da69ee43da734..00000000000000 --- a/drivers/isdn/sc/card.h +++ /dev/null @@ -1,131 +0,0 @@ -/* $Id: card.h,v 1.1.10.1 2001/09/23 22:24:59 kai Exp $ - * - * Driver parameters for SpellCaster ISA ISDN adapters - * - * Copyright (C) 1996 SpellCaster Telecommunications Inc. - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - * For more information, please contact gpl-info@spellcast.com or write: - * - * SpellCaster Telecommunications Inc. - * 5621 Finch Avenue East, Unit #3 - * Scarborough, Ontario Canada - * M1B 2T9 - * +1 (416) 297-8565 - * +1 (416) 297-6433 Facsimile - */ - -#ifndef CARD_H -#define CARD_H - -/* - * We need these if they're not already included - */ -#include <linux/timer.h> -#include <linux/time.h> -#include <linux/isdnif.h> -#include <linux/irqreturn.h> -#include "message.h" -#include "scioc.h" - -/* - * Amount of time to wait for a reset to complete - */ -#define CHECKRESET_TIME msecs_to_jiffies(4000) - -/* - * Amount of time between line status checks - */ -#define CHECKSTAT_TIME msecs_to_jiffies(8000) - -/* - * The maximum amount of time to wait for a message response - * to arrive. Use exclusively by send_and_receive - */ -#define SAR_TIMEOUT msecs_to_jiffies(10000) - -/* - * Macro to determine is a card id is valid - */ -#define IS_VALID_CARD(x) ((x >= 0) && (x <= cinst)) - -/* - * Per channel status and configuration - */ -typedef struct { - int l2_proto; - int l3_proto; - char dn[50]; - unsigned long first_sendbuf; /* Offset of first send buffer */ - unsigned int num_sendbufs; /* Number of send buffers */ - unsigned int free_sendbufs; /* Number of free sendbufs */ - unsigned int next_sendbuf; /* Next sequential buffer */ - char eazlist[50]; /* Set with SETEAZ */ - char sillist[50]; /* Set with SETSIL */ - int eazclear; /* Don't accept calls if TRUE */ -} bchan; - -/* - * Everything you want to know about the adapter ... - */ -typedef struct { - int model; - int driverId; /* LL Id */ - char devicename[20]; /* The device name */ - isdn_if *card; /* ISDN4Linux structure */ - bchan *channel; /* status of the B channels */ - char nChannels; /* Number of channels */ - unsigned int interrupt; /* Interrupt number */ - int iobase; /* I/O Base address */ - int ioport[MAX_IO_REGS]; /* Index to I/O ports */ - int shmem_pgport; /* port for the exp mem page reg. */ - int shmem_magic; /* adapter magic number */ - unsigned int rambase; /* Shared RAM base address */ - unsigned int ramsize; /* Size of shared memory */ - RspMessage async_msg; /* Async response message */ - int want_async_messages; /* Snoop the Q ? */ - unsigned char seq_no; /* Next send seq. number */ - struct timer_list reset_timer; /* Check reset timer */ - struct timer_list stat_timer; /* Check startproc timer */ - unsigned char nphystat; /* Latest PhyStat info */ - unsigned char phystat; /* Last PhyStat info */ - HWConfig_pl hwconfig; /* Hardware config info */ - char load_ver[11]; /* CommManage Version string */ - char proc_ver[11]; /* CommEngine Version */ - int StartOnReset; /* Indicates startproc after reset */ - int EngineUp; /* Indicates CommEngine Up */ - int trace_mode; /* Indicate if tracing is on */ - spinlock_t lock; /* local lock */ -} board; - - -extern board *sc_adapter[]; -extern int cinst; - -void memcpy_toshmem(int card, void *dest, const void *src, size_t n); -void memcpy_fromshmem(int card, void *dest, const void *src, size_t n); -int get_card_from_id(int driver); -int indicate_status(int card, int event, ulong Channel, char *Data); -irqreturn_t interrupt_handler(int interrupt, void *cardptr); -int sndpkt(int devId, int channel, int ack, struct sk_buff *data); -void rcvpkt(int card, RspMessage *rcvmsg); -int command(isdn_ctrl *cmd); -int reset(int card); -int startproc(int card); -int send_and_receive(int card, unsigned int procid, unsigned char type, - unsigned char class, unsigned char code, - unsigned char link, unsigned char data_len, - unsigned char *data, RspMessage *mesgdata, int timeout); -void flushreadfifo(int card); -int sendmessage(int card, unsigned int procid, unsigned int type, - unsigned int class, unsigned int code, unsigned int link, - unsigned int data_len, unsigned int *data); -int receivemessage(int card, RspMessage *rspmsg); -int sc_ioctl(int card, scs_ioctl *data); -int setup_buffers(int card, int c); -void sc_check_reset(unsigned long data); -void check_phystat(unsigned long data); - -#endif /* CARD_H */ diff --git a/drivers/isdn/sc/command.c b/drivers/isdn/sc/command.c deleted file mode 100644 index 4a4e66152ce77e..00000000000000 --- a/drivers/isdn/sc/command.c +++ /dev/null @@ -1,363 +0,0 @@ -/* $Id: command.c,v 1.4.10.1 2001/09/23 22:24:59 kai Exp $ - * - * Copyright (C) 1996 SpellCaster Telecommunications Inc. - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - * For more information, please contact gpl-info@spellcast.com or write: - * - * SpellCaster Telecommunications Inc. - * 5621 Finch Avenue East, Unit #3 - * Scarborough, Ontario Canada - * M1B 2T9 - * +1 (416) 297-8565 - * +1 (416) 297-6433 Facsimile - */ - -#include <linux/module.h> -#include "includes.h" /* This must be first */ -#include "hardware.h" -#include "message.h" -#include "card.h" -#include "scioc.h" - -static int dial(int card, unsigned long channel, setup_parm setup); -static int hangup(int card, unsigned long channel); -static int answer(int card, unsigned long channel); -static int clreaz(int card, unsigned long channel); -static int seteaz(int card, unsigned long channel, char *); -static int setl2(int card, unsigned long arg); -static int setl3(int card, unsigned long arg); -static int acceptb(int card, unsigned long channel); - -#ifdef DEBUG -/* - * Translate command codes to strings - */ -static char *commands[] = { "ISDN_CMD_IOCTL", - "ISDN_CMD_DIAL", - "ISDN_CMD_ACCEPTB", - "ISDN_CMD_ACCEPTB", - "ISDN_CMD_HANGUP", - "ISDN_CMD_CLREAZ", - "ISDN_CMD_SETEAZ", - NULL, - NULL, - NULL, - "ISDN_CMD_SETL2", - NULL, - "ISDN_CMD_SETL3", - NULL, - NULL, - NULL, - NULL, - NULL, }; - -/* - * Translates ISDN4Linux protocol codes to strings for debug messages - */ -static char *l3protos[] = { "ISDN_PROTO_L3_TRANS" }; -static char *l2protos[] = { "ISDN_PROTO_L2_X75I", - "ISDN_PROTO_L2_X75UI", - "ISDN_PROTO_L2_X75BUI", - "ISDN_PROTO_L2_HDLC", - "ISDN_PROTO_L2_TRANS" }; -#endif - -int get_card_from_id(int driver) -{ - int i; - - for (i = 0; i < cinst; i++) { - if (sc_adapter[i]->driverId == driver) - return i; - } - return -ENODEV; -} - -/* - * command - */ - -int command(isdn_ctrl *cmd) -{ - int card; - - card = get_card_from_id(cmd->driver); - if (!IS_VALID_CARD(card)) { - pr_debug("Invalid param: %d is not a valid card id\n", card); - return -ENODEV; - } - - /* - * Dispatch the command - */ - switch (cmd->command) { - case ISDN_CMD_IOCTL: - { - unsigned long cmdptr; - scs_ioctl ioc; - - memcpy(&cmdptr, cmd->parm.num, sizeof(unsigned long)); - if (copy_from_user(&ioc, (scs_ioctl __user *)cmdptr, - sizeof(scs_ioctl))) { - pr_debug("%s: Failed to verify user space 0x%lx\n", - sc_adapter[card]->devicename, cmdptr); - return -EFAULT; - } - return sc_ioctl(card, &ioc); - } - case ISDN_CMD_DIAL: - return dial(card, cmd->arg, cmd->parm.setup); - case ISDN_CMD_HANGUP: - return hangup(card, cmd->arg); - case ISDN_CMD_ACCEPTD: - return answer(card, cmd->arg); - case ISDN_CMD_ACCEPTB: - return acceptb(card, cmd->arg); - case ISDN_CMD_CLREAZ: - return clreaz(card, cmd->arg); - case ISDN_CMD_SETEAZ: - return seteaz(card, cmd->arg, cmd->parm.num); - case ISDN_CMD_SETL2: - return setl2(card, cmd->arg); - case ISDN_CMD_SETL3: - return setl3(card, cmd->arg); - default: - return -EINVAL; - } - return 0; -} - -/* - * start the onboard firmware - */ -int startproc(int card) -{ - int status; - - if (!IS_VALID_CARD(card)) { - pr_debug("Invalid param: %d is not a valid card id\n", card); - return -ENODEV; - } - - /* - * send start msg - */ - status = sendmessage(card, CMPID, cmReqType2, - cmReqClass0, - cmReqStartProc, - 0, 0, NULL); - pr_debug("%s: Sent startProc\n", sc_adapter[card]->devicename); - - return status; -} - - -/* - * Dials the number passed in - */ -static int dial(int card, unsigned long channel, setup_parm setup) -{ - int status; - char Phone[48]; - - if (!IS_VALID_CARD(card)) { - pr_debug("Invalid param: %d is not a valid card id\n", card); - return -ENODEV; - } - - /*extract ISDN number to dial from eaz/msn string*/ - strcpy(Phone, setup.phone); - - /*send the connection message*/ - status = sendmessage(card, CEPID, ceReqTypePhy, - ceReqClass1, - ceReqPhyConnect, - (unsigned char)channel + 1, - strlen(Phone), - (unsigned int *)Phone); - - pr_debug("%s: Dialing %s on channel %lu\n", - sc_adapter[card]->devicename, Phone, channel + 1); - - return status; -} - -/* - * Answer an incoming call - */ -static int answer(int card, unsigned long channel) -{ - if (!IS_VALID_CARD(card)) { - pr_debug("Invalid param: %d is not a valid card id\n", card); - return -ENODEV; - } - - if (setup_buffers(card, channel + 1)) { - hangup(card, channel + 1); - return -ENOBUFS; - } - - indicate_status(card, ISDN_STAT_BCONN, channel, NULL); - pr_debug("%s: Answered incoming call on channel %lu\n", - sc_adapter[card]->devicename, channel + 1); - return 0; -} - -/* - * Hangup up the call on specified channel - */ -static int hangup(int card, unsigned long channel) -{ - int status; - - if (!IS_VALID_CARD(card)) { - pr_debug("Invalid param: %d is not a valid card id\n", card); - return -ENODEV; - } - - status = sendmessage(card, CEPID, ceReqTypePhy, - ceReqClass1, - ceReqPhyDisconnect, - (unsigned char)channel + 1, - 0, - NULL); - pr_debug("%s: Sent HANGUP message to channel %lu\n", - sc_adapter[card]->devicename, channel + 1); - return status; -} - -/* - * Set the layer 2 protocol (X.25, HDLC, Raw) - */ -static int setl2(int card, unsigned long arg) -{ - int status = 0; - int protocol, channel; - - if (!IS_VALID_CARD(card)) { - pr_debug("Invalid param: %d is not a valid card id\n", card); - return -ENODEV; - } - protocol = arg >> 8; - channel = arg & 0xff; - sc_adapter[card]->channel[channel].l2_proto = protocol; - - /* - * check that the adapter is also set to the correct protocol - */ - pr_debug("%s: Sending GetFrameFormat for channel %d\n", - sc_adapter[card]->devicename, channel + 1); - status = sendmessage(card, CEPID, ceReqTypeCall, - ceReqClass0, - ceReqCallGetFrameFormat, - (unsigned char)channel + 1, - 1, - (unsigned int *)protocol); - if (status) - return status; - return 0; -} - -/* - * Set the layer 3 protocol - */ -static int setl3(int card, unsigned long channel) -{ - int protocol = channel >> 8; - - if (!IS_VALID_CARD(card)) { - pr_debug("Invalid param: %d is not a valid card id\n", card); - return -ENODEV; - } - - sc_adapter[card]->channel[channel].l3_proto = protocol; - return 0; -} - -static int acceptb(int card, unsigned long channel) -{ - if (!IS_VALID_CARD(card)) { - pr_debug("Invalid param: %d is not a valid card id\n", card); - return -ENODEV; - } - - if (setup_buffers(card, channel + 1)) - { - hangup(card, channel + 1); - return -ENOBUFS; - } - - pr_debug("%s: B-Channel connection accepted on channel %lu\n", - sc_adapter[card]->devicename, channel + 1); - indicate_status(card, ISDN_STAT_BCONN, channel, NULL); - return 0; -} - -static int clreaz(int card, unsigned long arg) -{ - if (!IS_VALID_CARD(card)) { - pr_debug("Invalid param: %d is not a valid card id\n", card); - return -ENODEV; - } - - strcpy(sc_adapter[card]->channel[arg].eazlist, ""); - sc_adapter[card]->channel[arg].eazclear = 1; - pr_debug("%s: EAZ List cleared for channel %lu\n", - sc_adapter[card]->devicename, arg + 1); - return 0; -} - -static int seteaz(int card, unsigned long arg, char *num) -{ - if (!IS_VALID_CARD(card)) { - pr_debug("Invalid param: %d is not a valid card id\n", card); - return -ENODEV; - } - - strcpy(sc_adapter[card]->channel[arg].eazlist, num); - sc_adapter[card]->channel[arg].eazclear = 0; - pr_debug("%s: EAZ list for channel %lu set to: %s\n", - sc_adapter[card]->devicename, arg + 1, - sc_adapter[card]->channel[arg].eazlist); - return 0; -} - -int reset(int card) -{ - unsigned long flags; - - if (!IS_VALID_CARD(card)) { - pr_debug("Invalid param: %d is not a valid card id\n", card); - return -ENODEV; - } - - indicate_status(card, ISDN_STAT_STOP, 0, NULL); - - if (sc_adapter[card]->EngineUp) { - del_timer(&sc_adapter[card]->stat_timer); - } - - sc_adapter[card]->EngineUp = 0; - - spin_lock_irqsave(&sc_adapter[card]->lock, flags); - init_timer(&sc_adapter[card]->reset_timer); - sc_adapter[card]->reset_timer.function = sc_check_reset; - sc_adapter[card]->reset_timer.data = card; - sc_adapter[card]->reset_timer.expires = jiffies + CHECKRESET_TIME; - add_timer(&sc_adapter[card]->reset_timer); - spin_unlock_irqrestore(&sc_adapter[card]->lock, flags); - - outb(0x1, sc_adapter[card]->ioport[SFT_RESET]); - - pr_debug("%s: Adapter Reset\n", sc_adapter[card]->devicename); - return 0; -} - -void flushreadfifo(int card) -{ - while (inb(sc_adapter[card]->ioport[FIFO_STATUS]) & RF_HAS_DATA) - inb(sc_adapter[card]->ioport[FIFO_READ]); -} diff --git a/drivers/isdn/sc/event.c b/drivers/isdn/sc/event.c deleted file mode 100644 index 833d96c2cf9257..00000000000000 --- a/drivers/isdn/sc/event.c +++ /dev/null @@ -1,68 +0,0 @@ -/* $Id: event.c,v 1.4.8.1 2001/09/23 22:24:59 kai Exp $ - * - * Copyright (C) 1996 SpellCaster Telecommunications Inc. - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - * For more information, please contact gpl-info@spellcast.com or write: - * - * SpellCaster Telecommunications Inc. - * 5621 Finch Avenue East, Unit #3 - * Scarborough, Ontario Canada - * M1B 2T9 - * +1 (416) 297-8565 - * +1 (416) 297-6433 Facsimile - */ - -#include "includes.h" -#include "hardware.h" -#include "message.h" -#include "card.h" - -#ifdef DEBUG -static char *events[] = { "ISDN_STAT_STAVAIL", - "ISDN_STAT_ICALL", - "ISDN_STAT_RUN", - "ISDN_STAT_STOP", - "ISDN_STAT_DCONN", - "ISDN_STAT_BCONN", - "ISDN_STAT_DHUP", - "ISDN_STAT_BHUP", - "ISDN_STAT_CINF", - "ISDN_STAT_LOAD", - "ISDN_STAT_UNLOAD", - "ISDN_STAT_BSENT", - "ISDN_STAT_NODCH", - "ISDN_STAT_ADDCH", - "ISDN_STAT_CAUSE" }; -#endif - -int indicate_status(int card, int event, ulong Channel, char *Data) -{ - isdn_ctrl cmd; - -#ifdef DEBUG - pr_debug("%s: Indicating event %s on Channel %d\n", - sc_adapter[card]->devicename, events[event - 256], Channel); -#endif - if (Data != NULL) { - pr_debug("%s: Event data: %s\n", sc_adapter[card]->devicename, - Data); - switch (event) { - case ISDN_STAT_BSENT: - memcpy(&cmd.parm.length, Data, sizeof(cmd.parm.length)); - break; - case ISDN_STAT_ICALL: - memcpy(&cmd.parm.setup, Data, sizeof(cmd.parm.setup)); - break; - default: - strlcpy(cmd.parm.num, Data, sizeof(cmd.parm.num)); - } - } - - cmd.command = event; - cmd.driver = sc_adapter[card]->driverId; - cmd.arg = Channel; - return sc_adapter[card]->card->statcallb(&cmd); -} diff --git a/drivers/isdn/sc/hardware.h b/drivers/isdn/sc/hardware.h deleted file mode 100644 index 81fbe78701f05d..00000000000000 --- a/drivers/isdn/sc/hardware.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Hardware specific macros, defines and structures - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#ifndef HARDWARE_H -#define HARDWARE_H - -#include <asm/param.h> /* For HZ */ - -/* - * General hardware parameters common to all ISA adapters - */ - -#define MAX_CARDS 4 /* The maximum number of cards to - control or probe for. */ - -#define SIGNATURE 0x87654321 /* Board reset signature */ -#define SIG_OFFSET 0x1004 /* Where to find signature in shared RAM */ -#define TRACE_OFFSET 0x1008 /* Trace enable word offset in shared RAM */ -#define BUFFER_OFFSET 0x1800 /* Beginning of buffers */ - -/* I/O Port parameters */ -#define IOBASE_MIN 0x180 /* Lowest I/O port address */ -#define IOBASE_MAX 0x3C0 /* Highest I/O port address */ -#define IOBASE_OFFSET 0x20 /* Inter-board I/O port gap used during - probing */ -#define FIFORD_OFFSET 0x0 -#define FIFOWR_OFFSET 0x400 -#define FIFOSTAT_OFFSET 0x1000 -#define RESET_OFFSET 0x2800 -#define PG0_OFFSET 0x3000 /* Offset from I/O Base for Page 0 register */ -#define PG1_OFFSET 0x3400 /* Offset from I/O Base for Page 1 register */ -#define PG2_OFFSET 0x3800 /* Offset from I/O Base for Page 2 register */ -#define PG3_OFFSET 0x3C00 /* Offset from I/O Base for Page 3 register */ - -#define FIFO_READ 0 /* FIFO Read register */ -#define FIFO_WRITE 1 /* FIFO Write rgister */ -#define LO_ADDR_PTR 2 /* Extended RAM Low Addr Pointer */ -#define HI_ADDR_PTR 3 /* Extended RAM High Addr Pointer */ -#define NOT_USED_1 4 -#define FIFO_STATUS 5 /* FIFO Status Register */ -#define NOT_USED_2 6 -#define MEM_OFFSET 7 -#define SFT_RESET 10 /* Reset Register */ -#define EXP_BASE 11 /* Shared RAM Base address */ -#define EXP_PAGE0 12 /* Shared RAM Page0 register */ -#define EXP_PAGE1 13 /* Shared RAM Page1 register */ -#define EXP_PAGE2 14 /* Shared RAM Page2 register */ -#define EXP_PAGE3 15 /* Shared RAM Page3 register */ -#define IRQ_SELECT 16 /* IRQ selection register */ -#define MAX_IO_REGS 17 /* Total number of I/O ports */ - -/* FIFO register values */ -#define RF_HAS_DATA 0x01 /* fifo has data */ -#define RF_QUART_FULL 0x02 /* fifo quarter full */ -#define RF_HALF_FULL 0x04 /* fifo half full */ -#define RF_NOT_FULL 0x08 /* fifo not full */ -#define WF_HAS_DATA 0x10 /* fifo has data */ -#define WF_QUART_FULL 0x20 /* fifo quarter full */ -#define WF_HALF_FULL 0x40 /* fifo half full */ -#define WF_NOT_FULL 0x80 /* fifo not full */ - -/* Shared RAM parameters */ -#define SRAM_MIN 0xC0000 /* Lowest host shared RAM address */ -#define SRAM_MAX 0xEFFFF /* Highest host shared RAM address */ -#define SRAM_PAGESIZE 0x4000 /* Size of one RAM page (16K) */ - -/* Shared RAM buffer parameters */ -#define BUFFER_SIZE 0x800 /* The size of a buffer in bytes */ -#define BUFFER_BASE BUFFER_OFFSET /* Offset from start of shared RAM - where buffer start */ -#define BUFFERS_MAX 16 /* Maximum number of send/receive - buffers per channel */ -#define HDLC_PROTO 0x01 /* Frame Format for Layer 2 */ - -#define BRI_BOARD 0 -#define POTS_BOARD 1 -#define PRI_BOARD 2 - -/* - * Specific hardware parameters for the DataCommute/BRI - */ -#define BRI_CHANNELS 2 /* Number of B channels */ -#define BRI_BASEPG_VAL 0x98 -#define BRI_MAGIC 0x60000 /* Magic Number */ -#define BRI_MEMSIZE 0x10000 /* Amount of RAM (64K) */ -#define BRI_PARTNO "72-029" -#define BRI_FEATURES ISDN_FEATURE_L2_HDLC | ISDN_FEATURE_L3_TRANS; -/* - * Specific hardware parameters for the DataCommute/PRI - */ -#define PRI_CHANNELS 23 /* Number of B channels */ -#define PRI_BASEPG_VAL 0x88 -#define PRI_MAGIC 0x20000 /* Magic Number */ -#define PRI_MEMSIZE 0x100000 /* Amount of RAM (1M) */ -#define PRI_PARTNO "72-030" -#define PRI_FEATURES ISDN_FEATURE_L2_HDLC | ISDN_FEATURE_L3_TRANS; - -/* - * Some handy macros - */ - -/* Determine if a channel number is valid for the adapter */ -#define IS_VALID_CHANNEL(y, x) ((x > 0) && (x <= sc_adapter[y]->channels)) - -#endif diff --git a/drivers/isdn/sc/includes.h b/drivers/isdn/sc/includes.h deleted file mode 100644 index 4766e5b7737836..00000000000000 --- a/drivers/isdn/sc/includes.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#include <linux/errno.h> -#include <asm/io.h> -#include <linux/delay.h> -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/mm.h> -#include <linux/ioport.h> -#include <linux/timer.h> -#include <linux/wait.h> -#include <linux/isdnif.h> diff --git a/drivers/isdn/sc/init.c b/drivers/isdn/sc/init.c deleted file mode 100644 index 3597ef47b28a47..00000000000000 --- a/drivers/isdn/sc/init.c +++ /dev/null @@ -1,549 +0,0 @@ -/* - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/delay.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include "includes.h" -#include "hardware.h" -#include "card.h" - -MODULE_DESCRIPTION("ISDN4Linux: Driver for Spellcaster card"); -MODULE_AUTHOR("Spellcaster Telecommunications Inc."); -MODULE_LICENSE("GPL"); - -board *sc_adapter[MAX_CARDS]; -int cinst; - -static char devname[] = "scX"; -static const char version[] = "2.0b1"; - -static const char *boardname[] = { "DataCommute/BRI", "DataCommute/PRI", "TeleCommute/BRI" }; - -/* insmod set parameters */ -static unsigned int io[] = {0, 0, 0, 0}; -static unsigned char irq[] = {0, 0, 0, 0}; -static unsigned long ram[] = {0, 0, 0, 0}; -static bool do_reset; - -module_param_array(io, int, NULL, 0); -module_param_array(irq, byte, NULL, 0); -module_param_array(ram, long, NULL, 0); -module_param(do_reset, bool, 0); - -static int identify_board(unsigned long, unsigned int); - -static int __init sc_init(void) -{ - int b = -1; - int i, j; - int status = -ENODEV; - - unsigned long memsize = 0; - unsigned long features = 0; - isdn_if *interface; - unsigned char channels; - unsigned char pgport; - unsigned long magic; - int model; - int last_base = IOBASE_MIN; - int probe_exhasted = 0; - -#ifdef MODULE - pr_info("SpellCaster ISA ISDN Adapter Driver rev. %s Loaded\n", version); -#else - pr_info("SpellCaster ISA ISDN Adapter Driver rev. %s\n", version); -#endif - pr_info("Copyright (C) 1996 SpellCaster Telecommunications Inc.\n"); - - while (b++ < MAX_CARDS - 1) { - pr_debug("Probing for adapter #%d\n", b); - /* - * Initialize reusable variables - */ - model = -1; - magic = 0; - channels = 0; - pgport = 0; - - /* - * See if we should probe for IO base - */ - pr_debug("I/O Base for board %d is 0x%x, %s probe\n", b, io[b], - io[b] == 0 ? "will" : "won't"); - if (io[b]) { - /* - * No, I/O Base has been provided - */ - for (i = 0; i < MAX_IO_REGS - 1; i++) { - if (!request_region(io[b] + i * 0x400, 1, "sc test")) { - pr_debug("request_region for 0x%x failed\n", io[b] + i * 0x400); - io[b] = 0; - break; - } else - release_region(io[b] + i * 0x400, 1); - } - - /* - * Confirm the I/O Address with a test - */ - if (io[b] == 0) { - pr_debug("I/O Address invalid.\n"); - continue; - } - - outb(0x18, io[b] + 0x400 * EXP_PAGE0); - if (inb(io[b] + 0x400 * EXP_PAGE0) != 0x18) { - pr_debug("I/O Base 0x%x fails test\n", - io[b] + 0x400 * EXP_PAGE0); - continue; - } - } else { - /* - * Yes, probe for I/O Base - */ - if (probe_exhasted) { - pr_debug("All probe addresses exhausted, skipping\n"); - continue; - } - pr_debug("Probing for I/O...\n"); - for (i = last_base; i <= IOBASE_MAX; i += IOBASE_OFFSET) { - int found_io = 1; - if (i == IOBASE_MAX) { - probe_exhasted = 1; /* No more addresses to probe */ - pr_debug("End of Probes\n"); - } - last_base = i + IOBASE_OFFSET; - pr_debug(" checking 0x%x...", i); - for (j = 0; j < MAX_IO_REGS - 1; j++) { - if (!request_region(i + j * 0x400, 1, "sc test")) { - pr_debug("Failed\n"); - found_io = 0; - break; - } else - release_region(i + j * 0x400, 1); - } - - if (found_io) { - io[b] = i; - outb(0x18, io[b] + 0x400 * EXP_PAGE0); - if (inb(io[b] + 0x400 * EXP_PAGE0) != 0x18) { - pr_debug("Failed by test\n"); - continue; - } - pr_debug("Passed\n"); - break; - } - } - if (probe_exhasted) { - continue; - } - } - - /* - * See if we should probe for shared RAM - */ - if (do_reset) { - pr_debug("Doing a SAFE probe reset\n"); - outb(0xFF, io[b] + RESET_OFFSET); - msleep_interruptible(10000); - } - pr_debug("RAM Base for board %d is 0x%lx, %s probe\n", b, - ram[b], ram[b] == 0 ? "will" : "won't"); - - if (ram[b]) { - /* - * No, the RAM base has been provided - * Just look for a signature and ID the - * board model - */ - if (request_region(ram[b], SRAM_PAGESIZE, "sc test")) { - pr_debug("request_region for RAM base 0x%lx succeeded\n", ram[b]); - model = identify_board(ram[b], io[b]); - release_region(ram[b], SRAM_PAGESIZE); - } - } else { - /* - * Yes, probe for free RAM and look for - * a signature and id the board model - */ - for (i = SRAM_MIN; i < SRAM_MAX; i += SRAM_PAGESIZE) { - pr_debug("Checking RAM address 0x%x...\n", i); - if (request_region(i, SRAM_PAGESIZE, "sc test")) { - pr_debug(" request_region succeeded\n"); - model = identify_board(i, io[b]); - release_region(i, SRAM_PAGESIZE); - if (model >= 0) { - pr_debug(" Identified a %s\n", - boardname[model]); - ram[b] = i; - break; - } - pr_debug(" Unidentified or inaccessible\n"); - continue; - } - pr_debug(" request failed\n"); - } - } - /* - * See if we found free RAM and the board model - */ - if (!ram[b] || model < 0) { - /* - * Nope, there was no place in RAM for the - * board, or it couldn't be identified - */ - pr_debug("Failed to find an adapter at 0x%lx\n", ram[b]); - continue; - } - - /* - * Set the board's magic number, memory size and page register - */ - switch (model) { - case PRI_BOARD: - channels = 23; - magic = 0x20000; - memsize = 0x100000; - features = PRI_FEATURES; - break; - - case BRI_BOARD: - case POTS_BOARD: - channels = 2; - magic = 0x60000; - memsize = 0x10000; - features = BRI_FEATURES; - break; - } - switch (ram[b] >> 12 & 0x0F) { - case 0x0: - pr_debug("RAM Page register set to EXP_PAGE0\n"); - pgport = EXP_PAGE0; - break; - - case 0x4: - pr_debug("RAM Page register set to EXP_PAGE1\n"); - pgport = EXP_PAGE1; - break; - - case 0x8: - pr_debug("RAM Page register set to EXP_PAGE2\n"); - pgport = EXP_PAGE2; - break; - - case 0xC: - pr_debug("RAM Page register set to EXP_PAGE3\n"); - pgport = EXP_PAGE3; - break; - - default: - pr_debug("RAM base address doesn't fall on 16K boundary\n"); - continue; - } - - pr_debug("current IRQ: %d b: %d\n", irq[b], b); - - /* - * Make sure we got an IRQ - */ - if (!irq[b]) { - /* - * No interrupt could be used - */ - pr_debug("Failed to acquire an IRQ line\n"); - continue; - } - - /* - * Horray! We found a board, Make sure we can register - * it with ISDN4Linux - */ - interface = kzalloc(sizeof(isdn_if), GFP_KERNEL); - if (interface == NULL) { - /* - * Oops, can't malloc isdn_if - */ - continue; - } - - interface->owner = THIS_MODULE; - interface->hl_hdrlen = 0; - interface->channels = channels; - interface->maxbufsize = BUFFER_SIZE; - interface->features = features; - interface->writebuf_skb = sndpkt; - interface->writecmd = NULL; - interface->command = command; - strcpy(interface->id, devname); - interface->id[2] = '0' + cinst; - - /* - * Allocate the board structure - */ - sc_adapter[cinst] = kzalloc(sizeof(board), GFP_KERNEL); - if (sc_adapter[cinst] == NULL) { - /* - * Oops, can't alloc memory for the board - */ - kfree(interface); - continue; - } - spin_lock_init(&sc_adapter[cinst]->lock); - - if (!register_isdn(interface)) { - /* - * Oops, couldn't register for some reason - */ - kfree(interface); - kfree(sc_adapter[cinst]); - continue; - } - - sc_adapter[cinst]->card = interface; - sc_adapter[cinst]->driverId = interface->channels; - strcpy(sc_adapter[cinst]->devicename, interface->id); - sc_adapter[cinst]->nChannels = channels; - sc_adapter[cinst]->ramsize = memsize; - sc_adapter[cinst]->shmem_magic = magic; - sc_adapter[cinst]->shmem_pgport = pgport; - sc_adapter[cinst]->StartOnReset = 1; - - /* - * Allocate channels status structures - */ - sc_adapter[cinst]->channel = kzalloc(sizeof(bchan) * channels, GFP_KERNEL); - if (sc_adapter[cinst]->channel == NULL) { - /* - * Oops, can't alloc memory for the channels - */ - indicate_status(cinst, ISDN_STAT_UNLOAD, 0, NULL); /* Fix me */ - kfree(interface); - kfree(sc_adapter[cinst]); - continue; - } - - /* - * Lock down the hardware resources - */ - sc_adapter[cinst]->interrupt = irq[b]; - if (request_irq(sc_adapter[cinst]->interrupt, interrupt_handler, - 0, interface->id, - (void *)(unsigned long) cinst)) { - kfree(sc_adapter[cinst]->channel); - indicate_status(cinst, ISDN_STAT_UNLOAD, 0, NULL); /* Fix me */ - kfree(interface); - kfree(sc_adapter[cinst]); - continue; - - } - sc_adapter[cinst]->iobase = io[b]; - for (i = 0; i < MAX_IO_REGS - 1; i++) { - sc_adapter[cinst]->ioport[i] = io[b] + i * 0x400; - request_region(sc_adapter[cinst]->ioport[i], 1, - interface->id); - pr_debug("Requesting I/O Port %#x\n", - sc_adapter[cinst]->ioport[i]); - } - sc_adapter[cinst]->ioport[IRQ_SELECT] = io[b] + 0x2; - request_region(sc_adapter[cinst]->ioport[IRQ_SELECT], 1, - interface->id); - pr_debug("Requesting I/O Port %#x\n", - sc_adapter[cinst]->ioport[IRQ_SELECT]); - sc_adapter[cinst]->rambase = ram[b]; - request_region(sc_adapter[cinst]->rambase, SRAM_PAGESIZE, - interface->id); - - pr_info(" %s (%d) - %s %d channels IRQ %d, I/O Base 0x%x, RAM Base 0x%lx\n", - sc_adapter[cinst]->devicename, - sc_adapter[cinst]->driverId, - boardname[model], channels, irq[b], io[b], ram[b]); - - /* - * reset the adapter to put things in motion - */ - reset(cinst); - - cinst++; - status = 0; - } - if (status) - pr_info("Failed to find any adapters, driver unloaded\n"); - return status; -} - -static void __exit sc_exit(void) -{ - int i, j; - - for (i = 0; i < cinst; i++) { - pr_debug("Cleaning up after adapter %d\n", i); - /* - * kill the timers - */ - del_timer_sync(&(sc_adapter[i]->reset_timer)); - del_timer_sync(&(sc_adapter[i]->stat_timer)); - - /* - * Tell I4L we're toast - */ - indicate_status(i, ISDN_STAT_STOP, 0, NULL); - indicate_status(i, ISDN_STAT_UNLOAD, 0, NULL); - - /* - * Release shared RAM - */ - release_region(sc_adapter[i]->rambase, SRAM_PAGESIZE); - - /* - * Release the IRQ - */ - free_irq(sc_adapter[i]->interrupt, NULL); - - /* - * Reset for a clean start - */ - outb(0xFF, sc_adapter[i]->ioport[SFT_RESET]); - - /* - * Release the I/O Port regions - */ - for (j = 0; j < MAX_IO_REGS - 1; j++) { - release_region(sc_adapter[i]->ioport[j], 1); - pr_debug("Releasing I/O Port %#x\n", - sc_adapter[i]->ioport[j]); - } - release_region(sc_adapter[i]->ioport[IRQ_SELECT], 1); - pr_debug("Releasing I/O Port %#x\n", - sc_adapter[i]->ioport[IRQ_SELECT]); - - /* - * Release any memory we alloced - */ - kfree(sc_adapter[i]->channel); - kfree(sc_adapter[i]->card); - kfree(sc_adapter[i]); - } - pr_info("SpellCaster ISA ISDN Adapter Driver Unloaded.\n"); -} - -static int identify_board(unsigned long rambase, unsigned int iobase) -{ - unsigned int pgport; - unsigned long sig; - DualPortMemory *dpm; - RspMessage rcvmsg; - ReqMessage sndmsg; - HWConfig_pl hwci; - int x; - - pr_debug("Attempting to identify adapter @ 0x%lx io 0x%x\n", - rambase, iobase); - - /* - * Enable the base pointer - */ - outb(rambase >> 12, iobase + 0x2c00); - - switch (rambase >> 12 & 0x0F) { - case 0x0: - pgport = iobase + PG0_OFFSET; - pr_debug("Page Register offset is 0x%x\n", PG0_OFFSET); - break; - - case 0x4: - pgport = iobase + PG1_OFFSET; - pr_debug("Page Register offset is 0x%x\n", PG1_OFFSET); - break; - - case 0x8: - pgport = iobase + PG2_OFFSET; - pr_debug("Page Register offset is 0x%x\n", PG2_OFFSET); - break; - - case 0xC: - pgport = iobase + PG3_OFFSET; - pr_debug("Page Register offset is 0x%x\n", PG3_OFFSET); - break; - default: - pr_debug("Invalid rambase 0x%lx\n", rambase); - return -1; - } - - /* - * Try to identify a PRI card - */ - outb(PRI_BASEPG_VAL, pgport); - msleep_interruptible(1000); - sig = readl(rambase + SIG_OFFSET); - pr_debug("Looking for a signature, got 0x%lx\n", sig); - if (sig == SIGNATURE) - return PRI_BOARD; - - /* - * Try to identify a PRI card - */ - outb(BRI_BASEPG_VAL, pgport); - msleep_interruptible(1000); - sig = readl(rambase + SIG_OFFSET); - pr_debug("Looking for a signature, got 0x%lx\n", sig); - if (sig == SIGNATURE) - return BRI_BOARD; - - return -1; - - /* - * Try to spot a card - */ - sig = readl(rambase + SIG_OFFSET); - pr_debug("Looking for a signature, got 0x%lx\n", sig); - if (sig != SIGNATURE) - return -1; - - dpm = (DualPortMemory *) rambase; - - memset(&sndmsg, 0, MSG_LEN); - sndmsg.msg_byte_cnt = 3; - sndmsg.type = cmReqType1; - sndmsg.class = cmReqClass0; - sndmsg.code = cmReqHWConfig; - memcpy_toio(&(dpm->req_queue[dpm->req_head++]), &sndmsg, MSG_LEN); - outb(0, iobase + 0x400); - pr_debug("Sent HWConfig message\n"); - /* - * Wait for the response - */ - x = 0; - while ((inb(iobase + FIFOSTAT_OFFSET) & RF_HAS_DATA) && x < 100) { - schedule_timeout_interruptible(1); - x++; - } - if (x == 100) { - pr_debug("Timeout waiting for response\n"); - return -1; - } - - memcpy_fromio(&rcvmsg, &(dpm->rsp_queue[dpm->rsp_tail]), MSG_LEN); - pr_debug("Got HWConfig response, status = 0x%x\n", rcvmsg.rsp_status); - memcpy(&hwci, &(rcvmsg.msg_data.HWCresponse), sizeof(HWConfig_pl)); - pr_debug("Hardware Config: Interface: %s, RAM Size: %ld, Serial: %s\n" - " Part: %s, Rev: %s\n", - hwci.st_u_sense ? "S/T" : "U", hwci.ram_size, - hwci.serial_no, hwci.part_no, hwci.rev_no); - - if (!strncmp(PRI_PARTNO, hwci.part_no, 6)) - return PRI_BOARD; - if (!strncmp(BRI_PARTNO, hwci.part_no, 6)) - return BRI_BOARD; - - return -1; -} - -module_init(sc_init); -module_exit(sc_exit); diff --git a/drivers/isdn/sc/interrupt.c b/drivers/isdn/sc/interrupt.c deleted file mode 100644 index e80cc76bc3142f..00000000000000 --- a/drivers/isdn/sc/interrupt.c +++ /dev/null @@ -1,247 +0,0 @@ -/* $Id: interrupt.c,v 1.4.8.3 2001/09/23 22:24:59 kai Exp $ - * - * Copyright (C) 1996 SpellCaster Telecommunications Inc. - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - * For more information, please contact gpl-info@spellcast.com or write: - * - * SpellCaster Telecommunications Inc. - * 5621 Finch Avenue East, Unit #3 - * Scarborough, Ontario Canada - * M1B 2T9 - * +1 (416) 297-8565 - * +1 (416) 297-6433 Facsimile - */ - -#include "includes.h" -#include "hardware.h" -#include "message.h" -#include "card.h" -#include <linux/interrupt.h> - -/* - * - */ -irqreturn_t interrupt_handler(int dummy, void *card_inst) -{ - - RspMessage rcvmsg; - int channel; - int card = (int)(unsigned long) card_inst; - - if (!IS_VALID_CARD(card)) { - pr_debug("Invalid param: %d is not a valid card id\n", card); - return IRQ_NONE; - } - - pr_debug("%s: Entered Interrupt handler\n", - sc_adapter[card]->devicename); - - /* - * Pull all of the waiting messages off the response queue - */ - while (!receivemessage(card, &rcvmsg)) { - /* - * Push the message to the adapter structure for - * send_and_receive to snoop - */ - if (sc_adapter[card]->want_async_messages) - memcpy(&(sc_adapter[card]->async_msg), - &rcvmsg, sizeof(RspMessage)); - - channel = (unsigned int) rcvmsg.phy_link_no; - - /* - * Trap Invalid request messages - */ - if (IS_CM_MESSAGE(rcvmsg, 0, 0, Invalid)) { - pr_debug("%s: Invalid request Message, rsp_status = %d\n", - sc_adapter[card]->devicename, - rcvmsg.rsp_status); - break; - } - - /* - * Check for a linkRead message - */ - if (IS_CE_MESSAGE(rcvmsg, Lnk, 1, Read)) - { - pr_debug("%s: Received packet 0x%x bytes long at 0x%lx\n", - sc_adapter[card]->devicename, - rcvmsg.msg_data.response.msg_len, - rcvmsg.msg_data.response.buff_offset); - rcvpkt(card, &rcvmsg); - continue; - - } - - /* - * Handle a write acknoledgement - */ - if (IS_CE_MESSAGE(rcvmsg, Lnk, 1, Write)) { - pr_debug("%s: Packet Send ACK on channel %d\n", - sc_adapter[card]->devicename, - rcvmsg.phy_link_no); - sc_adapter[card]->channel[rcvmsg.phy_link_no - 1].free_sendbufs++; - continue; - } - - /* - * Handle a connection message - */ - if (IS_CE_MESSAGE(rcvmsg, Phy, 1, Connect)) - { - unsigned int callid; - setup_parm setup; - pr_debug("%s: Connect message: line %d: status %d: cause 0x%x\n", - sc_adapter[card]->devicename, - rcvmsg.phy_link_no, - rcvmsg.rsp_status, - rcvmsg.msg_data.byte_array[2]); - - memcpy(&callid, rcvmsg.msg_data.byte_array, sizeof(int)); - if (callid >= 0x8000 && callid <= 0xFFFF) - { - pr_debug("%s: Got Dial-Out Rsp\n", - sc_adapter[card]->devicename); - indicate_status(card, ISDN_STAT_DCONN, - (unsigned long)rcvmsg.phy_link_no - 1, NULL); - - } - else if (callid >= 0x0000 && callid <= 0x7FFF) - { - int len; - - pr_debug("%s: Got Incoming Call\n", - sc_adapter[card]->devicename); - len = strlcpy(setup.phone, &(rcvmsg.msg_data.byte_array[4]), - sizeof(setup.phone)); - if (len >= sizeof(setup.phone)) - continue; - len = strlcpy(setup.eazmsn, - sc_adapter[card]->channel[rcvmsg.phy_link_no - 1].dn, - sizeof(setup.eazmsn)); - if (len >= sizeof(setup.eazmsn)) - continue; - setup.si1 = 7; - setup.si2 = 0; - setup.plan = 0; - setup.screen = 0; - - indicate_status(card, ISDN_STAT_ICALL, (unsigned long)rcvmsg.phy_link_no - 1, (char *)&setup); - indicate_status(card, ISDN_STAT_DCONN, (unsigned long)rcvmsg.phy_link_no - 1, NULL); - } - continue; - } - - /* - * Handle a disconnection message - */ - if (IS_CE_MESSAGE(rcvmsg, Phy, 1, Disconnect)) - { - pr_debug("%s: disconnect message: line %d: status %d: cause 0x%x\n", - sc_adapter[card]->devicename, - rcvmsg.phy_link_no, - rcvmsg.rsp_status, - rcvmsg.msg_data.byte_array[2]); - - indicate_status(card, ISDN_STAT_BHUP, (unsigned long)rcvmsg.phy_link_no - 1, NULL); - indicate_status(card, ISDN_STAT_DHUP, (unsigned long)rcvmsg.phy_link_no - 1, NULL); - continue; - - } - - /* - * Handle a startProc engine up message - */ - if (IS_CM_MESSAGE(rcvmsg, 5, 0, MiscEngineUp)) { - pr_debug("%s: Received EngineUp message\n", - sc_adapter[card]->devicename); - sc_adapter[card]->EngineUp = 1; - sendmessage(card, CEPID, ceReqTypeCall, ceReqClass0, ceReqCallGetMyNumber, 1, 0, NULL); - sendmessage(card, CEPID, ceReqTypeCall, ceReqClass0, ceReqCallGetMyNumber, 2, 0, NULL); - init_timer(&sc_adapter[card]->stat_timer); - sc_adapter[card]->stat_timer.function = check_phystat; - sc_adapter[card]->stat_timer.data = card; - sc_adapter[card]->stat_timer.expires = jiffies + CHECKSTAT_TIME; - add_timer(&sc_adapter[card]->stat_timer); - continue; - } - - /* - * Start proc response - */ - if (IS_CM_MESSAGE(rcvmsg, 2, 0, StartProc)) { - pr_debug("%s: StartProc Response Status %d\n", - sc_adapter[card]->devicename, - rcvmsg.rsp_status); - continue; - } - - /* - * Handle a GetMyNumber Rsp - */ - if (IS_CE_MESSAGE(rcvmsg, Call, 0, GetMyNumber)) { - strlcpy(sc_adapter[card]->channel[rcvmsg.phy_link_no - 1].dn, - rcvmsg.msg_data.byte_array, - sizeof(rcvmsg.msg_data.byte_array)); - continue; - } - - /* - * PhyStatus response - */ - if (IS_CE_MESSAGE(rcvmsg, Phy, 2, Status)) { - unsigned int b1stat, b2stat; - - /* - * Covert the message data to the adapter->phystat code - */ - b1stat = (unsigned int) rcvmsg.msg_data.byte_array[0]; - b2stat = (unsigned int) rcvmsg.msg_data.byte_array[1]; - - sc_adapter[card]->nphystat = (b2stat >> 8) | b1stat; /* endian?? */ - pr_debug("%s: PhyStat is 0x%2x\n", - sc_adapter[card]->devicename, - sc_adapter[card]->nphystat); - continue; - } - - - /* - * Handle a GetFramFormat - */ - if (IS_CE_MESSAGE(rcvmsg, Call, 0, GetFrameFormat)) { - if (rcvmsg.msg_data.byte_array[0] != HDLC_PROTO) { - unsigned int proto = HDLC_PROTO; - /* - * Set board format to HDLC if it wasn't already - */ - pr_debug("%s: current frame format: 0x%x, will change to HDLC\n", - sc_adapter[card]->devicename, - rcvmsg.msg_data.byte_array[0]); - sendmessage(card, CEPID, ceReqTypeCall, - ceReqClass0, - ceReqCallSetFrameFormat, - (unsigned char)channel + 1, - 1, &proto); - } - continue; - } - - /* - * Hmm... - */ - pr_debug("%s: Received unhandled message (%d,%d,%d) link %d\n", - sc_adapter[card]->devicename, - rcvmsg.type, rcvmsg.class, rcvmsg.code, - rcvmsg.phy_link_no); - - } /* while */ - - pr_debug("%s: Exiting Interrupt Handler\n", - sc_adapter[card]->devicename); - return IRQ_HANDLED; -} diff --git a/drivers/isdn/sc/ioctl.c b/drivers/isdn/sc/ioctl.c deleted file mode 100644 index e63983aa1d2769..00000000000000 --- a/drivers/isdn/sc/ioctl.c +++ /dev/null @@ -1,582 +0,0 @@ -/* - * Copyright (C) 1996 SpellCaster Telecommunications Inc. - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - */ - -#include "includes.h" -#include "hardware.h" -#include "message.h" -#include "card.h" -#include "scioc.h" - -static int GetStatus(int card, boardInfo *); - -/* - * Process private IOCTL messages (typically from scctrl) - */ -int sc_ioctl(int card, scs_ioctl *data) -{ - int status; - RspMessage *rcvmsg; - char *spid; - char *dn; - char switchtype; - char speed; - - rcvmsg = kmalloc(sizeof(RspMessage), GFP_KERNEL); - if (!rcvmsg) - return -ENOMEM; - - switch (data->command) { - case SCIOCRESET: /* Perform a hard reset of the adapter */ - { - pr_debug("%s: SCIOCRESET: ioctl received\n", - sc_adapter[card]->devicename); - sc_adapter[card]->StartOnReset = 0; - kfree(rcvmsg); - return reset(card); - } - - case SCIOCLOAD: - { - char *srec; - - srec = kmalloc(SCIOC_SRECSIZE, GFP_KERNEL); - if (!srec) { - kfree(rcvmsg); - return -ENOMEM; - } - pr_debug("%s: SCIOLOAD: ioctl received\n", - sc_adapter[card]->devicename); - if (sc_adapter[card]->EngineUp) { - pr_debug("%s: SCIOCLOAD: command failed, LoadProc while engine running.\n", - sc_adapter[card]->devicename); - kfree(rcvmsg); - kfree(srec); - return -1; - } - - /* - * Get the SRec from user space - */ - if (copy_from_user(srec, data->dataptr, SCIOC_SRECSIZE)) { - kfree(rcvmsg); - kfree(srec); - return -EFAULT; - } - - status = send_and_receive(card, CMPID, cmReqType2, cmReqClass0, cmReqLoadProc, - 0, SCIOC_SRECSIZE, srec, rcvmsg, SAR_TIMEOUT); - kfree(rcvmsg); - kfree(srec); - - if (status) { - pr_debug("%s: SCIOCLOAD: command failed, status = %d\n", - sc_adapter[card]->devicename, status); - return -1; - } - else { - pr_debug("%s: SCIOCLOAD: command successful\n", - sc_adapter[card]->devicename); - return 0; - } - } - - case SCIOCSTART: - { - kfree(rcvmsg); - pr_debug("%s: SCIOSTART: ioctl received\n", - sc_adapter[card]->devicename); - if (sc_adapter[card]->EngineUp) { - pr_debug("%s: SCIOCSTART: command failed, engine already running.\n", - sc_adapter[card]->devicename); - return -1; - } - - sc_adapter[card]->StartOnReset = 1; - startproc(card); - return 0; - } - - case SCIOCSETSWITCH: - { - pr_debug("%s: SCIOSETSWITCH: ioctl received\n", - sc_adapter[card]->devicename); - - /* - * Get the switch type from user space - */ - if (copy_from_user(&switchtype, data->dataptr, sizeof(char))) { - kfree(rcvmsg); - return -EFAULT; - } - - pr_debug("%s: SCIOCSETSWITCH: setting switch type to %d\n", - sc_adapter[card]->devicename, - switchtype); - status = send_and_receive(card, CEPID, ceReqTypeCall, ceReqClass0, ceReqCallSetSwitchType, - 0, sizeof(char), &switchtype, rcvmsg, SAR_TIMEOUT); - if (!status && !(rcvmsg->rsp_status)) { - pr_debug("%s: SCIOCSETSWITCH: command successful\n", - sc_adapter[card]->devicename); - kfree(rcvmsg); - return 0; - } - else { - pr_debug("%s: SCIOCSETSWITCH: command failed (status = %d)\n", - sc_adapter[card]->devicename, status); - kfree(rcvmsg); - return status; - } - } - - case SCIOCGETSWITCH: - { - pr_debug("%s: SCIOGETSWITCH: ioctl received\n", - sc_adapter[card]->devicename); - - /* - * Get the switch type from the board - */ - status = send_and_receive(card, CEPID, ceReqTypeCall, ceReqClass0, - ceReqCallGetSwitchType, 0, 0, NULL, rcvmsg, SAR_TIMEOUT); - if (!status && !(rcvmsg->rsp_status)) { - pr_debug("%s: SCIOCGETSWITCH: command successful\n", - sc_adapter[card]->devicename); - } - else { - pr_debug("%s: SCIOCGETSWITCH: command failed (status = %d)\n", - sc_adapter[card]->devicename, status); - kfree(rcvmsg); - return status; - } - - switchtype = rcvmsg->msg_data.byte_array[0]; - - /* - * Package the switch type and send to user space - */ - if (copy_to_user(data->dataptr, &switchtype, - sizeof(char))) { - kfree(rcvmsg); - return -EFAULT; - } - - kfree(rcvmsg); - return 0; - } - - case SCIOCGETSPID: - { - pr_debug("%s: SCIOGETSPID: ioctl received\n", - sc_adapter[card]->devicename); - - spid = kzalloc(SCIOC_SPIDSIZE, GFP_KERNEL); - if (!spid) { - kfree(rcvmsg); - return -ENOMEM; - } - /* - * Get the spid from the board - */ - status = send_and_receive(card, CEPID, ceReqTypeCall, ceReqClass0, ceReqCallGetSPID, - data->channel, 0, NULL, rcvmsg, SAR_TIMEOUT); - if (!status) { - pr_debug("%s: SCIOCGETSPID: command successful\n", - sc_adapter[card]->devicename); - } else { - pr_debug("%s: SCIOCGETSPID: command failed (status = %d)\n", - sc_adapter[card]->devicename, status); - kfree(spid); - kfree(rcvmsg); - return status; - } - strlcpy(spid, rcvmsg->msg_data.byte_array, SCIOC_SPIDSIZE); - - /* - * Package the switch type and send to user space - */ - if (copy_to_user(data->dataptr, spid, SCIOC_SPIDSIZE)) { - kfree(spid); - kfree(rcvmsg); - return -EFAULT; - } - - kfree(spid); - kfree(rcvmsg); - return 0; - } - - case SCIOCSETSPID: - { - pr_debug("%s: DCBIOSETSPID: ioctl received\n", - sc_adapter[card]->devicename); - - /* - * Get the spid from user space - */ - spid = memdup_user(data->dataptr, SCIOC_SPIDSIZE); - if (IS_ERR(spid)) { - kfree(rcvmsg); - return PTR_ERR(spid); - } - - pr_debug("%s: SCIOCSETSPID: setting channel %d spid to %s\n", - sc_adapter[card]->devicename, data->channel, spid); - status = send_and_receive(card, CEPID, ceReqTypeCall, - ceReqClass0, ceReqCallSetSPID, data->channel, - strlen(spid), spid, rcvmsg, SAR_TIMEOUT); - if (!status && !(rcvmsg->rsp_status)) { - pr_debug("%s: SCIOCSETSPID: command successful\n", - sc_adapter[card]->devicename); - kfree(rcvmsg); - kfree(spid); - return 0; - } - else { - pr_debug("%s: SCIOCSETSPID: command failed (status = %d)\n", - sc_adapter[card]->devicename, status); - kfree(rcvmsg); - kfree(spid); - return status; - } - } - - case SCIOCGETDN: - { - pr_debug("%s: SCIOGETDN: ioctl received\n", - sc_adapter[card]->devicename); - - /* - * Get the dn from the board - */ - status = send_and_receive(card, CEPID, ceReqTypeCall, ceReqClass0, ceReqCallGetMyNumber, - data->channel, 0, NULL, rcvmsg, SAR_TIMEOUT); - if (!status) { - pr_debug("%s: SCIOCGETDN: command successful\n", - sc_adapter[card]->devicename); - } - else { - pr_debug("%s: SCIOCGETDN: command failed (status = %d)\n", - sc_adapter[card]->devicename, status); - kfree(rcvmsg); - return status; - } - - dn = kzalloc(SCIOC_DNSIZE, GFP_KERNEL); - if (!dn) { - kfree(rcvmsg); - return -ENOMEM; - } - strlcpy(dn, rcvmsg->msg_data.byte_array, SCIOC_DNSIZE); - kfree(rcvmsg); - - /* - * Package the dn and send to user space - */ - if (copy_to_user(data->dataptr, dn, SCIOC_DNSIZE)) { - kfree(dn); - return -EFAULT; - } - kfree(dn); - return 0; - } - - case SCIOCSETDN: - { - pr_debug("%s: SCIOSETDN: ioctl received\n", - sc_adapter[card]->devicename); - - /* - * Get the spid from user space - */ - dn = memdup_user(data->dataptr, SCIOC_DNSIZE); - if (IS_ERR(dn)) { - kfree(rcvmsg); - return PTR_ERR(dn); - } - - pr_debug("%s: SCIOCSETDN: setting channel %d dn to %s\n", - sc_adapter[card]->devicename, data->channel, dn); - status = send_and_receive(card, CEPID, ceReqTypeCall, - ceReqClass0, ceReqCallSetMyNumber, data->channel, - strlen(dn), dn, rcvmsg, SAR_TIMEOUT); - if (!status && !(rcvmsg->rsp_status)) { - pr_debug("%s: SCIOCSETDN: command successful\n", - sc_adapter[card]->devicename); - kfree(rcvmsg); - kfree(dn); - return 0; - } - else { - pr_debug("%s: SCIOCSETDN: command failed (status = %d)\n", - sc_adapter[card]->devicename, status); - kfree(rcvmsg); - kfree(dn); - return status; - } - } - - case SCIOCTRACE: - - pr_debug("%s: SCIOTRACE: ioctl received\n", - sc_adapter[card]->devicename); -/* sc_adapter[card]->trace = !sc_adapter[card]->trace; - pr_debug("%s: SCIOCTRACE: tracing turned %s\n", - sc_adapter[card]->devicename, - sc_adapter[card]->trace ? "ON" : "OFF"); */ - break; - - case SCIOCSTAT: - { - boardInfo *bi; - - pr_debug("%s: SCIOSTAT: ioctl received\n", - sc_adapter[card]->devicename); - - bi = kzalloc(sizeof(boardInfo), GFP_KERNEL); - if (!bi) { - kfree(rcvmsg); - return -ENOMEM; - } - - kfree(rcvmsg); - GetStatus(card, bi); - - if (copy_to_user(data->dataptr, bi, sizeof(boardInfo))) { - kfree(bi); - return -EFAULT; - } - - kfree(bi); - return 0; - } - - case SCIOCGETSPEED: - { - pr_debug("%s: SCIOGETSPEED: ioctl received\n", - sc_adapter[card]->devicename); - - /* - * Get the speed from the board - */ - status = send_and_receive(card, CEPID, ceReqTypeCall, ceReqClass0, - ceReqCallGetCallType, data->channel, 0, NULL, rcvmsg, SAR_TIMEOUT); - if (!status && !(rcvmsg->rsp_status)) { - pr_debug("%s: SCIOCGETSPEED: command successful\n", - sc_adapter[card]->devicename); - } - else { - pr_debug("%s: SCIOCGETSPEED: command failed (status = %d)\n", - sc_adapter[card]->devicename, status); - kfree(rcvmsg); - return status; - } - - speed = rcvmsg->msg_data.byte_array[0]; - - kfree(rcvmsg); - - /* - * Package the switch type and send to user space - */ - - if (copy_to_user(data->dataptr, &speed, sizeof(char))) - return -EFAULT; - - return 0; - } - - case SCIOCSETSPEED: - pr_debug("%s: SCIOCSETSPEED: ioctl received\n", - sc_adapter[card]->devicename); - break; - - case SCIOCLOOPTST: - pr_debug("%s: SCIOCLOOPTST: ioctl received\n", - sc_adapter[card]->devicename); - break; - - default: - kfree(rcvmsg); - return -1; - } - - kfree(rcvmsg); - return 0; -} - -static int GetStatus(int card, boardInfo *bi) -{ - RspMessage rcvmsg; - int i, status; - - /* - * Fill in some of the basic info about the board - */ - bi->modelid = sc_adapter[card]->model; - strcpy(bi->serial_no, sc_adapter[card]->hwconfig.serial_no); - strcpy(bi->part_no, sc_adapter[card]->hwconfig.part_no); - bi->iobase = sc_adapter[card]->iobase; - bi->rambase = sc_adapter[card]->rambase; - bi->irq = sc_adapter[card]->interrupt; - bi->ramsize = sc_adapter[card]->hwconfig.ram_size; - bi->interface = sc_adapter[card]->hwconfig.st_u_sense; - strcpy(bi->load_ver, sc_adapter[card]->load_ver); - strcpy(bi->proc_ver, sc_adapter[card]->proc_ver); - - /* - * Get the current PhyStats and LnkStats - */ - status = send_and_receive(card, CEPID, ceReqTypePhy, ceReqClass2, - ceReqPhyStatus, 0, 0, NULL, &rcvmsg, SAR_TIMEOUT); - if (!status) { - if (sc_adapter[card]->model < PRI_BOARD) { - bi->l1_status = rcvmsg.msg_data.byte_array[2]; - for (i = 0; i < BRI_CHANNELS; i++) - bi->status.bristats[i].phy_stat = - rcvmsg.msg_data.byte_array[i]; - } - else { - bi->l1_status = rcvmsg.msg_data.byte_array[0]; - bi->l2_status = rcvmsg.msg_data.byte_array[1]; - for (i = 0; i < PRI_CHANNELS; i++) - bi->status.pristats[i].phy_stat = - rcvmsg.msg_data.byte_array[i + 2]; - } - } - - /* - * Get the call types for each channel - */ - for (i = 0; i < sc_adapter[card]->nChannels; i++) { - status = send_and_receive(card, CEPID, ceReqTypeCall, ceReqClass0, - ceReqCallGetCallType, 0, 0, NULL, &rcvmsg, SAR_TIMEOUT); - if (!status) { - if (sc_adapter[card]->model == PRI_BOARD) { - bi->status.pristats[i].call_type = - rcvmsg.msg_data.byte_array[0]; - } - else { - bi->status.bristats[i].call_type = - rcvmsg.msg_data.byte_array[0]; - } - } - } - - /* - * If PRI, get the call states and service states for each channel - */ - if (sc_adapter[card]->model == PRI_BOARD) { - /* - * Get the call states - */ - status = send_and_receive(card, CEPID, ceReqTypeStat, ceReqClass2, - ceReqPhyChCallState, 0, 0, NULL, &rcvmsg, SAR_TIMEOUT); - if (!status) { - for (i = 0; i < PRI_CHANNELS; i++) - bi->status.pristats[i].call_state = - rcvmsg.msg_data.byte_array[i]; - } - - /* - * Get the service states - */ - status = send_and_receive(card, CEPID, ceReqTypeStat, ceReqClass2, - ceReqPhyChServState, 0, 0, NULL, &rcvmsg, SAR_TIMEOUT); - if (!status) { - for (i = 0; i < PRI_CHANNELS; i++) - bi->status.pristats[i].serv_state = - rcvmsg.msg_data.byte_array[i]; - } - - /* - * Get the link stats for the channels - */ - for (i = 1; i <= PRI_CHANNELS; i++) { - status = send_and_receive(card, CEPID, ceReqTypeLnk, ceReqClass0, - ceReqLnkGetStats, i, 0, NULL, &rcvmsg, SAR_TIMEOUT); - if (!status) { - bi->status.pristats[i - 1].link_stats.tx_good = - (unsigned long)rcvmsg.msg_data.byte_array[0]; - bi->status.pristats[i - 1].link_stats.tx_bad = - (unsigned long)rcvmsg.msg_data.byte_array[4]; - bi->status.pristats[i - 1].link_stats.rx_good = - (unsigned long)rcvmsg.msg_data.byte_array[8]; - bi->status.pristats[i - 1].link_stats.rx_bad = - (unsigned long)rcvmsg.msg_data.byte_array[12]; - } - } - - /* - * Link stats for the D channel - */ - status = send_and_receive(card, CEPID, ceReqTypeLnk, ceReqClass0, - ceReqLnkGetStats, 0, 0, NULL, &rcvmsg, SAR_TIMEOUT); - if (!status) { - bi->dch_stats.tx_good = (unsigned long)rcvmsg.msg_data.byte_array[0]; - bi->dch_stats.tx_bad = (unsigned long)rcvmsg.msg_data.byte_array[4]; - bi->dch_stats.rx_good = (unsigned long)rcvmsg.msg_data.byte_array[8]; - bi->dch_stats.rx_bad = (unsigned long)rcvmsg.msg_data.byte_array[12]; - } - - return 0; - } - - /* - * If BRI or POTS, Get SPID, DN and call types for each channel - */ - - /* - * Get the link stats for the channels - */ - status = send_and_receive(card, CEPID, ceReqTypeLnk, ceReqClass0, - ceReqLnkGetStats, 0, 0, NULL, &rcvmsg, SAR_TIMEOUT); - if (!status) { - bi->dch_stats.tx_good = (unsigned long)rcvmsg.msg_data.byte_array[0]; - bi->dch_stats.tx_bad = (unsigned long)rcvmsg.msg_data.byte_array[4]; - bi->dch_stats.rx_good = (unsigned long)rcvmsg.msg_data.byte_array[8]; - bi->dch_stats.rx_bad = (unsigned long)rcvmsg.msg_data.byte_array[12]; - bi->status.bristats[0].link_stats.tx_good = - (unsigned long)rcvmsg.msg_data.byte_array[16]; - bi->status.bristats[0].link_stats.tx_bad = - (unsigned long)rcvmsg.msg_data.byte_array[20]; - bi->status.bristats[0].link_stats.rx_good = - (unsigned long)rcvmsg.msg_data.byte_array[24]; - bi->status.bristats[0].link_stats.rx_bad = - (unsigned long)rcvmsg.msg_data.byte_array[28]; - bi->status.bristats[1].link_stats.tx_good = - (unsigned long)rcvmsg.msg_data.byte_array[32]; - bi->status.bristats[1].link_stats.tx_bad = - (unsigned long)rcvmsg.msg_data.byte_array[36]; - bi->status.bristats[1].link_stats.rx_good = - (unsigned long)rcvmsg.msg_data.byte_array[40]; - bi->status.bristats[1].link_stats.rx_bad = - (unsigned long)rcvmsg.msg_data.byte_array[44]; - } - - /* - * Get the SPIDs - */ - for (i = 0; i < BRI_CHANNELS; i++) { - status = send_and_receive(card, CEPID, ceReqTypeCall, ceReqClass0, - ceReqCallGetSPID, i + 1, 0, NULL, &rcvmsg, SAR_TIMEOUT); - if (!status) - strcpy(bi->status.bristats[i].spid, rcvmsg.msg_data.byte_array); - } - - /* - * Get the DNs - */ - for (i = 0; i < BRI_CHANNELS; i++) { - status = send_and_receive(card, CEPID, ceReqTypeCall, ceReqClass0, - ceReqCallGetMyNumber, i + 1, 0, NULL, &rcvmsg, SAR_TIMEOUT); - if (!status) - strcpy(bi->status.bristats[i].dn, rcvmsg.msg_data.byte_array); - } - - return 0; -} diff --git a/drivers/isdn/sc/message.c b/drivers/isdn/sc/message.c deleted file mode 100644 index 9679a1902b325f..00000000000000 --- a/drivers/isdn/sc/message.c +++ /dev/null @@ -1,230 +0,0 @@ -/* $Id: message.c,v 1.5.8.2 2001/09/23 22:24:59 kai Exp $ - * - * functions for sending and receiving control messages - * - * Copyright (C) 1996 SpellCaster Telecommunications Inc. - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - * For more information, please contact gpl-info@spellcast.com or write: - * - * SpellCaster Telecommunications Inc. - * 5621 Finch Avenue East, Unit #3 - * Scarborough, Ontario Canada - * M1B 2T9 - * +1 (416) 297-8565 - * +1 (416) 297-6433 Facsimile - */ -#include <linux/sched.h> -#include "includes.h" -#include "hardware.h" -#include "message.h" -#include "card.h" - -/* - * receive a message from the board - */ -int receivemessage(int card, RspMessage *rspmsg) -{ - DualPortMemory *dpm; - unsigned long flags; - - if (!IS_VALID_CARD(card)) { - pr_debug("Invalid param: %d is not a valid card id\n", card); - return -EINVAL; - } - - pr_debug("%s: Entered receivemessage\n", - sc_adapter[card]->devicename); - - /* - * See if there are messages waiting - */ - if (inb(sc_adapter[card]->ioport[FIFO_STATUS]) & RF_HAS_DATA) { - /* - * Map in the DPM to the base page and copy the message - */ - spin_lock_irqsave(&sc_adapter[card]->lock, flags); - outb((sc_adapter[card]->shmem_magic >> 14) | 0x80, - sc_adapter[card]->ioport[sc_adapter[card]->shmem_pgport]); - dpm = (DualPortMemory *) sc_adapter[card]->rambase; - memcpy_fromio(rspmsg, &(dpm->rsp_queue[dpm->rsp_tail]), - MSG_LEN); - dpm->rsp_tail = (dpm->rsp_tail + 1) % MAX_MESSAGES; - inb(sc_adapter[card]->ioport[FIFO_READ]); - spin_unlock_irqrestore(&sc_adapter[card]->lock, flags); - /* - * Tell the board that the message is received - */ - pr_debug("%s: Received Message seq:%d pid:%d time:%d cmd:%d " - "cnt:%d (type,class,code):(%d,%d,%d) " - "link:%d stat:0x%x\n", - sc_adapter[card]->devicename, - rspmsg->sequence_no, - rspmsg->process_id, - rspmsg->time_stamp, - rspmsg->cmd_sequence_no, - rspmsg->msg_byte_cnt, - rspmsg->type, - rspmsg->class, - rspmsg->code, - rspmsg->phy_link_no, - rspmsg->rsp_status); - - return 0; - } - return -ENOMSG; -} - -/* - * send a message to the board - */ -int sendmessage(int card, - unsigned int procid, - unsigned int type, - unsigned int class, - unsigned int code, - unsigned int link, - unsigned int data_len, - unsigned int *data) -{ - DualPortMemory *dpm; - ReqMessage sndmsg; - unsigned long flags; - - if (!IS_VALID_CARD(card)) { - pr_debug("Invalid param: %d is not a valid card id\n", card); - return -EINVAL; - } - - /* - * Make sure we only send CEPID messages when the engine is up - * and CMPID messages when it is down - */ - if (sc_adapter[card]->EngineUp && procid == CMPID) { - pr_debug("%s: Attempt to send CM message with engine up\n", - sc_adapter[card]->devicename); - return -ESRCH; - } - - if (!sc_adapter[card]->EngineUp && procid == CEPID) { - pr_debug("%s: Attempt to send CE message with engine down\n", - sc_adapter[card]->devicename); - return -ESRCH; - } - - memset(&sndmsg, 0, MSG_LEN); - sndmsg.msg_byte_cnt = 4; - sndmsg.type = type; - sndmsg.class = class; - sndmsg.code = code; - sndmsg.phy_link_no = link; - - if (data_len > 0) { - if (data_len > MSG_DATA_LEN) - data_len = MSG_DATA_LEN; - memcpy(&(sndmsg.msg_data), data, data_len); - sndmsg.msg_byte_cnt = data_len + 8; - } - - sndmsg.process_id = procid; - sndmsg.sequence_no = sc_adapter[card]->seq_no++ % 256; - - /* - * wait for an empty slot in the queue - */ - while (!(inb(sc_adapter[card]->ioport[FIFO_STATUS]) & WF_NOT_FULL)) - udelay(1); - - /* - * Disable interrupts and map in shared memory - */ - spin_lock_irqsave(&sc_adapter[card]->lock, flags); - outb((sc_adapter[card]->shmem_magic >> 14) | 0x80, - sc_adapter[card]->ioport[sc_adapter[card]->shmem_pgport]); - dpm = (DualPortMemory *) sc_adapter[card]->rambase; /* Fix me */ - memcpy_toio(&(dpm->req_queue[dpm->req_head]), &sndmsg, MSG_LEN); - dpm->req_head = (dpm->req_head + 1) % MAX_MESSAGES; - outb(sndmsg.sequence_no, sc_adapter[card]->ioport[FIFO_WRITE]); - spin_unlock_irqrestore(&sc_adapter[card]->lock, flags); - - pr_debug("%s: Sent Message seq:%d pid:%d time:%d " - "cnt:%d (type,class,code):(%d,%d,%d) " - "link:%d\n ", - sc_adapter[card]->devicename, - sndmsg.sequence_no, - sndmsg.process_id, - sndmsg.time_stamp, - sndmsg.msg_byte_cnt, - sndmsg.type, - sndmsg.class, - sndmsg.code, - sndmsg.phy_link_no); - - return 0; -} - -int send_and_receive(int card, - unsigned int procid, - unsigned char type, - unsigned char class, - unsigned char code, - unsigned char link, - unsigned char data_len, - unsigned char *data, - RspMessage *mesgdata, - int timeout) -{ - int retval; - int tries; - - if (!IS_VALID_CARD(card)) { - pr_debug("Invalid param: %d is not a valid card id\n", card); - return -EINVAL; - } - - sc_adapter[card]->want_async_messages = 1; - retval = sendmessage(card, procid, type, class, code, link, - data_len, (unsigned int *) data); - - if (retval) { - pr_debug("%s: SendMessage failed in SAR\n", - sc_adapter[card]->devicename); - sc_adapter[card]->want_async_messages = 0; - return -EIO; - } - - tries = 0; - /* wait for the response */ - while (tries < timeout) { - schedule_timeout_interruptible(1); - - pr_debug("SAR waiting..\n"); - - /* - * See if we got our message back - */ - if ((sc_adapter[card]->async_msg.type == type) && - (sc_adapter[card]->async_msg.class == class) && - (sc_adapter[card]->async_msg.code == code) && - (sc_adapter[card]->async_msg.phy_link_no == link)) { - - /* - * Got it! - */ - pr_debug("%s: Got ASYNC message\n", - sc_adapter[card]->devicename); - memcpy(mesgdata, &(sc_adapter[card]->async_msg), - sizeof(RspMessage)); - sc_adapter[card]->want_async_messages = 0; - return 0; - } - - tries++; - } - - pr_debug("%s: SAR message timeout\n", sc_adapter[card]->devicename); - sc_adapter[card]->want_async_messages = 0; - return -ETIME; -} diff --git a/drivers/isdn/sc/message.h b/drivers/isdn/sc/message.h deleted file mode 100644 index 5e6f4a5c15f8d8..00000000000000 --- a/drivers/isdn/sc/message.h +++ /dev/null @@ -1,245 +0,0 @@ -/* $Id: message.h,v 1.1.10.1 2001/09/23 22:24:59 kai Exp $ - * - * Copyright (C) 1996 SpellCaster Telecommunications Inc. - * - * structures, macros and defines useful for sending - * messages to the adapter - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - * For more information, please contact gpl-info@spellcast.com or write: - * - * SpellCaster Telecommunications Inc. - * 5621 Finch Avenue East, Unit #3 - * Scarborough, Ontario Canada - * M1B 2T9 - * +1 (416) 297-8565 - * +1 (416) 297-6433 Facsimile - */ - -/* - * Board message macros, defines and structures - */ - -#ifndef MESSAGE_H -#define MESSAGE_H - -#define MAX_MESSAGES 32 /* Maximum messages that can be - queued */ -#define MSG_DATA_LEN 48 /* Maximum size of message payload */ -#define MSG_LEN 64 /* Size of a message */ -#define CMPID 0 /* Loader message process ID */ -#define CEPID 64 /* Firmware message process ID */ - -/* - * Macro to determine if a message is a loader message - */ -#define IS_CM_MESSAGE(mesg, tx, cx, dx) \ - ((mesg.type == cmRspType##tx) \ - && (mesg.class == cmRspClass##cx) \ - && (mesg.code == cmRsp##dx)) - -/* - * Macro to determine if a message is a firmware message - */ -#define IS_CE_MESSAGE(mesg, tx, cx, dx) \ - ((mesg.type == ceRspType##tx) \ - && (mesg.class == ceRspClass##cx) \ - && (mesg.code == ceRsp##tx##dx)) - -/* - * Loader Request and Response Messages - */ - -/* message types */ -#define cmReqType1 1 -#define cmReqType2 2 -#define cmRspType0 0 -#define cmRspType1 1 -#define cmRspType2 2 -#define cmRspType5 5 - -/* message classes */ -#define cmReqClass0 0 -#define cmRspClass0 0 - -/* message codes */ -#define cmReqHWConfig 1 /* 1,0,1 */ -#define cmReqMsgLpbk 2 /* 1,0,2 */ -#define cmReqVersion 3 /* 1,0,3 */ -#define cmReqLoadProc 1 /* 2,0,1 */ -#define cmReqStartProc 2 /* 2,0,2 */ -#define cmReqReadMem 6 /* 2,0,6 */ -#define cmRspHWConfig cmReqHWConfig -#define cmRspMsgLpbk cmReqMsgLpbk -#define cmRspVersion cmReqVersion -#define cmRspLoadProc cmReqLoadProc -#define cmRspStartProc cmReqStartProc -#define cmRspReadMem cmReqReadMem -#define cmRspMiscEngineUp 1 /* 5,0,1 */ -#define cmRspInvalid 0 /* 0,0,0 */ - - -/* - * Firmware Request and Response Messages - */ - -/* message types */ -#define ceReqTypePhy 1 -#define ceReqTypeLnk 2 -#define ceReqTypeCall 3 -#define ceReqTypeStat 1 -#define ceRspTypeErr 0 -#define ceRspTypePhy ceReqTypePhy -#define ceRspTypeLnk ceReqTypeLnk -#define ceRspTypeCall ceReqTypeCall -#define ceRspTypeStat ceReqTypeStat - -/* message classes */ -#define ceReqClass0 0 -#define ceReqClass1 1 -#define ceReqClass2 2 -#define ceReqClass3 3 -#define ceRspClass0 ceReqClass0 -#define ceRspClass1 ceReqClass1 -#define ceRspClass2 ceReqClass2 -#define ceRspClass3 ceReqClass3 - -/* message codes (B) = BRI only, (P) = PRI only, (V) = POTS only */ -#define ceReqPhyProcInfo 1 /* 1,0,1 */ -#define ceReqPhyConnect 1 /* 1,1,1 */ -#define ceReqPhyDisconnect 2 /* 1,1,2 */ -#define ceReqPhySetParams 3 /* 1,1,3 (P) */ -#define ceReqPhyGetParams 4 /* 1,1,4 (P) */ -#define ceReqPhyStatus 1 /* 1,2,1 */ -#define ceReqPhyAcfaStatus 2 /* 1,2,2 (P) */ -#define ceReqPhyChCallState 3 /* 1,2,3 (P) */ -#define ceReqPhyChServState 4 /* 1,2,4 (P) */ -#define ceReqPhyRLoopBack 1 /* 1,3,1 */ -#define ceRspPhyProcInfo ceReqPhyProcInfo -#define ceRspPhyConnect ceReqPhyConnect -#define ceRspPhyDisconnect ceReqPhyDisconnect -#define ceRspPhySetParams ceReqPhySetParams -#define ceRspPhyGetParams ceReqPhyGetParams -#define ceRspPhyStatus ceReqPhyStatus -#define ceRspPhyAcfaStatus ceReqPhyAcfaStatus -#define ceRspPhyChCallState ceReqPhyChCallState -#define ceRspPhyChServState ceReqPhyChServState -#define ceRspPhyRLoopBack ceReqphyRLoopBack -#define ceReqLnkSetParam 1 /* 2,0,1 */ -#define ceReqLnkGetParam 2 /* 2,0,2 */ -#define ceReqLnkGetStats 3 /* 2,0,3 */ -#define ceReqLnkWrite 1 /* 2,1,1 */ -#define ceReqLnkRead 2 /* 2,1,2 */ -#define ceReqLnkFlush 3 /* 2,1,3 */ -#define ceReqLnkWrBufTrc 4 /* 2,1,4 */ -#define ceReqLnkRdBufTrc 5 /* 2,1,5 */ -#define ceRspLnkSetParam ceReqLnkSetParam -#define ceRspLnkGetParam ceReqLnkGetParam -#define ceRspLnkGetStats ceReqLnkGetStats -#define ceRspLnkWrite ceReqLnkWrite -#define ceRspLnkRead ceReqLnkRead -#define ceRspLnkFlush ceReqLnkFlush -#define ceRspLnkWrBufTrc ceReqLnkWrBufTrc -#define ceRspLnkRdBufTrc ceReqLnkRdBufTrc -#define ceReqCallSetSwitchType 1 /* 3,0,1 */ -#define ceReqCallGetSwitchType 2 /* 3,0,2 */ -#define ceReqCallSetFrameFormat 3 /* 3,0,3 */ -#define ceReqCallGetFrameFormat 4 /* 3,0,4 */ -#define ceReqCallSetCallType 5 /* 3,0,5 */ -#define ceReqCallGetCallType 6 /* 3,0,6 */ -#define ceReqCallSetSPID 7 /* 3,0,7 (!P) */ -#define ceReqCallGetSPID 8 /* 3,0,8 (!P) */ -#define ceReqCallSetMyNumber 9 /* 3,0,9 (!P) */ -#define ceReqCallGetMyNumber 10 /* 3,0,10 (!P) */ -#define ceRspCallSetSwitchType ceReqCallSetSwitchType -#define ceRspCallGetSwitchType ceReqCallSetSwitchType -#define ceRspCallSetFrameFormat ceReqCallSetFrameFormat -#define ceRspCallGetFrameFormat ceReqCallGetFrameFormat -#define ceRspCallSetCallType ceReqCallSetCallType -#define ceRspCallGetCallType ceReqCallGetCallType -#define ceRspCallSetSPID ceReqCallSetSPID -#define ceRspCallGetSPID ceReqCallGetSPID -#define ceRspCallSetMyNumber ceReqCallSetMyNumber -#define ceRspCallGetMyNumber ceReqCallGetMyNumber -#define ceRspStatAcfaStatus 2 -#define ceRspStat -#define ceRspErrError 0 /* 0,0,0 */ - -/* - * Call Types - */ -#define CALLTYPE_64K 0 -#define CALLTYPE_56K 1 -#define CALLTYPE_SPEECH 2 -#define CALLTYPE_31KHZ 3 - -/* - * Link Level data contains a pointer to and the length of - * a buffer in shared RAM. Used by LnkRead and LnkWrite message - * types. Part of RspMsgStruct and ReqMsgStruct. - */ -typedef struct { - unsigned long buff_offset; - unsigned short msg_len; -} LLData; - - -/* - * Message payload template for an HWConfig message - */ -typedef struct { - char st_u_sense; - char powr_sense; - char sply_sense; - unsigned char asic_id; - long ram_size; - char serial_no[13]; - char part_no[13]; - char rev_no[2]; -} HWConfig_pl; - -/* - * A Message - */ -struct message { - unsigned char sequence_no; - unsigned char process_id; - unsigned char time_stamp; - unsigned char cmd_sequence_no; /* Rsp messages only */ - unsigned char reserved1[3]; - unsigned char msg_byte_cnt; - unsigned char type; - unsigned char class; - unsigned char code; - unsigned char phy_link_no; - unsigned char rsp_status; /* Rsp messages only */ - unsigned char reseved2[3]; - union { - unsigned char byte_array[MSG_DATA_LEN]; - LLData response; - HWConfig_pl HWCresponse; - } msg_data; -}; - -typedef struct message ReqMessage; /* Request message */ -typedef struct message RspMessage; /* Response message */ - -/* - * The first 5010 bytes of shared memory contain the message queues, - * indexes and other data. This structure is its template - */ -typedef struct { - volatile ReqMessage req_queue[MAX_MESSAGES]; - volatile RspMessage rsp_queue[MAX_MESSAGES]; - volatile unsigned char req_head; - volatile unsigned char req_tail; - volatile unsigned char rsp_head; - volatile unsigned char rsp_tail; - volatile unsigned long signature; - volatile unsigned long trace_enable; - volatile unsigned char reserved[4]; -} DualPortMemory; - -#endif diff --git a/drivers/isdn/sc/packet.c b/drivers/isdn/sc/packet.c deleted file mode 100644 index 2446957085e0cf..00000000000000 --- a/drivers/isdn/sc/packet.c +++ /dev/null @@ -1,204 +0,0 @@ -/* $Id: packet.c,v 1.5.8.1 2001/09/23 22:24:59 kai Exp $ - * - * Copyright (C) 1996 SpellCaster Telecommunications Inc. - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - * For more information, please contact gpl-info@spellcast.com or write: - * - * SpellCaster Telecommunications Inc. - * 5621 Finch Avenue East, Unit #3 - * Scarborough, Ontario Canada - * M1B 2T9 - * +1 (416) 297-8565 - * +1 (416) 297-6433 Facsimile - */ - -#include "includes.h" -#include "hardware.h" -#include "message.h" -#include "card.h" - -int sndpkt(int devId, int channel, int ack, struct sk_buff *data) -{ - LLData ReqLnkWrite; - int status; - int card; - unsigned long len; - - card = get_card_from_id(devId); - - if (!IS_VALID_CARD(card)) { - pr_debug("invalid param: %d is not a valid card id\n", card); - return -ENODEV; - } - - pr_debug("%s: sndpkt: frst = 0x%lx nxt = %d f = %d n = %d\n", - sc_adapter[card]->devicename, - sc_adapter[card]->channel[channel].first_sendbuf, - sc_adapter[card]->channel[channel].next_sendbuf, - sc_adapter[card]->channel[channel].free_sendbufs, - sc_adapter[card]->channel[channel].num_sendbufs); - - if (!sc_adapter[card]->channel[channel].free_sendbufs) { - pr_debug("%s: out of TX buffers\n", - sc_adapter[card]->devicename); - return -EINVAL; - } - - if (data->len > BUFFER_SIZE) { - pr_debug("%s: data overflows buffer size (data > buffer)\n", - sc_adapter[card]->devicename); - return -EINVAL; - } - - ReqLnkWrite.buff_offset = sc_adapter[card]->channel[channel].next_sendbuf * - BUFFER_SIZE + sc_adapter[card]->channel[channel].first_sendbuf; - ReqLnkWrite.msg_len = data->len; /* sk_buff size */ - pr_debug("%s: writing %d bytes to buffer offset 0x%lx\n", - sc_adapter[card]->devicename, - ReqLnkWrite.msg_len, ReqLnkWrite.buff_offset); - memcpy_toshmem(card, (char *)ReqLnkWrite.buff_offset, data->data, ReqLnkWrite.msg_len); - - /* - * sendmessage - */ - pr_debug("%s: sndpkt size=%d, buf_offset=0x%lx buf_indx=%d\n", - sc_adapter[card]->devicename, - ReqLnkWrite.msg_len, ReqLnkWrite.buff_offset, - sc_adapter[card]->channel[channel].next_sendbuf); - - status = sendmessage(card, CEPID, ceReqTypeLnk, ceReqClass1, ceReqLnkWrite, - channel + 1, sizeof(LLData), (unsigned int *)&ReqLnkWrite); - len = data->len; - if (status) { - pr_debug("%s: failed to send packet, status = %d\n", - sc_adapter[card]->devicename, status); - return -1; - } - else { - sc_adapter[card]->channel[channel].free_sendbufs--; - sc_adapter[card]->channel[channel].next_sendbuf = - ++sc_adapter[card]->channel[channel].next_sendbuf == - sc_adapter[card]->channel[channel].num_sendbufs ? 0 : - sc_adapter[card]->channel[channel].next_sendbuf; - pr_debug("%s: packet sent successfully\n", sc_adapter[card]->devicename); - dev_kfree_skb(data); - indicate_status(card, ISDN_STAT_BSENT, channel, (char *)&len); - } - return len; -} - -void rcvpkt(int card, RspMessage *rcvmsg) -{ - LLData newll; - struct sk_buff *skb; - - if (!IS_VALID_CARD(card)) { - pr_debug("invalid param: %d is not a valid card id\n", card); - return; - } - - switch (rcvmsg->rsp_status) { - case 0x01: - case 0x02: - case 0x70: - pr_debug("%s: error status code: 0x%x\n", - sc_adapter[card]->devicename, rcvmsg->rsp_status); - return; - case 0x00: - if (!(skb = dev_alloc_skb(rcvmsg->msg_data.response.msg_len))) { - printk(KERN_WARNING "%s: rcvpkt out of memory, dropping packet\n", - sc_adapter[card]->devicename); - return; - } - skb_put(skb, rcvmsg->msg_data.response.msg_len); - pr_debug("%s: getting data from offset: 0x%lx\n", - sc_adapter[card]->devicename, - rcvmsg->msg_data.response.buff_offset); - memcpy_fromshmem(card, - skb_put(skb, rcvmsg->msg_data.response.msg_len), - (char *)rcvmsg->msg_data.response.buff_offset, - rcvmsg->msg_data.response.msg_len); - sc_adapter[card]->card->rcvcallb_skb(sc_adapter[card]->driverId, - rcvmsg->phy_link_no - 1, skb); - - case 0x03: - /* - * Recycle the buffer - */ - pr_debug("%s: buffer size : %d\n", - sc_adapter[card]->devicename, BUFFER_SIZE); -/* memset_shmem(card, rcvmsg->msg_data.response.buff_offset, 0, BUFFER_SIZE); */ - newll.buff_offset = rcvmsg->msg_data.response.buff_offset; - newll.msg_len = BUFFER_SIZE; - pr_debug("%s: recycled buffer at offset 0x%lx size %d\n", - sc_adapter[card]->devicename, - newll.buff_offset, newll.msg_len); - sendmessage(card, CEPID, ceReqTypeLnk, ceReqClass1, ceReqLnkRead, - rcvmsg->phy_link_no, sizeof(LLData), (unsigned int *)&newll); - } - -} - -int setup_buffers(int card, int c) -{ - unsigned int nBuffers, i, cBase; - unsigned int buffer_size; - LLData RcvBuffOffset; - - if (!IS_VALID_CARD(card)) { - pr_debug("invalid param: %d is not a valid card id\n", card); - return -ENODEV; - } - - /* - * Calculate the buffer offsets (send/recv/send/recv) - */ - pr_debug("%s: setting up channel buffer space in shared RAM\n", - sc_adapter[card]->devicename); - buffer_size = BUFFER_SIZE; - nBuffers = ((sc_adapter[card]->ramsize - BUFFER_BASE) / buffer_size) / 2; - nBuffers = nBuffers > BUFFERS_MAX ? BUFFERS_MAX : nBuffers; - pr_debug("%s: calculating buffer space: %d buffers, %d big\n", - sc_adapter[card]->devicename, - nBuffers, buffer_size); - if (nBuffers < 2) { - pr_debug("%s: not enough buffer space\n", - sc_adapter[card]->devicename); - return -1; - } - cBase = (nBuffers * buffer_size) * (c - 1); - pr_debug("%s: channel buffer offset from shared RAM: 0x%x\n", - sc_adapter[card]->devicename, cBase); - sc_adapter[card]->channel[c - 1].first_sendbuf = BUFFER_BASE + cBase; - sc_adapter[card]->channel[c - 1].num_sendbufs = nBuffers / 2; - sc_adapter[card]->channel[c - 1].free_sendbufs = nBuffers / 2; - sc_adapter[card]->channel[c - 1].next_sendbuf = 0; - pr_debug("%s: send buffer setup complete: first=0x%lx n=%d f=%d, nxt=%d\n", - sc_adapter[card]->devicename, - sc_adapter[card]->channel[c - 1].first_sendbuf, - sc_adapter[card]->channel[c - 1].num_sendbufs, - sc_adapter[card]->channel[c - 1].free_sendbufs, - sc_adapter[card]->channel[c - 1].next_sendbuf); - - /* - * Prep the receive buffers - */ - pr_debug("%s: adding %d RecvBuffers:\n", - sc_adapter[card]->devicename, nBuffers / 2); - for (i = 0; i < nBuffers / 2; i++) { - RcvBuffOffset.buff_offset = - ((sc_adapter[card]->channel[c - 1].first_sendbuf + - (nBuffers / 2) * buffer_size) + (buffer_size * i)); - RcvBuffOffset.msg_len = buffer_size; - pr_debug("%s: adding RcvBuffer #%d offset=0x%lx sz=%d bufsz:%d\n", - sc_adapter[card]->devicename, - i + 1, RcvBuffOffset.buff_offset, - RcvBuffOffset.msg_len, buffer_size); - sendmessage(card, CEPID, ceReqTypeLnk, ceReqClass1, ceReqLnkRead, - c, sizeof(LLData), (unsigned int *)&RcvBuffOffset); - } - return 0; -} diff --git a/drivers/isdn/sc/scioc.h b/drivers/isdn/sc/scioc.h deleted file mode 100644 index a50e143779e732..00000000000000 --- a/drivers/isdn/sc/scioc.h +++ /dev/null @@ -1,110 +0,0 @@ -#ifndef __ISDN_SC_SCIOC_H__ -#define __ISDN_SC_SCIOC_H__ - -/* - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - */ - -/* - * IOCTL Command Codes - */ -#define SCIOCLOAD 0x01 /* Load a firmware record */ -#define SCIOCRESET 0x02 /* Perform hard reset */ -#define SCIOCDEBUG 0x03 /* Set debug level */ -#define SCIOCREV 0x04 /* Get driver revision(s) */ -#define SCIOCSTART 0x05 /* Start the firmware */ -#define SCIOCGETSWITCH 0x06 /* Get switch type */ -#define SCIOCSETSWITCH 0x07 /* Set switch type */ -#define SCIOCGETSPID 0x08 /* Get channel SPID */ -#define SCIOCSETSPID 0x09 /* Set channel SPID */ -#define SCIOCGETDN 0x0A /* Get channel DN */ -#define SCIOCSETDN 0x0B /* Set channel DN */ -#define SCIOCTRACE 0x0C /* Toggle trace mode */ -#define SCIOCSTAT 0x0D /* Get line status */ -#define SCIOCGETSPEED 0x0E /* Set channel speed */ -#define SCIOCSETSPEED 0x0F /* Set channel speed */ -#define SCIOCLOOPTST 0x10 /* Perform loopback test */ - -typedef struct { - int device; - int channel; - unsigned long command; - void __user *dataptr; -} scs_ioctl; - -/* Size of strings */ -#define SCIOC_SPIDSIZE 49 -#define SCIOC_DNSIZE SCIOC_SPIDSIZE -#define SCIOC_REVSIZE SCIOC_SPIDSIZE -#define SCIOC_SRECSIZE 49 - -typedef struct { - unsigned long tx_good; - unsigned long tx_bad; - unsigned long rx_good; - unsigned long rx_bad; -} ChLinkStats; - -typedef struct { - char spid[49]; - char dn[49]; - char call_type; - char phy_stat; - ChLinkStats link_stats; -} BRIStat; - -typedef BRIStat POTStat; - -typedef struct { - char call_type; - char call_state; - char serv_state; - char phy_stat; - ChLinkStats link_stats; -} PRIStat; - -typedef char PRIInfo; -typedef char BRIInfo; -typedef char POTInfo; - - -typedef struct { - char acfa_nos; - char acfa_ais; - char acfa_los; - char acfa_rra; - char acfa_slpp; - char acfa_slpn; - char acfa_fsrf; -} ACFAStat; - -typedef struct { - unsigned char modelid; - char serial_no[13]; - char part_no[13]; - char load_ver[11]; - char proc_ver[11]; - int iobase; - long rambase; - char irq; - long ramsize; - char interface; - char switch_type; - char l1_status; - char l2_status; - ChLinkStats dch_stats; - ACFAStat AcfaStats; - union { - PRIStat pristats[23]; - BRIStat bristats[2]; - POTStat potsstats[2]; - } status; - union { - PRIInfo priinfo; - BRIInfo briinfo; - POTInfo potsinfo; - } info; -} boardInfo; - -#endif /* __ISDN_SC_SCIOC_H__ */ diff --git a/drivers/isdn/sc/shmem.c b/drivers/isdn/sc/shmem.c deleted file mode 100644 index d24506ceb6e876..00000000000000 --- a/drivers/isdn/sc/shmem.c +++ /dev/null @@ -1,138 +0,0 @@ -/* $Id: shmem.c,v 1.2.10.1 2001/09/23 22:24:59 kai Exp $ - * - * Copyright (C) 1996 SpellCaster Telecommunications Inc. - * - * Card functions implementing ISDN4Linux functionality - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - * For more information, please contact gpl-info@spellcast.com or write: - * - * SpellCaster Telecommunications Inc. - * 5621 Finch Avenue East, Unit #3 - * Scarborough, Ontario Canada - * M1B 2T9 - * +1 (416) 297-8565 - * +1 (416) 297-6433 Facsimile - */ - -#include "includes.h" /* This must be first */ -#include "hardware.h" -#include "card.h" - -/* - * - */ -void memcpy_toshmem(int card, void *dest, const void *src, size_t n) -{ - unsigned long flags; - unsigned char ch; - unsigned long dest_rem = ((unsigned long) dest) % 0x4000; - - if (!IS_VALID_CARD(card)) { - pr_debug("Invalid param: %d is not a valid card id\n", card); - return; - } - - if (n > SRAM_PAGESIZE) - return; - - /* - * determine the page to load from the address - */ - ch = (unsigned long) dest / SRAM_PAGESIZE; - pr_debug("%s: loaded page %d\n", sc_adapter[card]->devicename, ch); - /* - * Block interrupts and load the page - */ - spin_lock_irqsave(&sc_adapter[card]->lock, flags); - - outb(((sc_adapter[card]->shmem_magic + ch * SRAM_PAGESIZE) >> 14) | 0x80, - sc_adapter[card]->ioport[sc_adapter[card]->shmem_pgport]); - memcpy_toio((void __iomem *)(sc_adapter[card]->rambase + dest_rem), src, n); - spin_unlock_irqrestore(&sc_adapter[card]->lock, flags); - pr_debug("%s: set page to %#x\n", sc_adapter[card]->devicename, - ((sc_adapter[card]->shmem_magic + ch * SRAM_PAGESIZE) >> 14) | 0x80); - pr_debug("%s: copying %zu bytes from %#lx to %#lx\n", - sc_adapter[card]->devicename, n, - (unsigned long) src, - sc_adapter[card]->rambase + ((unsigned long) dest % 0x4000)); -} - -/* - * Reverse of above - */ -void memcpy_fromshmem(int card, void *dest, const void *src, size_t n) -{ - unsigned long flags; - unsigned char ch; - - if (!IS_VALID_CARD(card)) { - pr_debug("Invalid param: %d is not a valid card id\n", card); - return; - } - - if (n > SRAM_PAGESIZE) { - return; - } - - /* - * determine the page to load from the address - */ - ch = (unsigned long) src / SRAM_PAGESIZE; - pr_debug("%s: loaded page %d\n", sc_adapter[card]->devicename, ch); - - - /* - * Block interrupts and load the page - */ - spin_lock_irqsave(&sc_adapter[card]->lock, flags); - - outb(((sc_adapter[card]->shmem_magic + ch * SRAM_PAGESIZE) >> 14) | 0x80, - sc_adapter[card]->ioport[sc_adapter[card]->shmem_pgport]); - memcpy_fromio(dest, (void *)(sc_adapter[card]->rambase + - ((unsigned long) src % 0x4000)), n); - spin_unlock_irqrestore(&sc_adapter[card]->lock, flags); - pr_debug("%s: set page to %#x\n", sc_adapter[card]->devicename, - ((sc_adapter[card]->shmem_magic + ch * SRAM_PAGESIZE) >> 14) | 0x80); -/* pr_debug("%s: copying %d bytes from %#x to %#x\n", - sc_adapter[card]->devicename, n, - sc_adapter[card]->rambase + ((unsigned long) src %0x4000), (unsigned long) dest); */ -} - -#if 0 -void memset_shmem(int card, void *dest, int c, size_t n) -{ - unsigned long flags; - unsigned char ch; - - if (!IS_VALID_CARD(card)) { - pr_debug("Invalid param: %d is not a valid card id\n", card); - return; - } - - if (n > SRAM_PAGESIZE) { - return; - } - - /* - * determine the page to load from the address - */ - ch = (unsigned long) dest / SRAM_PAGESIZE; - pr_debug("%s: loaded page %d\n", sc_adapter[card]->devicename, ch); - - /* - * Block interrupts and load the page - */ - spin_lock_irqsave(&sc_adapter[card]->lock, flags); - - outb(((sc_adapter[card]->shmem_magic + ch * SRAM_PAGESIZE) >> 14) | 0x80, - sc_adapter[card]->ioport[sc_adapter[card]->shmem_pgport]); - memset_io(sc_adapter[card]->rambase + - ((unsigned long) dest % 0x4000), c, n); - pr_debug("%s: set page to %#x\n", sc_adapter[card]->devicename, - ((sc_adapter[card]->shmem_magic + ch * SRAM_PAGESIZE) >> 14) | 0x80); - spin_unlock_irqrestore(&sc_adapter[card]->lock, flags); -} -#endif /* 0 */ diff --git a/drivers/isdn/sc/timer.c b/drivers/isdn/sc/timer.c deleted file mode 100644 index 6fbac2230d7e73..00000000000000 --- a/drivers/isdn/sc/timer.c +++ /dev/null @@ -1,122 +0,0 @@ -/* $Id: timer.c,v 1.3.6.1 2001/09/23 22:24:59 kai Exp $ - * - * Copyright (C) 1996 SpellCaster Telecommunications Inc. - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - * - * For more information, please contact gpl-info@spellcast.com or write: - * - * SpellCaster Telecommunications Inc. - * 5621 Finch Avenue East, Unit #3 - * Scarborough, Ontario Canada - * M1B 2T9 - * +1 (416) 297-8565 - * +1 (416) 297-6433 Facsimile - */ - -#include "includes.h" -#include "hardware.h" -#include "message.h" -#include "card.h" - - -/* - * Write the proper values into the I/O ports following a reset - */ -static void setup_ports(int card) -{ - - outb((sc_adapter[card]->rambase >> 12), sc_adapter[card]->ioport[EXP_BASE]); - - /* And the IRQ */ - outb((sc_adapter[card]->interrupt | 0x80), - sc_adapter[card]->ioport[IRQ_SELECT]); -} - -/* - * Timed function to check the status of a previous reset - * Must be very fast as this function runs in the context of - * an interrupt handler. - * - * Setup the ioports for the board that were cleared by the reset. - * Then, check to see if the signate has been set. Next, set the - * signature to a known value and issue a startproc if needed. - */ -void sc_check_reset(unsigned long data) -{ - unsigned long flags; - unsigned long sig; - int card = (unsigned int) data; - - pr_debug("%s: check_timer timer called\n", - sc_adapter[card]->devicename); - - /* Setup the io ports */ - setup_ports(card); - - spin_lock_irqsave(&sc_adapter[card]->lock, flags); - outb(sc_adapter[card]->ioport[sc_adapter[card]->shmem_pgport], - (sc_adapter[card]->shmem_magic >> 14) | 0x80); - sig = (unsigned long) *((unsigned long *)(sc_adapter[card]->rambase + SIG_OFFSET)); - - /* check the signature */ - if (sig == SIGNATURE) { - flushreadfifo(card); - spin_unlock_irqrestore(&sc_adapter[card]->lock, flags); - /* See if we need to do a startproc */ - if (sc_adapter[card]->StartOnReset) - startproc(card); - } else { - pr_debug("%s: No signature yet, waiting another %lu jiffies.\n", - sc_adapter[card]->devicename, CHECKRESET_TIME); - mod_timer(&sc_adapter[card]->reset_timer, jiffies + CHECKRESET_TIME); - spin_unlock_irqrestore(&sc_adapter[card]->lock, flags); - } -} - -/* - * Timed function to check the status of a previous reset - * Must be very fast as this function runs in the context of - * an interrupt handler. - * - * Send check sc_adapter->phystat to see if the channels are up - * If they are, tell ISDN4Linux that the board is up. If not, - * tell IADN4Linux that it is up. Always reset the timer to - * fire again (endless loop). - */ -void check_phystat(unsigned long data) -{ - unsigned long flags; - int card = (unsigned int) data; - - pr_debug("%s: Checking status...\n", sc_adapter[card]->devicename); - /* - * check the results of the last PhyStat and change only if - * has changed drastically - */ - if (sc_adapter[card]->nphystat && !sc_adapter[card]->phystat) { /* All is well */ - pr_debug("PhyStat transition to RUN\n"); - pr_info("%s: Switch contacted, transmitter enabled\n", - sc_adapter[card]->devicename); - indicate_status(card, ISDN_STAT_RUN, 0, NULL); - } - else if (!sc_adapter[card]->nphystat && sc_adapter[card]->phystat) { /* All is not well */ - pr_debug("PhyStat transition to STOP\n"); - pr_info("%s: Switch connection lost, transmitter disabled\n", - sc_adapter[card]->devicename); - - indicate_status(card, ISDN_STAT_STOP, 0, NULL); - } - - sc_adapter[card]->phystat = sc_adapter[card]->nphystat; - - /* Reinitialize the timer */ - spin_lock_irqsave(&sc_adapter[card]->lock, flags); - mod_timer(&sc_adapter[card]->stat_timer, jiffies + CHECKSTAT_TIME); - spin_unlock_irqrestore(&sc_adapter[card]->lock, flags); - - /* Send a new cePhyStatus message */ - sendmessage(card, CEPID, ceReqTypePhy, ceReqClass2, - ceReqPhyStatus, 0, 0, NULL); -} diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 940e2ebbdea81e..4cbb8b27a89123 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -93,7 +93,8 @@ enum ad_link_speed_type { AD_LINK_SPEED_10000MBPS, AD_LINK_SPEED_20000MBPS, AD_LINK_SPEED_40000MBPS, - AD_LINK_SPEED_56000MBPS + AD_LINK_SPEED_56000MBPS, + AD_LINK_SPEED_100000MBPS, }; /* compare MAC addresses */ @@ -258,6 +259,7 @@ static inline int __check_agg_selection_timer(struct port *port) * %AD_LINK_SPEED_20000MBPS * %AD_LINK_SPEED_40000MBPS * %AD_LINK_SPEED_56000MBPS + * %AD_LINK_SPEED_100000MBPS */ static u16 __get_link_speed(struct port *port) { @@ -305,6 +307,10 @@ static u16 __get_link_speed(struct port *port) speed = AD_LINK_SPEED_56000MBPS; break; + case SPEED_100000: + speed = AD_LINK_SPEED_100000MBPS; + break; + default: /* unknown speed value from ethtool. shouldn't happen */ speed = 0; @@ -681,6 +687,9 @@ static u32 __get_agg_bandwidth(struct aggregator *aggregator) case AD_LINK_SPEED_56000MBPS: bandwidth = aggregator->num_of_ports * 56000; break; + case AD_LINK_SPEED_100000MBPS: + bandwidth = aggregator->num_of_ports * 100000; + break; default: bandwidth = 0; /* to silence the compiler */ } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 9e0f8a7ef8b169..fe0e7a6f4d726d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -830,7 +830,8 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) } new_active->delay = 0; - bond_set_slave_link_state(new_active, BOND_LINK_UP); + bond_set_slave_link_state(new_active, BOND_LINK_UP, + BOND_SLAVE_NOTIFY_NOW); if (BOND_MODE(bond) == BOND_MODE_8023AD) bond_3ad_handle_link_change(new_active, BOND_LINK_UP); @@ -1198,26 +1199,43 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) return ret; } -static int bond_master_upper_dev_link(struct net_device *bond_dev, - struct net_device *slave_dev, - struct slave *slave) +static enum netdev_lag_tx_type bond_lag_tx_type(struct bonding *bond) { + switch (BOND_MODE(bond)) { + case BOND_MODE_ROUNDROBIN: + return NETDEV_LAG_TX_TYPE_ROUNDROBIN; + case BOND_MODE_ACTIVEBACKUP: + return NETDEV_LAG_TX_TYPE_ACTIVEBACKUP; + case BOND_MODE_BROADCAST: + return NETDEV_LAG_TX_TYPE_BROADCAST; + case BOND_MODE_XOR: + case BOND_MODE_8023AD: + return NETDEV_LAG_TX_TYPE_HASH; + default: + return NETDEV_LAG_TX_TYPE_UNKNOWN; + } +} + +static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave) +{ + struct netdev_lag_upper_info lag_upper_info; int err; - err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave); + lag_upper_info.tx_type = bond_lag_tx_type(bond); + err = netdev_master_upper_dev_link(slave->dev, bond->dev, slave, + &lag_upper_info); if (err) return err; - slave_dev->flags |= IFF_SLAVE; - rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); + slave->dev->flags |= IFF_SLAVE; + rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL); return 0; } -static void bond_upper_dev_unlink(struct net_device *bond_dev, - struct net_device *slave_dev) +static void bond_upper_dev_unlink(struct bonding *bond, struct slave *slave) { - netdev_upper_dev_unlink(slave_dev, bond_dev); - slave_dev->flags &= ~IFF_SLAVE; - rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); + netdev_upper_dev_unlink(slave->dev, bond->dev); + slave->dev->flags &= ~IFF_SLAVE; + rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL); } static struct slave *bond_alloc_slave(struct bonding *bond) @@ -1299,6 +1317,16 @@ void bond_queue_slave_event(struct slave *slave) queue_delayed_work(slave->bond->wq, &nnw->work, 0); } +void bond_lower_state_changed(struct slave *slave) +{ + struct netdev_lag_lower_state_info info; + + info.link_up = slave->link == BOND_LINK_UP || + slave->link == BOND_LINK_FAIL; + info.tx_enabled = bond_is_active_slave(slave); + netdev_lower_state_changed(slave->dev, &info); +} + /* enslave device <slave> to bond device <master> */ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) { @@ -1351,7 +1379,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) * the current ifenslave will set the interface down prior to * enslaving it; the old ifenslave will not. */ - if ((slave_dev->flags & IFF_UP)) { + if (slave_dev->flags & IFF_UP) { netdev_err(bond_dev, "%s is up - this may be due to an out of date ifenslave\n", slave_dev->name); res = -EPERM; @@ -1563,21 +1591,26 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) { if (bond->params.updelay) { bond_set_slave_link_state(new_slave, - BOND_LINK_BACK); + BOND_LINK_BACK, + BOND_SLAVE_NOTIFY_NOW); new_slave->delay = bond->params.updelay; } else { bond_set_slave_link_state(new_slave, - BOND_LINK_UP); + BOND_LINK_UP, + BOND_SLAVE_NOTIFY_NOW); } } else { - bond_set_slave_link_state(new_slave, BOND_LINK_DOWN); + bond_set_slave_link_state(new_slave, BOND_LINK_DOWN, + BOND_SLAVE_NOTIFY_NOW); } } else if (bond->params.arp_interval) { bond_set_slave_link_state(new_slave, (netif_carrier_ok(slave_dev) ? - BOND_LINK_UP : BOND_LINK_DOWN)); + BOND_LINK_UP : BOND_LINK_DOWN), + BOND_SLAVE_NOTIFY_NOW); } else { - bond_set_slave_link_state(new_slave, BOND_LINK_UP); + bond_set_slave_link_state(new_slave, BOND_LINK_UP, + BOND_SLAVE_NOTIFY_NOW); } if (new_slave->link != BOND_LINK_DOWN) @@ -1662,7 +1695,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) goto err_detach; } - res = bond_master_upper_dev_link(bond_dev, slave_dev, new_slave); + res = bond_master_upper_dev_link(bond, new_slave); if (res) { netdev_dbg(bond_dev, "Error %d calling bond_master_upper_dev_link\n", res); goto err_unregister; @@ -1698,7 +1731,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) /* Undo stages on error */ err_upper_unlink: - bond_upper_dev_unlink(bond_dev, slave_dev); + bond_upper_dev_unlink(bond, new_slave); err_unregister: netdev_rx_handler_unregister(slave_dev); @@ -1799,12 +1832,14 @@ static int __bond_release_one(struct net_device *bond_dev, return -EINVAL; } + bond_set_slave_inactive_flags(slave, BOND_SLAVE_NOTIFY_NOW); + bond_sysfs_slave_del(slave); /* recompute stats just before removing the slave */ bond_get_stats(bond->dev, &bond->bond_stats); - bond_upper_dev_unlink(bond_dev, slave_dev); + bond_upper_dev_unlink(bond, slave); /* unregister rx_handler early so bond_handle_frame wouldn't be called * for this slave anymore. */ @@ -1996,7 +2031,8 @@ static int bond_miimon_inspect(struct bonding *bond) if (link_state) continue; - bond_set_slave_link_state(slave, BOND_LINK_FAIL); + bond_set_slave_link_state(slave, BOND_LINK_FAIL, + BOND_SLAVE_NOTIFY_LATER); slave->delay = bond->params.downdelay; if (slave->delay) { netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n", @@ -2011,7 +2047,8 @@ static int bond_miimon_inspect(struct bonding *bond) case BOND_LINK_FAIL: if (link_state) { /* recovered before downdelay expired */ - bond_set_slave_link_state(slave, BOND_LINK_UP); + bond_set_slave_link_state(slave, BOND_LINK_UP, + BOND_SLAVE_NOTIFY_LATER); slave->last_link_up = jiffies; netdev_info(bond->dev, "link status up again after %d ms for interface %s\n", (bond->params.downdelay - slave->delay) * @@ -2033,7 +2070,8 @@ static int bond_miimon_inspect(struct bonding *bond) if (!link_state) continue; - bond_set_slave_link_state(slave, BOND_LINK_BACK); + bond_set_slave_link_state(slave, BOND_LINK_BACK, + BOND_SLAVE_NOTIFY_LATER); slave->delay = bond->params.updelay; if (slave->delay) { @@ -2047,7 +2085,8 @@ static int bond_miimon_inspect(struct bonding *bond) case BOND_LINK_BACK: if (!link_state) { bond_set_slave_link_state(slave, - BOND_LINK_DOWN); + BOND_LINK_DOWN, + BOND_SLAVE_NOTIFY_LATER); netdev_info(bond->dev, "link status down again after %d ms for interface %s\n", (bond->params.updelay - slave->delay) * bond->params.miimon, @@ -2085,7 +2124,8 @@ static void bond_miimon_commit(struct bonding *bond) continue; case BOND_LINK_UP: - bond_set_slave_link_state(slave, BOND_LINK_UP); + bond_set_slave_link_state(slave, BOND_LINK_UP, + BOND_SLAVE_NOTIFY_NOW); slave->last_link_up = jiffies; primary = rtnl_dereference(bond->primary_slave); @@ -2125,7 +2165,8 @@ static void bond_miimon_commit(struct bonding *bond) if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; - bond_set_slave_link_state(slave, BOND_LINK_DOWN); + bond_set_slave_link_state(slave, BOND_LINK_DOWN, + BOND_SLAVE_NOTIFY_NOW); if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP || BOND_MODE(bond) == BOND_MODE_8023AD) @@ -2708,7 +2749,8 @@ static void bond_ab_arp_commit(struct bonding *bond) struct slave *current_arp_slave; current_arp_slave = rtnl_dereference(bond->current_arp_slave); - bond_set_slave_link_state(slave, BOND_LINK_UP); + bond_set_slave_link_state(slave, BOND_LINK_UP, + BOND_SLAVE_NOTIFY_NOW); if (current_arp_slave) { bond_set_slave_inactive_flags( current_arp_slave, @@ -2731,7 +2773,8 @@ static void bond_ab_arp_commit(struct bonding *bond) if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; - bond_set_slave_link_state(slave, BOND_LINK_DOWN); + bond_set_slave_link_state(slave, BOND_LINK_DOWN, + BOND_SLAVE_NOTIFY_NOW); bond_set_slave_inactive_flags(slave, BOND_SLAVE_NOTIFY_NOW); @@ -2810,7 +2853,8 @@ static bool bond_ab_arp_probe(struct bonding *bond) * up when it is actually down */ if (!bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) { - bond_set_slave_link_state(slave, BOND_LINK_DOWN); + bond_set_slave_link_state(slave, BOND_LINK_DOWN, + BOND_SLAVE_NOTIFY_LATER); if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; @@ -2830,7 +2874,8 @@ static bool bond_ab_arp_probe(struct bonding *bond) if (!new_slave) goto check_state; - bond_set_slave_link_state(new_slave, BOND_LINK_BACK); + bond_set_slave_link_state(new_slave, BOND_LINK_BACK, + BOND_SLAVE_NOTIFY_LATER); bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER); bond_arp_send_all(bond, new_slave); new_slave->last_link_up = jiffies; @@ -2838,7 +2883,7 @@ static bool bond_ab_arp_probe(struct bonding *bond) check_state: bond_for_each_slave_rcu(bond, slave, iter) { - if (slave->should_notify) { + if (slave->should_notify || slave->should_notify_link) { should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW; break; } @@ -2893,8 +2938,10 @@ re_arm: if (should_notify_peers) call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); - if (should_notify_rtnl) + if (should_notify_rtnl) { bond_slave_state_notify(bond); + bond_slave_link_notify(bond); + } rtnl_unlock(); } diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 31c5e476fd648f..0b13af8e407017 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -122,6 +122,7 @@ config FEALNX cards. <http://www.myson.com.tw/> source "drivers/net/ethernet/natsemi/Kconfig" +source "drivers/net/ethernet/netronome/Kconfig" source "drivers/net/ethernet/8390/Kconfig" config NET_NETX diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 071f84eb6f3f8c..38dc1a776a2bd6 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -53,6 +53,7 @@ obj-$(CONFIG_NET_VENDOR_MOXART) += moxa/ obj-$(CONFIG_NET_VENDOR_MYRI) += myricom/ obj-$(CONFIG_FEALNX) += fealnx.o obj-$(CONFIG_NET_VENDOR_NATSEMI) += natsemi/ +obj-$(CONFIG_NET_VENDOR_NETRONOME) += netronome/ obj-$(CONFIG_NET_NETX) += netx-eth.o obj-$(CONFIG_NET_VENDOR_NUVOTON) += nuvoton/ obj-$(CONFIG_NET_VENDOR_NVIDIA) += nvidia/ diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index 096531a7312495..e0e95a15cab08b 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -1912,21 +1912,21 @@ static struct platform_driver bfin_mac_driver = { }, }; +static struct platform_driver * const drivers[] = { + &bfin_mii_bus_driver, + &bfin_mac_driver, +}; + static int __init bfin_mac_init(void) { - int ret; - ret = platform_driver_register(&bfin_mii_bus_driver); - if (!ret) - return platform_driver_register(&bfin_mac_driver); - return -ENODEV; + return platform_register_drivers(drivers, ARRAY_SIZE(drivers)); } module_init(bfin_mac_init); static void __exit bfin_mac_cleanup(void) { - platform_driver_unregister(&bfin_mac_driver); - platform_driver_unregister(&bfin_mii_bus_driver); + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); } module_exit(bfin_mac_cleanup); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index c31e691d11fc2e..db55c9f6e8e130 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -869,7 +869,7 @@ void xgene_enet_mdio_remove(struct xgene_enet_pdata *pdata) pdata->mdio_bus = NULL; } -struct xgene_mac_ops xgene_gmac_ops = { +const struct xgene_mac_ops xgene_gmac_ops = { .init = xgene_gmac_init, .reset = xgene_gmac_reset, .rx_enable = xgene_gmac_rx_enable, @@ -879,7 +879,7 @@ struct xgene_mac_ops xgene_gmac_ops = { .set_mac_addr = xgene_gmac_set_mac_addr, }; -struct xgene_port_ops xgene_gport_ops = { +const struct xgene_port_ops xgene_gport_ops = { .reset = xgene_enet_reset, .cle_bypass = xgene_enet_cle_bypass, .shutdown = xgene_gport_shutdown, diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h index c153a1dc5ff781..8a9091039ab449 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h @@ -340,8 +340,8 @@ int xgene_enet_mdio_config(struct xgene_enet_pdata *pdata); void xgene_enet_mdio_remove(struct xgene_enet_pdata *pdata); bool xgene_ring_mgr_init(struct xgene_enet_pdata *p); -extern struct xgene_mac_ops xgene_gmac_ops; -extern struct xgene_port_ops xgene_gport_ops; +extern const struct xgene_mac_ops xgene_gmac_ops; +extern const struct xgene_port_ops xgene_gport_ops; extern struct xgene_ring_ops xgene_ring1_ops; #endif /* __XGENE_ENET_HW_H__ */ diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index d21ee8767c2d8e..2394191ad28e5e 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -682,7 +682,7 @@ static void xgene_enet_napi_disable(struct xgene_enet_pdata *pdata) static int xgene_enet_open(struct net_device *ndev) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); - struct xgene_mac_ops *mac_ops = pdata->mac_ops; + const struct xgene_mac_ops *mac_ops = pdata->mac_ops; int ret; mac_ops->tx_enable(pdata); @@ -706,7 +706,7 @@ static int xgene_enet_open(struct net_device *ndev) static int xgene_enet_close(struct net_device *ndev) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); - struct xgene_mac_ops *mac_ops = pdata->mac_ops; + const struct xgene_mac_ops *mac_ops = pdata->mac_ops; netif_stop_queue(ndev); @@ -1416,7 +1416,7 @@ static int xgene_enet_probe(struct platform_device *pdev) struct net_device *ndev; struct xgene_enet_pdata *pdata; struct device *dev = &pdev->dev; - struct xgene_mac_ops *mac_ops; + const struct xgene_mac_ops *mac_ops; const struct of_device_id *of_id; int ret; @@ -1503,7 +1503,7 @@ err: static int xgene_enet_remove(struct platform_device *pdev) { struct xgene_enet_pdata *pdata; - struct xgene_mac_ops *mac_ops; + const struct xgene_mac_ops *mac_ops; struct net_device *ndev; pdata = platform_get_drvdata(pdev); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index a6e56b88c0a07a..054caf055f0a06 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -174,8 +174,8 @@ struct xgene_enet_pdata { int phy_mode; enum xgene_enet_rm rm; struct rtnl_link_stats64 stats; - struct xgene_mac_ops *mac_ops; - struct xgene_port_ops *port_ops; + const struct xgene_mac_ops *mac_ops; + const struct xgene_port_ops *port_ops; struct xgene_ring_ops *ring_ops; struct delayed_work link_work; u32 port_id; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c index 05b817e56fdeb2..78475512b6839c 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c @@ -405,7 +405,7 @@ static void xgene_enet_link_state(struct work_struct *work) schedule_delayed_work(&p->link_work, poll_interval); } -struct xgene_mac_ops xgene_sgmac_ops = { +const struct xgene_mac_ops xgene_sgmac_ops = { .init = xgene_sgmac_init, .reset = xgene_sgmac_reset, .rx_enable = xgene_sgmac_rx_enable, @@ -416,7 +416,7 @@ struct xgene_mac_ops xgene_sgmac_ops = { .link_state = xgene_enet_link_state }; -struct xgene_port_ops xgene_sgport_ops = { +const struct xgene_port_ops xgene_sgport_ops = { .reset = xgene_enet_reset, .cle_bypass = xgene_enet_cle_bypass, .shutdown = xgene_enet_shutdown diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h index de432465009c1f..29a71b4dcc44f3 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h @@ -35,7 +35,7 @@ #define MPA_IDLE_WITH_QMI_EMPTY BIT(12) #define SG_RX_DV_GATE_REG_0_ADDR 0x0dfc -extern struct xgene_mac_ops xgene_sgmac_ops; -extern struct xgene_port_ops xgene_sgport_ops; +extern const struct xgene_mac_ops xgene_sgmac_ops; +extern const struct xgene_port_ops xgene_sgport_ops; #endif /* __XGENE_ENET_SGMAC_H__ */ diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c index 7a28a48cb2c779..ba030dc1940b51 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c @@ -326,7 +326,7 @@ static void xgene_enet_link_state(struct work_struct *work) schedule_delayed_work(&pdata->link_work, poll_interval); } -struct xgene_mac_ops xgene_xgmac_ops = { +const struct xgene_mac_ops xgene_xgmac_ops = { .init = xgene_xgmac_init, .reset = xgene_xgmac_reset, .rx_enable = xgene_xgmac_rx_enable, @@ -338,7 +338,7 @@ struct xgene_mac_ops xgene_xgmac_ops = { .link_state = xgene_enet_link_state }; -struct xgene_port_ops xgene_xgport_ops = { +const struct xgene_port_ops xgene_xgport_ops = { .reset = xgene_enet_reset, .cle_bypass = xgene_enet_xgcle_bypass, .shutdown = xgene_enet_shutdown, diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h index f8f908dbf51c1b..0a2dca8a1725ab 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h @@ -69,7 +69,7 @@ #define XG_ENET_SPARE_CFG_REG_1_ADDR 0x0410 #define XGENET_RX_DV_GATE_REG_0_ADDR 0x0804 -extern struct xgene_mac_ops xgene_xgmac_ops; -extern struct xgene_port_ops xgene_xgport_ops; +extern const struct xgene_mac_ops xgene_xgmac_ops; +extern const struct xgene_port_ops xgene_xgport_ops; #endif /* __XGENE_ENET_XGMAC_H__ */ diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index bd377a6b067d4e..d3763bc2c561c2 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -577,7 +577,6 @@ static int alx_alloc_rings(struct alx_priv *alx) alx->int_mask &= ~ALX_ISR_ALL_QUEUES; alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0; - alx->tx_ringsz = alx->tx_ringsz; netif_napi_add(alx->dev, &alx->napi, alx_poll, 64); diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 8b1929e9f698c4..a54bafad35385c 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -2884,33 +2884,21 @@ struct platform_driver bcm63xx_enet_shared_driver = { }, }; +static struct platform_driver * const drivers[] = { + &bcm63xx_enet_shared_driver, + &bcm63xx_enet_driver, + &bcm63xx_enetsw_driver, +}; + /* entry point */ static int __init bcm_enet_init(void) { - int ret; - - ret = platform_driver_register(&bcm63xx_enet_shared_driver); - if (ret) - return ret; - - ret = platform_driver_register(&bcm63xx_enet_driver); - if (ret) - platform_driver_unregister(&bcm63xx_enet_shared_driver); - - ret = platform_driver_register(&bcm63xx_enetsw_driver); - if (ret) { - platform_driver_unregister(&bcm63xx_enet_driver); - platform_driver_unregister(&bcm63xx_enet_shared_driver); - } - - return ret; + return platform_register_drivers(drivers, ARRAY_SIZE(drivers)); } static void __exit bcm_enet_exit(void) { - platform_driver_unregister(&bcm63xx_enet_driver); - platform_driver_unregister(&bcm63xx_enetsw_driver); - platform_driver_unregister(&bcm63xx_enet_shared_driver); + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 0b214b5d944a20..cae0956186ce98 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -590,8 +590,6 @@ struct bnx2x_fastpath { /* The last maximal completed SGE */ u16 last_max_sge; __le16 *rx_cons_sb; - unsigned long rx_pkt, - rx_calls; /* TPA related */ struct bnx2x_agg_info *tpa_info; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index d9add7c02e42ca..b3552dd749c4fd 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -558,10 +558,8 @@ static int bnx2x_alloc_rx_sge(struct bnx2x *bp, struct bnx2x_fastpath *fp, put_page(pool->page); pool->page = alloc_pages(gfp_mask, PAGES_PER_SGE_SHIFT); - if (unlikely(!pool->page)) { - BNX2X_ERR("Can't alloc sge\n"); + if (unlikely(!pool->page)) return -ENOMEM; - } pool->offset = 0; } @@ -745,7 +743,7 @@ static void bnx2x_gro_receive(struct bnx2x *bp, struct bnx2x_fastpath *fp, bnx2x_gro_csum(bp, skb, bnx2x_gro_ipv6_csum); break; default: - BNX2X_ERR("Error: FW GRO supports only IPv4/IPv6, not 0x%04x\n", + WARN_ONCE(1, "Error: FW GRO supports only IPv4/IPv6, not 0x%04x\n", be16_to_cpu(skb->protocol)); } } @@ -1124,9 +1122,6 @@ next_cqe: bnx2x_update_rx_prod(bp, fp, bd_prod_fw, sw_comp_prod, fp->rx_sge_prod); - fp->rx_pkt += rx_pkt; - fp->rx_calls++; - return rx_pkt; } @@ -3206,42 +3201,32 @@ int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state) */ static int bnx2x_poll(struct napi_struct *napi, int budget) { - int work_done = 0; - u8 cos; struct bnx2x_fastpath *fp = container_of(napi, struct bnx2x_fastpath, napi); struct bnx2x *bp = fp->bp; + int rx_work_done; + u8 cos; - while (1) { #ifdef BNX2X_STOP_ON_ERROR - if (unlikely(bp->panic)) { - napi_complete(napi); - return 0; - } + if (unlikely(bp->panic)) { + napi_complete(napi); + return 0; + } #endif - for_each_cos_in_tx_queue(fp, cos) - if (bnx2x_tx_queue_has_work(fp->txdata_ptr[cos])) - bnx2x_tx_int(bp, fp->txdata_ptr[cos]); - - if (bnx2x_has_rx_work(fp)) { - work_done += bnx2x_rx_int(fp, budget - work_done); - - /* must not complete if we consumed full budget */ - if (work_done >= budget) - break; - } + for_each_cos_in_tx_queue(fp, cos) + if (bnx2x_tx_queue_has_work(fp->txdata_ptr[cos])) + bnx2x_tx_int(bp, fp->txdata_ptr[cos]); - /* Fall out from the NAPI loop if needed */ - if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) { + rx_work_done = (bnx2x_has_rx_work(fp)) ? bnx2x_rx_int(fp, budget) : 0; - /* No need to update SB for FCoE L2 ring as long as - * it's connected to the default SB and the SB - * has been updated when NAPI was scheduled. - */ - if (IS_FCOE_FP(fp)) { - napi_complete(napi); - break; - } + if (rx_work_done < budget) { + /* No need to update SB for FCoE L2 ring as long as + * it's connected to the default SB and the SB + * has been updated when NAPI was scheduled. + */ + if (IS_FCOE_FP(fp)) { + napi_complete(napi); + } else { bnx2x_update_fpsb_idx(fp); /* bnx2x_has_rx_work() reads the status block, * thus we need to ensure that status block indices @@ -3266,12 +3251,13 @@ static int bnx2x_poll(struct napi_struct *napi, int budget) bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID, le16_to_cpu(fp->fp_hc_idx), IGU_INT_ENABLE, 1); - break; + } else { + rx_work_done = budget; } } } - return work_done; + return rx_work_done; } /* we split the first BD into headers and data BDs @@ -4444,7 +4430,6 @@ static int bnx2x_alloc_rx_bds(struct bnx2x_fastpath *fp, /* Limit the CQE producer by the CQE ring size */ fp->rx_comp_prod = min_t(u16, NUM_RCQ_RINGS*RCQ_DESC_CNT, cqe_ring_prod); - fp->rx_pkt = fp->rx_calls = 0; bnx2x_fp_stats(bp, fp)->eth_q_stats.rx_skb_alloc_failed += failure_cnt; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index a3ce9f2a233597..820b7e04bb5f3c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -74,118 +74,115 @@ static const struct { static const struct { long offset; int size; - u32 flags; -#define STATS_FLAGS_PORT 1 -#define STATS_FLAGS_FUNC 2 -#define STATS_FLAGS_BOTH (STATS_FLAGS_FUNC | STATS_FLAGS_PORT) + bool is_port_stat; char string[ETH_GSTRING_LEN]; } bnx2x_stats_arr[] = { /* 1 */ { STATS_OFFSET32(total_bytes_received_hi), - 8, STATS_FLAGS_BOTH, "rx_bytes" }, + 8, false, "rx_bytes" }, { STATS_OFFSET32(error_bytes_received_hi), - 8, STATS_FLAGS_BOTH, "rx_error_bytes" }, + 8, false, "rx_error_bytes" }, { STATS_OFFSET32(total_unicast_packets_received_hi), - 8, STATS_FLAGS_BOTH, "rx_ucast_packets" }, + 8, false, "rx_ucast_packets" }, { STATS_OFFSET32(total_multicast_packets_received_hi), - 8, STATS_FLAGS_BOTH, "rx_mcast_packets" }, + 8, false, "rx_mcast_packets" }, { STATS_OFFSET32(total_broadcast_packets_received_hi), - 8, STATS_FLAGS_BOTH, "rx_bcast_packets" }, + 8, false, "rx_bcast_packets" }, { STATS_OFFSET32(rx_stat_dot3statsfcserrors_hi), - 8, STATS_FLAGS_PORT, "rx_crc_errors" }, + 8, true, "rx_crc_errors" }, { STATS_OFFSET32(rx_stat_dot3statsalignmenterrors_hi), - 8, STATS_FLAGS_PORT, "rx_align_errors" }, + 8, true, "rx_align_errors" }, { STATS_OFFSET32(rx_stat_etherstatsundersizepkts_hi), - 8, STATS_FLAGS_PORT, "rx_undersize_packets" }, + 8, true, "rx_undersize_packets" }, { STATS_OFFSET32(etherstatsoverrsizepkts_hi), - 8, STATS_FLAGS_PORT, "rx_oversize_packets" }, + 8, true, "rx_oversize_packets" }, /* 10 */{ STATS_OFFSET32(rx_stat_etherstatsfragments_hi), - 8, STATS_FLAGS_PORT, "rx_fragments" }, + 8, true, "rx_fragments" }, { STATS_OFFSET32(rx_stat_etherstatsjabbers_hi), - 8, STATS_FLAGS_PORT, "rx_jabbers" }, + 8, true, "rx_jabbers" }, { STATS_OFFSET32(no_buff_discard_hi), - 8, STATS_FLAGS_BOTH, "rx_discards" }, + 8, false, "rx_discards" }, { STATS_OFFSET32(mac_filter_discard), - 4, STATS_FLAGS_PORT, "rx_filtered_packets" }, + 4, true, "rx_filtered_packets" }, { STATS_OFFSET32(mf_tag_discard), - 4, STATS_FLAGS_PORT, "rx_mf_tag_discard" }, + 4, true, "rx_mf_tag_discard" }, { STATS_OFFSET32(pfc_frames_received_hi), - 8, STATS_FLAGS_PORT, "pfc_frames_received" }, + 8, true, "pfc_frames_received" }, { STATS_OFFSET32(pfc_frames_sent_hi), - 8, STATS_FLAGS_PORT, "pfc_frames_sent" }, + 8, true, "pfc_frames_sent" }, { STATS_OFFSET32(brb_drop_hi), - 8, STATS_FLAGS_PORT, "rx_brb_discard" }, + 8, true, "rx_brb_discard" }, { STATS_OFFSET32(brb_truncate_hi), - 8, STATS_FLAGS_PORT, "rx_brb_truncate" }, + 8, true, "rx_brb_truncate" }, { STATS_OFFSET32(pause_frames_received_hi), - 8, STATS_FLAGS_PORT, "rx_pause_frames" }, + 8, true, "rx_pause_frames" }, { STATS_OFFSET32(rx_stat_maccontrolframesreceived_hi), - 8, STATS_FLAGS_PORT, "rx_mac_ctrl_frames" }, + 8, true, "rx_mac_ctrl_frames" }, { STATS_OFFSET32(nig_timer_max), - 4, STATS_FLAGS_PORT, "rx_constant_pause_events" }, + 4, true, "rx_constant_pause_events" }, /* 20 */{ STATS_OFFSET32(rx_err_discard_pkt), - 4, STATS_FLAGS_BOTH, "rx_phy_ip_err_discards"}, + 4, false, "rx_phy_ip_err_discards"}, { STATS_OFFSET32(rx_skb_alloc_failed), - 4, STATS_FLAGS_BOTH, "rx_skb_alloc_discard" }, + 4, false, "rx_skb_alloc_discard" }, { STATS_OFFSET32(hw_csum_err), - 4, STATS_FLAGS_BOTH, "rx_csum_offload_errors" }, + 4, false, "rx_csum_offload_errors" }, { STATS_OFFSET32(driver_xoff), - 4, STATS_FLAGS_BOTH, "tx_exhaustion_events" }, + 4, false, "tx_exhaustion_events" }, { STATS_OFFSET32(total_bytes_transmitted_hi), - 8, STATS_FLAGS_BOTH, "tx_bytes" }, + 8, false, "tx_bytes" }, { STATS_OFFSET32(tx_stat_ifhcoutbadoctets_hi), - 8, STATS_FLAGS_PORT, "tx_error_bytes" }, + 8, true, "tx_error_bytes" }, { STATS_OFFSET32(total_unicast_packets_transmitted_hi), - 8, STATS_FLAGS_BOTH, "tx_ucast_packets" }, + 8, false, "tx_ucast_packets" }, { STATS_OFFSET32(total_multicast_packets_transmitted_hi), - 8, STATS_FLAGS_BOTH, "tx_mcast_packets" }, + 8, false, "tx_mcast_packets" }, { STATS_OFFSET32(total_broadcast_packets_transmitted_hi), - 8, STATS_FLAGS_BOTH, "tx_bcast_packets" }, + 8, false, "tx_bcast_packets" }, { STATS_OFFSET32(tx_stat_dot3statsinternalmactransmiterrors_hi), - 8, STATS_FLAGS_PORT, "tx_mac_errors" }, + 8, true, "tx_mac_errors" }, { STATS_OFFSET32(rx_stat_dot3statscarriersenseerrors_hi), - 8, STATS_FLAGS_PORT, "tx_carrier_errors" }, + 8, true, "tx_carrier_errors" }, /* 30 */{ STATS_OFFSET32(tx_stat_dot3statssinglecollisionframes_hi), - 8, STATS_FLAGS_PORT, "tx_single_collisions" }, + 8, true, "tx_single_collisions" }, { STATS_OFFSET32(tx_stat_dot3statsmultiplecollisionframes_hi), - 8, STATS_FLAGS_PORT, "tx_multi_collisions" }, + 8, true, "tx_multi_collisions" }, { STATS_OFFSET32(tx_stat_dot3statsdeferredtransmissions_hi), - 8, STATS_FLAGS_PORT, "tx_deferred" }, + 8, true, "tx_deferred" }, { STATS_OFFSET32(tx_stat_dot3statsexcessivecollisions_hi), - 8, STATS_FLAGS_PORT, "tx_excess_collisions" }, + 8, true, "tx_excess_collisions" }, { STATS_OFFSET32(tx_stat_dot3statslatecollisions_hi), - 8, STATS_FLAGS_PORT, "tx_late_collisions" }, + 8, true, "tx_late_collisions" }, { STATS_OFFSET32(tx_stat_etherstatscollisions_hi), - 8, STATS_FLAGS_PORT, "tx_total_collisions" }, + 8, true, "tx_total_collisions" }, { STATS_OFFSET32(tx_stat_etherstatspkts64octets_hi), - 8, STATS_FLAGS_PORT, "tx_64_byte_packets" }, + 8, true, "tx_64_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts65octetsto127octets_hi), - 8, STATS_FLAGS_PORT, "tx_65_to_127_byte_packets" }, + 8, true, "tx_65_to_127_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts128octetsto255octets_hi), - 8, STATS_FLAGS_PORT, "tx_128_to_255_byte_packets" }, + 8, true, "tx_128_to_255_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts256octetsto511octets_hi), - 8, STATS_FLAGS_PORT, "tx_256_to_511_byte_packets" }, + 8, true, "tx_256_to_511_byte_packets" }, /* 40 */{ STATS_OFFSET32(tx_stat_etherstatspkts512octetsto1023octets_hi), - 8, STATS_FLAGS_PORT, "tx_512_to_1023_byte_packets" }, + 8, true, "tx_512_to_1023_byte_packets" }, { STATS_OFFSET32(etherstatspkts1024octetsto1522octets_hi), - 8, STATS_FLAGS_PORT, "tx_1024_to_1522_byte_packets" }, + 8, true, "tx_1024_to_1522_byte_packets" }, { STATS_OFFSET32(etherstatspktsover1522octets_hi), - 8, STATS_FLAGS_PORT, "tx_1523_to_9022_byte_packets" }, + 8, true, "tx_1523_to_9022_byte_packets" }, { STATS_OFFSET32(pause_frames_sent_hi), - 8, STATS_FLAGS_PORT, "tx_pause_frames" }, + 8, true, "tx_pause_frames" }, { STATS_OFFSET32(total_tpa_aggregations_hi), - 8, STATS_FLAGS_FUNC, "tpa_aggregations" }, + 8, false, "tpa_aggregations" }, { STATS_OFFSET32(total_tpa_aggregated_frames_hi), - 8, STATS_FLAGS_FUNC, "tpa_aggregated_frames"}, + 8, false, "tpa_aggregated_frames"}, { STATS_OFFSET32(total_tpa_bytes_hi), - 8, STATS_FLAGS_FUNC, "tpa_bytes"}, + 8, false, "tpa_bytes"}, { STATS_OFFSET32(recoverable_error), - 4, STATS_FLAGS_FUNC, "recoverable_errors" }, + 4, false, "recoverable_errors" }, { STATS_OFFSET32(unrecoverable_error), - 4, STATS_FLAGS_FUNC, "unrecoverable_errors" }, + 4, false, "unrecoverable_errors" }, { STATS_OFFSET32(driver_filtered_tx_pkt), - 4, STATS_FLAGS_FUNC, "driver_filtered_tx_pkt" }, + 4, false, "driver_filtered_tx_pkt" }, { STATS_OFFSET32(eee_tx_lpi), - 4, STATS_FLAGS_PORT, "Tx LPI entry count"} + 4, true, "Tx LPI entry count"} }; #define BNX2X_NUM_STATS ARRAY_SIZE(bnx2x_stats_arr) @@ -3066,9 +3063,7 @@ static void bnx2x_self_test(struct net_device *dev, } } -#define IS_PORT_STAT(i) \ - ((bnx2x_stats_arr[i].flags & STATS_FLAGS_BOTH) == STATS_FLAGS_PORT) -#define IS_FUNC_STAT(i) (bnx2x_stats_arr[i].flags & STATS_FLAGS_FUNC) +#define IS_PORT_STAT(i) (bnx2x_stats_arr[i].is_port_stat) #define HIDE_PORT_STAT(bp) IS_VF(bp) /* ethtool statistics are displayed for all regular ethernet queues and the @@ -3093,7 +3088,7 @@ static int bnx2x_get_sset_count(struct net_device *dev, int stringset) num_strings = 0; if (HIDE_PORT_STAT(bp)) { for (i = 0; i < BNX2X_NUM_STATS; i++) - if (IS_FUNC_STAT(i)) + if (!IS_PORT_STAT(i)) num_strings++; } else num_strings += BNX2X_NUM_STATS; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 206b6a71a545af..1fbd9084333e89 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -18,14 +18,6 @@ #include "q_struct.h" #include "nicvf_queues.h" -struct rbuf_info { - struct page *page; - void *data; - u64 offset; -}; - -#define GET_RBUF_INFO(x) ((struct rbuf_info *)(x - NICVF_RCV_BUF_ALIGN_BYTES)) - /* Poll a register for a specific value */ static int nicvf_poll_reg(struct nicvf *nic, int qidx, u64 reg, int bit_pos, int bits, int val) @@ -86,8 +78,6 @@ static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem) static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, u32 buf_len, u64 **rbuf) { - u64 data; - struct rbuf_info *rinfo; int order = get_order(buf_len); /* Check if request can be accomodated in previous allocated page */ @@ -113,46 +103,28 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, nic->rb_page_offset = 0; } - data = (u64)page_address(nic->rb_page) + nic->rb_page_offset; - - /* Align buffer addr to cache line i.e 128 bytes */ - rinfo = (struct rbuf_info *)(data + NICVF_RCV_BUF_ALIGN_LEN(data)); - /* Save page address for reference updation */ - rinfo->page = nic->rb_page; - /* Store start address for later retrieval */ - rinfo->data = (void *)data; - /* Store alignment offset */ - rinfo->offset = NICVF_RCV_BUF_ALIGN_LEN(data); + *rbuf = (u64 *)((u64)page_address(nic->rb_page) + nic->rb_page_offset); - data += rinfo->offset; - - /* Give next aligned address to hw for DMA */ - *rbuf = (u64 *)(data + NICVF_RCV_BUF_ALIGN_BYTES); return 0; } -/* Retrieve actual buffer start address and build skb for received packet */ +/* Build skb around receive buffer */ static struct sk_buff *nicvf_rb_ptr_to_skb(struct nicvf *nic, u64 rb_ptr, int len) { + void *data; struct sk_buff *skb; - struct rbuf_info *rinfo; - rb_ptr = (u64)phys_to_virt(rb_ptr); - /* Get buffer start address and alignment offset */ - rinfo = GET_RBUF_INFO(rb_ptr); + data = phys_to_virt(rb_ptr); /* Now build an skb to give to stack */ - skb = build_skb(rinfo->data, RCV_FRAG_LEN); + skb = build_skb(data, RCV_FRAG_LEN); if (!skb) { - put_page(rinfo->page); + put_page(virt_to_page(data)); return NULL; } - /* Set correct skb->data */ - skb_reserve(skb, rinfo->offset + NICVF_RCV_BUF_ALIGN_BYTES); - - prefetch((void *)rb_ptr); + prefetch(skb->data); return skb; } @@ -196,7 +168,6 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) int head, tail; u64 buf_addr; struct rbdr_entry_t *desc; - struct rbuf_info *rinfo; if (!rbdr) return; @@ -212,16 +183,14 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) while (head != tail) { desc = GET_RBDR_DESC(rbdr, head); buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN; - rinfo = GET_RBUF_INFO((u64)phys_to_virt(buf_addr)); - put_page(rinfo->page); + put_page(virt_to_page(phys_to_virt(buf_addr))); head++; head &= (rbdr->dmem.q_len - 1); } /* Free SKB of tail desc */ desc = GET_RBDR_DESC(rbdr, tail); buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN; - rinfo = GET_RBUF_INFO((u64)phys_to_virt(buf_addr)); - put_page(rinfo->page); + put_page(virt_to_page(phys_to_virt(buf_addr))); /* Free RBDR ring */ nicvf_free_q_desc_mem(nic, &rbdr->dmem); @@ -1234,153 +1203,93 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) return skb; } -/* Enable interrupt */ -void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx) +static u64 nicvf_int_type_to_mask(int int_type, int q_idx) { u64 reg_val; - reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S); - switch (int_type) { case NICVF_INTR_CQ: - reg_val |= ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); + reg_val = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); break; case NICVF_INTR_SQ: - reg_val |= ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); + reg_val = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); break; case NICVF_INTR_RBDR: - reg_val |= ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); + reg_val = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); break; case NICVF_INTR_PKT_DROP: - reg_val |= (1ULL << NICVF_INTR_PKT_DROP_SHIFT); + reg_val = (1ULL << NICVF_INTR_PKT_DROP_SHIFT); break; case NICVF_INTR_TCP_TIMER: - reg_val |= (1ULL << NICVF_INTR_TCP_TIMER_SHIFT); + reg_val = (1ULL << NICVF_INTR_TCP_TIMER_SHIFT); break; case NICVF_INTR_MBOX: - reg_val |= (1ULL << NICVF_INTR_MBOX_SHIFT); + reg_val = (1ULL << NICVF_INTR_MBOX_SHIFT); break; case NICVF_INTR_QS_ERR: - reg_val |= (1ULL << NICVF_INTR_QS_ERR_SHIFT); + reg_val = (1ULL << NICVF_INTR_QS_ERR_SHIFT); break; default: - netdev_err(nic->netdev, - "Failed to enable interrupt: unknown type\n"); - break; + reg_val = 0; } - nicvf_reg_write(nic, NIC_VF_ENA_W1S, reg_val); + return reg_val; +} + +/* Enable interrupt */ +void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx) +{ + u64 mask = nicvf_int_type_to_mask(int_type, q_idx); + + if (!mask) { + netdev_dbg(nic->netdev, + "Failed to enable interrupt: unknown type\n"); + return; + } + nicvf_reg_write(nic, NIC_VF_ENA_W1S, + nicvf_reg_read(nic, NIC_VF_ENA_W1S) | mask); } /* Disable interrupt */ void nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx) { - u64 reg_val = 0; + u64 mask = nicvf_int_type_to_mask(int_type, q_idx); - switch (int_type) { - case NICVF_INTR_CQ: - reg_val |= ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); - break; - case NICVF_INTR_SQ: - reg_val |= ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); - break; - case NICVF_INTR_RBDR: - reg_val |= ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); - break; - case NICVF_INTR_PKT_DROP: - reg_val |= (1ULL << NICVF_INTR_PKT_DROP_SHIFT); - break; - case NICVF_INTR_TCP_TIMER: - reg_val |= (1ULL << NICVF_INTR_TCP_TIMER_SHIFT); - break; - case NICVF_INTR_MBOX: - reg_val |= (1ULL << NICVF_INTR_MBOX_SHIFT); - break; - case NICVF_INTR_QS_ERR: - reg_val |= (1ULL << NICVF_INTR_QS_ERR_SHIFT); - break; - default: - netdev_err(nic->netdev, + if (!mask) { + netdev_dbg(nic->netdev, "Failed to disable interrupt: unknown type\n"); - break; + return; } - nicvf_reg_write(nic, NIC_VF_ENA_W1C, reg_val); + nicvf_reg_write(nic, NIC_VF_ENA_W1C, mask); } /* Clear interrupt */ void nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx) { - u64 reg_val = 0; + u64 mask = nicvf_int_type_to_mask(int_type, q_idx); - switch (int_type) { - case NICVF_INTR_CQ: - reg_val = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); - break; - case NICVF_INTR_SQ: - reg_val = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); - break; - case NICVF_INTR_RBDR: - reg_val = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); - break; - case NICVF_INTR_PKT_DROP: - reg_val = (1ULL << NICVF_INTR_PKT_DROP_SHIFT); - break; - case NICVF_INTR_TCP_TIMER: - reg_val = (1ULL << NICVF_INTR_TCP_TIMER_SHIFT); - break; - case NICVF_INTR_MBOX: - reg_val = (1ULL << NICVF_INTR_MBOX_SHIFT); - break; - case NICVF_INTR_QS_ERR: - reg_val |= (1ULL << NICVF_INTR_QS_ERR_SHIFT); - break; - default: - netdev_err(nic->netdev, + if (!mask) { + netdev_dbg(nic->netdev, "Failed to clear interrupt: unknown type\n"); - break; + return; } - nicvf_reg_write(nic, NIC_VF_INT, reg_val); + nicvf_reg_write(nic, NIC_VF_INT, mask); } /* Check if interrupt is enabled */ int nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx) { - u64 reg_val; - u64 mask = 0xff; - - reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S); - - switch (int_type) { - case NICVF_INTR_CQ: - mask = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); - break; - case NICVF_INTR_SQ: - mask = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); - break; - case NICVF_INTR_RBDR: - mask = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); - break; - case NICVF_INTR_PKT_DROP: - mask = NICVF_INTR_PKT_DROP_MASK; - break; - case NICVF_INTR_TCP_TIMER: - mask = NICVF_INTR_TCP_TIMER_MASK; - break; - case NICVF_INTR_MBOX: - mask = NICVF_INTR_MBOX_MASK; - break; - case NICVF_INTR_QS_ERR: - mask = NICVF_INTR_QS_ERR_MASK; - break; - default: - netdev_err(nic->netdev, + u64 mask = nicvf_int_type_to_mask(int_type, q_idx); + /* If interrupt type is unknown, we treat it disabled. */ + if (!mask) { + netdev_dbg(nic->netdev, "Failed to check interrupt enable: unknown type\n"); - break; + return 0; } - return (reg_val & mask); + return mask & nicvf_reg_read(nic, NIC_VF_ENA_W1S); } void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index 033e8306e91c69..a4f6667fdbd44a 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -83,10 +83,8 @@ #define MAX_RCV_BUF_COUNT (1ULL << (RBDR_SIZE6 + 13)) #define RBDR_THRESH (RCV_BUF_COUNT / 2) #define DMA_BUFFER_LEN 2048 /* In multiples of 128bytes */ -#define RCV_FRAG_LEN (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \ - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + \ - (NICVF_RCV_BUF_ALIGN_BYTES * 2)) -#define RCV_DATA_OFFSET NICVF_RCV_BUF_ALIGN_BYTES +#define RCV_FRAG_LEN (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) #define MAX_CQES_FOR_TX ((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \ MAX_CQE_PER_PKT_XMIT) @@ -108,10 +106,6 @@ #define NICVF_SQ_BASE_ALIGN_BYTES 128 /* 7 bits */ #define NICVF_ALIGNED_ADDR(ADDR, ALIGN_BYTES) ALIGN(ADDR, ALIGN_BYTES) -#define NICVF_ADDR_ALIGN_LEN(ADDR, BYTES)\ - (NICVF_ALIGNED_ADDR(ADDR, BYTES) - BYTES) -#define NICVF_RCV_BUF_ALIGN_LEN(X)\ - (NICVF_ALIGNED_ADDR(X, NICVF_RCV_BUF_ALIGN_BYTES) - X) /* Queue enable/disable */ #define NICVF_SQ_EN BIT_ULL(19) diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig index a79813a17b6e58..4d187f22c48be7 100644 --- a/drivers/net/ethernet/chelsio/Kconfig +++ b/drivers/net/ethernet/chelsio/Kconfig @@ -65,13 +65,14 @@ config CHELSIO_T3 will be called cxgb3. config CHELSIO_T4 - tristate "Chelsio Communications T4/T5 Ethernet support" + tristate "Chelsio Communications T4/T5/T6 Ethernet support" depends on PCI && (IPV6 || IPV6=n) select FW_LOADER select MDIO ---help--- - This driver supports Chelsio T4 and T5 based gigabit, 10Gb Ethernet - adapter and T5 based 40Gb Ethernet adapter. + This driver supports Chelsio T4, T5 & T6 based gigabit, 10Gb Ethernet + adapter and T5/T6 based 40Gb and T6 based 25Gb, 50Gb and 100Gb + Ethernet adapters. For general information about Chelsio and our products, visit our website at <http://www.chelsio.com>. @@ -85,7 +86,7 @@ config CHELSIO_T4 will be called cxgb4. config CHELSIO_T4_DCB - bool "Data Center Bridging (DCB) Support for Chelsio T4/T5 cards" + bool "Data Center Bridging (DCB) Support for Chelsio T4/T5/T6 cards" default n depends on CHELSIO_T4 && DCB ---help--- @@ -107,12 +108,12 @@ config CHELSIO_T4_FCOE If unsure, say N. config CHELSIO_T4VF - tristate "Chelsio Communications T4/T5 Virtual Function Ethernet support" + tristate "Chelsio Communications T4/T5/T6 Virtual Function Ethernet support" depends on PCI ---help--- - This driver supports Chelsio T4 and T5 based gigabit, 10Gb Ethernet - adapters and T5 based 40Gb Ethernet adapters with PCI-E SR-IOV Virtual - Functions. + This driver supports Chelsio T4, T5 & T6 based gigabit, 10Gb Ethernet + adapters and T5/T6 based 40Gb and T6 based 25Gb, 50Gb and 100Gb + Ethernet adapters with PCI-E SR-IOV Virtual Functions. For general information about Chelsio and our products, visit our website at <http://www.chelsio.com>. diff --git a/drivers/net/ethernet/chelsio/cxgb/pm3393.c b/drivers/net/ethernet/chelsio/cxgb/pm3393.c index ec5e05052d99ea..eb462d7db4276c 100644 --- a/drivers/net/ethernet/chelsio/cxgb/pm3393.c +++ b/drivers/net/ethernet/chelsio/cxgb/pm3393.c @@ -570,7 +570,7 @@ static void pm3393_destroy(struct cmac *cmac) kfree(cmac); } -static struct cmac_ops pm3393_ops = { +static const struct cmac_ops pm3393_ops = { .destroy = pm3393_destroy, .reset = pm3393_reset, .interrupt_enable = pm3393_interrupt_enable, diff --git a/drivers/net/ethernet/chelsio/cxgb/vsc7326.c b/drivers/net/ethernet/chelsio/cxgb/vsc7326.c index b0cb388f5e1239..6f30b6f785539c 100644 --- a/drivers/net/ethernet/chelsio/cxgb/vsc7326.c +++ b/drivers/net/ethernet/chelsio/cxgb/vsc7326.c @@ -666,7 +666,7 @@ static void mac_destroy(struct cmac *mac) kfree(mac); } -static struct cmac_ops vsc7326_ops = { +static const struct cmac_ops vsc7326_ops = { .destroy = mac_destroy, .reset = mac_reset, .interrupt_handler = mac_intr_handler, diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 8f7aa53a4c4bec..60908eab3b3adf 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -701,15 +701,16 @@ static ssize_t attr_store(struct device *d, ssize_t(*set) (struct net_device *, unsigned int), unsigned int min_val, unsigned int max_val) { - char *endp; ssize_t ret; unsigned int val; if (!capable(CAP_NET_ADMIN)) return -EPERM; - val = simple_strtoul(buf, &endp, 0); - if (endp == buf || val < min_val || val > max_val) + ret = kstrtouint(buf, 0, &val); + if (ret) + return ret; + if (val < min_val || val > max_val) return -EINVAL; rtnl_lock(); @@ -829,14 +830,15 @@ static ssize_t tm_attr_store(struct device *d, struct port_info *pi = netdev_priv(to_net_dev(d)); struct adapter *adap = pi->adapter; unsigned int val; - char *endp; ssize_t ret; if (!capable(CAP_NET_ADMIN)) return -EPERM; - val = simple_strtoul(buf, &endp, 0); - if (endp == buf || val > 10000000) + ret = kstrtouint(buf, 0, &val); + if (ret) + return ret; + if (val > 10000000) return -EINVAL; rtnl_lock(); diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c index a22768c94200ef..ee04caa6c4d852 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c @@ -709,11 +709,21 @@ static int get_vpd_params(struct adapter *adapter, struct vpd_params *p) return ret; } - p->cclk = simple_strtoul(vpd.cclk_data, NULL, 10); - p->mclk = simple_strtoul(vpd.mclk_data, NULL, 10); - p->uclk = simple_strtoul(vpd.uclk_data, NULL, 10); - p->mdc = simple_strtoul(vpd.mdc_data, NULL, 10); - p->mem_timing = simple_strtoul(vpd.mt_data, NULL, 10); + ret = kstrtouint(vpd.cclk_data, 10, &p->cclk); + if (ret) + return ret; + ret = kstrtouint(vpd.mclk_data, 10, &p->mclk); + if (ret) + return ret; + ret = kstrtouint(vpd.uclk_data, 10, &p->uclk); + if (ret) + return ret; + ret = kstrtouint(vpd.mdc_data, 10, &p->mdc); + if (ret) + return ret; + ret = kstrtouint(vpd.mt_data, 10, &p->mem_timing); + if (ret) + return ret; memcpy(p->sn, vpd.sn_data, SERNUM_LEN); /* Old eeproms didn't have port information */ @@ -723,8 +733,12 @@ static int get_vpd_params(struct adapter *adapter, struct vpd_params *p) } else { p->port_type[0] = hex_to_bin(vpd.port0_data[0]); p->port_type[1] = hex_to_bin(vpd.port1_data[0]); - p->xauicfg[0] = simple_strtoul(vpd.xaui0cfg_data, NULL, 16); - p->xauicfg[1] = simple_strtoul(vpd.xaui1cfg_data, NULL, 16); + ret = kstrtou16(vpd.xaui0cfg_data, 16, &p->xauicfg[0]); + if (ret) + return ret; + ret = kstrtou16(vpd.xaui1cfg_data, 16, &p->xauicfg[1]); + if (ret) + return ret; } ret = hex2bin(p->eth_base, vpd.na_data, 6); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 55a47de544ea29..e01e7228f607db 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -483,6 +483,8 @@ struct sge_fl { /* SGE free-buffer queue state */ unsigned int pidx; /* producer index */ unsigned long alloc_failed; /* # of times buffer allocation failed */ unsigned long large_alloc_failed; + unsigned long mapping_err; /* # of RX Buffer DMA Mapping failures */ + unsigned long low; /* # of times momentarily starving */ unsigned long starving; /* RO fields */ unsigned int cntxt_id; /* SGE context id for the free list */ @@ -618,6 +620,7 @@ struct sge_ofld_txq { /* state for an SGE offload Tx queue */ struct adapter *adap; struct sk_buff_head sendq; /* list of backpressured packets */ struct tasklet_struct qresume_tsk; /* restarts the queue */ + bool service_ofldq_running; /* service_ofldq() is processing sendq */ u8 full; /* the Tx ring is full */ unsigned long mapping_err; /* # of I/O MMU packet mapping errors */ } ____cacheline_aligned_in_smp; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 4269944c5db53b..0d579b192350e9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2325,6 +2325,8 @@ do { \ TL("TxMapErr:", mapping_err); RL("FLAllocErr:", fl.alloc_failed); RL("FLLrgAlcErr:", fl.large_alloc_failed); + RL("FLMapErr:", fl.mapping_err); + RL("FLLow:", fl.low); RL("FLStarving:", fl.starving); } else if (iscsi_idx < iscsi_entries) { @@ -2359,6 +2361,8 @@ do { \ RL("RxNoMem:", stats.nomem); RL("FLAllocErr:", fl.alloc_failed); RL("FLLrgAlcErr:", fl.large_alloc_failed); + RL("FLMapErr:", fl.mapping_err); + RL("FLLow:", fl.low); RL("FLStarving:", fl.starving); } else if (rdma_idx < rdma_entries) { @@ -2388,6 +2392,8 @@ do { \ RL("RxNoMem:", stats.nomem); RL("FLAllocErr:", fl.alloc_failed); RL("FLLrgAlcErr:", fl.large_alloc_failed); + RL("FLMapErr:", fl.mapping_err); + RL("FLLow:", fl.low); RL("FLStarving:", fl.starving); } else if (ciq_idx < ciq_entries) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index a077f9476daf35..2a61a42ab033d9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -35,79 +35,79 @@ static void set_msglevel(struct net_device *dev, u32 val) } static const char stats_strings[][ETH_GSTRING_LEN] = { - "tx_octets_ok ", - "tx_frames_ok ", - "tx_broadcast_frames ", - "tx_multicast_frames ", - "tx_unicast_frames ", - "tx_error_frames ", - - "tx_frames_64 ", - "tx_frames_65_to_127 ", - "tx_frames_128_to_255 ", - "tx_frames_256_to_511 ", - "tx_frames_512_to_1023 ", - "tx_frames_1024_to_1518 ", - "tx_frames_1519_to_max ", - - "tx_frames_dropped ", - "tx_pause_frames ", - "tx_ppp0_frames ", - "tx_ppp1_frames ", - "tx_ppp2_frames ", - "tx_ppp3_frames ", - "tx_ppp4_frames ", - "tx_ppp5_frames ", - "tx_ppp6_frames ", - "tx_ppp7_frames ", - - "rx_octets_ok ", - "rx_frames_ok ", - "rx_broadcast_frames ", - "rx_multicast_frames ", - "rx_unicast_frames ", - - "rx_frames_too_long ", - "rx_jabber_errors ", - "rx_fcs_errors ", - "rx_length_errors ", - "rx_symbol_errors ", - "rx_runt_frames ", - - "rx_frames_64 ", - "rx_frames_65_to_127 ", - "rx_frames_128_to_255 ", - "rx_frames_256_to_511 ", - "rx_frames_512_to_1023 ", - "rx_frames_1024_to_1518 ", - "rx_frames_1519_to_max ", - - "rx_pause_frames ", - "rx_ppp0_frames ", - "rx_ppp1_frames ", - "rx_ppp2_frames ", - "rx_ppp3_frames ", - "rx_ppp4_frames ", - "rx_ppp5_frames ", - "rx_ppp6_frames ", - "rx_ppp7_frames ", - - "rx_bg0_frames_dropped ", - "rx_bg1_frames_dropped ", - "rx_bg2_frames_dropped ", - "rx_bg3_frames_dropped ", - "rx_bg0_frames_trunc ", - "rx_bg1_frames_trunc ", - "rx_bg2_frames_trunc ", - "rx_bg3_frames_trunc ", - - "tso ", - "tx_csum_offload ", - "rx_csum_good ", - "vlan_extractions ", - "vlan_insertions ", - "gro_packets ", - "gro_merged ", + "tx_octets_ok ", + "tx_frames_ok ", + "tx_broadcast_frames ", + "tx_multicast_frames ", + "tx_unicast_frames ", + "tx_error_frames ", + + "tx_frames_64 ", + "tx_frames_65_to_127 ", + "tx_frames_128_to_255 ", + "tx_frames_256_to_511 ", + "tx_frames_512_to_1023 ", + "tx_frames_1024_to_1518 ", + "tx_frames_1519_to_max ", + + "tx_frames_dropped ", + "tx_pause_frames ", + "tx_ppp0_frames ", + "tx_ppp1_frames ", + "tx_ppp2_frames ", + "tx_ppp3_frames ", + "tx_ppp4_frames ", + "tx_ppp5_frames ", + "tx_ppp6_frames ", + "tx_ppp7_frames ", + + "rx_octets_ok ", + "rx_frames_ok ", + "rx_broadcast_frames ", + "rx_multicast_frames ", + "rx_unicast_frames ", + + "rx_frames_too_long ", + "rx_jabber_errors ", + "rx_fcs_errors ", + "rx_length_errors ", + "rx_symbol_errors ", + "rx_runt_frames ", + + "rx_frames_64 ", + "rx_frames_65_to_127 ", + "rx_frames_128_to_255 ", + "rx_frames_256_to_511 ", + "rx_frames_512_to_1023 ", + "rx_frames_1024_to_1518 ", + "rx_frames_1519_to_max ", + + "rx_pause_frames ", + "rx_ppp0_frames ", + "rx_ppp1_frames ", + "rx_ppp2_frames ", + "rx_ppp3_frames ", + "rx_ppp4_frames ", + "rx_ppp5_frames ", + "rx_ppp6_frames ", + "rx_ppp7_frames ", + + "rx_bg0_frames_dropped ", + "rx_bg1_frames_dropped ", + "rx_bg2_frames_dropped ", + "rx_bg3_frames_dropped ", + "rx_bg0_frames_trunc ", + "rx_bg1_frames_trunc ", + "rx_bg2_frames_trunc ", + "rx_bg3_frames_trunc ", + + "tso ", + "tx_csum_offload ", + "rx_csum_good ", + "vlan_extractions ", + "vlan_insertions ", + "gro_packets ", + "gro_merged ", }; static char adapter_stats_strings[][ETH_GSTRING_LEN] = { diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 48d8fbb1c2200f..8d35ce317f679c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -406,7 +406,7 @@ static void free_tx_desc(struct adapter *adap, struct sge_txq *q, */ static inline int reclaimable(const struct sge_txq *q) { - int hw_cidx = ntohs(q->stat->cidx); + int hw_cidx = ntohs(ACCESS_ONCE(q->stat->cidx)); hw_cidx -= q->cidx; return hw_cidx < 0 ? hw_cidx + q->size : hw_cidx; } @@ -613,6 +613,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n, PCI_DMA_FROMDEVICE); if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) { __free_pages(pg, s->fl_pg_order); + q->mapping_err++; goto out; /* do not try small pages for this error */ } mapping |= RX_LARGE_PG_BUF; @@ -642,6 +643,7 @@ alloc_small_pages: PCI_DMA_FROMDEVICE); if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) { put_page(pg); + q->mapping_err++; goto out; } *d++ = cpu_to_be64(mapping); @@ -663,6 +665,7 @@ out: cred = q->avail - cred; if (unlikely(fl_starving(adap, q))) { smp_wmb(); + q->low++; set_bit(q->cntxt_id - adap->sge.egr_start, adap->sge.starving_fl); } @@ -1029,6 +1032,30 @@ static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q, *p = 0; } +static void *inline_tx_skb_header(const struct sk_buff *skb, + const struct sge_txq *q, void *pos, + int length) +{ + u64 *p; + int left = (void *)q->stat - pos; + + if (likely(length <= left)) { + memcpy(pos, skb->data, length); + pos += length; + } else { + memcpy(pos, skb->data, left); + memcpy(q->desc, skb->data + left, length - left); + pos = (void *)q->desc + (length - left); + } + /* 0-pad to multiple of 16 */ + p = PTR_ALIGN(pos, 8); + if ((uintptr_t)p & 8) { + *p = 0; + return p + 1; + } + return p; +} + /* * Figure out what HW csum a packet wants and return the appropriate control * bits. @@ -1320,7 +1347,7 @@ out_free: dev_kfree_skb_any(skb); */ static inline void reclaim_completed_tx_imm(struct sge_txq *q) { - int hw_cidx = ntohs(q->stat->cidx); + int hw_cidx = ntohs(ACCESS_ONCE(q->stat->cidx)); int reclaim = hw_cidx - q->cidx; if (reclaim < 0) @@ -1542,24 +1569,50 @@ static void ofldtxq_stop(struct sge_ofld_txq *q, struct sk_buff *skb) } /** - * service_ofldq - restart a suspended offload queue + * service_ofldq - service/restart a suspended offload queue * @q: the offload queue * - * Services an offload Tx queue by moving packets from its packet queue - * to the HW Tx ring. The function starts and ends with the queue locked. + * Services an offload Tx queue by moving packets from its Pending Send + * Queue to the Hardware TX ring. The function starts and ends with the + * Send Queue locked, but drops the lock while putting the skb at the + * head of the Send Queue onto the Hardware TX Ring. Dropping the lock + * allows more skbs to be added to the Send Queue by other threads. + * The packet being processed at the head of the Pending Send Queue is + * left on the queue in case we experience DMA Mapping errors, etc. + * and need to give up and restart later. + * + * service_ofldq() can be thought of as a task which opportunistically + * uses other threads execution contexts. We use the Offload Queue + * boolean "service_ofldq_running" to make sure that only one instance + * is ever running at a time ... */ static void service_ofldq(struct sge_ofld_txq *q) { - u64 *pos; + u64 *pos, *before, *end; int credits; struct sk_buff *skb; + struct sge_txq *txq; + unsigned int left; unsigned int written = 0; unsigned int flits, ndesc; + /* If another thread is currently in service_ofldq() processing the + * Pending Send Queue then there's nothing to do. Otherwise, flag + * that we're doing the work and continue. Examining/modifying + * the Offload Queue boolean "service_ofldq_running" must be done + * while holding the Pending Send Queue Lock. + */ + if (q->service_ofldq_running) + return; + q->service_ofldq_running = true; + while ((skb = skb_peek(&q->sendq)) != NULL && !q->full) { - /* - * We drop the lock but leave skb on sendq, thus retaining - * exclusive access to the state of the queue. + /* We drop the lock while we're working with the skb at the + * head of the Pending Send Queue. This allows more skbs to + * be added to the Pending Send Queue while we're working on + * this one. We don't need to lock to guard the TX Ring + * updates because only one thread of execution is ever + * allowed into service_ofldq() at a time. */ spin_unlock(&q->sendq.lock); @@ -1583,9 +1636,32 @@ static void service_ofldq(struct sge_ofld_txq *q) } else { int last_desc, hdr_len = skb_transport_offset(skb); - memcpy(pos, skb->data, hdr_len); - write_sgl(skb, &q->q, (void *)pos + hdr_len, - pos + flits, hdr_len, + /* The WR headers may not fit within one descriptor. + * So we need to deal with wrap-around here. + */ + before = (u64 *)pos; + end = (u64 *)pos + flits; + txq = &q->q; + pos = (void *)inline_tx_skb_header(skb, &q->q, + (void *)pos, + hdr_len); + if (before > (u64 *)pos) { + left = (u8 *)end - (u8 *)txq->stat; + end = (void *)txq->desc + left; + } + + /* If current position is already at the end of the + * ofld queue, reset the current to point to + * start of the queue and update the end ptr as well. + */ + if (pos == (u64 *)txq->stat) { + left = (u8 *)end - (u8 *)txq->stat; + end = (void *)txq->desc + left; + pos = (void *)txq->desc; + } + + write_sgl(skb, &q->q, (void *)pos, + end, hdr_len, (dma_addr_t *)skb->head); #ifdef CONFIG_NEED_DMA_MAP_STATE skb->dev = q->adap->port[0]; @@ -1604,6 +1680,11 @@ static void service_ofldq(struct sge_ofld_txq *q) written = 0; } + /* Reacquire the Pending Send Queue Lock so we can unlink the + * skb we've just successfully transferred to the TX Ring and + * loop for the next skb which may be at the head of the + * Pending Send Queue. + */ spin_lock(&q->sendq.lock); __skb_unlink(skb, &q->sendq); if (is_ofld_imm(skb)) @@ -1611,6 +1692,11 @@ static void service_ofldq(struct sge_ofld_txq *q) } if (likely(written)) ring_tx_db(q->adap, &q->q, written); + + /*Indicate that no thread is processing the Pending Send Queue + * currently. + */ + q->service_ofldq_running = false; } /** @@ -1624,9 +1710,19 @@ static int ofld_xmit(struct sge_ofld_txq *q, struct sk_buff *skb) { skb->priority = calc_tx_flits_ofld(skb); /* save for restart */ spin_lock(&q->sendq.lock); + + /* Queue the new skb onto the Offload Queue's Pending Send Queue. If + * that results in this new skb being the only one on the queue, start + * servicing it. If there are other skbs already on the list, then + * either the queue is currently being processed or it's been stopped + * for some reason and it'll be restarted at a later time. Restart + * paths are triggered by events like experiencing a DMA Mapping Error + * or filling the Hardware TX Ring. + */ __skb_queue_tail(&q->sendq, skb); if (q->sendq.qlen == 1) service_ofldq(q); + spin_unlock(&q->sendq.lock); return NET_XMIT_SUCCESS; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index 03ed00c498230d..a8dda635456dc1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -162,6 +162,9 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x5095), /* Custom T540-CR-SO */ CH_PCI_ID_TABLE_FENTRY(0x5096), /* Custom T580-CR */ CH_PCI_ID_TABLE_FENTRY(0x5097), /* Custom T520-KR */ + CH_PCI_ID_TABLE_FENTRY(0x5098), /* Custom 2x40G QSFP */ + CH_PCI_ID_TABLE_FENTRY(0x5099), /* Custom 2x40G QSFP */ + CH_PCI_ID_TABLE_FENTRY(0x509a), /* Custom T520-CR */ /* T6 adapters: */ diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index afe7f39cdd7cc2..25553ee857b467 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -1084,27 +1084,23 @@ static struct platform_driver mpc52xx_fec_driver = { /* Module */ /* ======================================================================== */ +static struct platform_driver * const drivers[] = { +#ifdef CONFIG_FEC_MPC52xx_MDIO + &mpc52xx_fec_mdio_driver, +#endif + &mpc52xx_fec_driver, +}; + static int __init mpc52xx_fec_init(void) { -#ifdef CONFIG_FEC_MPC52xx_MDIO - int ret; - ret = platform_driver_register(&mpc52xx_fec_mdio_driver); - if (ret) { - pr_err("failed to register mdio driver\n"); - return ret; - } -#endif - return platform_driver_register(&mpc52xx_fec_driver); + return platform_register_drivers(drivers, ARRAY_SIZE(drivers)); } static void __exit mpc52xx_fec_exit(void) { - platform_driver_unregister(&mpc52xx_fec_driver); -#ifdef CONFIG_FEC_MPC52xx_MDIO - platform_driver_unregister(&mpc52xx_fec_mdio_driver); -#endif + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); } diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index cec95ac8687df4..6ca94dc3dda3c2 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -35,7 +35,7 @@ #include <linux/phy.h> #include <linux/types.h> -#define HNAE_DRIVER_VERSION "1.3.0" +#define HNAE_DRIVER_VERSION "2.0" #define HNAE_DRIVER_NAME "hns" #define HNAE_COPYRIGHT "Copyright(c) 2015 Huawei Corporation." #define HNAE_DRIVER_STRING "Hisilicon Network Subsystem Driver" @@ -63,6 +63,7 @@ do { \ #define AE_VERSION_1 ('6' << 16 | '6' << 8 | '0') #define AE_VERSION_2 ('1' << 24 | '6' << 16 | '1' << 8 | '0') +#define AE_IS_VER1(ver) ((ver) == AE_VERSION_1) #define AE_NAME_SIZE 16 /* some said the RX and TX RCB format should not be the same in the future. But @@ -144,23 +145,61 @@ enum hnae_led_state { #define HNS_RXD_ASID_S 24 #define HNS_RXD_ASID_M (0xff << HNS_RXD_ASID_S) +#define HNSV2_TXD_BUFNUM_S 0 +#define HNSV2_TXD_BUFNUM_M (0x7 << HNSV2_TXD_BUFNUM_S) +#define HNSV2_TXD_RI_B 1 +#define HNSV2_TXD_L4CS_B 2 +#define HNSV2_TXD_L3CS_B 3 +#define HNSV2_TXD_FE_B 4 +#define HNSV2_TXD_VLD_B 5 + +#define HNSV2_TXD_TSE_B 0 +#define HNSV2_TXD_VLAN_EN_B 1 +#define HNSV2_TXD_SNAP_B 2 +#define HNSV2_TXD_IPV6_B 3 +#define HNSV2_TXD_SCTP_B 4 + /* hardware spec ring buffer format */ struct __packed hnae_desc { __le64 addr; union { struct { - __le16 asid_bufnum_pid; + union { + __le16 asid_bufnum_pid; + __le16 asid; + }; __le16 send_size; - __le32 flag_ipoffset; - __le32 reserved_3[4]; + union { + __le32 flag_ipoffset; + struct { + __u8 bn_pid; + __u8 ra_ri_cs_fe_vld; + __u8 ip_offset; + __u8 tse_vlan_snap_v6_sctp_nth; + }; + }; + __le16 mss; + __u8 l4_len; + __u8 reserved1; + __le16 paylen; + __u8 vmid; + __u8 qid; + __le32 reserved2[2]; } tx; struct { __le32 ipoff_bnum_pid_flag; __le16 pkt_len; __le16 size; - __le32 vlan_pri_asid; - __le32 reserved_2[3]; + union { + __le32 vlan_pri_asid; + struct { + __le16 asid; + __le16 vlan_cfi_pri; + }; + }; + __le32 rss_hash; + __le32 reserved_1[2]; } rx; }; }; @@ -302,7 +341,8 @@ struct hnae_queue { void __iomem *io_base; phys_addr_t phy_base; struct hnae_ae_dev *dev; /* the device who use this queue */ - struct hnae_ring rx_ring, tx_ring; + struct hnae_ring rx_ring ____cacheline_internodealigned_in_smp; + struct hnae_ring tx_ring ____cacheline_internodealigned_in_smp; struct hnae_handle *handle; }; @@ -435,6 +475,7 @@ struct hnae_ae_ops { int (*set_mac_addr)(struct hnae_handle *handle, void *p); int (*set_mc_addr)(struct hnae_handle *handle, void *addr); int (*set_mtu)(struct hnae_handle *handle, int new_mtu); + void (*set_tso_stats)(struct hnae_handle *handle, int enable); void (*update_stats)(struct hnae_handle *handle, struct net_device_stats *net_stats); void (*get_stats)(struct hnae_handle *handle, u64 *data); @@ -446,6 +487,12 @@ struct hnae_ae_ops { enum hnae_led_state status); void (*get_regs)(struct hnae_handle *handle, void *data); int (*get_regs_len)(struct hnae_handle *handle); + u32 (*get_rss_key_size)(struct hnae_handle *handle); + u32 (*get_rss_indir_size)(struct hnae_handle *handle); + int (*get_rss)(struct hnae_handle *handle, u32 *indir, u8 *key, + u8 *hfunc); + int (*set_rss)(struct hnae_handle *handle, const u32 *indir, + const u8 *key, const u8 hfunc); }; struct hnae_ae_dev { @@ -551,11 +598,9 @@ static inline void hnae_replace_buffer(struct hnae_ring *ring, int i, struct hnae_desc_cb *res_cb) { struct hnae_buf_ops *bops = ring->q->handle->bops; - struct hnae_desc_cb tmp_cb = ring->desc_cb[i]; bops->unmap_buffer(ring, &ring->desc_cb[i]); ring->desc_cb[i] = *res_cb; - *res_cb = tmp_cb; ring->desc[i].addr = (__le64)ring->desc_cb[i].dma; ring->desc[i].rx.ipoff_bnum_pid_flag = 0; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index 1a16c0307b475b..522b264866b42e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -252,7 +252,7 @@ static int hns_ae_set_multicast_one(struct hnae_handle *handle, void *addr) if (mac_cb->mac_type != HNAE_PORT_SERVICE) return 0; - ret = hns_mac_set_multi(mac_cb, mac_cb->mac_id, mac_addr, ENABLE); + ret = hns_mac_set_multi(mac_cb, mac_cb->mac_id, mac_addr, true); if (ret) { dev_err(handle->owner_dev, "mac add mul_mac:%pM port%d fail, ret = %#x!\n", @@ -261,7 +261,7 @@ static int hns_ae_set_multicast_one(struct hnae_handle *handle, void *addr) } ret = hns_mac_set_multi(mac_cb, DSAF_BASE_INNER_PORT_NUM, - mac_addr, ENABLE); + mac_addr, true); if (ret) dev_err(handle->owner_dev, "mac add mul_mac:%pM port%d fail, ret = %#x!\n", @@ -277,12 +277,19 @@ static int hns_ae_set_mtu(struct hnae_handle *handle, int new_mtu) return hns_mac_set_mtu(mac_cb, new_mtu); } +static void hns_ae_set_tso_stats(struct hnae_handle *handle, int enable) +{ + struct hns_ppe_cb *ppe_cb = hns_get_ppe_cb(handle); + + hns_ppe_set_tso_enable(ppe_cb, enable); +} + static int hns_ae_start(struct hnae_handle *handle) { int ret; struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle); - ret = hns_mac_vm_config_bc_en(mac_cb, 0, ENABLE); + ret = hns_mac_vm_config_bc_en(mac_cb, 0, true); if (ret) return ret; @@ -309,7 +316,7 @@ void hns_ae_stop(struct hnae_handle *handle) hns_ae_ring_enable_all(handle, 0); - (void)hns_mac_vm_config_bc_en(mac_cb, 0, DISABLE); + (void)hns_mac_vm_config_bc_en(mac_cb, 0, false); } static void hns_ae_reset(struct hnae_handle *handle) @@ -334,12 +341,30 @@ void hns_ae_toggle_ring_irq(struct hnae_ring *ring, u32 mask) else flag = RCB_INT_FLAG_RX; - hns_rcb_int_clr_hw(ring->q, flag); hns_rcb_int_ctrl_hw(ring->q, flag, mask); } +static void hns_aev2_toggle_ring_irq(struct hnae_ring *ring, u32 mask) +{ + u32 flag; + + if (is_tx_ring(ring)) + flag = RCB_INT_FLAG_TX; + else + flag = RCB_INT_FLAG_RX; + + hns_rcbv2_int_ctrl_hw(ring->q, flag, mask); +} + static void hns_ae_toggle_queue_status(struct hnae_queue *queue, u32 val) { + struct dsaf_device *dsaf_dev = hns_ae_get_dsaf_dev(queue->dev); + + if (AE_IS_VER1(dsaf_dev->dsaf_ver)) + hns_rcb_int_clr_hw(queue, RCB_INT_FLAG_TX | RCB_INT_FLAG_RX); + else + hns_rcbv2_int_clr_hw(queue, RCB_INT_FLAG_TX | RCB_INT_FLAG_RX); + hns_rcb_start(queue, val); } @@ -730,6 +755,53 @@ int hns_ae_get_regs_len(struct hnae_handle *handle) return total_num; } +static u32 hns_ae_get_rss_key_size(struct hnae_handle *handle) +{ + return HNS_PPEV2_RSS_KEY_SIZE; +} + +static u32 hns_ae_get_rss_indir_size(struct hnae_handle *handle) +{ + return HNS_PPEV2_RSS_IND_TBL_SIZE; +} + +static int hns_ae_get_rss(struct hnae_handle *handle, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct hns_ppe_cb *ppe_cb = hns_get_ppe_cb(handle); + + /* currently we support only one type of hash function i.e. Toep hash */ + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + + /* get the RSS Key required by the user */ + if (key) + memcpy(key, ppe_cb->rss_key, HNS_PPEV2_RSS_KEY_SIZE); + + /* update the current hash->queue mappings from the shadow RSS table */ + memcpy(indir, ppe_cb->rss_indir_table, HNS_PPEV2_RSS_IND_TBL_SIZE); + + return 0; +} + +static int hns_ae_set_rss(struct hnae_handle *handle, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct hns_ppe_cb *ppe_cb = hns_get_ppe_cb(handle); + + /* set the RSS Hash Key if specififed by the user */ + if (key) + hns_ppe_set_rss_key(ppe_cb, (int *)key); + + /* update the shadow RSS table with user specified qids */ + memcpy(ppe_cb->rss_indir_table, indir, HNS_PPEV2_RSS_IND_TBL_SIZE); + + /* now update the hardware */ + hns_ppe_set_indir_table(ppe_cb, ppe_cb->rss_indir_table); + + return 0; +} + static struct hnae_ae_ops hns_dsaf_ops = { .get_handle = hns_ae_get_handle, .put_handle = hns_ae_put_handle, @@ -758,19 +830,34 @@ static struct hnae_ae_ops hns_dsaf_ops = { .set_mc_addr = hns_ae_set_multicast_one, .set_mtu = hns_ae_set_mtu, .update_stats = hns_ae_update_stats, + .set_tso_stats = hns_ae_set_tso_stats, .get_stats = hns_ae_get_stats, .get_strings = hns_ae_get_strings, .get_sset_count = hns_ae_get_sset_count, .update_led_status = hns_ae_update_led_status, .set_led_id = hns_ae_cpld_set_led_id, .get_regs = hns_ae_get_regs, - .get_regs_len = hns_ae_get_regs_len + .get_regs_len = hns_ae_get_regs_len, + .get_rss_key_size = hns_ae_get_rss_key_size, + .get_rss_indir_size = hns_ae_get_rss_indir_size, + .get_rss = hns_ae_get_rss, + .set_rss = hns_ae_set_rss }; int hns_dsaf_ae_init(struct dsaf_device *dsaf_dev) { struct hnae_ae_dev *ae_dev = &dsaf_dev->ae_dev; + switch (dsaf_dev->dsaf_ver) { + case AE_VERSION_1: + hns_dsaf_ops.toggle_ring_irq = hns_ae_toggle_ring_irq; + break; + case AE_VERSION_2: + hns_dsaf_ops.toggle_ring_irq = hns_aev2_toggle_ring_irq; + break; + default: + break; + } ae_dev->ops = &hns_dsaf_ops; ae_dev->dev = dsaf_dev->dev; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 026b38676cbaf2..5ef0e96e918a7d 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -283,7 +283,7 @@ int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb, } int hns_mac_set_multi(struct hns_mac_cb *mac_cb, - u32 port_num, char *addr, u8 en) + u32 port_num, char *addr, bool enable) { int ret; struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev; @@ -295,7 +295,7 @@ int hns_mac_set_multi(struct hns_mac_cb *mac_cb, mac_entry.in_port_num = mac_cb->mac_id; mac_entry.port_num = port_num; - if (en == DISABLE) + if (!enable) ret = hns_dsaf_del_mac_mc_port(dsaf_dev, &mac_entry); else ret = hns_dsaf_add_mac_mc_port(dsaf_dev, &mac_entry); @@ -368,7 +368,7 @@ static void hns_mac_param_get(struct mac_params *param, *retuen 0 - success , negative --fail */ static int hns_mac_port_config_bc_en(struct hns_mac_cb *mac_cb, - u32 port_num, u16 vlan_id, u8 en) + u32 port_num, u16 vlan_id, bool enable) { int ret; struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev; @@ -386,7 +386,7 @@ static int hns_mac_port_config_bc_en(struct hns_mac_cb *mac_cb, mac_entry.in_port_num = mac_cb->mac_id; mac_entry.port_num = port_num; - if (en == DISABLE) + if (!enable) ret = hns_dsaf_del_mac_mc_port(dsaf_dev, &mac_entry); else ret = hns_dsaf_add_mac_mc_port(dsaf_dev, &mac_entry); @@ -403,7 +403,7 @@ static int hns_mac_port_config_bc_en(struct hns_mac_cb *mac_cb, *@en:enable *retuen 0 - success , negative --fail */ -int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, u8 en) +int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, bool enable) { int ret; struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev; @@ -427,7 +427,7 @@ int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, u8 en) return ret; mac_entry.port_num = port_num; - if (en == DISABLE) + if (!enable) ret = hns_dsaf_del_mac_mc_port(dsaf_dev, &mac_entry); else ret = hns_dsaf_add_mac_mc_port(dsaf_dev, &mac_entry); @@ -648,7 +648,7 @@ static int hns_mac_init_ex(struct hns_mac_cb *mac_cb) hns_mac_adjust_link(mac_cb, mac_cb->speed, !mac_cb->half_duplex); - ret = hns_mac_port_config_bc_en(mac_cb, mac_cb->mac_id, 0, ENABLE); + ret = hns_mac_port_config_bc_en(mac_cb, mac_cb->mac_id, 0, true); if (ret) goto free_mac_drv; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h index 7da95a7581f932..0b052191d75109 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h @@ -425,8 +425,8 @@ void mac_adjust_link(struct net_device *net_dev); void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status); int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb, u32 vmid, char *addr); int hns_mac_set_multi(struct hns_mac_cb *mac_cb, - u32 port_num, char *addr, u8 en); -int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vm, u8 en); + u32 port_num, char *addr, bool enable); +int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vm, bool enable); void hns_mac_start(struct hns_mac_cb *mac_cb); void hns_mac_stop(struct hns_mac_cb *mac_cb); int hns_mac_del_mac(struct hns_mac_cb *mac_cb, u32 vfn, char *mac); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 2a98eba660c06a..636b205a236608 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -38,10 +38,10 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) const char *name, *mode_str; struct device_node *np = dsaf_dev->dev->of_node; - if (of_device_is_compatible(np, "hisilicon,hns-dsaf-v2")) - dsaf_dev->dsaf_ver = AE_VERSION_2; - else + if (of_device_is_compatible(np, "hisilicon,hns-dsaf-v1")) dsaf_dev->dsaf_ver = AE_VERSION_1; + else + dsaf_dev->dsaf_ver = AE_VERSION_2; ret = of_property_read_string(np, "dsa_name", &name); if (ret) { @@ -274,6 +274,8 @@ static void hns_dsaf_stp_port_type_cfg(struct dsaf_device *dsaf_dev, } } +#define HNS_DSAF_SBM_NUM(dev) \ + (AE_IS_VER1((dev)->dsaf_ver) ? DSAF_SBM_NUM : DSAFV2_SBM_NUM) /** * hns_dsaf_sbm_cfg - config sbm * @dsaf_id: dsa fabric id @@ -283,7 +285,7 @@ static void hns_dsaf_sbm_cfg(struct dsaf_device *dsaf_dev) u32 o_sbm_cfg; u32 i; - for (i = 0; i < DSAF_SBM_NUM; i++) { + for (i = 0; i < HNS_DSAF_SBM_NUM(dsaf_dev); i++) { o_sbm_cfg = dsaf_read_dev(dsaf_dev, DSAF_SBM_CFG_REG_0_REG + 0x80 * i); dsaf_set_bit(o_sbm_cfg, DSAF_SBM_CFG_EN_S, 1); @@ -304,13 +306,19 @@ static int hns_dsaf_sbm_cfg_mib_en(struct dsaf_device *dsaf_dev) u32 reg; u32 read_cnt; - for (i = 0; i < DSAF_SBM_NUM; i++) { + /* validate configure by setting SBM_CFG_MIB_EN bit from 0 to 1. */ + for (i = 0; i < HNS_DSAF_SBM_NUM(dsaf_dev); i++) { + reg = DSAF_SBM_CFG_REG_0_REG + 0x80 * i; + dsaf_set_dev_bit(dsaf_dev, reg, DSAF_SBM_CFG_MIB_EN_S, 0); + } + + for (i = 0; i < HNS_DSAF_SBM_NUM(dsaf_dev); i++) { reg = DSAF_SBM_CFG_REG_0_REG + 0x80 * i; dsaf_set_dev_bit(dsaf_dev, reg, DSAF_SBM_CFG_MIB_EN_S, 1); } /* waitint for all sbm enable finished */ - for (i = 0; i < DSAF_SBM_NUM; i++) { + for (i = 0; i < HNS_DSAF_SBM_NUM(dsaf_dev); i++) { read_cnt = 0; reg = DSAF_SBM_CFG_REG_0_REG + 0x80 * i; do { @@ -338,83 +346,156 @@ static int hns_dsaf_sbm_cfg_mib_en(struct dsaf_device *dsaf_dev) */ static void hns_dsaf_sbm_bp_wl_cfg(struct dsaf_device *dsaf_dev) { - u32 o_sbm_bp_cfg0; - u32 o_sbm_bp_cfg1; - u32 o_sbm_bp_cfg2; - u32 o_sbm_bp_cfg3; + u32 o_sbm_bp_cfg; u32 reg; u32 i; /* XGE */ for (i = 0; i < DSAF_XGE_NUM; i++) { reg = DSAF_SBM_BP_CFG_0_XGE_REG_0_REG + 0x80 * i; - o_sbm_bp_cfg0 = dsaf_read_dev(dsaf_dev, reg); - dsaf_set_field(o_sbm_bp_cfg0, DSAF_SBM_CFG0_COM_MAX_BUF_NUM_M, + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG0_COM_MAX_BUF_NUM_M, DSAF_SBM_CFG0_COM_MAX_BUF_NUM_S, 512); - dsaf_set_field(o_sbm_bp_cfg0, DSAF_SBM_CFG0_VC0_MAX_BUF_NUM_M, + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG0_VC0_MAX_BUF_NUM_M, DSAF_SBM_CFG0_VC0_MAX_BUF_NUM_S, 0); - dsaf_set_field(o_sbm_bp_cfg0, DSAF_SBM_CFG0_VC1_MAX_BUF_NUM_M, + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG0_VC1_MAX_BUF_NUM_M, DSAF_SBM_CFG0_VC1_MAX_BUF_NUM_S, 0); - dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg0); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); reg = DSAF_SBM_BP_CFG_1_REG_0_REG + 0x80 * i; - o_sbm_bp_cfg1 = dsaf_read_dev(dsaf_dev, reg); - dsaf_set_field(o_sbm_bp_cfg1, DSAF_SBM_CFG1_TC4_MAX_BUF_NUM_M, + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG1_TC4_MAX_BUF_NUM_M, DSAF_SBM_CFG1_TC4_MAX_BUF_NUM_S, 0); - dsaf_set_field(o_sbm_bp_cfg1, DSAF_SBM_CFG1_TC0_MAX_BUF_NUM_M, + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG1_TC0_MAX_BUF_NUM_M, DSAF_SBM_CFG1_TC0_MAX_BUF_NUM_S, 0); - dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg1); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); reg = DSAF_SBM_BP_CFG_2_XGE_REG_0_REG + 0x80 * i; - o_sbm_bp_cfg2 = dsaf_read_dev(dsaf_dev, reg); - dsaf_set_field(o_sbm_bp_cfg2, DSAF_SBM_CFG2_SET_BUF_NUM_M, + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG2_SET_BUF_NUM_M, DSAF_SBM_CFG2_SET_BUF_NUM_S, 104); - dsaf_set_field(o_sbm_bp_cfg2, DSAF_SBM_CFG2_RESET_BUF_NUM_M, + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG2_RESET_BUF_NUM_M, DSAF_SBM_CFG2_RESET_BUF_NUM_S, 128); - dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg2); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); reg = DSAF_SBM_BP_CFG_3_REG_0_REG + 0x80 * i; - o_sbm_bp_cfg3 = dsaf_read_dev(dsaf_dev, reg); - dsaf_set_field(o_sbm_bp_cfg3, + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG3_SET_BUF_NUM_NO_PFC_M, DSAF_SBM_CFG3_SET_BUF_NUM_NO_PFC_S, 110); - dsaf_set_field(o_sbm_bp_cfg3, + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG3_RESET_BUF_NUM_NO_PFC_M, DSAF_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S, 160); - dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg3); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); /* for no enable pfc mode */ reg = DSAF_SBM_BP_CFG_4_REG_0_REG + 0x80 * i; - o_sbm_bp_cfg3 = dsaf_read_dev(dsaf_dev, reg); - dsaf_set_field(o_sbm_bp_cfg3, + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG3_SET_BUF_NUM_NO_PFC_M, DSAF_SBM_CFG3_SET_BUF_NUM_NO_PFC_S, 128); - dsaf_set_field(o_sbm_bp_cfg3, + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG3_RESET_BUF_NUM_NO_PFC_M, DSAF_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S, 192); - dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg3); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); } /* PPE */ for (i = 0; i < DSAF_COMM_CHN; i++) { reg = DSAF_SBM_BP_CFG_2_PPE_REG_0_REG + 0x80 * i; - o_sbm_bp_cfg2 = dsaf_read_dev(dsaf_dev, reg); - dsaf_set_field(o_sbm_bp_cfg2, DSAF_SBM_CFG2_SET_BUF_NUM_M, + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG2_SET_BUF_NUM_M, DSAF_SBM_CFG2_SET_BUF_NUM_S, 10); - dsaf_set_field(o_sbm_bp_cfg2, DSAF_SBM_CFG2_RESET_BUF_NUM_M, + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG2_RESET_BUF_NUM_M, DSAF_SBM_CFG2_RESET_BUF_NUM_S, 12); - dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg2); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); } /* RoCEE */ for (i = 0; i < DSAF_COMM_CHN; i++) { reg = DSAF_SBM_BP_CFG_2_ROCEE_REG_0_REG + 0x80 * i; - o_sbm_bp_cfg2 = dsaf_read_dev(dsaf_dev, reg); - dsaf_set_field(o_sbm_bp_cfg2, DSAF_SBM_CFG2_SET_BUF_NUM_M, + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG2_SET_BUF_NUM_M, DSAF_SBM_CFG2_SET_BUF_NUM_S, 2); - dsaf_set_field(o_sbm_bp_cfg2, DSAF_SBM_CFG2_RESET_BUF_NUM_M, + dsaf_set_field(o_sbm_bp_cfg, DSAF_SBM_CFG2_RESET_BUF_NUM_M, DSAF_SBM_CFG2_RESET_BUF_NUM_S, 4); - dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg2); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); + } +} + +static void hns_dsafv2_sbm_bp_wl_cfg(struct dsaf_device *dsaf_dev) +{ + u32 o_sbm_bp_cfg; + u32 reg; + u32 i; + + /* XGE */ + for (i = 0; i < DSAFV2_SBM_XGE_CHN; i++) { + reg = DSAF_SBM_BP_CFG_0_XGE_REG_0_REG + 0x80 * i; + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG0_COM_MAX_BUF_NUM_M, + DSAFV2_SBM_CFG0_COM_MAX_BUF_NUM_S, 256); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG0_VC0_MAX_BUF_NUM_M, + DSAFV2_SBM_CFG0_VC0_MAX_BUF_NUM_S, 0); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG0_VC1_MAX_BUF_NUM_M, + DSAFV2_SBM_CFG0_VC1_MAX_BUF_NUM_S, 0); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); + + reg = DSAF_SBM_BP_CFG_1_REG_0_REG + 0x80 * i; + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG1_TC4_MAX_BUF_NUM_M, + DSAFV2_SBM_CFG1_TC4_MAX_BUF_NUM_S, 0); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG1_TC0_MAX_BUF_NUM_M, + DSAFV2_SBM_CFG1_TC0_MAX_BUF_NUM_S, 0); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); + + reg = DSAF_SBM_BP_CFG_2_XGE_REG_0_REG + 0x80 * i; + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_SET_BUF_NUM_M, + DSAFV2_SBM_CFG2_SET_BUF_NUM_S, 104); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_RESET_BUF_NUM_M, + DSAFV2_SBM_CFG2_RESET_BUF_NUM_S, 128); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); + + reg = DSAF_SBM_BP_CFG_3_REG_0_REG + 0x80 * i; + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, + DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_M, + DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_S, 110); + dsaf_set_field(o_sbm_bp_cfg, + DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_M, + DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S, 160); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); + + /* for no enable pfc mode */ + reg = DSAF_SBM_BP_CFG_4_REG_0_REG + 0x80 * i; + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, + DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_M, + DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_S, 128); + dsaf_set_field(o_sbm_bp_cfg, + DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_M, + DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_S, 192); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); + } + + /* PPE */ + reg = DSAF_SBM_BP_CFG_2_PPE_REG_0_REG + 0x80 * i; + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_SET_BUF_NUM_M, + DSAFV2_SBM_CFG2_SET_BUF_NUM_S, 10); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_RESET_BUF_NUM_M, + DSAFV2_SBM_CFG2_RESET_BUF_NUM_S, 12); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); + /* RoCEE */ + for (i = 0; i < DASFV2_ROCEE_CRD_NUM; i++) { + reg = DSAFV2_SBM_BP_CFG_2_ROCEE_REG_0_REG + 0x80 * i; + o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_SET_BUF_NUM_M, + DSAFV2_SBM_CFG2_SET_BUF_NUM_S, 2); + dsaf_set_field(o_sbm_bp_cfg, DSAFV2_SBM_CFG2_RESET_BUF_NUM_M, + DSAFV2_SBM_CFG2_RESET_BUF_NUM_S, 4); + dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg); } } @@ -985,11 +1066,38 @@ static void hns_dsaf_inode_init(struct dsaf_device *dsaf_dev) else tc_cfg = HNS_DSAF_I8TC_CFG; + if (AE_IS_VER1(dsaf_dev->dsaf_ver)) { + for (i = 0; i < DSAF_INODE_NUM; i++) { + reg = DSAF_INODE_IN_PORT_NUM_0_REG + 0x80 * i; + dsaf_set_dev_field(dsaf_dev, reg, + DSAF_INODE_IN_PORT_NUM_M, + DSAF_INODE_IN_PORT_NUM_S, + i % DSAF_XGE_NUM); + } + } else { + for (i = 0; i < DSAF_PORT_TYPE_NUM; i++) { + reg = DSAF_INODE_IN_PORT_NUM_0_REG + 0x80 * i; + dsaf_set_dev_field(dsaf_dev, reg, + DSAF_INODE_IN_PORT_NUM_M, + DSAF_INODE_IN_PORT_NUM_S, 0); + dsaf_set_dev_field(dsaf_dev, reg, + DSAFV2_INODE_IN_PORT1_NUM_M, + DSAFV2_INODE_IN_PORT1_NUM_S, 1); + dsaf_set_dev_field(dsaf_dev, reg, + DSAFV2_INODE_IN_PORT2_NUM_M, + DSAFV2_INODE_IN_PORT2_NUM_S, 2); + dsaf_set_dev_field(dsaf_dev, reg, + DSAFV2_INODE_IN_PORT3_NUM_M, + DSAFV2_INODE_IN_PORT3_NUM_S, 3); + dsaf_set_dev_field(dsaf_dev, reg, + DSAFV2_INODE_IN_PORT4_NUM_M, + DSAFV2_INODE_IN_PORT4_NUM_S, 4); + dsaf_set_dev_field(dsaf_dev, reg, + DSAFV2_INODE_IN_PORT5_NUM_M, + DSAFV2_INODE_IN_PORT5_NUM_S, 5); + } + } for (i = 0; i < DSAF_INODE_NUM; i++) { - reg = DSAF_INODE_IN_PORT_NUM_0_REG + 0x80 * i; - dsaf_set_dev_field(dsaf_dev, reg, DSAF_INODE_IN_PORT_NUM_M, - DSAF_INODE_IN_PORT_NUM_S, i % DSAF_XGE_NUM); - reg = DSAF_INODE_PRI_TC_CFG_0_REG + 0x80 * i; dsaf_write_dev(dsaf_dev, reg, tc_cfg); } @@ -1002,10 +1110,17 @@ static void hns_dsaf_inode_init(struct dsaf_device *dsaf_dev) static int hns_dsaf_sbm_init(struct dsaf_device *dsaf_dev) { u32 flag; + u32 finish_msk; u32 cnt = 0; int ret; - hns_dsaf_sbm_bp_wl_cfg(dsaf_dev); + if (AE_IS_VER1(dsaf_dev->dsaf_ver)) { + hns_dsaf_sbm_bp_wl_cfg(dsaf_dev); + finish_msk = DSAF_SRAM_INIT_OVER_M; + } else { + hns_dsafv2_sbm_bp_wl_cfg(dsaf_dev); + finish_msk = DSAFV2_SRAM_INIT_OVER_M; + } /* enable sbm chanel, disable sbm chanel shcut function*/ hns_dsaf_sbm_cfg(dsaf_dev); @@ -1024,11 +1139,13 @@ static int hns_dsaf_sbm_init(struct dsaf_device *dsaf_dev) do { usleep_range(200, 210);/*udelay(200);*/ - flag = dsaf_read_dev(dsaf_dev, DSAF_SRAM_INIT_OVER_0_REG); + flag = dsaf_get_dev_field(dsaf_dev, DSAF_SRAM_INIT_OVER_0_REG, + finish_msk, DSAF_SRAM_INIT_OVER_S); cnt++; - } while (flag != DSAF_SRAM_INIT_FINISH_FLAG && cnt < DSAF_CFG_READ_CNT); + } while (flag != (finish_msk >> DSAF_SRAM_INIT_OVER_S) && + cnt < DSAF_CFG_READ_CNT); - if (flag != DSAF_SRAM_INIT_FINISH_FLAG) { + if (flag != (finish_msk >> DSAF_SRAM_INIT_OVER_S)) { dev_err(dsaf_dev->dev, "hns_dsaf_sbm_init fail %s, flag=%d, cnt=%d\n", dsaf_dev->ae_dev.name, flag, cnt); @@ -2032,7 +2149,7 @@ void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data) DSAF_INODE_VC1_IN_PKT_NUM_0_REG + port * 4); /* dsaf inode registers */ - for (i = 0; i < DSAF_SBM_NUM / DSAF_COMM_CHN; i++) { + for (i = 0; i < HNS_DSAF_SBM_NUM(ddev) / DSAF_COMM_CHN; i++) { j = i * DSAF_COMM_CHN + port; p[232 + i] = dsaf_read_dev(ddev, DSAF_SBM_CFG_REG_0_REG + j * 0x80); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h index b2b93484995ca7..31c312f9826e9d 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h @@ -19,24 +19,20 @@ struct hns_mac_cb; #define DSAF_DRV_NAME "hns_dsaf" #define DSAF_MOD_VERSION "v1.0" -#define ENABLE (0x1) -#define DISABLE (0x0) +#define HNS_DSAF_DEBUG_NW_REG_OFFSET 0x100000 -#define HNS_DSAF_DEBUG_NW_REG_OFFSET (0x100000) +#define DSAF_BASE_INNER_PORT_NUM 127/* mac tbl qid*/ -#define DSAF_BASE_INNER_PORT_NUM (127) /* mac tbl qid*/ +#define DSAF_MAX_CHIP_NUM 2 /*max 2 chips */ -#define DSAF_MAX_CHIP_NUM (2) /*max 2 chips */ +#define DSAF_DEFAUTL_QUEUE_NUM_PER_PPE 22 -#define DSAF_DEFAUTL_QUEUE_NUM_PER_PPE (22) +#define HNS_DSAF_MAX_DESC_CNT 1024 +#define HNS_DSAF_MIN_DESC_CNT 16 -#define HNS_DSAF_MAX_DESC_CNT (1024) -#define HNS_DSAF_MIN_DESC_CNT (16) +#define DSAF_INVALID_ENTRY_IDX 0xffff -#define DSAF_INVALID_ENTRY_IDX (0xffff) - -#define DSAF_CFG_READ_CNT (30) -#define DSAF_SRAM_INIT_FINISH_FLAG (0xff) +#define DSAF_CFG_READ_CNT 30 #define MAC_NUM_OCTETS_PER_ADDR 6 @@ -274,10 +270,6 @@ struct dsaf_device { struct device *dev; struct hnae_ae_dev ae_dev; - void *priv; - - int virq[DSAF_IRQ_NUM]; - u8 __iomem *sc_base; u8 __iomem *sds_base; u8 __iomem *ppe_base; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index 523e9b83d30429..607c3be4224101 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -149,7 +149,11 @@ void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val) if (port < DSAF_SERVICE_NW_NUM) { reg_val_1 = 0x1 << port; - reg_val_2 = 0x1041041 << port; + /* there is difference between V1 and V2 in register.*/ + if (AE_IS_VER1(dsaf_dev->dsaf_ver)) + reg_val_2 = 0x1041041 << port; + else + reg_val_2 = 0x2082082 << port; if (val == 0) { dsaf_write_reg(dsaf_dev->sc_base, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c index 67f33f185a44da..f302ef9073c6b0 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c @@ -19,6 +19,48 @@ #include "hns_dsaf_ppe.h" +void hns_ppe_set_tso_enable(struct hns_ppe_cb *ppe_cb, u32 value) +{ + dsaf_set_dev_bit(ppe_cb, PPEV2_CFG_TSO_EN_REG, 0, !!value); +} + +void hns_ppe_set_rss_key(struct hns_ppe_cb *ppe_cb, + const u32 rss_key[HNS_PPEV2_RSS_KEY_NUM]) +{ + int key_item = 0; + + for (key_item = 0; key_item < HNS_PPEV2_RSS_KEY_NUM; key_item++) + dsaf_write_dev(ppe_cb, PPEV2_RSS_KEY_REG + key_item * 0x4, + rss_key[key_item]); +} + +void hns_ppe_set_indir_table(struct hns_ppe_cb *ppe_cb, + const u32 rss_tab[HNS_PPEV2_RSS_IND_TBL_SIZE]) +{ + int i; + int reg_value; + + for (i = 0; i < (HNS_PPEV2_RSS_IND_TBL_SIZE / 4); i++) { + reg_value = dsaf_read_dev(ppe_cb, + PPEV2_INDRECTION_TBL_REG + i * 0x4); + + dsaf_set_field(reg_value, PPEV2_CFG_RSS_TBL_4N0_M, + PPEV2_CFG_RSS_TBL_4N0_S, + rss_tab[i * 4 + 0] & 0x1F); + dsaf_set_field(reg_value, PPEV2_CFG_RSS_TBL_4N1_M, + PPEV2_CFG_RSS_TBL_4N1_S, + rss_tab[i * 4 + 1] & 0x1F); + dsaf_set_field(reg_value, PPEV2_CFG_RSS_TBL_4N2_M, + PPEV2_CFG_RSS_TBL_4N2_S, + rss_tab[i * 4 + 2] & 0x1F); + dsaf_set_field(reg_value, PPEV2_CFG_RSS_TBL_4N3_M, + PPEV2_CFG_RSS_TBL_4N3_S, + rss_tab[i * 4 + 3] & 0x1F); + dsaf_write_dev( + ppe_cb, PPEV2_INDRECTION_TBL_REG + i * 0x4, reg_value); + } +} + static void __iomem *hns_ppe_common_get_ioaddr( struct ppe_common_cb *ppe_common) { @@ -134,6 +176,11 @@ static void hns_ppe_cnt_clr_ce(struct hns_ppe_cb *ppe_cb) PPE_CNT_CLR_CE_B, 1); } +static void hns_ppe_set_vlan_strip(struct hns_ppe_cb *ppe_cb, int en) +{ + dsaf_write_dev(ppe_cb, PPEV2_VLAN_STRIP_EN_REG, en); +} + /** * hns_ppe_checksum_hw - set ppe checksum caculate * @ppe_device: ppe device @@ -266,13 +313,17 @@ static void hns_ppe_exc_irq_en(struct hns_ppe_cb *ppe_cb, int en) /** * ppe_init_hw - init ppe - * @ppe_device: ppe device + * @ppe_cb: ppe device */ static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb) { struct ppe_common_cb *ppe_common_cb = ppe_cb->ppe_common_cb; u32 port = ppe_cb->port; struct dsaf_device *dsaf_dev = ppe_common_cb->dsaf_dev; + int i; + + /* get default RSS key */ + netdev_rss_key_fill(ppe_cb->rss_key, HNS_PPEV2_RSS_KEY_SIZE); hns_ppe_srst_by_port(dsaf_dev, port, 0); mdelay(10); @@ -285,8 +336,21 @@ static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb) hns_ppe_set_port_mode(ppe_cb, PPE_MODE_GE); else hns_ppe_set_port_mode(ppe_cb, PPE_MODE_XGE); + hns_ppe_checksum_hw(ppe_cb, 0xffffffff); hns_ppe_cnt_clr_ce(ppe_cb); + + if (!AE_IS_VER1(dsaf_dev->dsaf_ver)) { + hns_ppe_set_vlan_strip(ppe_cb, 0); + + /* set default RSS key in h/w */ + hns_ppe_set_rss_key(ppe_cb, ppe_cb->rss_key); + + /* Set default indrection table in h/w */ + for (i = 0; i < HNS_PPEV2_RSS_IND_TBL_SIZE; i++) + ppe_cb->rss_indir_table[i] = i; + hns_ppe_set_indir_table(ppe_cb, ppe_cb->rss_indir_table); + } } /** @@ -341,13 +405,13 @@ void hns_ppe_reset_common(struct dsaf_device *dsaf_dev, u8 ppe_common_index) if (ret) return; + for (i = 0; i < ppe_common->ppe_num; i++) + hns_ppe_init_hw(&ppe_common->ppe_cb[i]); + ret = hns_rcb_common_init_hw(dsaf_dev->rcb_common[ppe_common_index]); if (ret) return; - for (i = 0; i < ppe_common->ppe_num; i++) - hns_ppe_init_hw(&ppe_common->ppe_cb[i]); - hns_rcb_common_init_commit_hw(dsaf_dev->rcb_common[ppe_common_index]); } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h index 4894f9a0d39ff5..0f5cb6962acf7b 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h @@ -25,15 +25,24 @@ #define ETH_PPE_DUMP_NUM 576 #define ETH_PPE_STATIC_NUM 12 + +#define HNS_PPEV2_RSS_IND_TBL_SIZE 256 +#define HNS_PPEV2_RSS_KEY_SIZE 40 /* in bytes or 320 bits */ +#define HNS_PPEV2_RSS_KEY_NUM (HNS_PPEV2_RSS_KEY_SIZE / sizeof(u32)) + enum ppe_qid_mode { - PPE_QID_MODE0 = 0, /* fixed queue id mode */ - PPE_QID_MODE1, /* switch:128VM non switch:6Port/4VM/4TC */ - PPE_QID_MODE2, /* switch:32VM/4TC non switch:6Port/16VM */ - PPE_QID_MODE3, /* switch:4TC/8TAG non switch:2Port/64VM */ - PPE_QID_MODE4, /* switch:8VM/16TAG non switch:2Port/16VM/4TC */ - PPE_QID_MODE5, /* non switch:6Port/16TAG */ - PPE_QID_MODE6, /* non switch:6Port/2VM/8TC */ - PPE_QID_MODE7, /* non switch:2Port/8VM/8TC */ + PPE_QID_MODE0 = 0, /* fixed queue id mode */ + PPE_QID_MODE1, /* switch:128VM non switch:6Port/4VM/4TC */ + PPE_QID_MODE2, /* switch:32VM/4TC non switch:6Port/16VM */ + PPE_QID_MODE3, /* switch:4TC/8RSS non switch:2Port/64VM */ + PPE_QID_MODE4, /* switch:8VM/16RSS non switch:2Port/16VM/4TC */ + PPE_QID_MODE5, /* switch:16VM/8TC non switch:6Port/16RSS */ + PPE_QID_MODE6, /* switch:32VM/4RSS non switch:6Port/2VM/8TC */ + PPE_QID_MODE7, /* switch:32RSS non switch:2Port/8VM/8TC */ + PPE_QID_MODE8, /* switch:6VM/4TC/4RSS non switch:2Port/16VM/4RSS */ + PPE_QID_MODE9, /* non switch:2Port/32VM/2RSS */ + PPE_QID_MODE10, /* non switch:2Port/32RSS */ + PPE_QID_MODE11, /* non switch:2Port/4TC/16RSS */ }; enum ppe_port_mode { @@ -72,6 +81,8 @@ struct hns_ppe_cb { u8 port; /* port id in dsaf */ void __iomem *io_base; int virq; + u32 rss_indir_table[HNS_PPEV2_RSS_IND_TBL_SIZE]; /*shadow indir tab */ + u32 rss_key[HNS_PPEV2_RSS_KEY_NUM]; /* rss hash key */ }; struct ppe_common_cb { @@ -102,4 +113,9 @@ void hns_ppe_get_regs(struct hns_ppe_cb *ppe_cb, void *data); void hns_ppe_get_strings(struct hns_ppe_cb *ppe_cb, int stringset, u8 *data); void hns_ppe_get_stats(struct hns_ppe_cb *ppe_cb, u64 *data); +void hns_ppe_set_tso_enable(struct hns_ppe_cb *ppe_cb, u32 value); +void hns_ppe_set_rss_key(struct hns_ppe_cb *ppe_cb, + const u32 rss_key[HNS_PPEV2_RSS_KEY_NUM]); +void hns_ppe_set_indir_table(struct hns_ppe_cb *ppe_cb, + const u32 rss_tab[HNS_PPEV2_RSS_IND_TBL_SIZE]); #endif /* _HNS_DSAF_PPE_H */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c index 4db32c62f06242..8c30cec8850a8d 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c @@ -136,19 +136,37 @@ void hns_rcb_int_ctrl_hw(struct hnae_queue *q, u32 flag, u32 mask) void hns_rcb_int_clr_hw(struct hnae_queue *q, u32 flag) { - u32 clr = 1; - if (flag & RCB_INT_FLAG_TX) { - dsaf_write_dev(q, RCB_RING_INTSTS_TX_RING_REG, clr); - dsaf_write_dev(q, RCB_RING_INTSTS_TX_OVERTIME_REG, clr); + dsaf_write_dev(q, RCB_RING_INTSTS_TX_RING_REG, 1); + dsaf_write_dev(q, RCB_RING_INTSTS_TX_OVERTIME_REG, 1); } if (flag & RCB_INT_FLAG_RX) { - dsaf_write_dev(q, RCB_RING_INTSTS_RX_RING_REG, clr); - dsaf_write_dev(q, RCB_RING_INTSTS_RX_OVERTIME_REG, clr); + dsaf_write_dev(q, RCB_RING_INTSTS_RX_RING_REG, 1); + dsaf_write_dev(q, RCB_RING_INTSTS_RX_OVERTIME_REG, 1); } } +void hns_rcbv2_int_ctrl_hw(struct hnae_queue *q, u32 flag, u32 mask) +{ + u32 int_mask_en = !!mask; + + if (flag & RCB_INT_FLAG_TX) + dsaf_write_dev(q, RCB_RING_INTMSK_TXWL_REG, int_mask_en); + + if (flag & RCB_INT_FLAG_RX) + dsaf_write_dev(q, RCB_RING_INTMSK_RXWL_REG, int_mask_en); +} + +void hns_rcbv2_int_clr_hw(struct hnae_queue *q, u32 flag) +{ + if (flag & RCB_INT_FLAG_TX) + dsaf_write_dev(q, RCBV2_TX_RING_INT_STS_REG, 1); + + if (flag & RCB_INT_FLAG_RX) + dsaf_write_dev(q, RCBV2_RX_RING_INT_STS_REG, 1); +} + /** *hns_rcb_ring_enable_hw - enable ring *@ring: rcb ring @@ -193,6 +211,7 @@ static void hns_rcb_ring_init(struct ring_pair_cb *ring_pair, int ring_type) (u32)dma); dsaf_write_dev(q, RCB_RING_RX_RING_BASEADDR_H_REG, (u32)((dma >> 31) >> 1)); + dsaf_write_dev(q, RCB_RING_RX_RING_BD_LEN_REG, bd_size_type); dsaf_write_dev(q, RCB_RING_RX_RING_BD_NUM_REG, @@ -204,6 +223,7 @@ static void hns_rcb_ring_init(struct ring_pair_cb *ring_pair, int ring_type) (u32)dma); dsaf_write_dev(q, RCB_RING_TX_RING_BASEADDR_H_REG, (u32)((dma >> 31) >> 1)); + dsaf_write_dev(q, RCB_RING_TX_RING_BD_LEN_REG, bd_size_type); dsaf_write_dev(q, RCB_RING_TX_RING_BD_NUM_REG, @@ -232,9 +252,6 @@ void hns_rcb_init_hw(struct ring_pair_cb *ring) static void hns_rcb_set_port_desc_cnt(struct rcb_common_cb *rcb_common, u32 port_idx, u32 desc_cnt) { - if (port_idx >= HNS_RCB_SERVICE_NW_ENGINE_NUM) - port_idx = 0; - dsaf_write_dev(rcb_common, RCB_CFG_BD_NUM_REG + port_idx * 4, desc_cnt); } @@ -249,8 +266,6 @@ static int hns_rcb_set_port_coalesced_frames(struct rcb_common_cb *rcb_common, u32 port_idx, u32 coalesced_frames) { - if (port_idx >= HNS_RCB_SERVICE_NW_ENGINE_NUM) - port_idx = 0; if (coalesced_frames >= rcb_common->desc_num || coalesced_frames > HNS_RCB_MAX_COALESCED_FRAMES) return -EINVAL; @@ -354,6 +369,9 @@ int hns_rcb_common_init_hw(struct rcb_common_cb *rcb_common) dsaf_write_dev(rcb_common, RCB_COM_CFG_ENDIAN_REG, HNS_RCB_COMMON_ENDIAN); + dsaf_write_dev(rcb_common, RCB_COM_CFG_FNA_REG, 0x0); + dsaf_write_dev(rcb_common, RCB_COM_CFG_FA_REG, 0x1); + return 0; } @@ -387,19 +405,23 @@ static void hns_rcb_ring_get_cfg(struct hnae_queue *q, int ring_type) struct rcb_common_cb *rcb_common; struct ring_pair_cb *ring_pair_cb; u32 buf_size; - u16 desc_num; - int irq_idx; + u16 desc_num, mdnum_ppkt; + bool irq_idx, is_ver1; ring_pair_cb = container_of(q, struct ring_pair_cb, q); + is_ver1 = AE_IS_VER1(ring_pair_cb->rcb_common->dsaf_dev->dsaf_ver); if (ring_type == RX_RING) { ring = &q->rx_ring; ring->io_base = ring_pair_cb->q.io_base; irq_idx = HNS_RCB_IRQ_IDX_RX; + mdnum_ppkt = HNS_RCB_RING_MAX_BD_PER_PKT; } else { ring = &q->tx_ring; ring->io_base = (u8 __iomem *)ring_pair_cb->q.io_base + HNS_RCB_TX_REG_OFFSET; irq_idx = HNS_RCB_IRQ_IDX_TX; + mdnum_ppkt = is_ver1 ? HNS_RCB_RING_MAX_TXBD_PER_PKT : + HNS_RCBV2_RING_MAX_TXBD_PER_PKT; } rcb_common = ring_pair_cb->rcb_common; @@ -414,7 +436,7 @@ static void hns_rcb_ring_get_cfg(struct hnae_queue *q, int ring_type) ring->buf_size = buf_size; ring->desc_num = desc_num; - ring->max_desc_num_per_pkt = HNS_RCB_RING_MAX_BD_PER_PKT; + ring->max_desc_num_per_pkt = mdnum_ppkt; ring->max_raw_data_sz_per_desc = HNS_RCB_MAX_PKT_SIZE; ring->max_pkt_size = HNS_RCB_MAX_PKT_SIZE; ring->next_to_use = 0; @@ -445,14 +467,22 @@ static int hns_rcb_get_port(struct rcb_common_cb *rcb_common, int ring_idx) return port; } +#define SERVICE_RING_IRQ_IDX(v1) \ + ((v1) ? HNS_SERVICE_RING_IRQ_IDX : HNSV2_SERVICE_RING_IRQ_IDX) +#define DEBUG_RING_IRQ_IDX(v1) \ + ((v1) ? HNS_DEBUG_RING_IRQ_IDX : HNSV2_DEBUG_RING_IRQ_IDX) +#define DEBUG_RING_IRQ_OFFSET(v1) \ + ((v1) ? HNS_DEBUG_RING_IRQ_OFFSET : HNSV2_DEBUG_RING_IRQ_OFFSET) static int hns_rcb_get_base_irq_idx(struct rcb_common_cb *rcb_common) { int comm_index = rcb_common->comm_index; + bool is_ver1 = AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver); if (comm_index == HNS_DSAF_COMM_SERVICE_NW_IDX) - return HNS_SERVICE_RING_IRQ_IDX; + return SERVICE_RING_IRQ_IDX(is_ver1); else - return HNS_DEBUG_RING_IRQ_IDX + (comm_index - 1) * 2; + return DEBUG_RING_IRQ_IDX(is_ver1) + + (comm_index - 1) * DEBUG_RING_IRQ_OFFSET(is_ver1); } #define RCB_COMM_BASE_TO_RING_BASE(base, ringid)\ @@ -468,6 +498,10 @@ void hns_rcb_get_cfg(struct rcb_common_cb *rcb_common) u32 ring_num = rcb_common->ring_num; int base_irq_idx = hns_rcb_get_base_irq_idx(rcb_common); struct device_node *np = rcb_common->dsaf_dev->dev->of_node; + struct platform_device *pdev = + container_of(rcb_common->dsaf_dev->dev, + struct platform_device, dev); + bool is_ver1 = AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver); for (i = 0; i < ring_num; i++) { ring_pair_cb = &rcb_common->ring_pair_cb[i]; @@ -477,10 +511,12 @@ void hns_rcb_get_cfg(struct rcb_common_cb *rcb_common) ring_pair_cb->q.io_base = RCB_COMM_BASE_TO_RING_BASE(rcb_common->io_base, i); ring_pair_cb->port_id_in_dsa = hns_rcb_get_port(rcb_common, i); - ring_pair_cb->virq[HNS_RCB_IRQ_IDX_TX] - = irq_of_parse_and_map(np, base_irq_idx + i * 2); - ring_pair_cb->virq[HNS_RCB_IRQ_IDX_RX] - = irq_of_parse_and_map(np, base_irq_idx + i * 2 + 1); + ring_pair_cb->virq[HNS_RCB_IRQ_IDX_TX] = + is_ver1 ? irq_of_parse_and_map(np, base_irq_idx + i * 2) : + platform_get_irq(pdev, base_irq_idx + i * 3 + 1); + ring_pair_cb->virq[HNS_RCB_IRQ_IDX_RX] = + is_ver1 ? irq_of_parse_and_map(np, base_irq_idx + i * 2 + 1) : + platform_get_irq(pdev, base_irq_idx + i * 3); ring_pair_cb->q.phy_base = RCB_COMM_BASE_TO_RING_BASE(rcb_common->phy_base, i); hns_rcb_ring_pair_get_cfg(ring_pair_cb); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h index 3a2afe2dd8bba9..29041b18741aa0 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h @@ -26,6 +26,8 @@ struct rcb_common_cb; #define HNS_RCB_SERVICE_NW_ENGINE_NUM DSAF_COMM_CHN #define HNS_RCB_DEBUG_NW_ENGINE_NUM 1 #define HNS_RCB_RING_MAX_BD_PER_PKT 3 +#define HNS_RCB_RING_MAX_TXBD_PER_PKT 3 +#define HNS_RCBV2_RING_MAX_TXBD_PER_PKT 8 #define HNS_RCB_MAX_PKT_SIZE MAC_MAX_MTU #define HNS_RCB_RING_MAX_PENDING_BD 1024 @@ -106,13 +108,17 @@ void hns_rcb_common_free_cfg(struct dsaf_device *dsaf_dev, u32 comm_index); int hns_rcb_common_init_hw(struct rcb_common_cb *rcb_common); void hns_rcb_start(struct hnae_queue *q, u32 val); void hns_rcb_get_cfg(struct rcb_common_cb *rcb_common); -void hns_rcb_common_init_commit_hw(struct rcb_common_cb *rcb_common); void hns_rcb_get_queue_mode(enum dsaf_mode dsaf_mode, int comm_index, u16 *max_vfn, u16 *max_q_per_vf); +void hns_rcb_common_init_commit_hw(struct rcb_common_cb *rcb_common); + void hns_rcb_ring_enable_hw(struct hnae_queue *q, u32 val); void hns_rcb_int_clr_hw(struct hnae_queue *q, u32 flag); void hns_rcb_int_ctrl_hw(struct hnae_queue *q, u32 flag, u32 enable); +void hns_rcbv2_int_ctrl_hw(struct hnae_queue *q, u32 flag, u32 mask); +void hns_rcbv2_int_clr_hw(struct hnae_queue *q, u32 flag); + void hns_rcb_init_hw(struct ring_pair_cb *ring); void hns_rcb_reset_ring_hw(struct hnae_queue *q); void hns_rcb_wait_fbd_clean(struct hnae_queue **qs, int q_num, u32 flag); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index b475e1bf2e6fdb..c4d7c26952c42a 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -10,21 +10,12 @@ #ifndef _DSAF_REG_H_ #define _DSAF_REG_H_ -#define HNS_GE_FIFO_ERR_INTNUM 8 -#define HNS_XGE_ERR_INTNUM 6 -#define HNS_RCB_COMM_ERR_INTNUM 12 -#define HNS_PPE_TNL_ERR_INTNUM 8 -#define HNS_DSAF_EVENT_INTNUM 21 -#define HNS_DEBUG_RING_INTNUM 4 -#define HNS_SERVICE_RING_INTNUM 256 - -#define HNS_DEBUG_RING_IRQ_IDX (HNS_GE_FIFO_ERR_INTNUM + HNS_XGE_ERR_INTNUM +\ - HNS_RCB_COMM_ERR_INTNUM + HNS_PPE_TNL_ERR_INTNUM +\ - HNS_DSAF_EVENT_INTNUM) -#define HNS_SERVICE_RING_IRQ_IDX (HNS_DEBUG_RING_IRQ_IDX +\ - HNS_DEBUG_RING_INTNUM) - -#define DSAF_IRQ_NUM 18 +#define HNS_DEBUG_RING_IRQ_IDX 55 +#define HNS_SERVICE_RING_IRQ_IDX 59 +#define HNS_DEBUG_RING_IRQ_OFFSET 2 +#define HNSV2_DEBUG_RING_IRQ_IDX 409 +#define HNSV2_SERVICE_RING_IRQ_IDX 25 +#define HNSV2_DEBUG_RING_IRQ_OFFSET 9 #define DSAF_MAX_PORT_NUM_PER_CHIP 8 #define DSAF_SERVICE_PORT_NUM_PER_DSAF 6 @@ -39,9 +30,15 @@ #define DSAF_GE_NUM ((DSAF_SERVICE_NW_NUM) + (DSAF_DEBUG_NW_NUM)) #define DSAF_PORT_NUM ((DSAF_SERVICE_NW_NUM) + (DSAF_DEBUG_NW_NUM)) #define DSAF_XGE_NUM DSAF_SERVICE_NW_NUM +#define DSAF_PORT_TYPE_NUM 3 #define DSAF_NODE_NUM 18 #define DSAF_XOD_BIG_NUM DSAF_NODE_NUM #define DSAF_SBM_NUM DSAF_NODE_NUM +#define DSAFV2_SBM_NUM 8 +#define DSAFV2_SBM_XGE_CHN 6 +#define DSAFV2_SBM_PPE_CHN 1 +#define DASFV2_ROCEE_CRD_NUM 8 + #define DSAF_VOQ_NUM DSAF_NODE_NUM #define DSAF_INODE_NUM DSAF_NODE_NUM #define DSAF_XOD_NUM 8 @@ -52,56 +49,56 @@ #define DSAF_TCAM_SUM 512 #define DSAF_LINE_SUM (2048 * 14) -#define DSAF_SUB_SC_NT_SRAM_CLK_SEL_REG 0x100 -#define DSAF_SUB_SC_HILINK3_CRG_CTRL0_REG 0x180 -#define DSAF_SUB_SC_HILINK3_CRG_CTRL1_REG 0x184 -#define DSAF_SUB_SC_HILINK3_CRG_CTRL2_REG 0x188 -#define DSAF_SUB_SC_HILINK3_CRG_CTRL3_REG 0x18C -#define DSAF_SUB_SC_HILINK4_CRG_CTRL0_REG 0x190 -#define DSAF_SUB_SC_HILINK4_CRG_CTRL1_REG 0x194 -#define DSAF_SUB_SC_DSAF_CLK_EN_REG 0x300 -#define DSAF_SUB_SC_DSAF_CLK_DIS_REG 0x304 -#define DSAF_SUB_SC_NT_CLK_EN_REG 0x308 -#define DSAF_SUB_SC_NT_CLK_DIS_REG 0x30C -#define DSAF_SUB_SC_XGE_CLK_EN_REG 0x310 -#define DSAF_SUB_SC_XGE_CLK_DIS_REG 0x314 -#define DSAF_SUB_SC_GE_CLK_EN_REG 0x318 -#define DSAF_SUB_SC_GE_CLK_DIS_REG 0x31C -#define DSAF_SUB_SC_PPE_CLK_EN_REG 0x320 -#define DSAF_SUB_SC_PPE_CLK_DIS_REG 0x324 -#define DSAF_SUB_SC_RCB_PPE_COM_CLK_EN_REG 0x350 -#define DSAF_SUB_SC_RCB_PPE_COM_CLK_DIS_REG 0x354 -#define DSAF_SUB_SC_XBAR_RESET_REQ_REG 0xA00 -#define DSAF_SUB_SC_XBAR_RESET_DREQ_REG 0xA04 -#define DSAF_SUB_SC_NT_RESET_REQ_REG 0xA08 -#define DSAF_SUB_SC_NT_RESET_DREQ_REG 0xA0C -#define DSAF_SUB_SC_XGE_RESET_REQ_REG 0xA10 -#define DSAF_SUB_SC_XGE_RESET_DREQ_REG 0xA14 -#define DSAF_SUB_SC_GE_RESET_REQ0_REG 0xA18 -#define DSAF_SUB_SC_GE_RESET_DREQ0_REG 0xA1C -#define DSAF_SUB_SC_GE_RESET_REQ1_REG 0xA20 -#define DSAF_SUB_SC_GE_RESET_DREQ1_REG 0xA24 -#define DSAF_SUB_SC_PPE_RESET_REQ_REG 0xA48 -#define DSAF_SUB_SC_PPE_RESET_DREQ_REG 0xA4C -#define DSAF_SUB_SC_RCB_PPE_COM_RESET_REQ_REG 0xA88 -#define DSAF_SUB_SC_RCB_PPE_COM_RESET_DREQ_REG 0xA8C -#define DSAF_SUB_SC_LIGHT_MODULE_DETECT_EN_REG 0x2060 -#define DSAF_SUB_SC_TCAM_MBIST_EN_REG 0x2300 -#define DSAF_SUB_SC_DSAF_CLK_ST_REG 0x5300 -#define DSAF_SUB_SC_NT_CLK_ST_REG 0x5304 -#define DSAF_SUB_SC_XGE_CLK_ST_REG 0x5308 -#define DSAF_SUB_SC_GE_CLK_ST_REG 0x530C -#define DSAF_SUB_SC_PPE_CLK_ST_REG 0x5310 -#define DSAF_SUB_SC_ROCEE_CLK_ST_REG 0x5314 -#define DSAF_SUB_SC_CPU_CLK_ST_REG 0x5318 -#define DSAF_SUB_SC_RCB_PPE_COM_CLK_ST_REG 0x5328 -#define DSAF_SUB_SC_XBAR_RESET_ST_REG 0x5A00 -#define DSAF_SUB_SC_NT_RESET_ST_REG 0x5A04 -#define DSAF_SUB_SC_XGE_RESET_ST_REG 0x5A08 -#define DSAF_SUB_SC_GE_RESET_ST0_REG 0x5A0C -#define DSAF_SUB_SC_GE_RESET_ST1_REG 0x5A10 -#define DSAF_SUB_SC_PPE_RESET_ST_REG 0x5A24 -#define DSAF_SUB_SC_RCB_PPE_COM_RESET_ST_REG 0x5A44 +#define DSAF_SUB_SC_NT_SRAM_CLK_SEL_REG 0x100 +#define DSAF_SUB_SC_HILINK3_CRG_CTRL0_REG 0x180 +#define DSAF_SUB_SC_HILINK3_CRG_CTRL1_REG 0x184 +#define DSAF_SUB_SC_HILINK3_CRG_CTRL2_REG 0x188 +#define DSAF_SUB_SC_HILINK3_CRG_CTRL3_REG 0x18C +#define DSAF_SUB_SC_HILINK4_CRG_CTRL0_REG 0x190 +#define DSAF_SUB_SC_HILINK4_CRG_CTRL1_REG 0x194 +#define DSAF_SUB_SC_DSAF_CLK_EN_REG 0x300 +#define DSAF_SUB_SC_DSAF_CLK_DIS_REG 0x304 +#define DSAF_SUB_SC_NT_CLK_EN_REG 0x308 +#define DSAF_SUB_SC_NT_CLK_DIS_REG 0x30C +#define DSAF_SUB_SC_XGE_CLK_EN_REG 0x310 +#define DSAF_SUB_SC_XGE_CLK_DIS_REG 0x314 +#define DSAF_SUB_SC_GE_CLK_EN_REG 0x318 +#define DSAF_SUB_SC_GE_CLK_DIS_REG 0x31C +#define DSAF_SUB_SC_PPE_CLK_EN_REG 0x320 +#define DSAF_SUB_SC_PPE_CLK_DIS_REG 0x324 +#define DSAF_SUB_SC_RCB_PPE_COM_CLK_EN_REG 0x350 +#define DSAF_SUB_SC_RCB_PPE_COM_CLK_DIS_REG 0x354 +#define DSAF_SUB_SC_XBAR_RESET_REQ_REG 0xA00 +#define DSAF_SUB_SC_XBAR_RESET_DREQ_REG 0xA04 +#define DSAF_SUB_SC_NT_RESET_REQ_REG 0xA08 +#define DSAF_SUB_SC_NT_RESET_DREQ_REG 0xA0C +#define DSAF_SUB_SC_XGE_RESET_REQ_REG 0xA10 +#define DSAF_SUB_SC_XGE_RESET_DREQ_REG 0xA14 +#define DSAF_SUB_SC_GE_RESET_REQ0_REG 0xA18 +#define DSAF_SUB_SC_GE_RESET_DREQ0_REG 0xA1C +#define DSAF_SUB_SC_GE_RESET_REQ1_REG 0xA20 +#define DSAF_SUB_SC_GE_RESET_DREQ1_REG 0xA24 +#define DSAF_SUB_SC_PPE_RESET_REQ_REG 0xA48 +#define DSAF_SUB_SC_PPE_RESET_DREQ_REG 0xA4C +#define DSAF_SUB_SC_RCB_PPE_COM_RESET_REQ_REG 0xA88 +#define DSAF_SUB_SC_RCB_PPE_COM_RESET_DREQ_REG 0xA8C +#define DSAF_SUB_SC_LIGHT_MODULE_DETECT_EN_REG 0x2060 +#define DSAF_SUB_SC_TCAM_MBIST_EN_REG 0x2300 +#define DSAF_SUB_SC_DSAF_CLK_ST_REG 0x5300 +#define DSAF_SUB_SC_NT_CLK_ST_REG 0x5304 +#define DSAF_SUB_SC_XGE_CLK_ST_REG 0x5308 +#define DSAF_SUB_SC_GE_CLK_ST_REG 0x530C +#define DSAF_SUB_SC_PPE_CLK_ST_REG 0x5310 +#define DSAF_SUB_SC_ROCEE_CLK_ST_REG 0x5314 +#define DSAF_SUB_SC_CPU_CLK_ST_REG 0x5318 +#define DSAF_SUB_SC_RCB_PPE_COM_CLK_ST_REG 0x5328 +#define DSAF_SUB_SC_XBAR_RESET_ST_REG 0x5A00 +#define DSAF_SUB_SC_NT_RESET_ST_REG 0x5A04 +#define DSAF_SUB_SC_XGE_RESET_ST_REG 0x5A08 +#define DSAF_SUB_SC_GE_RESET_ST0_REG 0x5A0C +#define DSAF_SUB_SC_GE_RESET_ST1_REG 0x5A10 +#define DSAF_SUB_SC_PPE_RESET_ST_REG 0x5A24 +#define DSAF_SUB_SC_RCB_PPE_COM_RESET_ST_REG 0x5A44 /*serdes offset**/ #define HNS_MAC_HILINK3_REG DSAF_SUB_SC_HILINK3_CRG_CTRL0_REG @@ -178,6 +175,7 @@ #define DSAF_SBM_BP_CFG_2_XGE_REG_0_REG 0x200C #define DSAF_SBM_BP_CFG_2_PPE_REG_0_REG 0x230C #define DSAF_SBM_BP_CFG_2_ROCEE_REG_0_REG 0x260C +#define DSAFV2_SBM_BP_CFG_2_ROCEE_REG_0_REG 0x238C #define DSAF_SBM_FREE_CNT_0_0_REG 0x2010 #define DSAF_SBM_FREE_CNT_1_0_REG 0x2014 #define DSAF_SBM_BP_CNT_0_0_REG 0x2018 @@ -319,6 +317,8 @@ #define PPE_CFG_TAG_GEN_REG 0x90 #define PPE_CFG_PARSE_TAG_REG 0x94 #define PPE_CFG_PRO_CHECK_EN_REG 0x98 +#define PPEV2_CFG_TSO_EN_REG 0xA0 +#define PPEV2_VLAN_STRIP_EN_REG 0xAC #define PPE_INTEN_REG 0x100 #define PPE_RINT_REG 0x104 #define PPE_INTSTS_REG 0x108 @@ -351,6 +351,8 @@ #define PPE_ECO0_REG 0x32C #define PPE_ECO1_REG 0x330 #define PPE_ECO2_REG 0x334 +#define PPEV2_INDRECTION_TBL_REG 0x800 +#define PPEV2_RSS_KEY_REG 0x900 #define RCB_COM_CFG_ENDIAN_REG 0x0 #define RCB_COM_CFG_SYS_FSH_REG 0xC @@ -431,8 +433,10 @@ #define RCB_RING_INTMSK_RXWL_REG 0x000A0 #define RCB_RING_INTSTS_RX_RING_REG 0x000A4 +#define RCBV2_RX_RING_INT_STS_REG 0x000A8 #define RCB_RING_INTMSK_TXWL_REG 0x000AC #define RCB_RING_INTSTS_TX_RING_REG 0x000B0 +#define RCBV2_TX_RING_INT_STS_REG 0x000B4 #define RCB_RING_INTMSK_RX_OVERTIME_REG 0x000B8 #define RCB_RING_INTSTS_RX_OVERTIME_REG 0x000BC #define RCB_RING_INTMSK_TX_OVERTIME_REG 0x000C4 @@ -678,6 +682,10 @@ #define XGMAC_TRX_CORE_SRST_M 0x2080 +#define DSAF_SRAM_INIT_OVER_M 0xff +#define DSAFV2_SRAM_INIT_OVER_M 0x3ff +#define DSAF_SRAM_INIT_OVER_S 0 + #define DSAF_CFG_EN_S 0 #define DSAF_CFG_TC_MODE_S 1 #define DSAF_CFG_CRC_EN_S 2 @@ -685,6 +693,7 @@ #define DSAF_CFG_MIX_MODE_S 4 #define DSAF_CFG_STP_MODE_S 5 #define DSAF_CFG_LOCA_ADDR_EN_S 6 +#define DSAFV2_CFG_VLAN_TAG_MODE_S 17 #define DSAF_CNT_CLR_CE_S 0 #define DSAF_SNAP_EN_S 1 @@ -707,6 +716,16 @@ #define DSAF_INODE_IN_PORT_NUM_M 7 #define DSAF_INODE_IN_PORT_NUM_S 0 +#define DSAFV2_INODE_IN_PORT1_NUM_M (7ULL << 3) +#define DSAFV2_INODE_IN_PORT1_NUM_S 3 +#define DSAFV2_INODE_IN_PORT2_NUM_M (7ULL << 6) +#define DSAFV2_INODE_IN_PORT2_NUM_S 6 +#define DSAFV2_INODE_IN_PORT3_NUM_M (7ULL << 9) +#define DSAFV2_INODE_IN_PORT3_NUM_S 9 +#define DSAFV2_INODE_IN_PORT4_NUM_M (7ULL << 12) +#define DSAFV2_INODE_IN_PORT4_NUM_S 12 +#define DSAFV2_INODE_IN_PORT5_NUM_M (7ULL << 15) +#define DSAFV2_INODE_IN_PORT5_NUM_S 15 #define HNS_DSAF_I4TC_CFG 0x18688688 #define HNS_DSAF_I8TC_CFG 0x18FAC688 @@ -738,6 +757,33 @@ #define DSAF_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S 10 #define DSAF_SBM_CFG3_RESET_BUF_NUM_NO_PFC_M (((1ULL << 10) - 1) << 10) +#define DSAFV2_SBM_CFG0_VC1_MAX_BUF_NUM_S 0 +#define DSAFV2_SBM_CFG0_VC1_MAX_BUF_NUM_M (((1ULL << 9) - 1) << 0) +#define DSAFV2_SBM_CFG0_VC0_MAX_BUF_NUM_S 9 +#define DSAFV2_SBM_CFG0_VC0_MAX_BUF_NUM_M (((1ULL << 9) - 1) << 9) +#define DSAFV2_SBM_CFG0_COM_MAX_BUF_NUM_S 18 +#define DSAFV2_SBM_CFG0_COM_MAX_BUF_NUM_M (((1ULL << 10) - 1) << 18) + +#define DSAFV2_SBM_CFG1_TC4_MAX_BUF_NUM_S 0 +#define DSAFV2_SBM_CFG1_TC4_MAX_BUF_NUM_M (((1ULL << 9) - 1) << 0) +#define DSAFV2_SBM_CFG1_TC0_MAX_BUF_NUM_S 9 +#define DSAFV2_SBM_CFG1_TC0_MAX_BUF_NUM_M (((1ULL << 9) - 1) << 9) + +#define DSAFV2_SBM_CFG2_SET_BUF_NUM_S 0 +#define DSAFV2_SBM_CFG2_SET_BUF_NUM_M (((1ULL << 9) - 1) << 0) +#define DSAFV2_SBM_CFG2_RESET_BUF_NUM_S 9 +#define DSAFV2_SBM_CFG2_RESET_BUF_NUM_M (((1ULL << 9) - 1) << 9) + +#define DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_S 0 +#define DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_M (((1ULL << 9) - 1) << 0) +#define DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S 9 +#define DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_M (((1ULL << 9) - 1) << 9) + +#define DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_S 0 +#define DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_M (((1ULL << 9) - 1) << 0) +#define DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_S 9 +#define DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_M (((1ULL << 9) - 1) << 9) + #define DSAF_TBL_TCAM_ADDR_S 0 #define DSAF_TBL_TCAM_ADDR_M ((1ULL << 9) - 1) @@ -797,6 +843,18 @@ #define PPE_CFG_QID_MODE_CF_QID_MODE_S 8 #define PPE_CFG_QID_MODE_CF_QID_MODE_M (0x7 << PPE_CFG_QID_MODE_CF_QID_MODE_S) +#define PPEV2_CFG_RSS_TBL_4N0_S 0 +#define PPEV2_CFG_RSS_TBL_4N0_M (((1UL << 5) - 1) << PPEV2_CFG_RSS_TBL_4N0_S) + +#define PPEV2_CFG_RSS_TBL_4N1_S 8 +#define PPEV2_CFG_RSS_TBL_4N1_M (((1UL << 5) - 1) << PPEV2_CFG_RSS_TBL_4N1_S) + +#define PPEV2_CFG_RSS_TBL_4N2_S 16 +#define PPEV2_CFG_RSS_TBL_4N2_M (((1UL << 5) - 1) << PPEV2_CFG_RSS_TBL_4N2_S) + +#define PPEV2_CFG_RSS_TBL_4N3_S 24 +#define PPEV2_CFG_RSS_TBL_4N3_M (((1UL << 5) - 1) << PPEV2_CFG_RSS_TBL_4N3_S) + #define PPE_CNT_CLR_CE_B 0 #define PPE_CNT_CLR_SNAP_EN_B 1 diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 08cef0dfb5db29..5a81dafd725e29 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -33,10 +33,105 @@ #define RCB_IRQ_NOT_INITED 0 #define RCB_IRQ_INITED 1 +#define HNS_BUFFER_SIZE_2048 2048 + +#define BD_MAX_SEND_SIZE 8191 +#define SKB_TMP_LEN(SKB) \ + (((SKB)->transport_header - (SKB)->mac_header) + tcp_hdrlen(SKB)) + +static void fill_v2_desc(struct hnae_ring *ring, void *priv, + int size, dma_addr_t dma, int frag_end, + int buf_num, enum hns_desc_type type, int mtu) +{ + struct hnae_desc *desc = &ring->desc[ring->next_to_use]; + struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; + struct iphdr *iphdr; + struct ipv6hdr *ipv6hdr; + struct sk_buff *skb; + int skb_tmp_len; + __be16 protocol; + u8 bn_pid = 0; + u8 rrcfv = 0; + u8 ip_offset = 0; + u8 tvsvsn = 0; + u16 mss = 0; + u8 l4_len = 0; + u16 paylen = 0; + + desc_cb->priv = priv; + desc_cb->length = size; + desc_cb->dma = dma; + desc_cb->type = type; + + desc->addr = cpu_to_le64(dma); + desc->tx.send_size = cpu_to_le16((u16)size); + + /*config bd buffer end */ + hnae_set_bit(rrcfv, HNSV2_TXD_VLD_B, 1); + hnae_set_field(bn_pid, HNSV2_TXD_BUFNUM_M, 0, buf_num - 1); + + if (type == DESC_TYPE_SKB) { + skb = (struct sk_buff *)priv; + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + skb_reset_mac_len(skb); + protocol = skb->protocol; + ip_offset = ETH_HLEN; + + if (protocol == htons(ETH_P_8021Q)) { + ip_offset += VLAN_HLEN; + protocol = vlan_get_protocol(skb); + skb->protocol = protocol; + } + + if (skb->protocol == htons(ETH_P_IP)) { + iphdr = ip_hdr(skb); + hnae_set_bit(rrcfv, HNSV2_TXD_L3CS_B, 1); + hnae_set_bit(rrcfv, HNSV2_TXD_L4CS_B, 1); + + /* check for tcp/udp header */ + if (iphdr->protocol == IPPROTO_TCP) { + hnae_set_bit(tvsvsn, + HNSV2_TXD_TSE_B, 1); + skb_tmp_len = SKB_TMP_LEN(skb); + l4_len = tcp_hdrlen(skb); + mss = mtu - skb_tmp_len - ETH_FCS_LEN; + paylen = skb->len - skb_tmp_len; + } + } else if (skb->protocol == htons(ETH_P_IPV6)) { + hnae_set_bit(tvsvsn, HNSV2_TXD_IPV6_B, 1); + ipv6hdr = ipv6_hdr(skb); + hnae_set_bit(rrcfv, HNSV2_TXD_L4CS_B, 1); + + /* check for tcp/udp header */ + if (ipv6hdr->nexthdr == IPPROTO_TCP) { + hnae_set_bit(tvsvsn, + HNSV2_TXD_TSE_B, 1); + skb_tmp_len = SKB_TMP_LEN(skb); + l4_len = tcp_hdrlen(skb); + mss = mtu - skb_tmp_len - ETH_FCS_LEN; + paylen = skb->len - skb_tmp_len; + } + } + desc->tx.ip_offset = ip_offset; + desc->tx.tse_vlan_snap_v6_sctp_nth = tvsvsn; + desc->tx.mss = cpu_to_le16(mss); + desc->tx.l4_len = l4_len; + desc->tx.paylen = cpu_to_le16(paylen); + } + } + + hnae_set_bit(rrcfv, HNSV2_TXD_FE_B, frag_end); + + desc->tx.bn_pid = bn_pid; + desc->tx.ra_ri_cs_fe_vld = rrcfv; + + ring_ptr_move_fw(ring, next_to_use); +} static void fill_desc(struct hnae_ring *ring, void *priv, int size, dma_addr_t dma, int frag_end, - int buf_num, enum hns_desc_type type) + int buf_num, enum hns_desc_type type, int mtu) { struct hnae_desc *desc = &ring->desc[ring->next_to_use]; struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; @@ -100,47 +195,129 @@ static void unfill_desc(struct hnae_ring *ring) ring_ptr_move_bw(ring, next_to_use); } -int hns_nic_net_xmit_hw(struct net_device *ndev, - struct sk_buff *skb, - struct hns_nic_ring_data *ring_data) +static int hns_nic_maybe_stop_tx( + struct sk_buff **out_skb, int *bnum, struct hnae_ring *ring) { - struct hns_nic_priv *priv = netdev_priv(ndev); - struct device *dev = priv->dev; - struct hnae_ring *ring = ring_data->ring; - struct netdev_queue *dev_queue; - struct skb_frag_struct *frag; + struct sk_buff *skb = *out_skb; + struct sk_buff *new_skb = NULL; int buf_num; - dma_addr_t dma; - int size, next_to_use; - int i, j; - struct sk_buff *new_skb; - - assert(ring->max_desc_num_per_pkt <= ring->desc_num); /* no. of segments (plus a header) */ buf_num = skb_shinfo(skb)->nr_frags + 1; if (unlikely(buf_num > ring->max_desc_num_per_pkt)) { - if (ring_space(ring) < 1) { - ring->stats.tx_busy++; - goto out_net_tx_busy; - } + if (ring_space(ring) < 1) + return -EBUSY; new_skb = skb_copy(skb, GFP_ATOMIC); - if (!new_skb) { - ring->stats.sw_err_cnt++; - netdev_err(ndev, "no memory to xmit!\n"); - goto out_err_tx_ok; - } + if (!new_skb) + return -ENOMEM; dev_kfree_skb_any(skb); - skb = new_skb; + *out_skb = new_skb; buf_num = 1; - assert(skb_shinfo(skb)->nr_frags == 1); } else if (buf_num > ring_space(ring)) { + return -EBUSY; + } + + *bnum = buf_num; + return 0; +} + +static int hns_nic_maybe_stop_tso( + struct sk_buff **out_skb, int *bnum, struct hnae_ring *ring) +{ + int i; + int size; + int buf_num; + int frag_num; + struct sk_buff *skb = *out_skb; + struct sk_buff *new_skb = NULL; + struct skb_frag_struct *frag; + + size = skb_headlen(skb); + buf_num = (size + BD_MAX_SEND_SIZE - 1) / BD_MAX_SEND_SIZE; + + frag_num = skb_shinfo(skb)->nr_frags; + for (i = 0; i < frag_num; i++) { + frag = &skb_shinfo(skb)->frags[i]; + size = skb_frag_size(frag); + buf_num += (size + BD_MAX_SEND_SIZE - 1) / BD_MAX_SEND_SIZE; + } + + if (unlikely(buf_num > ring->max_desc_num_per_pkt)) { + buf_num = (skb->len + BD_MAX_SEND_SIZE - 1) / BD_MAX_SEND_SIZE; + if (ring_space(ring) < buf_num) + return -EBUSY; + /* manual split the send packet */ + new_skb = skb_copy(skb, GFP_ATOMIC); + if (!new_skb) + return -ENOMEM; + dev_kfree_skb_any(skb); + *out_skb = new_skb; + + } else if (ring_space(ring) < buf_num) { + return -EBUSY; + } + + *bnum = buf_num; + return 0; +} + +static void fill_tso_desc(struct hnae_ring *ring, void *priv, + int size, dma_addr_t dma, int frag_end, + int buf_num, enum hns_desc_type type, int mtu) +{ + int frag_buf_num; + int sizeoflast; + int k; + + frag_buf_num = (size + BD_MAX_SEND_SIZE - 1) / BD_MAX_SEND_SIZE; + sizeoflast = size % BD_MAX_SEND_SIZE; + sizeoflast = sizeoflast ? sizeoflast : BD_MAX_SEND_SIZE; + + /* when the frag size is bigger than hardware, split this frag */ + for (k = 0; k < frag_buf_num; k++) + fill_v2_desc(ring, priv, + (k == frag_buf_num - 1) ? + sizeoflast : BD_MAX_SEND_SIZE, + dma + BD_MAX_SEND_SIZE * k, + frag_end && (k == frag_buf_num - 1) ? 1 : 0, + buf_num, + (type == DESC_TYPE_SKB && !k) ? + DESC_TYPE_SKB : DESC_TYPE_PAGE, + mtu); +} + +int hns_nic_net_xmit_hw(struct net_device *ndev, + struct sk_buff *skb, + struct hns_nic_ring_data *ring_data) +{ + struct hns_nic_priv *priv = netdev_priv(ndev); + struct device *dev = priv->dev; + struct hnae_ring *ring = ring_data->ring; + struct netdev_queue *dev_queue; + struct skb_frag_struct *frag; + int buf_num; + int seg_num; + dma_addr_t dma; + int size, next_to_use; + int i; + + switch (priv->ops.maybe_stop_tx(&skb, &buf_num, ring)) { + case -EBUSY: ring->stats.tx_busy++; goto out_net_tx_busy; + case -ENOMEM: + ring->stats.sw_err_cnt++; + netdev_err(ndev, "no memory to xmit!\n"); + goto out_err_tx_ok; + default: + break; } + + /* no. of segments (plus a header) */ + seg_num = skb_shinfo(skb)->nr_frags + 1; next_to_use = ring->next_to_use; /* fill the first part */ @@ -151,11 +328,11 @@ int hns_nic_net_xmit_hw(struct net_device *ndev, ring->stats.sw_err_cnt++; goto out_err_tx_ok; } - fill_desc(ring, skb, size, dma, buf_num == 1 ? 1 : 0, buf_num, - DESC_TYPE_SKB); + priv->ops.fill_desc(ring, skb, size, dma, seg_num == 1 ? 1 : 0, + buf_num, DESC_TYPE_SKB, ndev->mtu); /* fill the fragments */ - for (i = 1; i < buf_num; i++) { + for (i = 1; i < seg_num; i++) { frag = &skb_shinfo(skb)->frags[i - 1]; size = skb_frag_size(frag); dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE); @@ -164,8 +341,9 @@ int hns_nic_net_xmit_hw(struct net_device *ndev, ring->stats.sw_err_cnt++; goto out_map_frag_fail; } - fill_desc(ring, skb_frag_page(frag), size, dma, - buf_num - 1 == i ? 1 : 0, buf_num, DESC_TYPE_PAGE); + priv->ops.fill_desc(ring, skb_frag_page(frag), size, dma, + seg_num - 1 == i ? 1 : 0, buf_num, + DESC_TYPE_PAGE, ndev->mtu); } /*complete translate all packets*/ @@ -182,19 +360,20 @@ int hns_nic_net_xmit_hw(struct net_device *ndev, out_map_frag_fail: - for (j = i - 1; j > 0; j--) { + while (ring->next_to_use != next_to_use) { unfill_desc(ring); - next_to_use = ring->next_to_use; - dma_unmap_page(dev, ring->desc_cb[next_to_use].dma, - ring->desc_cb[next_to_use].length, - DMA_TO_DEVICE); + if (ring->next_to_use != next_to_use) + dma_unmap_page(dev, + ring->desc_cb[ring->next_to_use].dma, + ring->desc_cb[ring->next_to_use].length, + DMA_TO_DEVICE); + else + dma_unmap_single(dev, + ring->desc_cb[next_to_use].dma, + ring->desc_cb[next_to_use].length, + DMA_TO_DEVICE); } - unfill_desc(ring); - next_to_use = ring->next_to_use; - dma_unmap_single(dev, ring->desc_cb[next_to_use].dma, - ring->desc_cb[next_to_use].length, DMA_TO_DEVICE); - out_err_tx_ok: dev_kfree_skb_any(skb); @@ -313,13 +492,51 @@ static unsigned int hns_nic_get_headlen(unsigned char *data, u32 flag, return max_size; } -static void -hns_nic_reuse_page(struct hnae_desc_cb *desc_cb, int tsize, int last_offset) +static void hns_nic_reuse_page(struct sk_buff *skb, int i, + struct hnae_ring *ring, int pull_len, + struct hnae_desc_cb *desc_cb) { + struct hnae_desc *desc; + int truesize, size; + int last_offset; + + desc = &ring->desc[ring->next_to_clean]; + size = le16_to_cpu(desc->rx.size); + +#if (PAGE_SIZE < 8192) + if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) { + truesize = hnae_buf_size(ring); + } else { + truesize = ALIGN(size, L1_CACHE_BYTES); + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); + } + +#else + truesize = ALIGN(size, L1_CACHE_BYTES); + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); +#endif + + skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len, + size - pull_len, truesize - pull_len); + /* avoid re-using remote pages,flag default unreuse */ if (likely(page_to_nid(desc_cb->priv) == numa_node_id())) { +#if (PAGE_SIZE < 8192) + if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) { + /* if we are only owner of page we can reuse it */ + if (likely(page_count(desc_cb->priv) == 1)) { + /* flip page offset to other buffer */ + desc_cb->page_offset ^= truesize; + + desc_cb->reuse_flag = 1; + /* bump ref count on page before it is given*/ + get_page(desc_cb->priv); + } + return; + } +#endif /* move offset up to the next cache line */ - desc_cb->page_offset += tsize; + desc_cb->page_offset += truesize; if (desc_cb->page_offset <= last_offset) { desc_cb->reuse_flag = 1; @@ -329,35 +546,59 @@ hns_nic_reuse_page(struct hnae_desc_cb *desc_cb, int tsize, int last_offset) } } +static void get_v2rx_desc_bnum(u32 bnum_flag, int *out_bnum) +{ + *out_bnum = hnae_get_field(bnum_flag, + HNS_RXD_BUFNUM_M, HNS_RXD_BUFNUM_S) + 1; +} + +static void get_rx_desc_bnum(u32 bnum_flag, int *out_bnum) +{ + *out_bnum = hnae_get_field(bnum_flag, + HNS_RXD_BUFNUM_M, HNS_RXD_BUFNUM_S); +} + static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, struct sk_buff **out_skb, int *out_bnum) { struct hnae_ring *ring = ring_data->ring; struct net_device *ndev = ring_data->napi.dev; + struct hns_nic_priv *priv = netdev_priv(ndev); struct sk_buff *skb; struct hnae_desc *desc; struct hnae_desc_cb *desc_cb; unsigned char *va; - int bnum, length, size, i, truesize, last_offset; + int bnum, length, i; int pull_len; u32 bnum_flag; - last_offset = hnae_page_size(ring) - hnae_buf_size(ring); desc = &ring->desc[ring->next_to_clean]; desc_cb = &ring->desc_cb[ring->next_to_clean]; - length = le16_to_cpu(desc->rx.pkt_len); - bnum_flag = le32_to_cpu(desc->rx.ipoff_bnum_pid_flag); - bnum = hnae_get_field(bnum_flag, HNS_RXD_BUFNUM_M, HNS_RXD_BUFNUM_S); - *out_bnum = bnum; + + prefetch(desc); + va = (unsigned char *)desc_cb->buf + desc_cb->page_offset; - skb = *out_skb = napi_alloc_skb(&ring_data->napi, HNS_RX_HEAD_SIZE); + /* prefetch first cache line of first page */ + prefetch(va); +#if L1_CACHE_BYTES < 128 + prefetch(va + L1_CACHE_BYTES); +#endif + + skb = *out_skb = napi_alloc_skb(&ring_data->napi, + HNS_RX_HEAD_SIZE); if (unlikely(!skb)) { netdev_err(ndev, "alloc rx skb fail\n"); ring->stats.sw_err_cnt++; return -ENOMEM; } + prefetchw(skb->data); + length = le16_to_cpu(desc->rx.pkt_len); + bnum_flag = le32_to_cpu(desc->rx.ipoff_bnum_pid_flag); + priv->ops.get_rxd_bnum(bnum_flag, &bnum); + *out_bnum = bnum; + if (length <= HNS_RX_HEAD_SIZE) { memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long))); @@ -380,13 +621,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); - size = le16_to_cpu(desc->rx.size); - truesize = ALIGN(size, L1_CACHE_BYTES); - skb_add_rx_frag(skb, 0, desc_cb->priv, - desc_cb->page_offset + pull_len, - size - pull_len, truesize - pull_len); - - hns_nic_reuse_page(desc_cb, truesize, last_offset); + hns_nic_reuse_page(skb, 0, ring, pull_len, desc_cb); ring_ptr_move_fw(ring, next_to_clean); if (unlikely(bnum >= (int)MAX_SKB_FRAGS)) { /* check err*/ @@ -396,13 +631,8 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, for (i = 1; i < bnum; i++) { desc = &ring->desc[ring->next_to_clean]; desc_cb = &ring->desc_cb[ring->next_to_clean]; - size = le16_to_cpu(desc->rx.size); - truesize = ALIGN(size, L1_CACHE_BYTES); - skb_add_rx_frag(skb, i, desc_cb->priv, - desc_cb->page_offset, - size, truesize); - hns_nic_reuse_page(desc_cb, truesize, last_offset); + hns_nic_reuse_page(skb, i, ring, 0, desc_cb); ring_ptr_move_fw(ring, next_to_clean); } } @@ -540,20 +770,20 @@ recv: } /* make all data has been write before submit */ - if (clean_count > 0) { - hns_nic_alloc_rx_buffers(ring_data, clean_count); - clean_count = 0; - } - if (recv_pkts < budget) { ex_num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM); - rmb(); /*complete read rx ring bd number*/ - if (ex_num > 0) { - num += ex_num; + + if (ex_num > clean_count) { + num += ex_num - clean_count; + rmb(); /*complete read rx ring bd number*/ goto recv; } } + /* make all data has been write before submit */ + if (clean_count > 0) + hns_nic_alloc_rx_buffers(ring_data, clean_count); + return recv_pkts; } @@ -642,14 +872,20 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data, bytes = 0; pkts = 0; - while (head != ring->next_to_clean) + while (head != ring->next_to_clean) { hns_nic_reclaim_one_desc(ring, &bytes, &pkts); + /* issue prefetch for next Tx descriptor */ + prefetch(&ring->desc_cb[ring->next_to_clean]); + } NETIF_TX_UNLOCK(ndev); dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index); netdev_tx_completed_queue(dev_queue, pkts, bytes); + if (unlikely(priv->link && !netif_carrier_ok(ndev))) + netif_carrier_on(ndev); + if (unlikely(pkts && netif_carrier_ok(ndev) && (ring_space(ring) >= ring->max_desc_num_per_pkt * 2))) { /* Make sure that anybody stopping the queue after this @@ -716,6 +952,7 @@ static int hns_nic_common_poll(struct napi_struct *napi, int budget) ring_data->ring, 0); ring_data->fini_process(ring_data); + return 0; } return clean_complete; @@ -848,15 +1085,58 @@ static void hns_nic_ring_close(struct net_device *netdev, int idx) napi_disable(&priv->ring_data[idx].napi); } -static int hns_nic_init_irq(struct hns_nic_priv *priv) +static void hns_set_irq_affinity(struct hns_nic_priv *priv) { struct hnae_handle *h = priv->ae_handle; struct hns_nic_ring_data *rd; int i; - int ret; int cpu; cpumask_t mask; + /*diffrent irq banlance for 16core and 32core*/ + if (h->q_num == num_possible_cpus()) { + for (i = 0; i < h->q_num * 2; i++) { + rd = &priv->ring_data[i]; + if (cpu_online(rd->queue_index)) { + cpumask_clear(&mask); + cpu = rd->queue_index; + cpumask_set_cpu(cpu, &mask); + (void)irq_set_affinity_hint(rd->ring->irq, + &mask); + } + } + } else { + for (i = 0; i < h->q_num; i++) { + rd = &priv->ring_data[i]; + if (cpu_online(rd->queue_index * 2)) { + cpumask_clear(&mask); + cpu = rd->queue_index * 2; + cpumask_set_cpu(cpu, &mask); + (void)irq_set_affinity_hint(rd->ring->irq, + &mask); + } + } + + for (i = h->q_num; i < h->q_num * 2; i++) { + rd = &priv->ring_data[i]; + if (cpu_online(rd->queue_index * 2 + 1)) { + cpumask_clear(&mask); + cpu = rd->queue_index * 2 + 1; + cpumask_set_cpu(cpu, &mask); + (void)irq_set_affinity_hint(rd->ring->irq, + &mask); + } + } + } +} + +static int hns_nic_init_irq(struct hns_nic_priv *priv) +{ + struct hnae_handle *h = priv->ae_handle; + struct hns_nic_ring_data *rd; + int i; + int ret; + for (i = 0; i < h->q_num * 2; i++) { rd = &priv->ring_data[i]; @@ -878,16 +1158,11 @@ static int hns_nic_init_irq(struct hns_nic_priv *priv) } disable_irq(rd->ring->irq); rd->ring->irq_init_flag = RCB_IRQ_INITED; - - /*set cpu affinity*/ - if (cpu_online(rd->queue_index)) { - cpumask_clear(&mask); - cpu = rd->queue_index; - cpumask_set_cpu(cpu, &mask); - irq_set_affinity_hint(rd->ring->irq, &mask); - } } + /*set cpu affinity*/ + hns_set_irq_affinity(priv); + return 0; } @@ -1136,6 +1411,51 @@ static int hns_nic_change_mtu(struct net_device *ndev, int new_mtu) return ret; } +static int hns_nic_set_features(struct net_device *netdev, + netdev_features_t features) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + struct hnae_handle *h = priv->ae_handle; + + switch (priv->enet_ver) { + case AE_VERSION_1: + if (features & (NETIF_F_TSO | NETIF_F_TSO6)) + netdev_info(netdev, "enet v1 do not support tso!\n"); + break; + default: + if (features & (NETIF_F_TSO | NETIF_F_TSO6)) { + priv->ops.fill_desc = fill_tso_desc; + priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tso; + /* The chip only support 7*4096 */ + netif_set_gso_max_size(netdev, 7 * 4096); + h->dev->ops->set_tso_stats(h, 1); + } else { + priv->ops.fill_desc = fill_v2_desc; + priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx; + h->dev->ops->set_tso_stats(h, 0); + } + break; + } + netdev->features = features; + return 0; +} + +static netdev_features_t hns_nic_fix_features( + struct net_device *netdev, netdev_features_t features) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + + switch (priv->enet_ver) { + case AE_VERSION_1: + features &= ~(NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_HW_VLAN_CTAG_FILTER); + break; + default: + break; + } + return features; +} + /** * nic_set_multicast_list - set mutl mac address * @netdev: net device @@ -1231,6 +1551,8 @@ static const struct net_device_ops hns_nic_netdev_ops = { .ndo_set_mac_address = hns_nic_net_set_mac_address, .ndo_change_mtu = hns_nic_change_mtu, .ndo_do_ioctl = hns_nic_do_ioctl, + .ndo_set_features = hns_nic_set_features, + .ndo_fix_features = hns_nic_fix_features, .ndo_get_stats64 = hns_nic_get_stats64, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = hns_nic_poll_controller, @@ -1315,22 +1637,26 @@ static void hns_nic_reset_subtask(struct hns_nic_priv *priv) return; hns_nic_dump(priv); - netdev_info(priv->netdev, "Reset %s port\n", - (type == HNAE_PORT_DEBUG ? "debug" : "business")); + netdev_info(priv->netdev, "try to reset %s port!\n", + (type == HNAE_PORT_DEBUG ? "debug" : "service")); rtnl_lock(); /* put off any impending NetWatchDogTimeout */ priv->netdev->trans_start = jiffies; - if (type == HNAE_PORT_DEBUG) + if (type == HNAE_PORT_DEBUG) { hns_nic_net_reinit(priv->netdev); + } else { + netif_carrier_off(priv->netdev); + netif_tx_disable(priv->netdev); + } rtnl_unlock(); } /* for doing service complete*/ static void hns_nic_service_event_complete(struct hns_nic_priv *priv) { - assert(!test_bit(NIC_STATE_SERVICE_SCHED, &priv->state)); + WARN_ON(!test_bit(NIC_STATE_SERVICE_SCHED, &priv->state)); smp_mb__before_atomic(); clear_bit(NIC_STATE_SERVICE_SCHED, &priv->state); @@ -1435,8 +1761,9 @@ static void hns_nic_uninit_ring_data(struct hns_nic_priv *priv) for (i = 0; i < h->q_num * 2; i++) { netif_napi_del(&priv->ring_data[i].napi); if (priv->ring_data[i].ring->irq_init_flag == RCB_IRQ_INITED) { - irq_set_affinity_hint(priv->ring_data[i].ring->irq, - NULL); + (void)irq_set_affinity_hint( + priv->ring_data[i].ring->irq, + NULL); free_irq(priv->ring_data[i].ring->irq, &priv->ring_data[i]); } @@ -1446,6 +1773,31 @@ static void hns_nic_uninit_ring_data(struct hns_nic_priv *priv) kfree(priv->ring_data); } +static void hns_nic_set_priv_ops(struct net_device *netdev) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + struct hnae_handle *h = priv->ae_handle; + + if (AE_IS_VER1(priv->enet_ver)) { + priv->ops.fill_desc = fill_desc; + priv->ops.get_rxd_bnum = get_rx_desc_bnum; + priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx; + } else { + priv->ops.get_rxd_bnum = get_v2rx_desc_bnum; + if ((netdev->features & NETIF_F_TSO) || + (netdev->features & NETIF_F_TSO6)) { + priv->ops.fill_desc = fill_tso_desc; + priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tso; + /* This chip only support 7*4096 */ + netif_set_gso_max_size(netdev, 7 * 4096); + h->dev->ops->set_tso_stats(h, 1); + } else { + priv->ops.fill_desc = fill_v2_desc; + priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx; + } + } +} + static int hns_nic_try_get_ae(struct net_device *ndev) { struct hns_nic_priv *priv = netdev_priv(ndev); @@ -1473,6 +1825,8 @@ static int hns_nic_try_get_ae(struct net_device *ndev) goto out_init_ring_data; } + hns_nic_set_priv_ops(ndev); + ret = register_netdev(ndev); if (ret) { dev_err(priv->dev, "probe register netdev fail!\n"); @@ -1524,10 +1878,10 @@ static int hns_nic_dev_probe(struct platform_device *pdev) priv->dev = dev; priv->netdev = ndev; - if (of_device_is_compatible(node, "hisilicon,hns-nic-v2")) - priv->enet_ver = AE_VERSION_2; - else + if (of_device_is_compatible(node, "hisilicon,hns-nic-v1")) priv->enet_ver = AE_VERSION_1; + else + priv->enet_ver = AE_VERSION_2; ret = of_property_read_string(node, "ae-name", &priv->ae_name); if (ret) @@ -1543,6 +1897,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev) ndev->priv_flags |= IFF_UNICAST_FLT; ndev->netdev_ops = &hns_nic_netdev_ops; hns_ethtool_set_ops(ndev); + ndev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO; @@ -1550,6 +1905,17 @@ static int hns_nic_dev_probe(struct platform_device *pdev) NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM; ndev->vlan_features |= NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO; + switch (priv->enet_ver) { + case AE_VERSION_2: + ndev->features |= NETIF_F_TSO | NETIF_F_TSO6; + ndev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | + NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6; + break; + default: + break; + } + SET_NETDEV_DEV(ndev, dev); if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h index dae0ed19ac6d73..4b75270f014e76 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h @@ -40,6 +40,16 @@ struct hns_nic_ring_data { void (*fini_process)(struct hns_nic_ring_data *); }; +/* compatible the difference between two versions */ +struct hns_nic_ops { + void (*fill_desc)(struct hnae_ring *ring, void *priv, + int size, dma_addr_t dma, int frag_end, + int buf_num, enum hns_desc_type type, int mtu); + int (*maybe_stop_tx)(struct sk_buff **out_skb, + int *bnum, struct hnae_ring *ring); + void (*get_rxd_bnum)(u32 bnum_flag, int *out_bnum); +}; + struct hns_nic_priv { const char *ae_name; u32 enet_ver; @@ -51,6 +61,8 @@ struct hns_nic_priv { struct device *dev; struct hnae_handle *ae_handle; + struct hns_nic_ops ops; + /* the cb for nic to manage the ring buffer, the first half of the * array is for tx_ring and vice versa for the second half */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index a0332129970ba5..3b234176dd36bd 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -11,7 +11,6 @@ #include <linux/interrupt.h> #include <linux/module.h> #include <linux/platform_device.h> - #include "hns_enet.h" #define HNS_PHY_PAGE_MDIX 0 @@ -667,6 +666,7 @@ static void hns_nic_get_drvinfo(struct net_device *net_dev, drvinfo->bus_info[ETHTOOL_BUSINFO_LEN - 1] = '\0'; strncpy(drvinfo->fw_version, "N/A", ETHTOOL_FWVERS_LEN); + drvinfo->eedump_len = 0; } /** @@ -1187,6 +1187,95 @@ static int hns_nic_nway_reset(struct net_device *netdev) return ret; } +static u32 +hns_get_rss_key_size(struct net_device *netdev) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + struct hnae_ae_ops *ops; + u32 ret; + + if (AE_IS_VER1(priv->enet_ver)) { + netdev_err(netdev, + "RSS feature is not supported on this hardware\n"); + return -EOPNOTSUPP; + } + + ops = priv->ae_handle->dev->ops; + ret = ops->get_rss_key_size(priv->ae_handle); + + return ret; +} + +static u32 +hns_get_rss_indir_size(struct net_device *netdev) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + struct hnae_ae_ops *ops; + u32 ret; + + if (AE_IS_VER1(priv->enet_ver)) { + netdev_err(netdev, + "RSS feature is not supported on this hardware\n"); + return -EOPNOTSUPP; + } + + ops = priv->ae_handle->dev->ops; + ret = ops->get_rss_indir_size(priv->ae_handle); + + return ret; +} + +static int +hns_get_rss(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + struct hnae_ae_ops *ops; + int ret; + + if (AE_IS_VER1(priv->enet_ver)) { + netdev_err(netdev, + "RSS feature is not supported on this hardware\n"); + return -EOPNOTSUPP; + } + + ops = priv->ae_handle->dev->ops; + + if (!indir) + return 0; + + ret = ops->get_rss(priv->ae_handle, indir, key, hfunc); + + return 0; +} + +static int +hns_set_rss(struct net_device *netdev, const u32 *indir, const u8 *key, + const u8 hfunc) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + struct hnae_ae_ops *ops; + int ret; + + if (AE_IS_VER1(priv->enet_ver)) { + netdev_err(netdev, + "RSS feature is not supported on this hardware\n"); + return -EOPNOTSUPP; + } + + ops = priv->ae_handle->dev->ops; + + /* currently hfunc can only be Toeplitz hash */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + if (!indir) + return 0; + + ret = ops->set_rss(priv->ae_handle, indir, key, hfunc); + + return 0; +} + static struct ethtool_ops hns_ethtool_ops = { .get_drvinfo = hns_nic_get_drvinfo, .get_link = hns_nic_get_link, @@ -1206,6 +1295,10 @@ static struct ethtool_ops hns_ethtool_ops = { .get_regs_len = hns_get_regs_len, .get_regs = hns_get_regs, .nway_reset = hns_nic_nway_reset, + .get_rxfh_key_size = hns_get_rss_key_size, + .get_rxfh_indir_size = hns_get_rss_indir_size, + .get_rxfh = hns_get_rss, + .set_rxfh = hns_set_rss, }; void hns_ethtool_set_ops(struct net_device *ndev) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 48809e5d3f7975..fa26e20445a530 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -164,14 +164,20 @@ struct fm10k_ring_container { unsigned int total_packets; /* total packets processed this int */ u16 work_limit; /* total work allowed per interrupt */ u16 itr; /* interrupt throttle rate value */ + u8 itr_scale; /* ITR adjustment scaler based on PCI speed */ u8 count; /* total number of rings in vector */ }; #define FM10K_ITR_MAX 0x0FFF /* maximum value for ITR */ #define FM10K_ITR_10K 100 /* 100us */ #define FM10K_ITR_20K 50 /* 50us */ +#define FM10K_ITR_40K 25 /* 25us */ #define FM10K_ITR_ADAPTIVE 0x8000 /* adaptive interrupt moderation flag */ +#define ITR_IS_ADAPTIVE(itr) (!!(itr & FM10K_ITR_ADAPTIVE)) + +#define FM10K_TX_ITR_DEFAULT FM10K_ITR_40K +#define FM10K_RX_ITR_DEFAULT FM10K_ITR_20K #define FM10K_ITR_ENABLE (FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR) static inline struct netdev_queue *txring_txq(const struct fm10k_ring *ring) @@ -484,7 +490,7 @@ void fm10k_netpoll(struct net_device *netdev); #endif /* Netdev */ -struct net_device *fm10k_alloc_netdev(void); +struct net_device *fm10k_alloc_netdev(const struct fm10k_info *info); int fm10k_setup_rx_resources(struct fm10k_ring *); int fm10k_setup_tx_resources(struct fm10k_ring *); void fm10k_free_rx_resources(struct fm10k_ring *); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c index 2ce0eba5e04034..109e2111bddaac 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c @@ -111,12 +111,14 @@ static const struct fm10k_stats fm10k_gstrings_pf_stats[] = { static const struct fm10k_stats fm10k_gstrings_mbx_stats[] = { FM10K_MBX_STAT("mbx_tx_busy", tx_busy), - FM10K_MBX_STAT("mbx_tx_oversized", tx_dropped), + FM10K_MBX_STAT("mbx_tx_dropped", tx_dropped), FM10K_MBX_STAT("mbx_tx_messages", tx_messages), FM10K_MBX_STAT("mbx_tx_dwords", tx_dwords), + FM10K_MBX_STAT("mbx_tx_mbmem_pulled", tx_mbmem_pulled), FM10K_MBX_STAT("mbx_rx_messages", rx_messages), FM10K_MBX_STAT("mbx_rx_dwords", rx_dwords), FM10K_MBX_STAT("mbx_rx_parse_err", rx_parse_err), + FM10K_MBX_STAT("mbx_rx_mbmem_pushed", rx_mbmem_pushed), }; #define FM10K_GLOBAL_STATS_LEN ARRAY_SIZE(fm10k_gstrings_global_stats) @@ -699,12 +701,10 @@ static int fm10k_get_coalesce(struct net_device *dev, { struct fm10k_intfc *interface = netdev_priv(dev); - ec->use_adaptive_tx_coalesce = - !!(interface->tx_itr & FM10K_ITR_ADAPTIVE); + ec->use_adaptive_tx_coalesce = ITR_IS_ADAPTIVE(interface->tx_itr); ec->tx_coalesce_usecs = interface->tx_itr & ~FM10K_ITR_ADAPTIVE; - ec->use_adaptive_rx_coalesce = - !!(interface->rx_itr & FM10K_ITR_ADAPTIVE); + ec->use_adaptive_rx_coalesce = ITR_IS_ADAPTIVE(interface->rx_itr); ec->rx_coalesce_usecs = interface->rx_itr & ~FM10K_ITR_ADAPTIVE; return 0; @@ -729,10 +729,10 @@ static int fm10k_set_coalesce(struct net_device *dev, /* set initial values for adaptive ITR */ if (ec->use_adaptive_tx_coalesce) - tx_itr = FM10K_ITR_ADAPTIVE | FM10K_ITR_10K; + tx_itr = FM10K_ITR_ADAPTIVE | FM10K_TX_ITR_DEFAULT; if (ec->use_adaptive_rx_coalesce) - rx_itr = FM10K_ITR_ADAPTIVE | FM10K_ITR_20K; + rx_itr = FM10K_ITR_ADAPTIVE | FM10K_RX_ITR_DEFAULT; /* update interface */ interface->tx_itr = tx_itr; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 746a1986690b18..1c17b6284daaa1 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -1094,11 +1094,11 @@ dma_error: netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb, struct fm10k_ring *tx_ring) { + u16 count = TXD_USE_COUNT(skb_headlen(skb)); struct fm10k_tx_buffer *first; - int tso; - u32 tx_flags = 0; unsigned short f; - u16 count = TXD_USE_COUNT(skb_headlen(skb)); + u32 tx_flags = 0; + int tso; /* need: 1 descriptor per page * PAGE_SIZE/FM10K_MAX_DATA_PER_TXD, * + 1 desc for skb_headlen/FM10K_MAX_DATA_PER_TXD, @@ -1363,10 +1363,10 @@ static bool fm10k_clean_tx_irq(struct fm10k_q_vector *q_vector, **/ static void fm10k_update_itr(struct fm10k_ring_container *ring_container) { - unsigned int avg_wire_size, packets; + unsigned int avg_wire_size, packets, itr_round; /* Only update ITR if we are using adaptive setting */ - if (!(ring_container->itr & FM10K_ITR_ADAPTIVE)) + if (!ITR_IS_ADAPTIVE(ring_container->itr)) goto clear_counts; packets = ring_container->total_packets; @@ -1375,18 +1375,44 @@ static void fm10k_update_itr(struct fm10k_ring_container *ring_container) avg_wire_size = ring_container->total_bytes / packets; - /* Add 24 bytes to size to account for CRC, preamble, and gap */ - avg_wire_size += 24; - - /* Don't starve jumbo frames */ - if (avg_wire_size > 3000) - avg_wire_size = 3000; + /* The following is a crude approximation of: + * wmem_default / (size + overhead) = desired_pkts_per_int + * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate + * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value + * + * Assuming wmem_default is 212992 and overhead is 640 bytes per + * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the + * formula down to + * + * (34 * (size + 24)) / (size + 640) = ITR + * + * We first do some math on the packet size and then finally bitshift + * by 8 after rounding up. We also have to account for PCIe link speed + * difference as ITR scales based on this. + */ + if (avg_wire_size <= 360) { + /* Start at 250K ints/sec and gradually drop to 77K ints/sec */ + avg_wire_size *= 8; + avg_wire_size += 376; + } else if (avg_wire_size <= 1152) { + /* 77K ints/sec to 45K ints/sec */ + avg_wire_size *= 3; + avg_wire_size += 2176; + } else if (avg_wire_size <= 1920) { + /* 45K ints/sec to 38K ints/sec */ + avg_wire_size += 4480; + } else { + /* plateau at a limit of 38K ints/sec */ + avg_wire_size = 6656; + } - /* Give a little boost to mid-size frames */ - if ((avg_wire_size > 300) && (avg_wire_size < 1200)) - avg_wire_size /= 3; - else - avg_wire_size /= 2; + /* Perform final bitshift for division after rounding up to ensure + * that the calculation will never get below a 1. The bit shift + * accounts for changes in the ITR due to PCIe link speed. + */ + itr_round = ACCESS_ONCE(ring_container->itr_scale) + 8; + avg_wire_size += (1 << itr_round) - 1; + avg_wire_size >>= itr_round; /* write back value and retain adaptive flag */ ring_container->itr = avg_wire_size | FM10K_ITR_ADAPTIVE; @@ -1604,6 +1630,7 @@ static int fm10k_alloc_q_vector(struct fm10k_intfc *interface, q_vector->tx.ring = ring; q_vector->tx.work_limit = FM10K_DEFAULT_TX_WORK; q_vector->tx.itr = interface->tx_itr; + q_vector->tx.itr_scale = interface->hw.mac.itr_scale; q_vector->tx.count = txr_count; while (txr_count) { @@ -1632,6 +1659,7 @@ static int fm10k_alloc_q_vector(struct fm10k_intfc *interface, /* save Rx ring container info */ q_vector->rx.ring = ring; q_vector->rx.itr = interface->rx_itr; + q_vector->rx.itr_scale = interface->hw.mac.itr_scale; q_vector->rx.count = rxr_count; while (rxr_count) { diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c index af09a1b272e681..2bce47490723ff 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c @@ -375,6 +375,8 @@ static void fm10k_mbx_write_copy(struct fm10k_hw *hw, if (!tail) tail++; + mbx->tx_mbmem_pulled++; + /* write message to hardware FIFO */ fm10k_write_reg(hw, mbmem + tail++, *(head++)); } while (--len && --end); @@ -459,6 +461,8 @@ static void fm10k_mbx_read_copy(struct fm10k_hw *hw, if (!head) head++; + mbx->rx_mbmem_pushed++; + /* read message from hardware FIFO */ *(tail++) = fm10k_read_reg(hw, mbmem + head++); } while (--len && --end); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h index 0419a7f0035e57..c4f18a8f176c9d 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h @@ -1,5 +1,5 @@ /* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2015 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -291,8 +291,10 @@ struct fm10k_mbx_info { u64 tx_dropped; u64 tx_messages; u64 tx_dwords; + u64 tx_mbmem_pulled; u64 rx_messages; u64 rx_dwords; + u64 rx_mbmem_pushed; u64 rx_parse_err; /* Buffer to store messages */ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 7781e80896a60a..79f6b7dd23621e 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1388,8 +1388,9 @@ static const struct net_device_ops fm10k_netdev_ops = { #define DEFAULT_DEBUG_LEVEL_SHIFT 3 -struct net_device *fm10k_alloc_netdev(void) +struct net_device *fm10k_alloc_netdev(const struct fm10k_info *info) { + netdev_features_t hw_features; struct fm10k_intfc *interface; struct net_device *dev; @@ -1412,27 +1413,31 @@ struct net_device *fm10k_alloc_netdev(void) NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN | - NETIF_F_GSO_UDP_TUNNEL | NETIF_F_RXHASH | NETIF_F_RXCSUM; + /* Only the PF can support VXLAN and NVGRE tunnel offloads */ + if (info->mac == fm10k_mac_pf) { + dev->hw_enc_features = NETIF_F_IP_CSUM | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_TSO_ECN | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_IPV6_CSUM | + NETIF_F_SG; + + dev->features |= NETIF_F_GSO_UDP_TUNNEL; + } + /* all features defined to this point should be changeable */ - dev->hw_features |= dev->features; + hw_features = dev->features; /* allow user to enable L2 forwarding acceleration */ - dev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD; + hw_features |= NETIF_F_HW_L2FW_DOFFLOAD; /* configure VLAN features */ dev->vlan_features |= dev->features; - /* configure tunnel offloads */ - dev->hw_enc_features |= NETIF_F_IP_CSUM | - NETIF_F_TSO | - NETIF_F_TSO6 | - NETIF_F_TSO_ECN | - NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_IPV6_CSUM; - /* we want to leave these both on as we cannot disable VLAN tag * insertion or stripping on the hardware since it is contained * in the FTAG and not in the frame itself. @@ -1443,5 +1448,7 @@ struct net_device *fm10k_alloc_netdev(void) dev->priv_flags |= IFF_UNICAST_FLT; + dev->hw_features |= hw_features; + return dev; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 74be792f3f1b7e..15d8e10c250485 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -159,13 +159,30 @@ static void fm10k_reinit(struct fm10k_intfc *interface) fm10k_mbx_free_irq(interface); + /* free interrupts */ + fm10k_clear_queueing_scheme(interface); + /* delay any future reset requests */ interface->last_reset = jiffies + (10 * HZ); /* reset and initialize the hardware so it is in a known state */ - err = hw->mac.ops.reset_hw(hw) ? : hw->mac.ops.init_hw(hw); - if (err) + err = hw->mac.ops.reset_hw(hw); + if (err) { + dev_err(&interface->pdev->dev, "reset_hw failed: %d\n", err); + goto reinit_err; + } + + err = hw->mac.ops.init_hw(hw); + if (err) { dev_err(&interface->pdev->dev, "init_hw failed: %d\n", err); + goto reinit_err; + } + + err = fm10k_init_queueing_scheme(interface); + if (err) { + dev_err(&interface->pdev->dev, "init_queueing_scheme failed: %d\n", err); + goto reinit_err; + } /* reassociate interrupts */ fm10k_mbx_request_irq(interface); @@ -193,6 +210,10 @@ static void fm10k_reinit(struct fm10k_intfc *interface) fm10k_iov_resume(interface->pdev); +reinit_err: + if (err) + netif_device_detach(netdev); + rtnl_unlock(); clear_bit(__FM10K_RESETTING, &interface->state); @@ -563,7 +584,7 @@ static void fm10k_configure_tx_ring(struct fm10k_intfc *interface, /* store tail pointer */ ring->tail = &interface->uc_addr[FM10K_TDT(reg_idx)]; - /* reset ntu and ntc to place SW in sync with hardwdare */ + /* reset ntu and ntc to place SW in sync with hardware */ ring->next_to_clean = 0; ring->next_to_use = 0; @@ -669,7 +690,7 @@ static void fm10k_configure_rx_ring(struct fm10k_intfc *interface, /* store tail pointer */ ring->tail = &interface->uc_addr[FM10K_RDT(reg_idx)]; - /* reset ntu and ntc to place SW in sync with hardwdare */ + /* reset ntu and ntc to place SW in sync with hardware */ ring->next_to_clean = 0; ring->next_to_use = 0; ring->next_to_alloc = 0; @@ -846,7 +867,7 @@ static irqreturn_t fm10k_msix_clean_rings(int __always_unused irq, void *data) struct fm10k_q_vector *q_vector = data; if (q_vector->rx.count || q_vector->tx.count) - napi_schedule(&q_vector->napi); + napi_schedule_irqoff(&q_vector->napi); return IRQ_HANDLED; } @@ -859,7 +880,8 @@ static irqreturn_t fm10k_msix_mbx_vf(int __always_unused irq, void *data) /* re-enable mailbox interrupt and indicate 20us delay */ fm10k_write_reg(hw, FM10K_VFITR(FM10K_MBX_VECTOR), - FM10K_ITR_ENABLE | FM10K_MBX_INT_DELAY); + FM10K_ITR_ENABLE | (FM10K_MBX_INT_DELAY >> + hw->mac.itr_scale)); /* service upstream mailbox */ if (fm10k_mbx_trylock(interface)) { @@ -1090,7 +1112,8 @@ static irqreturn_t fm10k_msix_mbx_pf(int __always_unused irq, void *data) /* re-enable mailbox interrupt and indicate 20us delay */ fm10k_write_reg(hw, FM10K_ITR(FM10K_MBX_VECTOR), - FM10K_ITR_ENABLE | FM10K_MBX_INT_DELAY); + FM10K_ITR_ENABLE | (FM10K_MBX_INT_DELAY >> + hw->mac.itr_scale)); return IRQ_HANDLED; } @@ -1684,7 +1707,13 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, interface->last_reset = jiffies + (10 * HZ); /* reset and initialize the hardware so it is in a known state */ - err = hw->mac.ops.reset_hw(hw) ? : hw->mac.ops.init_hw(hw); + err = hw->mac.ops.reset_hw(hw); + if (err) { + dev_err(&pdev->dev, "reset_hw failed: %d\n", err); + return err; + } + + err = hw->mac.ops.init_hw(hw); if (err) { dev_err(&pdev->dev, "init_hw failed: %d\n", err); return err; @@ -1722,13 +1751,6 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, pci_resource_len(pdev, 4)); hw->sw_addr = interface->sw_addr; - /* Only the PF can support VXLAN and NVGRE offloads */ - if (hw->mac.type != fm10k_mac_pf) { - netdev->hw_enc_features = 0; - netdev->features &= ~NETIF_F_GSO_UDP_TUNNEL; - netdev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL; - } - /* initialize DCBNL interface */ fm10k_dcbnl_set_ops(netdev); @@ -1749,8 +1771,8 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, interface->rx_ring_count = FM10K_DEFAULT_RXD; /* set default interrupt moderation */ - interface->tx_itr = FM10K_ITR_10K; - interface->rx_itr = FM10K_ITR_ADAPTIVE | FM10K_ITR_20K; + interface->tx_itr = FM10K_TX_ITR_DEFAULT; + interface->rx_itr = FM10K_ITR_ADAPTIVE | FM10K_RX_ITR_DEFAULT; /* initialize vxlan_port list */ INIT_LIST_HEAD(&interface->vxlan_port); @@ -1894,7 +1916,7 @@ static int fm10k_probe(struct pci_dev *pdev, pci_set_master(pdev); pci_save_state(pdev); - netdev = fm10k_alloc_netdev(); + netdev = fm10k_alloc_netdev(fm10k_info_tbl[ent->driver_data]); if (!netdev) { err = -ENOMEM; goto err_alloc_netdev; @@ -2071,8 +2093,10 @@ static int fm10k_resume(struct pci_dev *pdev) /* reset hardware to known state */ err = hw->mac.ops.init_hw(&interface->hw); - if (err) + if (err) { + dev_err(&pdev->dev, "init_hw failed: %d\n", err); return err; + } /* reset statistics starting values */ hw->mac.ops.rebind_hw_stats(hw, &interface->stats); @@ -2185,6 +2209,9 @@ static pci_ers_result_t fm10k_io_error_detected(struct pci_dev *pdev, if (netif_running(netdev)) fm10k_close(netdev); + /* free interrupts */ + fm10k_clear_queueing_scheme(interface); + fm10k_mbx_free_irq(interface); pci_disable_device(pdev); @@ -2248,11 +2275,21 @@ static void fm10k_io_resume(struct pci_dev *pdev) int err = 0; /* reset hardware to known state */ - hw->mac.ops.init_hw(&interface->hw); + err = hw->mac.ops.init_hw(&interface->hw); + if (err) { + dev_err(&pdev->dev, "init_hw failed: %d\n", err); + return; + } /* reset statistics starting values */ hw->mac.ops.rebind_hw_stats(hw, &interface->stats); + err = fm10k_init_queueing_scheme(interface); + if (err) { + dev_err(&interface->pdev->dev, "init_queueing_scheme failed: %d\n", err); + return; + } + /* reassociate interrupts */ fm10k_mbx_request_irq(interface); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index 8c0bdc4e4edd44..8b9b6ba5b92bfb 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -1,5 +1,5 @@ /* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2015 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -150,19 +150,26 @@ static s32 fm10k_init_hw_pf(struct fm10k_hw *hw) FM10K_TPH_RXCTRL_HDR_WROEN); } - /* set max hold interval to align with 1.024 usec in all modes */ + /* set max hold interval to align with 1.024 usec in all modes and + * store ITR scale + */ switch (hw->bus.speed) { case fm10k_bus_speed_2500: dma_ctrl = FM10K_DMA_CTRL_MAX_HOLD_1US_GEN1; + hw->mac.itr_scale = FM10K_TDLEN_ITR_SCALE_GEN1; break; case fm10k_bus_speed_5000: dma_ctrl = FM10K_DMA_CTRL_MAX_HOLD_1US_GEN2; + hw->mac.itr_scale = FM10K_TDLEN_ITR_SCALE_GEN2; break; case fm10k_bus_speed_8000: dma_ctrl = FM10K_DMA_CTRL_MAX_HOLD_1US_GEN3; + hw->mac.itr_scale = FM10K_TDLEN_ITR_SCALE_GEN3; break; default: dma_ctrl = 0; + /* just in case, assume Gen3 ITR scale */ + hw->mac.itr_scale = FM10K_TDLEN_ITR_SCALE_GEN3; break; } @@ -334,8 +341,8 @@ static s32 fm10k_update_xc_addr_pf(struct fm10k_hw *hw, u16 glort, ((u32)mac[3] << 16) | ((u32)mac[4] << 8) | ((u32)mac[5])); - mac_update.mac_upper = cpu_to_le16(((u32)mac[0] << 8) | - ((u32)mac[1])); + mac_update.mac_upper = cpu_to_le16(((u16)mac[0] << 8) | + ((u16)mac[1])); mac_update.vlan = cpu_to_le16(vid); mac_update.glort = cpu_to_le16(glort); mac_update.action = add ? 0 : 1; @@ -903,6 +910,13 @@ static s32 fm10k_iov_assign_default_mac_vlan_pf(struct fm10k_hw *hw, fm10k_write_reg(hw, FM10K_TDBAL(vf_q_idx), tdbal); fm10k_write_reg(hw, FM10K_TDBAH(vf_q_idx), tdbah); + /* Provide the VF the ITR scale, using software-defined fields in TDLEN + * to pass the information during VF initialization. See definition of + * FM10K_TDLEN_ITR_SCALE_SHIFT for more details. + */ + fm10k_write_reg(hw, FM10K_TDLEN(vf_q_idx), hw->mac.itr_scale << + FM10K_TDLEN_ITR_SCALE_SHIFT); + err_out: /* configure Queue control register */ txqctl = ((u32)vf_vid << FM10K_TXQCTL_VID_SHIFT) & @@ -1035,6 +1049,12 @@ static s32 fm10k_iov_reset_resources_pf(struct fm10k_hw *hw, for (i = queues_per_pool; i--;) { fm10k_write_reg(hw, FM10K_TDBAL(vf_q_idx + i), tdbal); fm10k_write_reg(hw, FM10K_TDBAH(vf_q_idx + i), tdbah); + /* See definition of FM10K_TDLEN_ITR_SCALE_SHIFT for an + * explanation of how TDLEN is used. + */ + fm10k_write_reg(hw, FM10K_TDLEN(vf_q_idx + i), + hw->mac.itr_scale << + FM10K_TDLEN_ITR_SCALE_SHIFT); fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx + i), vf_q_idx + i); fm10k_write_reg(hw, FM10K_RQMAP(qmap_idx + i), vf_q_idx + i); } @@ -1212,7 +1232,7 @@ s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *hw, u32 **results, set = !(vid & FM10K_VLAN_CLEAR); vid &= ~FM10K_VLAN_CLEAR; - err = fm10k_iov_select_vid(vf_info, vid); + err = fm10k_iov_select_vid(vf_info, (u16)vid); if (err < 0) return err; else @@ -1242,7 +1262,7 @@ s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *hw, u32 **results, if (err < 0) return err; else - vlan = err; + vlan = (u16)err; /* notify switch of request for new unicast address */ err = hw->mac.ops.update_uc_addr(hw, vf_info->glort, @@ -1268,7 +1288,7 @@ s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *hw, u32 **results, if (err < 0) return err; else - vlan = err; + vlan = (u16)err; /* notify switch of request for new multicast address */ err = hw->mac.ops.update_mc_addr(hw, vf_info->glort, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h index 35afd711d14460..02727250ce1f59 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h @@ -1,5 +1,5 @@ /* Intel Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2015 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -272,6 +272,20 @@ struct fm10k_hw; #define FM10K_TDBAL(_n) ((0x40 * (_n)) + 0x8000) #define FM10K_TDBAH(_n) ((0x40 * (_n)) + 0x8001) #define FM10K_TDLEN(_n) ((0x40 * (_n)) + 0x8002) +/* When fist initialized, VFs need to know the Interrupt Throttle Rate (ITR) + * scale which is based on the PCIe speed but the speed information in the PCI + * configuration space may not be accurate. The PF already knows the ITR scale + * but there is no defined method to pass that information from the PF to the + * VF. This is accomplished during VF initialization by temporarily co-opting + * the yet-to-be-used TDLEN register to have the PF store the ITR shift for + * the VF to retrieve before the VF needs to use the TDLEN register for its + * intended purpose, i.e. before the Tx resources are allocated. + */ +#define FM10K_TDLEN_ITR_SCALE_SHIFT 9 +#define FM10K_TDLEN_ITR_SCALE_MASK 0x00000E00 +#define FM10K_TDLEN_ITR_SCALE_GEN1 2 +#define FM10K_TDLEN_ITR_SCALE_GEN2 1 +#define FM10K_TDLEN_ITR_SCALE_GEN3 0 #define FM10K_TPH_TXCTRL(_n) ((0x40 * (_n)) + 0x8003) #define FM10K_TPH_TXCTRL_DESC_TPHEN 0x00000020 #define FM10K_TPH_TXCTRL_DESC_RROEN 0x00000200 @@ -560,6 +574,7 @@ struct fm10k_mac_info { bool get_host_state; bool tx_ready; u32 dglort_map; + u8 itr_scale; }; struct fm10k_swapi_table_info { diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c index 3a18ef1cc01786..2af697df5abc1b 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c @@ -28,7 +28,7 @@ static s32 fm10k_stop_hw_vf(struct fm10k_hw *hw) { u8 *perm_addr = hw->mac.perm_addr; - u32 bal = 0, bah = 0; + u32 bal = 0, bah = 0, tdlen; s32 err; u16 i; @@ -48,6 +48,9 @@ static s32 fm10k_stop_hw_vf(struct fm10k_hw *hw) ((u32)perm_addr[2]); } + /* restore default itr_scale for next VF initialization */ + tdlen = hw->mac.itr_scale << FM10K_TDLEN_ITR_SCALE_SHIFT; + /* The queues have already been disabled so we just need to * update their base address registers */ @@ -56,6 +59,12 @@ static s32 fm10k_stop_hw_vf(struct fm10k_hw *hw) fm10k_write_reg(hw, FM10K_TDBAH(i), bah); fm10k_write_reg(hw, FM10K_RDBAL(i), bal); fm10k_write_reg(hw, FM10K_RDBAH(i), bah); + /* Restore ITR scale in software-defined mechanism in TDLEN + * for next VF initialization. See definition of + * FM10K_TDLEN_ITR_SCALE_SHIFT for more details on the use of + * TDLEN here. + */ + fm10k_write_reg(hw, FM10K_TDLEN(i), tdlen); } return 0; @@ -105,8 +114,10 @@ static s32 fm10k_init_hw_vf(struct fm10k_hw *hw) /* verify we have at least 1 queue */ if (!~fm10k_read_reg(hw, FM10K_TXQCTL(0)) || - !~fm10k_read_reg(hw, FM10K_RXQCTL(0))) - return FM10K_ERR_NO_RESOURCES; + !~fm10k_read_reg(hw, FM10K_RXQCTL(0))) { + err = FM10K_ERR_NO_RESOURCES; + goto reset_max_queues; + } /* determine how many queues we have */ for (i = 1; tqdloc0 && (i < FM10K_MAX_QUEUES_POOL); i++) { @@ -124,16 +135,28 @@ static s32 fm10k_init_hw_vf(struct fm10k_hw *hw) /* shut down queues we own and reset DMA configuration */ err = fm10k_disable_queues_generic(hw, i); if (err) - return err; + goto reset_max_queues; /* record maximum queue count */ hw->mac.max_queues = i; - /* fetch default VLAN */ + /* fetch default VLAN and ITR scale */ hw->mac.default_vid = (fm10k_read_reg(hw, FM10K_TXQCTL(0)) & FM10K_TXQCTL_VID_MASK) >> FM10K_TXQCTL_VID_SHIFT; + /* Read the ITR scale from TDLEN. See the definition of + * FM10K_TDLEN_ITR_SCALE_SHIFT for more information about how TDLEN is + * used here. + */ + hw->mac.itr_scale = (fm10k_read_reg(hw, FM10K_TDLEN(0)) & + FM10K_TDLEN_ITR_SCALE_MASK) >> + FM10K_TDLEN_ITR_SCALE_SHIFT; return 0; + +reset_max_queues: + hw->mac.max_queues = 0; + + return err; } /* This structure defines the attibutes to be parsed below */ diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 4dd3e26129b446..bd6d9c002accac 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -42,7 +42,6 @@ #include <linux/string.h> #include <linux/in.h> #include <linux/ip.h> -#include <linux/tcp.h> #include <linux/sctp.h> #include <linux/pkt_sched.h> #include <linux/ipv6.h> @@ -104,6 +103,7 @@ #define I40E_PRIV_FLAGS_LINKPOLL_FLAG BIT(1) #define I40E_PRIV_FLAGS_FD_ATR BIT(2) #define I40E_PRIV_FLAGS_VEB_STATS BIT(3) +#define I40E_PRIV_FLAGS_PS BIT(4) #define I40E_NVM_VERSION_LO_SHIFT 0 #define I40E_NVM_VERSION_LO_MASK (0xff << I40E_NVM_VERSION_LO_SHIFT) @@ -187,6 +187,7 @@ struct i40e_lump_tracking { #define I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR (I40E_FDIR_BUFFER_HEAD_ROOM * 4) #define I40E_HKEY_ARRAY_SIZE ((I40E_PFQF_HKEY_MAX_INDEX + 1) * 4) +#define I40E_HLUT_ARRAY_SIZE ((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4) enum i40e_fd_stat_idx { I40E_FD_STAT_ATR, @@ -265,7 +266,7 @@ struct i40e_pf { u16 num_lan_qps; /* num lan queues this PF has set up */ u16 num_lan_msix; /* num queue vectors for the base PF vsi */ int queues_left; /* queues left unclaimed */ - u16 rss_size; /* num queues in the RSS array */ + u16 alloc_rss_size; /* allocated RSS queues */ u16 rss_size_max; /* HW defined max RSS queues */ u16 fdir_pf_filter_count; /* num of guaranteed filters for this PF */ u16 num_alloc_vsi; /* num VSIs this driver supports */ @@ -412,7 +413,7 @@ struct i40e_pf { u32 rx_hwtstamp_cleared; bool ptp_tx; bool ptp_rx; - u16 rss_table_size; + u16 rss_table_size; /* HW RSS table size */ /* These are only valid in NPAR modes */ u32 npar_max_bw; u32 npar_min_bw; @@ -487,6 +488,7 @@ struct i40e_vsi { u32 tx_restart; u32 tx_busy; u64 tx_linearize; + u64 tx_force_wb; u32 rx_buf_failed; u32 rx_page_failed; @@ -504,8 +506,10 @@ struct i40e_vsi { u16 tx_itr_setting; u16 int_rate_limit; /* value in usecs */ - u16 rss_table_size; - u16 rss_size; + u16 rss_table_size; /* HW RSS table size */ + u16 rss_size; /* Allocated RSS queues */ + u8 *rss_hkey_user; /* User configured hash keys */ + u8 *rss_lut_user; /* User configured lookup table entries */ u16 max_frame; u16 rx_hdr_len; @@ -575,6 +579,9 @@ struct i40e_q_vector { u8 num_ringpairs; /* total number of ring pairs in vector */ +#define I40E_Q_VECTOR_HUNG_DETECT 0 /* Bit Index for hung detection logic */ + unsigned long hung_detected; /* Set/Reset for hung_detection logic */ + cpumask_t affinity_mask; struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; @@ -602,8 +609,8 @@ static inline char *i40e_nvm_version_str(struct i40e_hw *hw) full_ver = hw->nvm.oem_ver; ver = (u8)(full_ver >> I40E_OEM_VER_SHIFT); - build = (u16)((full_ver >> I40E_OEM_VER_BUILD_SHIFT) - & I40E_OEM_VER_BUILD_MASK); + build = (u16)((full_ver >> I40E_OEM_VER_BUILD_SHIFT) & + I40E_OEM_VER_BUILD_MASK); patch = (u8)(full_ver & I40E_OEM_VER_PATCH_MASK); snprintf(buf, sizeof(buf), @@ -668,6 +675,8 @@ extern const char i40e_driver_name[]; extern const char i40e_driver_version_str[]; void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags); void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags); +int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); +int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id); void i40e_update_stats(struct i40e_vsi *vsi); void i40e_update_eth_stats(struct i40e_vsi *vsi); @@ -691,7 +700,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, bool is_vf, bool is_netdev); void i40e_del_filter(struct i40e_vsi *vsi, u8 *macaddr, s16 vlan, bool is_vf, bool is_netdev); -int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl); +int i40e_sync_vsi_filters(struct i40e_vsi *vsi); struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, u16 uplink, u32 param1); int i40e_vsi_release(struct i40e_vsi *vsi); @@ -709,7 +718,7 @@ struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, u16 uplink_seid, void i40e_veb_release(struct i40e_veb *veb); int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc); -i40e_status i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid); +int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid); void i40e_vsi_remove_pvid(struct i40e_vsi *vsi); void i40e_vsi_reset_stats(struct i40e_vsi *vsi); void i40e_pf_reset_stats(struct i40e_pf *pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 6584b6cd73fd30..61a49793594120 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -2403,4 +2403,4 @@ struct i40e_aqc_debug_modify_internals { I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_modify_internals); -#endif +#endif /* _I40E_ADMINQ_CMD_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index d1a91c8178d6dc..10744a698d6f9d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1138,7 +1138,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, spin_lock_bh(&vsi->mac_filter_list_lock); f = i40e_add_filter(vsi, ma, vlan, false, false); spin_unlock_bh(&vsi->mac_filter_list_lock); - ret = i40e_sync_vsi_filters(vsi, true); + ret = i40e_sync_vsi_filters(vsi); if (f && !ret) dev_info(&pf->pdev->dev, "add macaddr: %pM vlan=%d added to VSI %d\n", @@ -1177,7 +1177,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, spin_lock_bh(&vsi->mac_filter_list_lock); i40e_del_filter(vsi, ma, vlan, false, false); spin_unlock_bh(&vsi->mac_filter_list_lock); - ret = i40e_sync_vsi_filters(vsi, true); + ret = i40e_sync_vsi_filters(vsi); if (!ret) dev_info(&pf->pdev->dev, "del macaddr: %pM vlan=%d removed from VSI %d\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 3f385ffe420f71..29d5833e24a3ff 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -88,6 +88,7 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = { I40E_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast), I40E_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol), I40E_VSI_STAT("tx_linearize", tx_linearize), + I40E_VSI_STAT("tx_force_wb", tx_force_wb), }; /* These PF_STATs might look like duplicates of some NETDEV_STATs, @@ -230,6 +231,7 @@ static const char i40e_priv_flags_strings[][ETH_GSTRING_LEN] = { "LinkPolling", "flow-director-atr", "veb-stats", + "packet-split", }; #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_priv_flags_strings) @@ -2110,7 +2112,7 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, switch (cmd->cmd) { case ETHTOOL_GRXRINGS: - cmd->data = vsi->alloc_queue_pairs; + cmd->data = vsi->num_queue_pairs; ret = 0; break; case ETHTOOL_GRXFH: @@ -2583,7 +2585,6 @@ static int i40e_set_channels(struct net_device *dev, return -EINVAL; } -#define I40E_HLUT_ARRAY_SIZE ((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4) /** * i40e_get_rxfh_key_size - get the RSS hash key size * @netdev: network interface device structure @@ -2611,10 +2612,9 @@ static int i40e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; - struct i40e_pf *pf = vsi->back; - struct i40e_hw *hw = &pf->hw; - u32 reg_val; - int i, j; + u8 *lut, *seed = NULL; + int ret; + u16 i; if (hfunc) *hfunc = ETH_RSS_HASH_TOP; @@ -2622,24 +2622,20 @@ static int i40e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, if (!indir) return 0; - for (i = 0, j = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) { - reg_val = rd32(hw, I40E_PFQF_HLUT(i)); - indir[j++] = reg_val & 0xff; - indir[j++] = (reg_val >> 8) & 0xff; - indir[j++] = (reg_val >> 16) & 0xff; - indir[j++] = (reg_val >> 24) & 0xff; - } + seed = key; + lut = kzalloc(I40E_HLUT_ARRAY_SIZE, GFP_KERNEL); + if (!lut) + return -ENOMEM; + ret = i40e_get_rss(vsi, seed, lut, I40E_HLUT_ARRAY_SIZE); + if (ret) + goto out; + for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++) + indir[i] = (u32)(lut[i]); - if (key) { - for (i = 0, j = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) { - reg_val = rd32(hw, I40E_PFQF_HKEY(i)); - key[j++] = (u8)(reg_val & 0xff); - key[j++] = (u8)((reg_val >> 8) & 0xff); - key[j++] = (u8)((reg_val >> 16) & 0xff); - key[j++] = (u8)((reg_val >> 24) & 0xff); - } - } - return 0; +out: + kfree(lut); + + return ret; } /** @@ -2656,10 +2652,8 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir, { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; - struct i40e_pf *pf = vsi->back; - struct i40e_hw *hw = &pf->hw; - u32 reg_val; - int i, j; + u8 *seed = NULL; + u16 i; if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; @@ -2667,24 +2661,28 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir, if (!indir) return 0; - for (i = 0, j = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) { - reg_val = indir[j++]; - reg_val |= indir[j++] << 8; - reg_val |= indir[j++] << 16; - reg_val |= indir[j++] << 24; - wr32(hw, I40E_PFQF_HLUT(i), reg_val); - } - if (key) { - for (i = 0, j = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) { - reg_val = key[j++]; - reg_val |= key[j++] << 8; - reg_val |= key[j++] << 16; - reg_val |= key[j++] << 24; - wr32(hw, I40E_PFQF_HKEY(i), reg_val); + if (!vsi->rss_hkey_user) { + vsi->rss_hkey_user = kzalloc(I40E_HKEY_ARRAY_SIZE, + GFP_KERNEL); + if (!vsi->rss_hkey_user) + return -ENOMEM; } + memcpy(vsi->rss_hkey_user, key, I40E_HKEY_ARRAY_SIZE); + seed = vsi->rss_hkey_user; } - return 0; + if (!vsi->rss_lut_user) { + vsi->rss_lut_user = kzalloc(I40E_HLUT_ARRAY_SIZE, GFP_KERNEL); + if (!vsi->rss_lut_user) + return -ENOMEM; + } + + /* Each 32 bits pointed by 'indir' is stored with a lut entry */ + for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++) + vsi->rss_lut_user[i] = (u8)(indir[i]); + + return i40e_config_rss(vsi, seed, vsi->rss_lut_user, + I40E_HLUT_ARRAY_SIZE); } /** @@ -2712,6 +2710,8 @@ static u32 i40e_get_priv_flags(struct net_device *dev) I40E_PRIV_FLAGS_FD_ATR : 0; ret_flags |= pf->flags & I40E_FLAG_VEB_STATS_ENABLED ? I40E_PRIV_FLAGS_VEB_STATS : 0; + ret_flags |= pf->flags & I40E_FLAG_RX_PS_ENABLED ? + I40E_PRIV_FLAGS_PS : 0; return ret_flags; } @@ -2726,6 +2726,26 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; + bool reset_required = false; + + /* NOTE: MFP is not settable */ + + /* allow the user to control the method of receive + * buffer DMA, whether the packet is split at header + * boundaries into two separate buffers. In some cases + * one routine or the other will perform better. + */ + if ((flags & I40E_PRIV_FLAGS_PS) && + !(pf->flags & I40E_FLAG_RX_PS_ENABLED)) { + pf->flags |= I40E_FLAG_RX_PS_ENABLED; + pf->flags &= ~I40E_FLAG_RX_1BUF_ENABLED; + reset_required = true; + } else if (!(flags & I40E_PRIV_FLAGS_PS) && + (pf->flags & I40E_FLAG_RX_PS_ENABLED)) { + pf->flags &= ~I40E_FLAG_RX_PS_ENABLED; + pf->flags |= I40E_FLAG_RX_1BUF_ENABLED; + reset_required = true; + } if (flags & I40E_PRIV_FLAGS_LINKPOLL_FLAG) pf->flags |= I40E_FLAG_LINK_POLLING_ENABLED; @@ -2748,6 +2768,10 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) else pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED; + /* if needed, issue reset to cause things to take effect */ + if (reset_required) + i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED)); + return 0; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c index fe5d9bf3ed6d80..579a46ca82dfa3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c +++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c @@ -1544,8 +1544,6 @@ void i40e_fcoe_vsi_setup(struct i40e_pf *pf) if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) return; - BUG_ON(!pf->vsi[pf->lan_vsi]); - for (i = 0; i < pf->num_alloc_vsi; i++) { vsi = pf->vsi[i]; if (vsi && vsi->type == I40E_VSI_FCOE) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b825f978d441d1..2b1b655a3b5251 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -38,8 +38,8 @@ static const char i40e_driver_string[] = #define DRV_KERN "-k" #define DRV_VERSION_MAJOR 1 -#define DRV_VERSION_MINOR 3 -#define DRV_VERSION_BUILD 46 +#define DRV_VERSION_MINOR 4 +#define DRV_VERSION_BUILD 7 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -55,6 +55,8 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit); static int i40e_setup_misc_vector(struct i40e_pf *pf); static void i40e_determine_queue_usage(struct i40e_pf *pf); static int i40e_setup_pf_filter_control(struct i40e_pf *pf); +static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, + u16 rss_table_size, u16 rss_size); static void i40e_fdir_sb_setup(struct i40e_pf *pf); static int i40e_veb_get_bw_info(struct i40e_veb *veb); @@ -790,75 +792,6 @@ static void i40e_update_fcoe_stats(struct i40e_vsi *vsi) #endif /** - * i40e_update_link_xoff_rx - Update XOFF received in link flow control mode - * @pf: the corresponding PF - * - * Update the Rx XOFF counter (PAUSE frames) in link flow control mode - **/ -static void i40e_update_link_xoff_rx(struct i40e_pf *pf) -{ - struct i40e_hw_port_stats *osd = &pf->stats_offsets; - struct i40e_hw_port_stats *nsd = &pf->stats; - struct i40e_hw *hw = &pf->hw; - u64 xoff = 0; - - if ((hw->fc.current_mode != I40E_FC_FULL) && - (hw->fc.current_mode != I40E_FC_RX_PAUSE)) - return; - - xoff = nsd->link_xoff_rx; - i40e_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port), - pf->stat_offsets_loaded, - &osd->link_xoff_rx, &nsd->link_xoff_rx); - - /* No new LFC xoff rx */ - if (!(nsd->link_xoff_rx - xoff)) - return; - -} - -/** - * i40e_update_prio_xoff_rx - Update XOFF received in PFC mode - * @pf: the corresponding PF - * - * Update the Rx XOFF counter (PAUSE frames) in PFC mode - **/ -static void i40e_update_prio_xoff_rx(struct i40e_pf *pf) -{ - struct i40e_hw_port_stats *osd = &pf->stats_offsets; - struct i40e_hw_port_stats *nsd = &pf->stats; - bool xoff[I40E_MAX_TRAFFIC_CLASS] = {false}; - struct i40e_dcbx_config *dcb_cfg; - struct i40e_hw *hw = &pf->hw; - u16 i; - u8 tc; - - dcb_cfg = &hw->local_dcbx_config; - - /* Collect Link XOFF stats when PFC is disabled */ - if (!dcb_cfg->pfc.pfcenable) { - i40e_update_link_xoff_rx(pf); - return; - } - - for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { - u64 prio_xoff = nsd->priority_xoff_rx[i]; - - i40e_stat_update32(hw, I40E_GLPRT_PXOFFRXC(hw->port, i), - pf->stat_offsets_loaded, - &osd->priority_xoff_rx[i], - &nsd->priority_xoff_rx[i]); - - /* No new PFC xoff rx */ - if (!(nsd->priority_xoff_rx[i] - prio_xoff)) - continue; - /* Get the TC for given priority */ - tc = dcb_cfg->etscfg.prioritytable[i]; - xoff[tc] = true; - } -} - -/** * i40e_update_vsi_stats - Update the vsi statistics counters. * @vsi: the VSI to be updated * @@ -881,6 +814,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) u64 bytes, packets; unsigned int start; u64 tx_linearize; + u64 tx_force_wb; u64 rx_p, rx_b; u64 tx_p, tx_b; u16 q; @@ -899,7 +833,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) */ rx_b = rx_p = 0; tx_b = tx_p = 0; - tx_restart = tx_busy = tx_linearize = 0; + tx_restart = tx_busy = tx_linearize = tx_force_wb = 0; rx_page = 0; rx_buf = 0; rcu_read_lock(); @@ -917,6 +851,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) tx_restart += p->tx_stats.restart_queue; tx_busy += p->tx_stats.tx_busy; tx_linearize += p->tx_stats.tx_linearize; + tx_force_wb += p->tx_stats.tx_force_wb; /* Rx queue is part of the same block as Tx queue */ p = &p[1]; @@ -934,6 +869,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) vsi->tx_restart = tx_restart; vsi->tx_busy = tx_busy; vsi->tx_linearize = tx_linearize; + vsi->tx_force_wb = tx_force_wb; vsi->rx_page_failed = rx_page; vsi->rx_buf_failed = rx_buf; @@ -1049,12 +985,18 @@ static void i40e_update_pf_stats(struct i40e_pf *pf) i40e_stat_update32(hw, I40E_GLPRT_LXONTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_tx, &nsd->link_xon_tx); - i40e_update_prio_xoff_rx(pf); /* handles I40E_GLPRT_LXOFFRXC */ + i40e_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port), + pf->stat_offsets_loaded, + &osd->link_xoff_rx, &nsd->link_xoff_rx); i40e_stat_update32(hw, I40E_GLPRT_LXOFFTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_tx, &nsd->link_xoff_tx); for (i = 0; i < 8; i++) { + i40e_stat_update32(hw, I40E_GLPRT_PXOFFRXC(hw->port, i), + pf->stat_offsets_loaded, + &osd->priority_xoff_rx[i], + &nsd->priority_xoff_rx[i]); i40e_stat_update32(hw, I40E_GLPRT_PXONRXC(hw->port, i), pf->stat_offsets_loaded, &osd->priority_xon_rx[i], @@ -1547,10 +1489,9 @@ static int i40e_set_mac(struct net_device *netdev, void *p) spin_unlock_bh(&vsi->mac_filter_list_lock); } - i40e_sync_vsi_filters(vsi, false); ether_addr_copy(netdev->dev_addr, addr->sa_data); - return 0; + return i40e_sync_vsi_filters(vsi); } /** @@ -1625,7 +1566,8 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, switch (vsi->type) { case I40E_VSI_MAIN: - qcount = min_t(int, pf->rss_size, num_tc_qps); + qcount = min_t(int, pf->alloc_rss_size, + num_tc_qps); break; #ifdef I40E_FCOE case I40E_VSI_FCOE: @@ -1851,13 +1793,12 @@ static void i40e_cleanup_add_list(struct list_head *add_list) /** * i40e_sync_vsi_filters - Update the VSI filter list to the HW * @vsi: ptr to the VSI - * @grab_rtnl: whether RTNL needs to be grabbed * * Push any outstanding VSI filter changes through the AdminQ. * * Returns 0 or error value **/ -int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) +int i40e_sync_vsi_filters(struct i40e_vsi *vsi) { struct list_head tmp_del_list, tmp_add_list; struct i40e_mac_filter *f, *ftmp, *fclone; @@ -1865,8 +1806,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) bool add_happened = false; int filter_list_len = 0; u32 changed_flags = 0; + i40e_status aq_ret = 0; bool err_cond = false; - i40e_status ret = 0; + int retval = 0; struct i40e_pf *pf; int num_add = 0; int num_del = 0; @@ -1929,8 +1871,11 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) } spin_unlock_bh(&vsi->mac_filter_list_lock); - if (err_cond) + if (err_cond) { i40e_cleanup_add_list(&tmp_add_list); + retval = -ENOMEM; + goto out; + } } /* Now process 'del_list' outside the lock */ @@ -1948,7 +1893,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) i40e_undo_del_filter_entries(vsi, &tmp_del_list); i40e_undo_add_filter_entries(vsi); spin_unlock_bh(&vsi->mac_filter_list_lock); - return -ENOMEM; + retval = -ENOMEM; + goto out; } list_for_each_entry_safe(f, ftmp, &tmp_del_list, list) { @@ -1966,18 +1912,22 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) /* flush a full buffer */ if (num_del == filter_list_len) { - ret = i40e_aq_remove_macvlan(&pf->hw, - vsi->seid, del_list, num_del, - NULL); + aq_ret = i40e_aq_remove_macvlan(&pf->hw, + vsi->seid, + del_list, + num_del, + NULL); aq_err = pf->hw.aq.asq_last_status; num_del = 0; memset(del_list, 0, sizeof(*del_list)); - if (ret && aq_err != I40E_AQ_RC_ENOENT) + if (aq_ret && aq_err != I40E_AQ_RC_ENOENT) { + retval = -EIO; dev_err(&pf->pdev->dev, "ignoring delete macvlan error, err %s, aq_err %s while flushing a full buffer\n", - i40e_stat_str(&pf->hw, ret), + i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, aq_err)); + } } /* Release memory for MAC filter entries which were * synced up with HW. @@ -1987,15 +1937,16 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) } if (num_del) { - ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid, - del_list, num_del, NULL); + aq_ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid, + del_list, num_del, + NULL); aq_err = pf->hw.aq.asq_last_status; num_del = 0; - if (ret && aq_err != I40E_AQ_RC_ENOENT) + if (aq_ret && aq_err != I40E_AQ_RC_ENOENT) dev_info(&pf->pdev->dev, "ignoring delete macvlan error, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), + i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, aq_err)); } @@ -2019,7 +1970,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) spin_lock_bh(&vsi->mac_filter_list_lock); i40e_undo_add_filter_entries(vsi); spin_unlock_bh(&vsi->mac_filter_list_lock); - return -ENOMEM; + retval = -ENOMEM; + goto out; } list_for_each_entry_safe(f, ftmp, &tmp_add_list, list) { @@ -2040,13 +1992,13 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) /* flush a full buffer */ if (num_add == filter_list_len) { - ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid, - add_list, num_add, - NULL); + aq_ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid, + add_list, num_add, + NULL); aq_err = pf->hw.aq.asq_last_status; num_add = 0; - if (ret) + if (aq_ret) break; memset(add_list, 0, sizeof(*add_list)); } @@ -2058,18 +2010,19 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) } if (num_add) { - ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid, - add_list, num_add, NULL); + aq_ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid, + add_list, num_add, NULL); aq_err = pf->hw.aq.asq_last_status; num_add = 0; } kfree(add_list); add_list = NULL; - if (add_happened && ret && aq_err != I40E_AQ_RC_EINVAL) { + if (add_happened && aq_ret && aq_err != I40E_AQ_RC_EINVAL) { + retval = i40e_aq_rc_to_posix(aq_ret, aq_err); dev_info(&pf->pdev->dev, "add filter failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), + i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, aq_err)); if ((pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOSPC) && !test_bit(__I40E_FILTER_OVERFLOW_PROMISC, @@ -2087,16 +2040,19 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) bool cur_multipromisc; cur_multipromisc = !!(vsi->current_netdev_flags & IFF_ALLMULTI); - ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw, - vsi->seid, - cur_multipromisc, - NULL); - if (ret) + aq_ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw, + vsi->seid, + cur_multipromisc, + NULL); + if (aq_ret) { + retval = i40e_aq_rc_to_posix(aq_ret, + pf->hw.aq.asq_last_status); dev_info(&pf->pdev->dev, "set multi promisc failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), + i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + } } if ((changed_flags & IFF_PROMISC) || promisc_forced_on) { bool cur_promisc; @@ -2112,44 +2068,50 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) */ if (pf->cur_promisc != cur_promisc) { pf->cur_promisc = cur_promisc; - if (grab_rtnl) - i40e_do_reset_safe(pf, - BIT(__I40E_PF_RESET_REQUESTED)); - else - i40e_do_reset(pf, - BIT(__I40E_PF_RESET_REQUESTED)); + set_bit(__I40E_PF_RESET_REQUESTED, &pf->state); } } else { - ret = i40e_aq_set_vsi_unicast_promiscuous( + aq_ret = i40e_aq_set_vsi_unicast_promiscuous( &vsi->back->hw, vsi->seid, cur_promisc, NULL); - if (ret) + if (aq_ret) { + retval = + i40e_aq_rc_to_posix(aq_ret, + pf->hw.aq.asq_last_status); dev_info(&pf->pdev->dev, "set unicast promisc failed, err %d, aq_err %d\n", - ret, pf->hw.aq.asq_last_status); - ret = i40e_aq_set_vsi_multicast_promiscuous( + aq_ret, pf->hw.aq.asq_last_status); + } + aq_ret = i40e_aq_set_vsi_multicast_promiscuous( &vsi->back->hw, vsi->seid, cur_promisc, NULL); - if (ret) + if (aq_ret) { + retval = + i40e_aq_rc_to_posix(aq_ret, + pf->hw.aq.asq_last_status); dev_info(&pf->pdev->dev, "set multicast promisc failed, err %d, aq_err %d\n", - ret, pf->hw.aq.asq_last_status); + aq_ret, pf->hw.aq.asq_last_status); + } } - ret = i40e_aq_set_vsi_broadcast(&vsi->back->hw, - vsi->seid, - cur_promisc, NULL); - if (ret) + aq_ret = i40e_aq_set_vsi_broadcast(&vsi->back->hw, + vsi->seid, + cur_promisc, NULL); + if (aq_ret) { + retval = i40e_aq_rc_to_posix(aq_ret, + pf->hw.aq.asq_last_status); dev_info(&pf->pdev->dev, "set brdcast promisc failed, err %s, aq_err %s\n", - i40e_stat_str(&pf->hw, ret), + i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + } } - +out: clear_bit(__I40E_CONFIG_BUSY, &vsi->state); - return 0; + return retval; } /** @@ -2166,8 +2128,15 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf) for (v = 0; v < pf->num_alloc_vsi; v++) { if (pf->vsi[v] && - (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED)) - i40e_sync_vsi_filters(pf->vsi[v], true); + (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED)) { + int ret = i40e_sync_vsi_filters(pf->vsi[v]); + + if (ret) { + /* come back and try again later */ + pf->flags |= I40E_FLAG_FILTER_SYNC; + break; + } + } } } @@ -2377,16 +2346,13 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid) } } - /* Make sure to release before sync_vsi_filter because that - * function will lock/unlock as necessary - */ spin_unlock_bh(&vsi->mac_filter_list_lock); - if (test_bit(__I40E_DOWN, &vsi->back->state) || - test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state)) - return 0; - - return i40e_sync_vsi_filters(vsi, false); + /* schedule our worker thread which will take care of + * applying the new filter changes + */ + i40e_service_event_schedule(vsi->back); + return 0; } /** @@ -2459,16 +2425,13 @@ int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid) } } - /* Make sure to release before sync_vsi_filter because that - * function with lock/unlock as necessary - */ spin_unlock_bh(&vsi->mac_filter_list_lock); - if (test_bit(__I40E_DOWN, &vsi->back->state) || - test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state)) - return 0; - - return i40e_sync_vsi_filters(vsi, false); + /* schedule our worker thread which will take care of + * applying the new filter changes + */ + i40e_service_event_schedule(vsi->back); + return 0; } /** @@ -2711,6 +2674,11 @@ static void i40e_config_xps_tx_ring(struct i40e_ring *ring) netif_set_xps_queue(ring->netdev, mask, ring->queue_index); free_cpumask_var(mask); } + + /* schedule our worker thread which will take care of + * applying the new filter changes + */ + i40e_service_event_schedule(vsi->back); } /** @@ -4360,17 +4328,41 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) else val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0); + /* Bail out if interrupts are disabled because napi_poll + * execution in-progress or will get scheduled soon. + * napi_poll cleans TX and RX queues and updates 'next_to_clean'. + */ + if (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK)) + return; + head = i40e_get_head(tx_ring); tx_pending = i40e_get_tx_pending(tx_ring); - /* Interrupts are disabled and TX pending is non-zero, - * trigger the SW interrupt (don't wait). Worst case - * there will be one extra interrupt which may result - * into not cleaning any queues because queues are cleaned. + /* HW is done executing descriptors, updated HEAD write back, + * but SW hasn't processed those descriptors. If interrupt is + * not generated from this point ON, it could result into + * dev_watchdog detecting timeout on those netdev_queue, + * hence proactively trigger SW interrupt. */ - if (tx_pending && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) - i40e_force_wb(vsi, tx_ring->q_vector); + if (tx_pending) { + /* NAPI Poll didn't run and clear since it was set */ + if (test_and_clear_bit(I40E_Q_VECTOR_HUNG_DETECT, + &tx_ring->q_vector->hung_detected)) { + netdev_info(vsi->netdev, "VSI_seid %d, Hung TX queue %d, tx_pending: %d, NTC:0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x\n", + vsi->seid, q_idx, tx_pending, + tx_ring->next_to_clean, head, + tx_ring->next_to_use, + readl(tx_ring->tail)); + netdev_info(vsi->netdev, "VSI_seid %d, Issuing force_wb for TX queue %d, Interrupt Reg: 0x%x\n", + vsi->seid, q_idx, val); + i40e_force_wb(vsi, tx_ring->q_vector); + } else { + /* First Chance - detected possible hung */ + set_bit(I40E_Q_VECTOR_HUNG_DETECT, + &tx_ring->q_vector->hung_detected); + } + } } /** @@ -5738,7 +5730,7 @@ static void i40e_handle_lan_overflow_event(struct i40e_pf *pf, **/ static void i40e_service_event_complete(struct i40e_pf *pf) { - BUG_ON(!test_bit(__I40E_SERVICE_SCHED, &pf->state)); + WARN_ON(!test_bit(__I40E_SERVICE_SCHED, &pf->state)); /* flush memory to make sure state is correct before next watchog */ smp_mb__before_atomic(); @@ -6013,6 +6005,9 @@ static void i40e_link_event(struct i40e_pf *pf) i40e_status status; bool new_link, old_link; + /* save off old link status information */ + pf->hw.phy.link_info_old = pf->hw.phy.link_info; + /* set this to force the get_link_status call to refresh state */ pf->hw.phy.get_link_info = true; @@ -6147,13 +6142,9 @@ unlock: static void i40e_handle_link_event(struct i40e_pf *pf, struct i40e_arq_event_info *e) { - struct i40e_hw *hw = &pf->hw; struct i40e_aqc_get_link_status *status = (struct i40e_aqc_get_link_status *)&e->desc.params.raw; - /* save off old link status information */ - hw->phy.link_info_old = hw->phy.link_info; - /* Do a new status request to re-enable LSE reporting * and load new status information into the hw struct * This completely ignores any state information @@ -6685,6 +6676,7 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit) struct i40e_hw *hw = &pf->hw; u8 set_fc_aq_fail = 0; i40e_status ret; + u32 val; u32 v; /* Now we wait for GRST to settle out. @@ -6823,6 +6815,20 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit) } } + /* Reconfigure hardware for allowing smaller MSS in the case + * of TSO, so that we avoid the MDD being fired and causing + * a reset in the case of small MSS+TSO. + */ +#define I40E_REG_MSS 0x000E64DC +#define I40E_REG_MSS_MIN_MASK 0x3FF0000 +#define I40E_64BYTE_MSS 0x400000 + val = rd32(hw, I40E_REG_MSS); + if ((val & I40E_REG_MSS_MIN_MASK) > I40E_64BYTE_MSS) { + val &= ~I40E_REG_MSS_MIN_MASK; + val |= I40E_64BYTE_MSS; + wr32(hw, I40E_REG_MSS, val); + } + if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || (pf->hw.aq.fw_maj_ver < 4)) { msleep(75); @@ -7282,6 +7288,23 @@ static void i40e_vsi_free_arrays(struct i40e_vsi *vsi, bool free_qvectors) } /** + * i40e_clear_rss_config_user - clear the user configured RSS hash keys + * and lookup table + * @vsi: Pointer to VSI structure + */ +static void i40e_clear_rss_config_user(struct i40e_vsi *vsi) +{ + if (!vsi) + return; + + kfree(vsi->rss_hkey_user); + vsi->rss_hkey_user = NULL; + + kfree(vsi->rss_lut_user); + vsi->rss_lut_user = NULL; +} + +/** * i40e_vsi_clear - Deallocate the VSI provided * @vsi: the VSI being un-configured **/ @@ -7318,6 +7341,7 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi) i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx); i40e_vsi_free_arrays(vsi, true); + i40e_clear_rss_config_user(vsi); pf->vsi[vsi->idx] = NULL; if (vsi->idx < pf->next_vsi) @@ -7780,7 +7804,8 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf) * @vsi: vsi structure * @seed: RSS hash seed **/ -static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed) +static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, + u8 *lut, u16 lut_size) { struct i40e_aqc_get_set_rss_key_data rss_key; struct i40e_pf *pf = vsi->back; @@ -7833,43 +7858,57 @@ static int i40e_vsi_config_rss(struct i40e_vsi *vsi) { u8 seed[I40E_HKEY_ARRAY_SIZE]; struct i40e_pf *pf = vsi->back; + u8 *lut; + int ret; - netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); - vsi->rss_size = min_t(int, pf->rss_size, vsi->num_queue_pairs); + if (!(pf->flags & I40E_FLAG_RSS_AQ_CAPABLE)) + return 0; - if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) - return i40e_config_rss_aq(vsi, seed); + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; - return 0; + i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); + netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); + vsi->rss_size = min_t(int, pf->alloc_rss_size, vsi->num_queue_pairs); + ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size); + kfree(lut); + + return ret; } /** - * i40e_config_rss_reg - Prepare for RSS if used - * @pf: board private structure + * i40e_config_rss_reg - Configure RSS keys and lut by writing registers + * @vsi: Pointer to vsi structure * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure **/ -static int i40e_config_rss_reg(struct i40e_pf *pf, const u8 *seed) +static int i40e_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed, + const u8 *lut, u16 lut_size) { - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; - u32 *seed_dw = (u32 *)seed; - u32 current_queue = 0; - u32 lut = 0; - int i, j; + u8 i; /* Fill out hash function seed */ - for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) - wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]); + if (seed) { + u32 *seed_dw = (u32 *)seed; - for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) { - lut = 0; - for (j = 0; j < 4; j++) { - if (current_queue == vsi->rss_size) - current_queue = 0; - lut |= ((current_queue) << (8 * j)); - current_queue++; - } - wr32(&pf->hw, I40E_PFQF_HLUT(i), lut); + for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) + wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]); + } + + if (lut) { + u32 *lut_dw = (u32 *)lut; + + if (lut_size != I40E_HLUT_ARRAY_SIZE) + return -EINVAL; + + for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) + wr32(hw, I40E_PFQF_HLUT(i), lut_dw[i]); } i40e_flush(hw); @@ -7877,18 +7916,101 @@ static int i40e_config_rss_reg(struct i40e_pf *pf, const u8 *seed) } /** - * i40e_config_rss - Prepare for RSS if used + * i40e_get_rss_reg - Get the RSS keys and lut by reading registers + * @vsi: Pointer to VSI structure + * @seed: Buffer to store the keys + * @lut: Buffer to store the lookup table entries + * @lut_size: Size of buffer to store the lookup table entries + * + * Returns 0 on success, negative on failure + */ +static int i40e_get_rss_reg(struct i40e_vsi *vsi, u8 *seed, + u8 *lut, u16 lut_size) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + u16 i; + + if (seed) { + u32 *seed_dw = (u32 *)seed; + + for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) + seed_dw[i] = rd32(hw, I40E_PFQF_HKEY(i)); + } + if (lut) { + u32 *lut_dw = (u32 *)lut; + + if (lut_size != I40E_HLUT_ARRAY_SIZE) + return -EINVAL; + for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) + lut_dw[i] = rd32(hw, I40E_PFQF_HLUT(i)); + } + + return 0; +} + +/** + * i40e_config_rss - Configure RSS keys and lut + * @vsi: Pointer to VSI structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure + */ +int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) +{ + struct i40e_pf *pf = vsi->back; + + if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) + return i40e_config_rss_aq(vsi, seed, lut, lut_size); + else + return i40e_config_rss_reg(vsi, seed, lut, lut_size); +} + +/** + * i40e_get_rss - Get RSS keys and lut + * @vsi: Pointer to VSI structure + * @seed: Buffer to store the keys + * @lut: Buffer to store the lookup table entries + * lut_size: Size of buffer to store the lookup table entries + * + * Returns 0 on success, negative on failure + */ +int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) +{ + return i40e_get_rss_reg(vsi, seed, lut, lut_size); +} + +/** + * i40e_fill_rss_lut - Fill the RSS lookup table with default values + * @pf: Pointer to board private structure + * @lut: Lookup table + * @rss_table_size: Lookup table size + * @rss_size: Range of queue number for hashing + */ +static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, + u16 rss_table_size, u16 rss_size) +{ + u16 i; + + for (i = 0; i < rss_table_size; i++) + lut[i] = i % rss_size; +} + +/** + * i40e_pf_config_rss - Prepare for RSS if used * @pf: board private structure **/ -static int i40e_config_rss(struct i40e_pf *pf) +static int i40e_pf_config_rss(struct i40e_pf *pf) { struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; u8 seed[I40E_HKEY_ARRAY_SIZE]; + u8 *lut; struct i40e_hw *hw = &pf->hw; u32 reg_val; u64 hena; - - netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); + int ret; /* By default we enable TCP/UDP with IPv4/IPv6 ptypes */ hena = (u64)rd32(hw, I40E_PFQF_HENA(0)) | @@ -7898,8 +8020,6 @@ static int i40e_config_rss(struct i40e_pf *pf) wr32(hw, I40E_PFQF_HENA(0), (u32)hena); wr32(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); - vsi->rss_size = min_t(int, pf->rss_size, vsi->num_queue_pairs); - /* Determine the RSS table size based on the hardware capabilities */ reg_val = rd32(hw, I40E_PFQF_CTL_0); reg_val = (pf->rss_table_size == 512) ? @@ -7907,10 +8027,32 @@ static int i40e_config_rss(struct i40e_pf *pf) (reg_val & ~I40E_PFQF_CTL_0_HASHLUTSIZE_512); wr32(hw, I40E_PFQF_CTL_0, reg_val); - if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) - return i40e_config_rss_aq(pf->vsi[pf->lan_vsi], seed); + /* Determine the RSS size of the VSI */ + if (!vsi->rss_size) + vsi->rss_size = min_t(int, pf->alloc_rss_size, + vsi->num_queue_pairs); + + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + /* Use user configured lut if there is one, otherwise use default */ + if (vsi->rss_lut_user) + memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size); + else + i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); + + /* Use user configured hash key if there is one, otherwise + * use default. + */ + if (vsi->rss_hkey_user) + memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); else - return i40e_config_rss_reg(pf, seed); + netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); + ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size); + kfree(lut); + + return ret; } /** @@ -7935,13 +8077,28 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count) vsi->req_queue_pairs = queue_count; i40e_prep_for_reset(pf); - pf->rss_size = new_rss_size; + pf->alloc_rss_size = new_rss_size; i40e_reset_and_rebuild(pf, true); - i40e_config_rss(pf); + + /* Discard the user configured hash keys and lut, if less + * queues are enabled. + */ + if (queue_count < vsi->rss_size) { + i40e_clear_rss_config_user(vsi); + dev_dbg(&pf->pdev->dev, + "discard user configured hash keys and lut\n"); + } + + /* Reset vsi->rss_size, as number of enabled queues changed */ + vsi->rss_size = min_t(int, pf->alloc_rss_size, + vsi->num_queue_pairs); + + i40e_pf_config_rss(pf); } - dev_info(&pf->pdev->dev, "RSS count: %d\n", pf->rss_size); - return pf->rss_size; + dev_info(&pf->pdev->dev, "RSS count/HW max RSS count: %d/%d\n", + pf->alloc_rss_size, pf->rss_size_max); + return pf->alloc_rss_size; } /** @@ -8112,13 +8269,14 @@ static int i40e_sw_init(struct i40e_pf *pf) * maximum might end up larger than the available queues */ pf->rss_size_max = BIT(pf->hw.func_caps.rss_table_entry_width); - pf->rss_size = 1; + pf->alloc_rss_size = 1; pf->rss_table_size = pf->hw.func_caps.rss_table_size; pf->rss_size_max = min_t(int, pf->rss_size_max, pf->hw.func_caps.num_tx_qp); if (pf->hw.func_caps.rss) { pf->flags |= I40E_FLAG_RSS_ENABLED; - pf->rss_size = min_t(int, pf->rss_size_max, num_online_cpus()); + pf->alloc_rss_size = min_t(int, pf->rss_size_max, + num_online_cpus()); } /* MFP mode enabled */ @@ -9051,7 +9209,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi) f->is_vf, f->is_netdev); spin_unlock_bh(&vsi->mac_filter_list_lock); - i40e_sync_vsi_filters(vsi, false); + i40e_sync_vsi_filters(vsi); i40e_vsi_delete(vsi); i40e_vsi_free_q_vectors(vsi); @@ -9947,7 +10105,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) * the hash */ if ((pf->flags & I40E_FLAG_RSS_ENABLED)) - i40e_config_rss(pf); + i40e_pf_config_rss(pf); /* fill in link information and enable LSE reporting */ i40e_update_link_info(&pf->hw); @@ -9985,7 +10143,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) !(pf->flags & I40E_FLAG_MSIX_ENABLED)) { /* one qp for PF, no queues for anything else */ queues_left = 0; - pf->rss_size = pf->num_lan_qps = 1; + pf->alloc_rss_size = pf->num_lan_qps = 1; /* make sure all the fancies are disabled */ pf->flags &= ~(I40E_FLAG_RSS_ENABLED | @@ -10002,7 +10160,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) I40E_FLAG_FD_ATR_ENABLED | I40E_FLAG_DCB_CAPABLE))) { /* one qp for PF */ - pf->rss_size = pf->num_lan_qps = 1; + pf->alloc_rss_size = pf->num_lan_qps = 1; queues_left -= pf->num_lan_qps; pf->flags &= ~(I40E_FLAG_RSS_ENABLED | @@ -10072,8 +10230,9 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) "qs_avail=%d FD SB=%d lan_qs=%d lan_tc0=%d vf=%d*%d vmdq=%d*%d, remaining=%d\n", pf->hw.func_caps.num_tx_qp, !!(pf->flags & I40E_FLAG_FD_SB_ENABLED), - pf->num_lan_qps, pf->rss_size, pf->num_req_vfs, pf->num_vf_qps, - pf->num_vmdq_vsis, pf->num_vmdq_qps, queues_left); + pf->num_lan_qps, pf->alloc_rss_size, pf->num_req_vfs, + pf->num_vf_qps, pf->num_vmdq_vsis, pf->num_vmdq_qps, + queues_left); #ifdef I40E_FCOE dev_dbg(&pf->pdev->dev, "fcoe queues = %d\n", pf->num_fcoe_qps); #endif @@ -10111,55 +10270,53 @@ static int i40e_setup_pf_filter_control(struct i40e_pf *pf) } #define INFO_STRING_LEN 255 +#define REMAIN(__x) (INFO_STRING_LEN - (__x)) static void i40e_print_features(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; - char *buf, *string; + char *buf; + int i; - string = kzalloc(INFO_STRING_LEN, GFP_KERNEL); - if (!string) { - dev_err(&pf->pdev->dev, "Features string allocation failed\n"); + buf = kmalloc(INFO_STRING_LEN, GFP_KERNEL); + if (!buf) return; - } - - buf = string; - buf += sprintf(string, "Features: PF-id[%d] ", hw->pf_id); + i = snprintf(buf, INFO_STRING_LEN, "Features: PF-id[%d]", hw->pf_id); #ifdef CONFIG_PCI_IOV - buf += sprintf(buf, "VFs: %d ", pf->num_req_vfs); + i += snprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs); #endif - buf += sprintf(buf, "VSIs: %d QP: %d RX: %s ", - pf->hw.func_caps.num_vsis, - pf->vsi[pf->lan_vsi]->num_queue_pairs, - pf->flags & I40E_FLAG_RX_PS_ENABLED ? "PS" : "1BUF"); + i += snprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d RX: %s", + pf->hw.func_caps.num_vsis, + pf->vsi[pf->lan_vsi]->num_queue_pairs, + pf->flags & I40E_FLAG_RX_PS_ENABLED ? "PS" : "1BUF"); if (pf->flags & I40E_FLAG_RSS_ENABLED) - buf += sprintf(buf, "RSS "); + i += snprintf(&buf[i], REMAIN(i), " RSS"); if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) - buf += sprintf(buf, "FD_ATR "); + i += snprintf(&buf[i], REMAIN(i), " FD_ATR"); if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { - buf += sprintf(buf, "FD_SB "); - buf += sprintf(buf, "NTUPLE "); + i += snprintf(&buf[i], REMAIN(i), " FD_SB"); + i += snprintf(&buf[i], REMAIN(i), " NTUPLE"); } if (pf->flags & I40E_FLAG_DCB_CAPABLE) - buf += sprintf(buf, "DCB "); + i += snprintf(&buf[i], REMAIN(i), " DCB"); #if IS_ENABLED(CONFIG_VXLAN) - buf += sprintf(buf, "VxLAN "); + i += snprintf(&buf[i], REMAIN(i), " VxLAN"); #endif if (pf->flags & I40E_FLAG_PTP) - buf += sprintf(buf, "PTP "); + i += snprintf(&buf[i], REMAIN(i), " PTP"); #ifdef I40E_FCOE if (pf->flags & I40E_FLAG_FCOE_ENABLED) - buf += sprintf(buf, "FCOE "); + i += snprintf(&buf[i], REMAIN(i), " FCOE"); #endif if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) - buf += sprintf(buf, "VEB "); + i += snprintf(&buf[i], REMAIN(i), " VEB"); else - buf += sprintf(buf, "VEPA "); + i += snprintf(&buf[i], REMAIN(i), " VEPA"); - BUG_ON(buf > (string + INFO_STRING_LEN)); - dev_info(&pf->pdev->dev, "%s\n", string); - kfree(string); + dev_info(&pf->pdev->dev, "%s\n", buf); + kfree(buf); + WARN_ON(i > INFO_STRING_LEN); } /** @@ -10183,6 +10340,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) u16 link_status; int err; u32 len; + u32 val; u32 i; u8 set_fc_aq_fail; @@ -10296,6 +10454,16 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pf->hw.fc.requested_mode = I40E_FC_NONE; err = i40e_init_adminq(hw); + if (err) { + if (err == I40E_ERR_FIRMWARE_API_VERSION) + dev_info(&pdev->dev, + "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n"); + else + dev_info(&pdev->dev, + "The driver for the device stopped because the device firmware failed to init. Try updating your NVM image.\n"); + + goto err_pf_reset; + } /* provide nvm, fw, api versions */ dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s\n", @@ -10303,12 +10471,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->aq.api_maj_ver, hw->aq.api_min_ver, i40e_nvm_version_str(hw)); - if (err) { - dev_info(&pdev->dev, - "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n"); - goto err_pf_reset; - } - if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR) dev_info(&pdev->dev, @@ -10487,6 +10649,17 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) i40e_stat_str(&pf->hw, err), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + /* Reconfigure hardware for allowing smaller MSS in the case + * of TSO, so that we avoid the MDD being fired and causing + * a reset in the case of small MSS+TSO. + */ + val = rd32(hw, I40E_REG_MSS); + if ((val & I40E_REG_MSS_MIN_MASK) > I40E_64BYTE_MSS) { + val &= ~I40E_REG_MSS_MIN_MASK; + val |= I40E_64BYTE_MSS; + wr32(hw, I40E_REG_MSS, val); + } + if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || (pf->hw.aq.fw_maj_ver < 4)) { msleep(75); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 6649ce4ba2de2a..b0ae3e69578398 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -235,6 +235,9 @@ static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi, "Filter deleted for PCTYPE %d loc = %d\n", fd_data->pctype, fd_data->fd_id); } + if (err) + kfree(raw_packet); + return err ? -EOPNOTSUPP : 0; } @@ -312,6 +315,9 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, fd_data->pctype, fd_data->fd_id); } + if (err) + kfree(raw_packet); + return err ? -EOPNOTSUPP : 0; } @@ -322,7 +328,7 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, * @fd_data: the flow director data required for the FDir descriptor * @add: true adds a filter, false removes it * - * Always returns -EOPNOTSUPP + * Returns 0 if the filters were successfully added or removed **/ static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi, struct i40e_fdir_filter *fd_data, @@ -387,6 +393,9 @@ static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi, } } + if (err) + kfree(raw_packet); + return err ? -EOPNOTSUPP : 0; } @@ -506,9 +515,6 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, pf->auto_disable_flags |= I40E_FLAG_FD_SB_ENABLED; } - } else { - dev_info(&pdev->dev, - "FD filter programming failed due to incorrect filter parameters\n"); } } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) { if (I40E_DEBUG_FD & pf->hw.debug_mask) @@ -526,11 +532,7 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, struct i40e_tx_buffer *tx_buffer) { if (tx_buffer->skb) { - if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) - kfree(tx_buffer->raw_buf); - else - dev_kfree_skb_any(tx_buffer->skb); - + dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buffer, dma), @@ -542,6 +544,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); } + + if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) + kfree(tx_buffer->raw_buf); + tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); @@ -1863,7 +1869,6 @@ enable_int: q_vector->itr_countdown--; else q_vector->itr_countdown = ITR_COUNTDOWN_START; - } /** @@ -1891,12 +1896,14 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) return 0; } + /* Clear hung_detected bit */ + clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected); /* Since the actual Tx work is minimal, we can give the Tx a larger * budget and be more aggressive about cleaning up the Tx descriptors. */ i40e_for_each_ring(ring, q_vector->tx) { clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); - arm_wb |= ring->arm_wb; + arm_wb = arm_wb || ring->arm_wb; ring->arm_wb = false; } @@ -1925,8 +1932,10 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) /* If work not completed, return budget and polling will return */ if (!clean_complete) { tx_only: - if (arm_wb) + if (arm_wb) { + q_vector->tx.ring[0].tx_stats.tx_force_wb++; i40e_force_wb(vsi, q_vector); + } return budget; } @@ -2186,14 +2195,12 @@ out: * @tx_ring: ptr to the ring to send * @skb: ptr to the skb we're sending * @hdr_len: ptr to the size of the packet header - * @cd_type_cmd_tso_mss: ptr to u64 object - * @cd_tunneling: ptr to context descriptor bits + * @cd_type_cmd_tso_mss: Quad Word 1 * * Returns 0 if no TSO can happen, 1 if tso is going, or error **/ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, - u8 *hdr_len, u64 *cd_type_cmd_tso_mss, - u32 *cd_tunneling) + u8 *hdr_len, u64 *cd_type_cmd_tso_mss) { u32 cd_cmd, cd_tso_len, cd_mss; struct ipv6hdr *ipv6h; @@ -2246,7 +2253,7 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, * @tx_ring: ptr to the ring to send * @skb: ptr to the skb we're sending * @tx_flags: the collected send information - * @cd_type_cmd_tso_mss: ptr to u64 object + * @cd_type_cmd_tso_mss: Quad Word 1 * * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen **/ @@ -2806,6 +2813,9 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, int tsyn; int tso; + /* prefetch the data, we'll need it later */ + prefetch(skb->data); + if (0 == i40e_xmit_descriptor_count(skb, tx_ring)) return NETDEV_TX_BUSY; @@ -2825,8 +2835,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (protocol == htons(ETH_P_IPV6)) tx_flags |= I40E_TX_FLAGS_IPV6; - tso = i40e_tso(tx_ring, skb, &hdr_len, - &cd_type_cmd_tso_mss, &cd_tunneling); + tso = i40e_tso(tx_ring, skb, &hdr_len, &cd_type_cmd_tso_mss); if (tso < 0) goto out_drop; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 6779fb771d6af9..dccc1eb576f251 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -202,6 +202,7 @@ struct i40e_tx_queue_stats { u64 tx_busy; u64 tx_done_old; u64 tx_linearize; + u64 tx_force_wb; }; struct i40e_rx_queue_stats { diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 44462b40f2d766..b3bd81c3e1ceec 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -290,8 +290,8 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, next_q = find_first_bit(&linklistmap, (I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES)); - vsi_queue_id = next_q/I40E_VIRTCHNL_SUPPORTED_QTYPES; - qtype = next_q%I40E_VIRTCHNL_SUPPORTED_QTYPES; + vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES; + qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES; pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); reg = ((qtype << I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) | pf_queue_id); @@ -565,7 +565,7 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) } /* program mac filter */ - ret = i40e_sync_vsi_filters(vsi, false); + ret = i40e_sync_vsi_filters(vsi); if (ret) dev_err(&pf->pdev->dev, "Unable to program ucast filters\n"); @@ -1094,8 +1094,8 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode, /* single place to detect unsuccessful return values */ if (v_retval) { vf->num_invalid_msgs++; - dev_err(&pf->pdev->dev, "Failed opcode %d Error: %d\n", - v_opcode, v_retval); + dev_err(&pf->pdev->dev, "VF %d failed opcode %d, error: %d\n", + vf->vf_id, v_opcode, v_retval); if (vf->num_invalid_msgs > I40E_DEFAULT_NUM_INVALID_MSGS_ALLOWED) { dev_err(&pf->pdev->dev, @@ -1623,7 +1623,8 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) if (!f) { dev_err(&pf->pdev->dev, - "Unable to add VF MAC filter\n"); + "Unable to add MAC filter %pM for VF %d\n", + al->list[i].addr, vf->vf_id); ret = I40E_ERR_PARAM; spin_unlock_bh(&vsi->mac_filter_list_lock); goto error_param; @@ -1632,8 +1633,10 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) spin_unlock_bh(&vsi->mac_filter_list_lock); /* program the updated filter list */ - if (i40e_sync_vsi_filters(vsi, false)) - dev_err(&pf->pdev->dev, "Unable to program VF MAC filters\n"); + ret = i40e_sync_vsi_filters(vsi); + if (ret) + dev_err(&pf->pdev->dev, "Unable to program VF %d MAC filters, error %d\n", + vf->vf_id, ret); error_param: /* send the response to the VF */ @@ -1669,8 +1672,8 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) for (i = 0; i < al->num_elements; i++) { if (is_broadcast_ether_addr(al->list[i].addr) || is_zero_ether_addr(al->list[i].addr)) { - dev_err(&pf->pdev->dev, "invalid VF MAC addr %pM\n", - al->list[i].addr); + dev_err(&pf->pdev->dev, "Invalid MAC addr %pM for VF %d\n", + al->list[i].addr, vf->vf_id); ret = I40E_ERR_INVALID_MAC_ADDR; goto error_param; } @@ -1685,8 +1688,10 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) spin_unlock_bh(&vsi->mac_filter_list_lock); /* program the updated filter list */ - if (i40e_sync_vsi_filters(vsi, false)) - dev_err(&pf->pdev->dev, "Unable to program VF MAC filters\n"); + ret = i40e_sync_vsi_filters(vsi); + if (ret) + dev_err(&pf->pdev->dev, "Unable to program VF %d MAC filters, error %d\n", + vf->vf_id, ret); error_param: /* send the response to the VF */ @@ -1740,8 +1745,8 @@ static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) if (ret) dev_err(&pf->pdev->dev, - "Unable to add VF vlan filter %d, error %d\n", - vfl->vlan_id[i], ret); + "Unable to add VLAN filter %d for VF %d, error %d\n", + vfl->vlan_id[i], vf->vf_id, ret); } error_param: @@ -1792,8 +1797,8 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) if (ret) dev_err(&pf->pdev->dev, - "Unable to delete VF vlan filter %d, error %d\n", - vfl->vlan_id[i], ret); + "Unable to delete VLAN filter %d for VF %d, error %d\n", + vfl->vlan_id[i], vf->vf_id, ret); } error_param: @@ -2099,7 +2104,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) dev_info(&pf->pdev->dev, "Setting MAC %pM on VF %d\n", mac, vf_id); /* program mac filter */ - if (i40e_sync_vsi_filters(vsi, false)) { + if (i40e_sync_vsi_filters(vsi)) { dev_err(&pf->pdev->dev, "Unable to program ucast filters\n"); ret = -EIO; goto error_param; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index fcb9ef34cc7a28..1c76389bd88826 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -2311,4 +2311,4 @@ struct i40e_aqc_debug_modify_internals { I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_modify_internals); -#endif +#endif /* _I40E_ADMINQ_CMD_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 77968b184b1fb9..4ca40651a228c9 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -51,11 +51,7 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, struct i40e_tx_buffer *tx_buffer) { if (tx_buffer->skb) { - if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) - kfree(tx_buffer->raw_buf); - else - dev_kfree_skb_any(tx_buffer->skb); - + dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buffer, dma), @@ -67,6 +63,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); } + + if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) + kfree(tx_buffer->raw_buf); + tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); @@ -127,17 +127,24 @@ void i40evf_free_tx_resources(struct i40e_ring *tx_ring) } /** - * i40e_get_head - Retrieve head from head writeback - * @tx_ring: tx ring to fetch head of + * i40evf_get_tx_pending - how many Tx descriptors not processed + * @tx_ring: the ring of descriptors * - * Returns value of Tx ring head based on value stored - * in head write-back location + * Since there is no access to the ring head register + * in XL710, we need to use our local copies **/ -static inline u32 i40e_get_head(struct i40e_ring *tx_ring) +u32 i40evf_get_tx_pending(struct i40e_ring *ring) { - void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; + u32 head, tail; - return le32_to_cpu(*(volatile __le32 *)head); + head = i40e_get_head(ring); + tail = readl(ring->tail); + + if (head != tail) + return (head < tail) ? + tail - head : (tail + ring->count - head); + + return 0; } #define WB_STRIDE 0x3 @@ -245,16 +252,6 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) tx_ring->q_vector->tx.total_bytes += total_bytes; tx_ring->q_vector->tx.total_packets += total_packets; - /* check to see if there are any non-cache aligned descriptors - * waiting to be written back, and kick the hardware to force - * them to be written back in case of napi polling - */ - if (budget && - !((i & WB_STRIDE) == WB_STRIDE) && - !test_bit(__I40E_DOWN, &tx_ring->vsi->state) && - (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) - tx_ring->arm_wb = true; - netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index), total_packets, total_bytes); @@ -414,7 +411,7 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) return false; } -/* +/** * i40evf_setup_tx_descriptors - Allocate the Tx descriptors * @tx_ring: the tx ring to set up * @@ -1262,10 +1259,12 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } + if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) { tx = i40e_set_new_dynamic_itr(&q_vector->tx); txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); } + if (rx || tx) { /* get the higher of the two ITR adjustments and * use the same value for both ITR registers @@ -1301,7 +1300,6 @@ enable_int: q_vector->itr_countdown--; else q_vector->itr_countdown = ITR_COUNTDOWN_START; - } /** @@ -1334,7 +1332,7 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) */ i40e_for_each_ring(ring, q_vector->tx) { clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); - arm_wb |= ring->arm_wb; + arm_wb = arm_wb || ring->arm_wb; ring->arm_wb = false; } @@ -1363,8 +1361,10 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) /* If work not completed, return budget and polling will return */ if (!clean_complete) { tx_only: - if (arm_wb) + if (arm_wb) { + q_vector->tx.ring[0].tx_stats.tx_force_wb++; i40evf_force_wb(vsi, q_vector); + } return budget; } @@ -1436,13 +1436,12 @@ out: * @tx_ring: ptr to the ring to send * @skb: ptr to the skb we're sending * @hdr_len: ptr to the size of the packet header - * @cd_tunneling: ptr to context descriptor bits + * @cd_type_cmd_tso_mss: Quad Word 1 * * Returns 0 if no TSO can happen, 1 if tso is going, or error **/ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, - u8 *hdr_len, u64 *cd_type_cmd_tso_mss, - u32 *cd_tunneling) + u8 *hdr_len, u64 *cd_type_cmd_tso_mss) { u32 cd_cmd, cd_tso_len, cd_mss; struct ipv6hdr *ipv6h; @@ -1554,7 +1553,6 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, *tx_flags |= I40E_TX_FLAGS_IPV6; } - if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) && (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING) && (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) { @@ -1653,7 +1651,7 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss); } - /** +/** * i40e_chk_linearize - Check if there are more than 8 fragments per packet * @skb: send buffer * @tx_flags: collected send information @@ -1769,6 +1767,9 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, u32 td_tag = 0; dma_addr_t dma; u16 gso_segs; + u16 desc_count = 0; + bool tail_bump = true; + bool do_rs = false; if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; @@ -1809,6 +1810,8 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_desc++; i++; + desc_count++; + if (i == tx_ring->count) { tx_desc = I40E_TX_DESC(tx_ring, 0); i = 0; @@ -1828,6 +1831,8 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_desc++; i++; + desc_count++; + if (i == tx_ring->count) { tx_desc = I40E_TX_DESC(tx_ring, 0); i = 0; @@ -1842,35 +1847,6 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_bi = &tx_ring->tx_bi[i]; } - /* Place RS bit on last descriptor of any packet that spans across the - * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline. - */ -#define WB_STRIDE 0x3 - if (((i & WB_STRIDE) != WB_STRIDE) && - (first <= &tx_ring->tx_bi[i]) && - (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) { - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag) | - cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP << - I40E_TXD_QW1_CMD_SHIFT); - } else { - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag) | - cpu_to_le64((u64)I40E_TXD_CMD << - I40E_TXD_QW1_CMD_SHIFT); - } - - netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), - first->bytecount); - - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). - */ - wmb(); - /* set next_to_watch value indicating a packet is present */ first->next_to_watch = tx_desc; @@ -1880,15 +1856,72 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->next_to_use = i; + netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index), + first->bytecount); i40evf_maybe_stop_tx(tx_ring, DESC_NEEDED); + + /* Algorithm to optimize tail and RS bit setting: + * if xmit_more is supported + * if xmit_more is true + * do not update tail and do not mark RS bit. + * if xmit_more is false and last xmit_more was false + * if every packet spanned less than 4 desc + * then set RS bit on 4th packet and update tail + * on every packet + * else + * update tail and set RS bit on every packet. + * if xmit_more is false and last_xmit_more was true + * update tail and set RS bit. + * + * Optimization: wmb to be issued only in case of tail update. + * Also optimize the Descriptor WB path for RS bit with the same + * algorithm. + * + * Note: If there are less than 4 packets + * pending and interrupts were disabled the service task will + * trigger a force WB. + */ + if (skb->xmit_more && + !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index))) { + tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; + tail_bump = false; + } else if (!skb->xmit_more && + !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index)) && + (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && + (tx_ring->packet_stride < WB_STRIDE) && + (desc_count < WB_STRIDE)) { + tx_ring->packet_stride++; + } else { + tx_ring->packet_stride = 0; + tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; + do_rs = true; + } + if (do_rs) + tx_ring->packet_stride = 0; + + tx_desc->cmd_type_offset_bsz = + build_ctob(td_cmd, td_offset, size, td_tag) | + cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD : + I40E_TX_DESC_CMD_EOP) << + I40E_TXD_QW1_CMD_SHIFT); + /* notify HW of packet */ - if (!skb->xmit_more || - netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index))) - writel(i, tx_ring->tail); - else + if (!tail_bump) prefetchw(tx_desc + 1); + if (tail_bump) { + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + writel(i, tx_ring->tail); + } + return; dma_error: @@ -1960,6 +1993,9 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, u8 hdr_len = 0; int tso; + /* prefetch the data, we'll need it later */ + prefetch(skb->data); + if (0 == i40evf_xmit_descriptor_count(skb, tx_ring)) return NETDEV_TX_BUSY; @@ -1979,8 +2015,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (protocol == htons(ETH_P_IPV6)) tx_flags |= I40E_TX_FLAGS_IPV6; - tso = i40e_tso(tx_ring, skb, &hdr_len, - &cd_type_cmd_tso_mss, &cd_tunneling); + tso = i40e_tso(tx_ring, skb, &hdr_len, &cd_type_cmd_tso_mss); if (tso < 0) goto out_drop; @@ -2028,7 +2063,7 @@ out_drop: netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct i40evf_adapter *adapter = netdev_priv(netdev); - struct i40e_ring *tx_ring = adapter->tx_rings[skb->queue_mapping]; + struct i40e_ring *tx_ring = &adapter->tx_rings[skb->queue_mapping]; /* hardware can't handle really short frames, hardware padding works * beyond this point diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index ebc1bf77f03606..e29bb3e86cfdc8 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -201,6 +201,7 @@ struct i40e_tx_queue_stats { u64 tx_busy; u64 tx_done_old; u64 tx_linearize; + u64 tx_force_wb; }; struct i40e_rx_queue_stats { @@ -267,6 +268,8 @@ struct i40e_ring { bool ring_active; /* is ring online or not */ bool arm_wb; /* do something to arm write back */ + u8 packet_stride; +#define I40E_TXR_FLAGS_LAST_XMIT_MORE_SET BIT(2) u16 flags; #define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) @@ -321,4 +324,19 @@ int i40evf_setup_rx_descriptors(struct i40e_ring *rx_ring); void i40evf_free_tx_resources(struct i40e_ring *tx_ring); void i40evf_free_rx_resources(struct i40e_ring *rx_ring); int i40evf_napi_poll(struct napi_struct *napi, int budget); +u32 i40evf_get_tx_pending(struct i40e_ring *ring); + +/** + * i40e_get_head - Retrieve head from head writeback + * @tx_ring: Tx ring to fetch head of + * + * Returns value of Tx ring head based on value stored + * in head write-back location + **/ +static inline u32 i40e_get_head(struct i40e_ring *tx_ring) +{ + void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; + + return le32_to_cpu(*(volatile __le32 *)head); +} #endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 22fc3d49c4b952..be1b72b938882d 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -67,6 +67,8 @@ struct i40e_vsi { u16 rx_itr_setting; u16 tx_itr_setting; u16 qs_handle; + u8 *rss_hkey_user; /* User configured hash keys */ + u8 *rss_lut_user; /* User configured lookup table entries */ }; /* How many Rx Buffers do we bundle into one write to the hardware ? */ @@ -95,10 +97,10 @@ struct i40e_vsi { #define I40E_TX_DESC(R, i) (&(((struct i40e_tx_desc *)((R)->desc))[i])) #define I40E_TX_CTXTDESC(R, i) \ (&(((struct i40e_tx_context_desc *)((R)->desc))[i])) -#define MAX_RX_QUEUES 8 -#define MAX_TX_QUEUES MAX_RX_QUEUES +#define MAX_QUEUES 16 #define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4) +#define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4) /* MAX_MSIX_Q_VECTORS of these are allocated, * but we only use one per queue-specific vector. @@ -142,9 +144,6 @@ struct i40e_q_vector { #define OTHER_VECTOR 1 #define NONQ_VECS (OTHER_VECTOR) -#define MAX_MSIX_Q_VECTORS 4 -#define MAX_MSIX_COUNT 5 - #define MIN_MSIX_Q_VECTORS 1 #define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NONQ_VECS) @@ -190,19 +189,19 @@ struct i40evf_adapter { struct work_struct reset_task; struct work_struct adminq_task; struct delayed_work init_task; - struct i40e_q_vector *q_vector[MAX_MSIX_Q_VECTORS]; + struct i40e_q_vector *q_vectors; struct list_head vlan_filter_list; char misc_vector_name[IFNAMSIZ + 9]; int num_active_queues; /* TX */ - struct i40e_ring *tx_rings[I40E_MAX_VSI_QP]; + struct i40e_ring *tx_rings; u32 tx_timeout_count; struct list_head mac_filter_list; u32 tx_desc_count; /* RX */ - struct i40e_ring *rx_rings[I40E_MAX_VSI_QP]; + struct i40e_ring *rx_rings; u64 hw_csum_rx_error; u32 rx_desc_count; int num_msix_vectors; @@ -313,4 +312,8 @@ void i40evf_request_reset(struct i40evf_adapter *adapter); void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, enum i40e_virtchnl_ops v_opcode, i40e_status v_retval, u8 *msg, u16 msglen); +int i40evf_config_rss(struct i40e_vsi *vsi, const u8 *seed, u8 *lut, + u16 lut_size); +int i40evf_get_rss(struct i40e_vsi *vsi, const u8 *seed, u8 *lut, + u16 lut_size); #endif /* _I40EVF_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index 4790437a50ac0d..a4c9feb589e702 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -121,12 +121,12 @@ static void i40evf_get_ethtool_stats(struct net_device *netdev, data[i] = *(u64 *)p; } for (j = 0; j < adapter->num_active_queues; j++) { - data[i++] = adapter->tx_rings[j]->stats.packets; - data[i++] = adapter->tx_rings[j]->stats.bytes; + data[i++] = adapter->tx_rings[j].stats.packets; + data[i++] = adapter->tx_rings[j].stats.bytes; } for (j = 0; j < adapter->num_active_queues; j++) { - data[i++] = adapter->rx_rings[j]->stats.packets; - data[i++] = adapter->rx_rings[j]->stats.bytes; + data[i++] = adapter->rx_rings[j].stats.packets; + data[i++] = adapter->rx_rings[j].stats.bytes; } } @@ -351,7 +351,7 @@ static int i40evf_set_coalesce(struct net_device *netdev, vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC; for (i = 0; i < adapter->num_msix_vectors - NONQ_VECS; i++) { - q_vector = adapter->q_vector[i]; + q_vector = &adapter->q_vectors[i]; q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); wr32(hw, I40E_VFINT_ITRN1(0, i), q_vector->rx.itr); q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); @@ -634,25 +634,34 @@ static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { struct i40evf_adapter *adapter = netdev_priv(netdev); - struct i40e_hw *hw = &adapter->hw; - u32 hlut_val; - int i, j; + struct i40e_vsi *vsi = &adapter->vsi; + u8 *seed = NULL, *lut; + int ret; + u16 i; if (hfunc) *hfunc = ETH_RSS_HASH_TOP; if (!indir) return 0; - if (indir) { - for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) { - hlut_val = rd32(hw, I40E_VFQF_HLUT(i)); - indir[j++] = hlut_val & 0xff; - indir[j++] = (hlut_val >> 8) & 0xff; - indir[j++] = (hlut_val >> 16) & 0xff; - indir[j++] = (hlut_val >> 24) & 0xff; - } - } - return 0; + seed = key; + + lut = kzalloc(I40EVF_HLUT_ARRAY_SIZE, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + ret = i40evf_get_rss(vsi, seed, lut, I40EVF_HLUT_ARRAY_SIZE); + if (ret) + goto out; + + /* Each 32 bits pointed by 'indir' is stored with a lut entry */ + for (i = 0; i < I40EVF_HLUT_ARRAY_SIZE; i++) + indir[i] = (u32)lut[i]; + +out: + kfree(lut); + + return ret; } /** @@ -668,9 +677,9 @@ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, const u8 hfunc) { struct i40evf_adapter *adapter = netdev_priv(netdev); - struct i40e_hw *hw = &adapter->hw; - u32 hlut_val; - int i, j; + struct i40e_vsi *vsi = &adapter->vsi; + u8 *seed = NULL; + u16 i; /* We do not allow change in unsupported parameters */ if (key || @@ -679,15 +688,29 @@ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir, if (!indir) return 0; - for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) { - hlut_val = indir[j++]; - hlut_val |= indir[j++] << 8; - hlut_val |= indir[j++] << 16; - hlut_val |= indir[j++] << 24; - wr32(hw, I40E_VFQF_HLUT(i), hlut_val); + if (key) { + if (!vsi->rss_hkey_user) { + vsi->rss_hkey_user = kzalloc(I40EVF_HKEY_ARRAY_SIZE, + GFP_KERNEL); + if (!vsi->rss_hkey_user) + return -ENOMEM; + } + memcpy(vsi->rss_hkey_user, key, I40EVF_HKEY_ARRAY_SIZE); + seed = vsi->rss_hkey_user; + } + if (!vsi->rss_lut_user) { + vsi->rss_lut_user = kzalloc(I40EVF_HLUT_ARRAY_SIZE, + GFP_KERNEL); + if (!vsi->rss_lut_user) + return -ENOMEM; } - return 0; + /* Each 32 bits pointed by 'indir' is stored with a lut entry */ + for (i = 0; i < I40EVF_HLUT_ARRAY_SIZE; i++) + vsi->rss_lut_user[i] = (u8)(indir[i]); + + return i40evf_config_rss(vsi, seed, vsi->rss_lut_user, + I40EVF_HLUT_ARRAY_SIZE); } static const struct ethtool_ops i40evf_ethtool_ops = { diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 6ad62656c75ef0..b4c632f417f68a 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -34,7 +34,7 @@ char i40evf_driver_name[] = "i40evf"; static const char i40evf_driver_string[] = "Intel(R) XL710/X710 Virtual Function Network Driver"; -#define DRV_VERSION "1.3.33" +#define DRV_VERSION "1.4.3" const char i40evf_driver_version[] = DRV_VERSION; static const char i40evf_copyright[] = "Copyright (c) 2013 - 2015 Intel Corporation."; @@ -259,7 +259,7 @@ static void i40evf_fire_sw_int(struct i40evf_adapter *adapter, u32 mask) { struct i40e_hw *hw = &adapter->hw; int i; - uint32_t dyn_ctl; + u32 dyn_ctl; if (mask & 1) { dyn_ctl = rd32(hw, I40E_VFINT_DYN_CTL01); @@ -347,8 +347,8 @@ static irqreturn_t i40evf_msix_clean_rings(int irq, void *data) static void i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) { - struct i40e_q_vector *q_vector = adapter->q_vector[v_idx]; - struct i40e_ring *rx_ring = adapter->rx_rings[r_idx]; + struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; + struct i40e_ring *rx_ring = &adapter->rx_rings[r_idx]; rx_ring->q_vector = q_vector; rx_ring->next = q_vector->rx.ring; @@ -368,8 +368,8 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) static void i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) { - struct i40e_q_vector *q_vector = adapter->q_vector[v_idx]; - struct i40e_ring *tx_ring = adapter->tx_rings[t_idx]; + struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; + struct i40e_ring *tx_ring = &adapter->tx_rings[t_idx]; tx_ring->q_vector = q_vector; tx_ring->next = q_vector->tx.ring; @@ -464,7 +464,7 @@ static void i40evf_netpoll(struct net_device *netdev) return; for (i = 0; i < q_vectors; i++) - i40evf_msix_clean_rings(0, adapter->q_vector[i]); + i40evf_msix_clean_rings(0, &adapter->q_vectors[i]); } #endif @@ -486,7 +486,7 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename) q_vectors = adapter->num_msix_vectors - NONQ_VECS; for (vector = 0; vector < q_vectors; vector++) { - struct i40e_q_vector *q_vector = adapter->q_vector[vector]; + struct i40e_q_vector *q_vector = &adapter->q_vectors[vector]; if (q_vector->tx.ring && q_vector->rx.ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, @@ -531,7 +531,7 @@ free_queue_irqs: adapter->msix_entries[vector + NONQ_VECS].vector, NULL); free_irq(adapter->msix_entries[vector + NONQ_VECS].vector, - adapter->q_vector[vector]); + &adapter->q_vectors[vector]); } return err; } @@ -581,7 +581,7 @@ static void i40evf_free_traffic_irqs(struct i40evf_adapter *adapter) irq_set_affinity_hint(adapter->msix_entries[i+1].vector, NULL); free_irq(adapter->msix_entries[i+1].vector, - adapter->q_vector[i]); + &adapter->q_vectors[i]); } } @@ -610,7 +610,7 @@ static void i40evf_configure_tx(struct i40evf_adapter *adapter) int i; for (i = 0; i < adapter->num_active_queues; i++) - adapter->tx_rings[i]->tail = hw->hw_addr + I40E_QTX_TAIL1(i); + adapter->tx_rings[i].tail = hw->hw_addr + I40E_QTX_TAIL1(i); } /** @@ -655,8 +655,8 @@ static void i40evf_configure_rx(struct i40evf_adapter *adapter) } for (i = 0; i < adapter->num_active_queues; i++) { - adapter->rx_rings[i]->tail = hw->hw_addr + I40E_QRX_TAIL1(i); - adapter->rx_rings[i]->rx_buf_len = rx_buf_len; + adapter->rx_rings[i].tail = hw->hw_addr + I40E_QRX_TAIL1(i); + adapter->rx_rings[i].rx_buf_len = rx_buf_len; } } @@ -953,7 +953,7 @@ static void i40evf_napi_enable_all(struct i40evf_adapter *adapter) for (q_idx = 0; q_idx < q_vectors; q_idx++) { struct napi_struct *napi; - q_vector = adapter->q_vector[q_idx]; + q_vector = &adapter->q_vectors[q_idx]; napi = &q_vector->napi; napi_enable(napi); } @@ -970,7 +970,7 @@ static void i40evf_napi_disable_all(struct i40evf_adapter *adapter) int q_vectors = adapter->num_msix_vectors - NONQ_VECS; for (q_idx = 0; q_idx < q_vectors; q_idx++) { - q_vector = adapter->q_vector[q_idx]; + q_vector = &adapter->q_vectors[q_idx]; napi_disable(&q_vector->napi); } } @@ -991,7 +991,7 @@ static void i40evf_configure(struct i40evf_adapter *adapter) adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_QUEUES; for (i = 0; i < adapter->num_active_queues; i++) { - struct i40e_ring *ring = adapter->rx_rings[i]; + struct i40e_ring *ring = &adapter->rx_rings[i]; i40evf_alloc_rx_buffers_1buf(ring, ring->count); ring->next_to_use = ring->count - 1; @@ -1111,16 +1111,10 @@ i40evf_acquire_msix_vectors(struct i40evf_adapter *adapter, int vectors) **/ static void i40evf_free_queues(struct i40evf_adapter *adapter) { - int i; - if (!adapter->vsi_res) return; - for (i = 0; i < adapter->num_active_queues; i++) { - if (adapter->tx_rings[i]) - kfree_rcu(adapter->tx_rings[i], rcu); - adapter->tx_rings[i] = NULL; - adapter->rx_rings[i] = NULL; - } + kfree(adapter->tx_rings); + kfree(adapter->rx_rings); } /** @@ -1135,13 +1129,20 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) { int i; + adapter->tx_rings = kcalloc(adapter->num_active_queues, + sizeof(struct i40e_ring), GFP_KERNEL); + if (!adapter->tx_rings) + goto err_out; + adapter->rx_rings = kcalloc(adapter->num_active_queues, + sizeof(struct i40e_ring), GFP_KERNEL); + if (!adapter->rx_rings) + goto err_out; + for (i = 0; i < adapter->num_active_queues; i++) { struct i40e_ring *tx_ring; struct i40e_ring *rx_ring; - tx_ring = kzalloc(sizeof(*tx_ring) * 2, GFP_KERNEL); - if (!tx_ring) - goto err_out; + tx_ring = &adapter->tx_rings[i]; tx_ring->queue_index = i; tx_ring->netdev = adapter->netdev; @@ -1149,14 +1150,12 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) tx_ring->count = adapter->tx_desc_count; if (adapter->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR; - adapter->tx_rings[i] = tx_ring; - rx_ring = &tx_ring[1]; + rx_ring = &adapter->rx_rings[i]; rx_ring->queue_index = i; rx_ring->netdev = adapter->netdev; rx_ring->dev = &adapter->pdev->dev; rx_ring->count = adapter->rx_desc_count; - adapter->rx_rings[i] = rx_ring; } return 0; @@ -1206,115 +1205,273 @@ static int i40evf_set_interrupt_capability(struct i40evf_adapter *adapter) err = i40evf_acquire_msix_vectors(adapter, v_budget); out: - adapter->netdev->real_num_tx_queues = pairs; + netif_set_real_num_rx_queues(adapter->netdev, pairs); + netif_set_real_num_tx_queues(adapter->netdev, pairs); return err; } /** - * i40e_configure_rss_aq - Prepare for RSS using AQ commands + * i40e_config_rss_aq - Prepare for RSS using AQ commands * @vsi: vsi structure * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Return 0 on success, negative on failure **/ -static void i40evf_configure_rss_aq(struct i40e_vsi *vsi, const u8 *seed) +static int i40evf_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, + u8 *lut, u16 lut_size) { - struct i40e_aqc_get_set_rss_key_data rss_key; struct i40evf_adapter *adapter = vsi->back; struct i40e_hw *hw = &adapter->hw; - int ret = 0, i; - u8 *rss_lut; + int ret = 0; if (!vsi->id) - return; + return -EINVAL; if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ dev_err(&adapter->pdev->dev, "Cannot confiure RSS, command %d pending\n", adapter->current_op); - return; + return -EBUSY; } - memset(&rss_key, 0, sizeof(rss_key)); - memcpy(&rss_key, seed, sizeof(rss_key)); + if (seed) { + struct i40e_aqc_get_set_rss_key_data *rss_key = + (struct i40e_aqc_get_set_rss_key_data *)seed; + ret = i40evf_aq_set_rss_key(hw, vsi->id, rss_key); + if (ret) { + dev_err(&adapter->pdev->dev, "Cannot set RSS key, err %s aq_err %s\n", + i40evf_stat_str(hw, ret), + i40evf_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + } - rss_lut = kzalloc(((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4), GFP_KERNEL); - if (!rss_lut) - return; + if (lut) { + ret = i40evf_aq_set_rss_lut(hw, vsi->id, false, lut, lut_size); + if (ret) { + dev_err(&adapter->pdev->dev, + "Cannot set RSS lut, err %s aq_err %s\n", + i40evf_stat_str(hw, ret), + i40evf_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + } - /* Populate the LUT with max no. PF queues in round robin fashion */ - for (i = 0; i <= (I40E_VFQF_HLUT_MAX_INDEX * 4); i++) - rss_lut[i] = i % adapter->num_active_queues; + return ret; +} - ret = i40evf_aq_set_rss_key(hw, vsi->id, &rss_key); - if (ret) { - dev_err(&adapter->pdev->dev, - "Cannot set RSS key, err %s aq_err %s\n", - i40evf_stat_str(hw, ret), - i40evf_aq_str(hw, hw->aq.asq_last_status)); - return; +/** + * i40evf_config_rss_reg - Configure RSS keys and lut by writing registers + * @vsi: Pointer to vsi structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure + **/ +static int i40evf_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed, + const u8 *lut, u16 lut_size) +{ + struct i40evf_adapter *adapter = vsi->back; + struct i40e_hw *hw = &adapter->hw; + u16 i; + + if (seed) { + u32 *seed_dw = (u32 *)seed; + + for (i = 0; i <= I40E_VFQF_HKEY_MAX_INDEX; i++) + wr32(hw, I40E_VFQF_HKEY(i), seed_dw[i]); } - ret = i40evf_aq_set_rss_lut(hw, vsi->id, false, rss_lut, - (I40E_VFQF_HLUT_MAX_INDEX + 1) * 4); - if (ret) - dev_err(&adapter->pdev->dev, - "Cannot set RSS lut, err %s aq_err %s\n", - i40evf_stat_str(hw, ret), - i40evf_aq_str(hw, hw->aq.asq_last_status)); + if (lut) { + u32 *lut_dw = (u32 *)lut; + + if (lut_size != I40EVF_HLUT_ARRAY_SIZE) + return -EINVAL; + + for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) + wr32(hw, I40E_VFQF_HLUT(i), lut_dw[i]); + } + i40e_flush(hw); + + return 0; } /** - * i40e_configure_rss_reg - Prepare for RSS if used - * @adapter: board private structure - * @seed: RSS hash seed + * * i40evf_get_rss_aq - Get RSS keys and lut by using AQ commands + * @vsi: Pointer to vsi structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Return 0 on success, negative on failure **/ -static void i40evf_configure_rss_reg(struct i40evf_adapter *adapter, - const u8 *seed) +static int i40evf_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed, + u8 *lut, u16 lut_size) { + struct i40evf_adapter *adapter = vsi->back; struct i40e_hw *hw = &adapter->hw; - u32 *seed_dw = (u32 *)seed; - u32 cqueue = 0; - u32 lut = 0; - int i, j; + int ret = 0; - /* Fill out hash function seed */ - for (i = 0; i <= I40E_VFQF_HKEY_MAX_INDEX; i++) - wr32(hw, I40E_VFQF_HKEY(i), seed_dw[i]); - - /* Populate the LUT with max no. PF queues in round robin fashion */ - for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) { - lut = 0; - for (j = 0; j < 4; j++) { - if (cqueue == adapter->num_active_queues) - cqueue = 0; - lut |= ((cqueue) << (8 * j)); - cqueue++; + if (seed) { + ret = i40evf_aq_get_rss_key(hw, vsi->id, + (struct i40e_aqc_get_set_rss_key_data *)seed); + if (ret) { + dev_err(&adapter->pdev->dev, + "Cannot get RSS key, err %s aq_err %s\n", + i40evf_stat_str(hw, ret), + i40evf_aq_str(hw, hw->aq.asq_last_status)); + return ret; } - wr32(hw, I40E_VFQF_HLUT(i), lut); } - i40e_flush(hw); + + if (lut) { + ret = i40evf_aq_get_rss_lut(hw, vsi->id, seed, lut, lut_size); + if (ret) { + dev_err(&adapter->pdev->dev, + "Cannot get RSS lut, err %s aq_err %s\n", + i40evf_stat_str(hw, ret), + i40evf_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + } + + return ret; +} + +/** + * * i40evf_get_rss_reg - Get RSS keys and lut by reading registers + * @vsi: Pointer to vsi structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure + **/ +static int i40evf_get_rss_reg(struct i40e_vsi *vsi, const u8 *seed, + const u8 *lut, u16 lut_size) +{ + struct i40evf_adapter *adapter = vsi->back; + struct i40e_hw *hw = &adapter->hw; + u16 i; + + if (seed) { + u32 *seed_dw = (u32 *)seed; + + for (i = 0; i <= I40E_VFQF_HKEY_MAX_INDEX; i++) + seed_dw[i] = rd32(hw, I40E_VFQF_HKEY(i)); + } + + if (lut) { + u32 *lut_dw = (u32 *)lut; + + if (lut_size != I40EVF_HLUT_ARRAY_SIZE) + return -EINVAL; + + for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) + lut_dw[i] = rd32(hw, I40E_VFQF_HLUT(i)); + } + + return 0; +} + +/** + * i40evf_config_rss - Configure RSS keys and lut + * @vsi: Pointer to vsi structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure + **/ +int i40evf_config_rss(struct i40e_vsi *vsi, const u8 *seed, + u8 *lut, u16 lut_size) +{ + struct i40evf_adapter *adapter = vsi->back; + + if (RSS_AQ(adapter)) + return i40evf_config_rss_aq(vsi, seed, lut, lut_size); + else + return i40evf_config_rss_reg(vsi, seed, lut, lut_size); +} + +/** + * i40evf_get_rss - Get RSS keys and lut + * @vsi: Pointer to vsi structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure + **/ +int i40evf_get_rss(struct i40e_vsi *vsi, const u8 *seed, u8 *lut, u16 lut_size) +{ + struct i40evf_adapter *adapter = vsi->back; + + if (RSS_AQ(adapter)) + return i40evf_get_rss_aq(vsi, seed, lut, lut_size); + else + return i40evf_get_rss_reg(vsi, seed, lut, lut_size); +} + +/** + * i40evf_fill_rss_lut - Fill the lut with default values + * @lut: Lookup table to be filled with + * @rss_table_size: Lookup table size + * @rss_size: Range of queue number for hashing + **/ +static void i40evf_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size) +{ + u16 i; + + for (i = 0; i < rss_table_size; i++) + lut[i] = i % rss_size; } /** - * i40evf_configure_rss - Prepare for RSS + * i40evf_init_rss - Prepare for RSS * @adapter: board private structure + * + * Return 0 on success, negative on failure **/ -static void i40evf_configure_rss(struct i40evf_adapter *adapter) +static int i40evf_init_rss(struct i40evf_adapter *adapter) { + struct i40e_vsi *vsi = &adapter->vsi; struct i40e_hw *hw = &adapter->hw; u8 seed[I40EVF_HKEY_ARRAY_SIZE]; u64 hena; - - netdev_rss_key_fill((void *)seed, I40EVF_HKEY_ARRAY_SIZE); + u8 *lut; + int ret; /* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */ hena = I40E_DEFAULT_RSS_HENA; wr32(hw, I40E_VFQF_HENA(0), (u32)hena); wr32(hw, I40E_VFQF_HENA(1), (u32)(hena >> 32)); - if (RSS_AQ(adapter)) - i40evf_configure_rss_aq(&adapter->vsi, seed); + lut = kzalloc(I40EVF_HLUT_ARRAY_SIZE, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + /* Use user configured lut if there is one, otherwise use default */ + if (vsi->rss_lut_user) + memcpy(lut, vsi->rss_lut_user, I40EVF_HLUT_ARRAY_SIZE); else - i40evf_configure_rss_reg(adapter, seed); + i40evf_fill_rss_lut(lut, I40EVF_HLUT_ARRAY_SIZE, + adapter->num_active_queues); + + /* Use user configured hash key if there is one, otherwise + * user default. + */ + if (vsi->rss_hkey_user) + memcpy(seed, vsi->rss_hkey_user, I40EVF_HKEY_ARRAY_SIZE); + else + netdev_rss_key_fill((void *)seed, I40EVF_HKEY_ARRAY_SIZE); + ret = i40evf_config_rss(vsi, seed, lut, I40EVF_HLUT_ARRAY_SIZE); + kfree(lut); + + return ret; } /** @@ -1326,21 +1483,22 @@ static void i40evf_configure_rss(struct i40evf_adapter *adapter) **/ static int i40evf_alloc_q_vectors(struct i40evf_adapter *adapter) { - int q_idx, num_q_vectors; + int q_idx = 0, num_q_vectors; struct i40e_q_vector *q_vector; num_q_vectors = adapter->num_msix_vectors - NONQ_VECS; + adapter->q_vectors = kcalloc(num_q_vectors, sizeof(*q_vector), + GFP_KERNEL); + if (!adapter->q_vectors) + goto err_out; for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { - q_vector = kzalloc(sizeof(*q_vector), GFP_KERNEL); - if (!q_vector) - goto err_out; + q_vector = &adapter->q_vectors[q_idx]; q_vector->adapter = adapter; q_vector->vsi = &adapter->vsi; q_vector->v_idx = q_idx; netif_napi_add(adapter->netdev, &q_vector->napi, i40evf_napi_poll, NAPI_POLL_WEIGHT); - adapter->q_vector[q_idx] = q_vector; } return 0; @@ -1348,11 +1506,10 @@ static int i40evf_alloc_q_vectors(struct i40evf_adapter *adapter) err_out: while (q_idx) { q_idx--; - q_vector = adapter->q_vector[q_idx]; + q_vector = &adapter->q_vectors[q_idx]; netif_napi_del(&q_vector->napi); - kfree(q_vector); - adapter->q_vector[q_idx] = NULL; } + kfree(adapter->q_vectors); return -ENOMEM; } @@ -1373,13 +1530,11 @@ static void i40evf_free_q_vectors(struct i40evf_adapter *adapter) napi_vectors = adapter->num_active_queues; for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { - struct i40e_q_vector *q_vector = adapter->q_vector[q_idx]; - - adapter->q_vector[q_idx] = NULL; + struct i40e_q_vector *q_vector = &adapter->q_vectors[q_idx]; if (q_idx < napi_vectors) netif_napi_del(&q_vector->napi); - kfree(q_vector); } + kfree(adapter->q_vectors); } /** @@ -1438,6 +1593,22 @@ err_set_interrupt: } /** + * i40evf_clear_rss_config_user - Clear user configurations of RSS + * @vsi: Pointer to VSI structure + **/ +static void i40evf_clear_rss_config_user(struct i40e_vsi *vsi) +{ + if (!vsi) + return; + + kfree(vsi->rss_hkey_user); + vsi->rss_hkey_user = NULL; + + kfree(vsi->rss_lut_user); + vsi->rss_lut_user = NULL; +} + +/** * i40evf_watchdog_timer - Periodic call-back timer * @data: pointer to adapter disguised as unsigned long **/ @@ -1564,7 +1735,7 @@ static void i40evf_watchdog_task(struct work_struct *work) * PF, so we don't have to set current_op as we will * not get a response through the ARQ. */ - i40evf_configure_rss(adapter); + i40evf_init_rss(adapter); adapter->aq_required &= ~I40EVF_FLAG_AQ_CONFIGURE_RSS; goto watchdog_done; } @@ -1864,8 +2035,8 @@ void i40evf_free_all_tx_resources(struct i40evf_adapter *adapter) int i; for (i = 0; i < adapter->num_active_queues; i++) - if (adapter->tx_rings[i]->desc) - i40evf_free_tx_resources(adapter->tx_rings[i]); + if (adapter->tx_rings[i].desc) + i40evf_free_tx_resources(&adapter->tx_rings[i]); } /** @@ -1883,8 +2054,8 @@ static int i40evf_setup_all_tx_resources(struct i40evf_adapter *adapter) int i, err = 0; for (i = 0; i < adapter->num_active_queues; i++) { - adapter->tx_rings[i]->count = adapter->tx_desc_count; - err = i40evf_setup_tx_descriptors(adapter->tx_rings[i]); + adapter->tx_rings[i].count = adapter->tx_desc_count; + err = i40evf_setup_tx_descriptors(&adapter->tx_rings[i]); if (!err) continue; dev_err(&adapter->pdev->dev, @@ -1910,8 +2081,8 @@ static int i40evf_setup_all_rx_resources(struct i40evf_adapter *adapter) int i, err = 0; for (i = 0; i < adapter->num_active_queues; i++) { - adapter->rx_rings[i]->count = adapter->rx_desc_count; - err = i40evf_setup_rx_descriptors(adapter->rx_rings[i]); + adapter->rx_rings[i].count = adapter->rx_desc_count; + err = i40evf_setup_rx_descriptors(&adapter->rx_rings[i]); if (!err) continue; dev_err(&adapter->pdev->dev, @@ -1932,8 +2103,8 @@ void i40evf_free_all_rx_resources(struct i40evf_adapter *adapter) int i; for (i = 0; i < adapter->num_active_queues; i++) - if (adapter->rx_rings[i]->desc) - i40evf_free_rx_resources(adapter->rx_rings[i]); + if (adapter->rx_rings[i].desc) + i40evf_free_rx_resources(&adapter->rx_rings[i]); } /** @@ -2262,6 +2433,14 @@ static void i40evf_init_task(struct work_struct *work) if (err == I40E_ERR_ADMIN_QUEUE_NO_WORK) { err = i40evf_send_vf_config_msg(adapter); goto err; + } else if (err == I40E_ERR_PARAM) { + /* We only get ERR_PARAM if the device is in a very bad + * state or if we've been disabled for previous bad + * behavior. Either way, we're done now. + */ + i40evf_shutdown_adminq(hw); + dev_err(&pdev->dev, "Unable to get VF config due to PF error condition, not retrying\n"); + return; } if (err) { dev_err(&pdev->dev, "Unable to get VF config (%d)\n", @@ -2312,7 +2491,7 @@ static void i40evf_init_task(struct work_struct *work) I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) adapter->flags |= I40EVF_FLAG_WB_ON_ITR_CAPABLE; if (!RSS_AQ(adapter)) - i40evf_configure_rss(adapter); + i40evf_init_rss(adapter); err = i40evf_request_misc_irq(adapter); if (err) goto err_sw_init; @@ -2333,7 +2512,6 @@ static void i40evf_init_task(struct work_struct *work) if (netdev->features & NETIF_F_GRO) dev_info(&pdev->dev, "GRO is enabled\n"); - dev_info(&pdev->dev, "%s\n", i40evf_driver_string); adapter->state = __I40EVF_DOWN; set_bit(__I40E_DOWN, &adapter->vsi.state); i40evf_misc_irq_enable(adapter); @@ -2342,7 +2520,7 @@ static void i40evf_init_task(struct work_struct *work) adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_RSS; mod_timer_pending(&adapter->watchdog_timer, jiffies + 1); } else { - i40evf_configure_rss(adapter); + i40evf_init_rss(adapter); } return; restart: @@ -2437,8 +2615,7 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); - netdev = alloc_etherdev_mq(sizeof(struct i40evf_adapter), - MAX_TX_QUEUES); + netdev = alloc_etherdev_mq(sizeof(struct i40evf_adapter), MAX_QUEUES); if (!netdev) { err = -ENOMEM; goto err_alloc_etherdev; @@ -2625,6 +2802,9 @@ static void i40evf_remove(struct pci_dev *pdev) flush_scheduled_work(); + /* Clear user configurations for RSS */ + i40evf_clear_rss_config_user(&adapter->vsi); + if (hw->aq.asq.count) i40evf_shutdown_adminq(hw); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 32e620e1eb5c95..3c9c008b168b75 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -242,7 +242,7 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter) adapter->current_op = I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES; len = sizeof(struct i40e_virtchnl_vsi_queue_config_info) + (sizeof(struct i40e_virtchnl_queue_pair_info) * pairs); - vqci = kzalloc(len, GFP_ATOMIC); + vqci = kzalloc(len, GFP_KERNEL); if (!vqci) return; @@ -255,19 +255,19 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter) for (i = 0; i < pairs; i++) { vqpi->txq.vsi_id = vqci->vsi_id; vqpi->txq.queue_id = i; - vqpi->txq.ring_len = adapter->tx_rings[i]->count; - vqpi->txq.dma_ring_addr = adapter->tx_rings[i]->dma; + vqpi->txq.ring_len = adapter->tx_rings[i].count; + vqpi->txq.dma_ring_addr = adapter->tx_rings[i].dma; vqpi->txq.headwb_enabled = 1; vqpi->txq.dma_headwb_addr = vqpi->txq.dma_ring_addr + (vqpi->txq.ring_len * sizeof(struct i40e_tx_desc)); vqpi->rxq.vsi_id = vqci->vsi_id; vqpi->rxq.queue_id = i; - vqpi->rxq.ring_len = adapter->rx_rings[i]->count; - vqpi->rxq.dma_ring_addr = adapter->rx_rings[i]->dma; + vqpi->rxq.ring_len = adapter->rx_rings[i].count; + vqpi->rxq.dma_ring_addr = adapter->rx_rings[i].dma; vqpi->rxq.max_pkt_size = adapter->netdev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN; - vqpi->rxq.databuffer_size = adapter->rx_rings[i]->rx_buf_len; + vqpi->rxq.databuffer_size = adapter->rx_rings[i].rx_buf_len; vqpi++; } @@ -353,14 +353,14 @@ void i40evf_map_queues(struct i40evf_adapter *adapter) len = sizeof(struct i40e_virtchnl_irq_map_info) + (adapter->num_msix_vectors * sizeof(struct i40e_virtchnl_vector_map)); - vimi = kzalloc(len, GFP_ATOMIC); + vimi = kzalloc(len, GFP_KERNEL); if (!vimi) return; vimi->num_vectors = adapter->num_msix_vectors; /* Queue vectors first */ for (v_idx = 0; v_idx < q_vectors; v_idx++) { - q_vector = adapter->q_vector[v_idx]; + q_vector = adapter->q_vectors + v_idx; vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id; vimi->vecmap[v_idx].vector_id = v_idx + NONQ_VECS; vimi->vecmap[v_idx].txq_map = q_vector->ring_mask; @@ -391,6 +391,7 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter) struct i40e_virtchnl_ether_addr_list *veal; int len, i = 0, count = 0; struct i40evf_mac_filter *f; + bool more = false; if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -415,10 +416,12 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter) count = (I40EVF_MAX_AQ_BUF_SIZE - sizeof(struct i40e_virtchnl_ether_addr_list)) / sizeof(struct i40e_virtchnl_ether_addr); - len = I40EVF_MAX_AQ_BUF_SIZE; + len = sizeof(struct i40e_virtchnl_ether_addr_list) + + (count * sizeof(struct i40e_virtchnl_ether_addr)); + more = true; } - veal = kzalloc(len, GFP_ATOMIC); + veal = kzalloc(len, GFP_KERNEL); if (!veal) return; @@ -431,7 +434,8 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter) f->add = false; } } - adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_MAC_FILTER; + if (!more) + adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_MAC_FILTER; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, (u8 *)veal, len); kfree(veal); @@ -450,6 +454,7 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter) struct i40e_virtchnl_ether_addr_list *veal; struct i40evf_mac_filter *f, *ftmp; int len, i = 0, count = 0; + bool more = false; if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -474,9 +479,11 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter) count = (I40EVF_MAX_AQ_BUF_SIZE - sizeof(struct i40e_virtchnl_ether_addr_list)) / sizeof(struct i40e_virtchnl_ether_addr); - len = I40EVF_MAX_AQ_BUF_SIZE; + len = sizeof(struct i40e_virtchnl_ether_addr_list) + + (count * sizeof(struct i40e_virtchnl_ether_addr)); + more = true; } - veal = kzalloc(len, GFP_ATOMIC); + veal = kzalloc(len, GFP_KERNEL); if (!veal) return; @@ -490,7 +497,8 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter) kfree(f); } } - adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_MAC_FILTER; + if (!more) + adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_MAC_FILTER; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS, (u8 *)veal, len); kfree(veal); @@ -509,6 +517,7 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter) struct i40e_virtchnl_vlan_filter_list *vvfl; int len, i = 0, count = 0; struct i40evf_vlan_filter *f; + bool more = false; if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -534,9 +543,11 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter) count = (I40EVF_MAX_AQ_BUF_SIZE - sizeof(struct i40e_virtchnl_vlan_filter_list)) / sizeof(u16); - len = I40EVF_MAX_AQ_BUF_SIZE; + len = sizeof(struct i40e_virtchnl_vlan_filter_list) + + (count * sizeof(u16)); + more = true; } - vvfl = kzalloc(len, GFP_ATOMIC); + vvfl = kzalloc(len, GFP_KERNEL); if (!vvfl) return; @@ -549,7 +560,8 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter) f->add = false; } } - adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_VLAN_FILTER; + if (!more) + adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_VLAN_FILTER; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len); kfree(vvfl); } @@ -567,6 +579,7 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter) struct i40e_virtchnl_vlan_filter_list *vvfl; struct i40evf_vlan_filter *f, *ftmp; int len, i = 0, count = 0; + bool more = false; if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -592,9 +605,11 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter) count = (I40EVF_MAX_AQ_BUF_SIZE - sizeof(struct i40e_virtchnl_vlan_filter_list)) / sizeof(u16); - len = I40EVF_MAX_AQ_BUF_SIZE; + len = sizeof(struct i40e_virtchnl_vlan_filter_list) + + (count * sizeof(u16)); + more = true; } - vvfl = kzalloc(len, GFP_ATOMIC); + vvfl = kzalloc(len, GFP_KERNEL); if (!vvfl) return; @@ -608,7 +623,8 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter) kfree(f); } } - adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_VLAN_FILTER; + if (!more) + adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_VLAN_FILTER; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_DEL_VLAN, (u8 *)vvfl, len); kfree(vvfl); } @@ -724,9 +740,29 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, return; } if (v_retval) { - dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n", - v_retval, i40evf_stat_str(&adapter->hw, v_retval), - v_opcode); + switch (v_opcode) { + case I40E_VIRTCHNL_OP_ADD_VLAN: + dev_err(&adapter->pdev->dev, "Failed to add VLAN filter, error %s\n", + i40evf_stat_str(&adapter->hw, v_retval)); + break; + case I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS: + dev_err(&adapter->pdev->dev, "Failed to add MAC filter, error %s\n", + i40evf_stat_str(&adapter->hw, v_retval)); + break; + case I40E_VIRTCHNL_OP_DEL_VLAN: + dev_err(&adapter->pdev->dev, "Failed to delete VLAN filter, error %s\n", + i40evf_stat_str(&adapter->hw, v_retval)); + break; + case I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS: + dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n", + i40evf_stat_str(&adapter->hw, v_retval)); + break; + default: + dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n", + v_retval, + i40evf_stat_str(&adapter->hw, v_retval), + v_opcode); + } } switch (v_opcode) { case I40E_VIRTCHNL_OP_GET_STATS: { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 1d2174526a4c90..445b4c9169b61d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -139,6 +139,7 @@ enum ixgbe_tx_flags { #define IXGBE_X540_VF_DEVICE_ID 0x1515 struct vf_data_storage { + struct pci_dev *vfdev; unsigned char vf_mac_addresses[ETH_ALEN]; u16 vf_mc_hashes[IXGBE_MAX_VF_MC_ENTRIES]; u16 num_vf_mc_hashes; @@ -224,6 +225,8 @@ struct ixgbe_rx_queue_stats { u64 csum_err; }; +#define IXGBE_TS_HDR_LEN 8 + enum ixgbe_ring_state_t { __IXGBE_TX_FDIR_INIT_DONE, __IXGBE_TX_XPS_INIT_DONE, @@ -282,6 +285,8 @@ struct ixgbe_ring { u16 next_to_use; u16 next_to_clean; + unsigned long last_rx_timestamp; + union { u16 next_to_alloc; struct { @@ -587,9 +592,10 @@ static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring) struct ixgbe_mac_addr { u8 addr[ETH_ALEN]; - u16 queue; + u16 pool; u16 state; /* bitmask */ }; + #define IXGBE_MAC_STATE_DEFAULT 0x1 #define IXGBE_MAC_STATE_MODIFIED 0x2 #define IXGBE_MAC_STATE_IN_USE 0x4 @@ -639,6 +645,8 @@ struct ixgbe_adapter { #define IXGBE_FLAG_SRIOV_CAPABLE (u32)(1 << 22) #define IXGBE_FLAG_SRIOV_ENABLED (u32)(1 << 23) #define IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE BIT(24) +#define IXGBE_FLAG_RX_HWTSTAMP_ENABLED BIT(25) +#define IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER BIT(26) u32 flags2; #define IXGBE_FLAG2_RSC_CAPABLE (u32)(1 << 0) @@ -755,9 +763,12 @@ struct ixgbe_adapter { unsigned long last_rx_ptp_check; unsigned long last_rx_timestamp; spinlock_t tmreg_lock; - struct cyclecounter cc; - struct timecounter tc; + struct cyclecounter hw_cc; + struct timecounter hw_tc; u32 base_incval; + u32 tx_hwtstamp_timeouts; + u32 rx_hwtstamp_cleared; + void (*ptp_setup_sdp)(struct ixgbe_adapter *); /* SR-IOV */ DECLARE_BITMAP(active_vfs, IXGBE_MAX_VF_FUNCTIONS); @@ -883,9 +894,9 @@ int ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id, void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter); #endif int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, - u8 *addr, u16 queue); + const u8 *addr, u16 queue); int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, - u8 *addr, u16 queue); + const u8 *addr, u16 queue); void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter); netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *, struct ixgbe_adapter *, struct ixgbe_ring *); @@ -968,12 +979,33 @@ void ixgbe_ptp_suspend(struct ixgbe_adapter *adapter); void ixgbe_ptp_stop(struct ixgbe_adapter *adapter); void ixgbe_ptp_overflow_check(struct ixgbe_adapter *adapter); void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter); -void ixgbe_ptp_rx_hwtstamp(struct ixgbe_adapter *adapter, struct sk_buff *skb); +void ixgbe_ptp_rx_pktstamp(struct ixgbe_q_vector *, struct sk_buff *); +void ixgbe_ptp_rx_rgtstamp(struct ixgbe_q_vector *, struct sk_buff *skb); +static inline void ixgbe_ptp_rx_hwtstamp(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + if (unlikely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_TSIP))) { + ixgbe_ptp_rx_pktstamp(rx_ring->q_vector, skb); + return; + } + + if (unlikely(!ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS))) + return; + + ixgbe_ptp_rx_rgtstamp(rx_ring->q_vector, skb); + + /* Update the last_rx_timestamp timer in order to enable watchdog check + * for error case of latched timestamp on a dropped packet. + */ + rx_ring->last_rx_timestamp = jiffies; +} + int ixgbe_ptp_set_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr); int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr); void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter); void ixgbe_ptp_reset(struct ixgbe_adapter *adapter); -void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter, u32 eicr); +void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter); #ifdef CONFIG_PCI_IOV void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter); #endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index 65db69b862fb9d..8f09d291a0430a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2014 Intel Corporation. + Copyright(c) 1999 - 2015 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -765,13 +765,14 @@ mac_reset_top: ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL) | IXGBE_CTRL_RST; IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); IXGBE_WRITE_FLUSH(hw); + usleep_range(1000, 1200); /* Poll for reset bit to self-clear indicating reset is complete */ for (i = 0; i < 10; i++) { - udelay(1); ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); if (!(ctrl & IXGBE_CTRL_RST)) break; + udelay(1); } if (ctrl & IXGBE_CTRL_RST) { status = IXGBE_ERR_RESET_FAILED; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index a39afcf03e2c4a..b8bd72589f7294 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -990,13 +990,14 @@ mac_reset_top: ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); IXGBE_WRITE_FLUSH(hw); + usleep_range(1000, 1200); /* Poll for reset bit to self-clear indicating reset is complete */ for (i = 0; i < 10; i++) { - udelay(1); ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); if (!(ctrl & IXGBE_CTRL_RST_MASK)) break; + udelay(1); } if (ctrl & IXGBE_CTRL_RST_MASK) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index ce61b36b94f101..daec6aef5dc8bf 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2014 Intel Corporation. + Copyright(c) 1999 - 2015 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -2454,6 +2454,17 @@ static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) /* Always set this bit to ensure any future transactions are blocked */ IXGBE_WRITE_REG(hw, IXGBE_CTRL, IXGBE_CTRL_GIO_DIS); + /* Poll for bit to read as set */ + for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) { + if (IXGBE_READ_REG(hw, IXGBE_CTRL) & IXGBE_CTRL_GIO_DIS) + break; + usleep_range(100, 120); + } + if (i >= IXGBE_PCI_MASTER_DISABLE_TIMEOUT) { + hw_dbg(hw, "GIO disable did not set - requesting resets\n"); + goto gio_disable_fail; + } + /* Exit if master requests are blocked */ if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO) || ixgbe_removed(hw->hw_addr)) @@ -2475,6 +2486,7 @@ static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) * again to clear out any effects they may have had on our device. */ hw_dbg(hw, "GIO Master Disable bit didn't clear - requesting resets\n"); +gio_disable_fail: hw->mac.flags |= IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; if (hw->mac.type >= ixgbe_mac_X550) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index c95042ee30dec4..ebd4522e7879dc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -172,6 +172,8 @@ MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); +static struct workqueue_struct *ixgbe_wq; + static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev); static int ixgbe_read_pci_cfg_word_parent(struct ixgbe_adapter *adapter, @@ -313,7 +315,7 @@ static void ixgbe_service_event_schedule(struct ixgbe_adapter *adapter) if (!test_bit(__IXGBE_DOWN, &adapter->state) && !test_bit(__IXGBE_REMOVING, &adapter->state) && !test_and_set_bit(__IXGBE_SERVICE_SCHED, &adapter->state)) - schedule_work(&adapter->service_task); + queue_work(ixgbe_wq, &adapter->service_task); } static void ixgbe_remove_adapter(struct ixgbe_hw *hw) @@ -1632,6 +1634,7 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, struct sk_buff *skb) { struct net_device *dev = rx_ring->netdev; + u32 flags = rx_ring->q_vector->adapter->flags; ixgbe_update_rsc_stats(rx_ring, skb); @@ -1639,8 +1642,8 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, ixgbe_rx_checksum(rx_ring, rx_desc, skb); - if (unlikely(ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS))) - ixgbe_ptp_rx_hwtstamp(rx_ring->q_vector->adapter, skb); + if (unlikely(flags & IXGBE_FLAG_RX_HWTSTAMP_ENABLED)) + ixgbe_ptp_rx_hwtstamp(rx_ring, rx_desc, skb); if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { @@ -2738,7 +2741,7 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data) ixgbe_check_fan_failure(adapter, eicr); if (unlikely(eicr & IXGBE_EICR_TIMESYNC)) - ixgbe_ptp_check_pps_event(adapter, eicr); + ixgbe_ptp_check_pps_event(adapter); /* re-enable the original interrupt state, no lsc, no queues */ if (!test_bit(__IXGBE_DOWN, &adapter->state)) @@ -2945,7 +2948,7 @@ static irqreturn_t ixgbe_intr(int irq, void *data) ixgbe_check_fan_failure(adapter, eicr); if (unlikely(eicr & IXGBE_EICR_TIMESYNC)) - ixgbe_ptp_check_pps_event(adapter, eicr); + ixgbe_ptp_check_pps_event(adapter); /* would disable interrupts here but EIAM disabled it */ napi_schedule_irqoff(&q_vector->napi); @@ -4029,124 +4032,156 @@ static int ixgbe_write_mc_addr_list(struct net_device *netdev) #ifdef CONFIG_PCI_IOV void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; int i; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) - hw->mac.ops.set_rar(hw, i, adapter->mac_table[i].addr, - adapter->mac_table[i].queue, + + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + mac_table->state &= ~IXGBE_MAC_STATE_MODIFIED; + + if (mac_table->state & IXGBE_MAC_STATE_IN_USE) + hw->mac.ops.set_rar(hw, i, + mac_table->addr, + mac_table->pool, IXGBE_RAH_AV); else hw->mac.ops.clear_rar(hw, i); - - adapter->mac_table[i].state &= ~(IXGBE_MAC_STATE_MODIFIED); } } -#endif +#endif static void ixgbe_sync_mac_table(struct ixgbe_adapter *adapter) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; int i; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state & IXGBE_MAC_STATE_MODIFIED) { - if (adapter->mac_table[i].state & - IXGBE_MAC_STATE_IN_USE) - hw->mac.ops.set_rar(hw, i, - adapter->mac_table[i].addr, - adapter->mac_table[i].queue, - IXGBE_RAH_AV); - else - hw->mac.ops.clear_rar(hw, i); - adapter->mac_table[i].state &= - ~(IXGBE_MAC_STATE_MODIFIED); - } + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + if (!(mac_table->state & IXGBE_MAC_STATE_MODIFIED)) + continue; + + mac_table->state &= ~IXGBE_MAC_STATE_MODIFIED; + + if (mac_table->state & IXGBE_MAC_STATE_IN_USE) + hw->mac.ops.set_rar(hw, i, + mac_table->addr, + mac_table->pool, + IXGBE_RAH_AV); + else + hw->mac.ops.clear_rar(hw, i); } } static void ixgbe_flush_sw_mac_table(struct ixgbe_adapter *adapter) { - int i; + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; + int i; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED; - adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE; - eth_zero_addr(adapter->mac_table[i].addr); - adapter->mac_table[i].queue = 0; + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + mac_table->state |= IXGBE_MAC_STATE_MODIFIED; + mac_table->state &= ~IXGBE_MAC_STATE_IN_USE; } + ixgbe_sync_mac_table(adapter); } -static int ixgbe_available_rars(struct ixgbe_adapter *adapter) +static int ixgbe_available_rars(struct ixgbe_adapter *adapter, u16 pool) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; int i, count = 0; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state == 0) - count++; + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + /* do not count default RAR as available */ + if (mac_table->state & IXGBE_MAC_STATE_DEFAULT) + continue; + + /* only count unused and addresses that belong to us */ + if (mac_table->state & IXGBE_MAC_STATE_IN_USE) { + if (mac_table->pool != pool) + continue; + } + + count++; } + return count; } /* this function destroys the first RAR entry */ -static void ixgbe_mac_set_default_filter(struct ixgbe_adapter *adapter, - u8 *addr) +static void ixgbe_mac_set_default_filter(struct ixgbe_adapter *adapter) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; - memcpy(&adapter->mac_table[0].addr, addr, ETH_ALEN); - adapter->mac_table[0].queue = VMDQ_P(0); - adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT | - IXGBE_MAC_STATE_IN_USE); - hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr, - adapter->mac_table[0].queue, + memcpy(&mac_table->addr, hw->mac.addr, ETH_ALEN); + mac_table->pool = VMDQ_P(0); + + mac_table->state = IXGBE_MAC_STATE_DEFAULT | IXGBE_MAC_STATE_IN_USE; + + hw->mac.ops.set_rar(hw, 0, mac_table->addr, mac_table->pool, IXGBE_RAH_AV); } -int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue) +int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, + const u8 *addr, u16 pool) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; int i; if (is_zero_ether_addr(addr)) return -EINVAL; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + if (mac_table->state & IXGBE_MAC_STATE_IN_USE) continue; - adapter->mac_table[i].state |= (IXGBE_MAC_STATE_MODIFIED | - IXGBE_MAC_STATE_IN_USE); - ether_addr_copy(adapter->mac_table[i].addr, addr); - adapter->mac_table[i].queue = queue; + + ether_addr_copy(mac_table->addr, addr); + mac_table->pool = pool; + + mac_table->state |= IXGBE_MAC_STATE_MODIFIED | + IXGBE_MAC_STATE_IN_USE; + ixgbe_sync_mac_table(adapter); + return i; } + return -ENOMEM; } -int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue) +int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, + const u8 *addr, u16 pool) { - /* search table for addr, if found, set to 0 and sync */ - int i; + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; + int i; if (is_zero_ether_addr(addr)) return -EINVAL; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (ether_addr_equal(addr, adapter->mac_table[i].addr) && - adapter->mac_table[i].queue == queue) { - adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED; - adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE; - eth_zero_addr(adapter->mac_table[i].addr); - adapter->mac_table[i].queue = 0; - ixgbe_sync_mac_table(adapter); - return 0; - } + /* search table for addr, if found clear IN_USE flag and sync */ + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + /* we can only delete an entry if it is in use */ + if (!(mac_table->state & IXGBE_MAC_STATE_IN_USE)) + continue; + /* we only care about entries that belong to the given pool */ + if (mac_table->pool != pool) + continue; + /* we only care about a specific MAC address */ + if (!ether_addr_equal(addr, mac_table->addr)) + continue; + + mac_table->state |= IXGBE_MAC_STATE_MODIFIED; + mac_table->state &= ~IXGBE_MAC_STATE_IN_USE; + + ixgbe_sync_mac_table(adapter); + + return 0; } + return -ENOMEM; } /** @@ -4164,7 +4199,7 @@ static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn) int count = 0; /* return ENOMEM indicating insufficient memory for addresses */ - if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter)) + if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter, vfn)) return -ENOMEM; if (!netdev_uc_empty(netdev)) { @@ -4178,6 +4213,25 @@ static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn) return count; } +static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char *addr) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + int ret; + + ret = ixgbe_add_mac_filter(adapter, addr, VMDQ_P(0)); + + return min_t(int, ret, 0); +} + +static int ixgbe_uc_unsync(struct net_device *netdev, const unsigned char *addr) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + ixgbe_del_mac_filter(adapter, addr, VMDQ_P(0)); + + return 0; +} + /** * ixgbe_set_rx_mode - Unicast, Multicast and Promiscuous mode set * @netdev: network interface device structure @@ -4233,8 +4287,7 @@ void ixgbe_set_rx_mode(struct net_device *netdev) * sufficient space to store all the addresses then enable * unicast promiscuous mode */ - count = ixgbe_write_uc_addr_list(netdev, VMDQ_P(0)); - if (count < 0) { + if (__dev_uc_sync(netdev, ixgbe_uc_sync, ixgbe_uc_unsync)) { fctrl |= IXGBE_FCTRL_UPE; vmolr |= IXGBE_VMOLR_ROPE; } @@ -5037,7 +5090,6 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; int err; - u8 old_addr[ETH_ALEN]; if (ixgbe_removed(hw->hw_addr)) return; @@ -5073,10 +5125,13 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) } clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state); - /* do not flush user set addresses */ - memcpy(old_addr, &adapter->mac_table[0].addr, netdev->addr_len); + + /* flush entries out of MAC table */ ixgbe_flush_sw_mac_table(adapter); - ixgbe_mac_set_default_filter(adapter, old_addr); + __dev_uc_unsync(netdev, NULL); + + /* do not flush user set addresses */ + ixgbe_mac_set_default_filter(adapter); /* update SAN MAC vmdq pool selection */ if (hw->mac.san_mac_rar_index) @@ -6611,10 +6666,8 @@ static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; - struct pci_dev *vfdev; + unsigned int vf; u32 gpc; - int pos; - unsigned short vf_id; if (!(netif_carrier_ok(adapter->netdev))) return; @@ -6631,26 +6684,17 @@ static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter) if (!pdev) return; - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); - if (!pos) - return; - - /* get the device ID for the VF */ - pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id); - /* check status reg for all VFs owned by this PF */ - vfdev = pci_get_device(pdev->vendor, vf_id, NULL); - while (vfdev) { - if (vfdev->is_virtfn && (vfdev->physfn == pdev)) { - u16 status_reg; - - pci_read_config_word(vfdev, PCI_STATUS, &status_reg); - if (status_reg & PCI_STATUS_REC_MASTER_ABORT) - /* issue VFLR */ - ixgbe_issue_vf_flr(adapter, vfdev); - } + for (vf = 0; vf < adapter->num_vfs; ++vf) { + struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev; + u16 status_reg; - vfdev = pci_get_device(pdev->vendor, vf_id, vfdev); + if (!vfdev) + continue; + pci_read_config_word(vfdev, PCI_STATUS, &status_reg); + if (status_reg != IXGBE_FAILED_READ_CFG_WORD && + status_reg & PCI_STATUS_REC_MASTER_ABORT) + ixgbe_issue_vf_flr(adapter, vfdev); } } @@ -7019,6 +7063,7 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, struct tcphdr *tcphdr; u8 *raw; } transport_hdr; + __be16 frag_off; if (skb->encapsulation) { network_hdr.raw = skb_inner_network_header(skb); @@ -7042,13 +7087,17 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, case 6: vlan_macip_lens |= transport_hdr.raw - network_hdr.raw; l4_hdr = network_hdr.ipv6->nexthdr; + if (likely((transport_hdr.raw - network_hdr.raw) == + sizeof(struct ipv6hdr))) + break; + ipv6_skip_exthdr(skb, network_hdr.raw - skb->data + + sizeof(struct ipv6hdr), + &l4_hdr, &frag_off); + if (unlikely(frag_off)) + l4_hdr = NEXTHDR_FRAGMENT; break; default: - if (unlikely(net_ratelimit())) { - dev_warn(tx_ring->dev, - "partial checksum but version=%d\n", - network_hdr.ipv4->version); - } + break; } switch (l4_hdr) { @@ -7069,16 +7118,18 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, default: if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, - "partial checksum but l4 proto=%x!\n", - l4_hdr); + "partial checksum, version=%d, l4 proto=%x\n", + network_hdr.ipv4->version, l4_hdr); } - break; + skb_checksum_help(skb); + goto no_csum; } /* update TX checksum flag */ first->tx_flags |= IXGBE_TX_FLAGS_CSUM; } +no_csum: /* vlan_macip_lens: MACLEN, VLAN tag */ vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; @@ -7654,17 +7705,16 @@ static int ixgbe_set_mac(struct net_device *netdev, void *p) struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; struct sockaddr *addr = p; - int ret; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - ixgbe_del_mac_filter(adapter, hw->mac.addr, VMDQ_P(0)); memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); - ret = ixgbe_add_mac_filter(adapter, hw->mac.addr, VMDQ_P(0)); - return ret > 0 ? 0 : ret; + ixgbe_mac_set_default_filter(adapter); + + return 0; } static int @@ -8147,7 +8197,10 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], { /* guarantee we can provide a unique filter for the unicast address */ if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { - if (IXGBE_MAX_PF_MACVLANS <= netdev_uc_count(dev)) + struct ixgbe_adapter *adapter = netdev_priv(dev); + u16 pool = VMDQ_P(0); + + if (netdev_uc_count(dev) >= ixgbe_available_rars(adapter, pool)) return -ENOMEM; } @@ -8865,7 +8918,7 @@ skip_sriov: goto err_sw_init; } - ixgbe_mac_set_default_filter(adapter, hw->mac.perm_addr); + ixgbe_mac_set_default_filter(adapter); setup_timer(&adapter->service_timer, &ixgbe_service_timer, (unsigned long) adapter); @@ -9320,6 +9373,12 @@ static int __init ixgbe_init_module(void) pr_info("%s - version %s\n", ixgbe_driver_string, ixgbe_driver_version); pr_info("%s\n", ixgbe_copyright); + ixgbe_wq = create_singlethread_workqueue(ixgbe_driver_name); + if (!ixgbe_wq) { + pr_err("%s: Failed to create workqueue\n", ixgbe_driver_name); + return -ENOMEM; + } + ixgbe_dbg_init(); ret = pci_register_driver(&ixgbe_driver); @@ -9351,6 +9410,10 @@ static void __exit ixgbe_exit_module(void) pci_unregister_driver(&ixgbe_driver); ixgbe_dbg_exit(); + if (ixgbe_wq) { + destroy_workqueue(ixgbe_wq); + ixgbe_wq = NULL; + } } #ifdef CONFIG_IXGBE_DCA diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index fb8673d6380689..db0731e05401e4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -2393,6 +2393,9 @@ s32 ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on) if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) return 0; + if (!on && ixgbe_mng_present(hw)) + return 0; + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index e5ba04025e2b9d..ef1504d41890f6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2013 Intel Corporation. + Copyright(c) 1999 - 2015 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -27,6 +27,7 @@ *******************************************************************************/ #include "ixgbe.h" #include <linux/ptp_classify.h> +#include <linux/clocksource.h> /* * The 82599 and the X540 do not have true 64bit nanosecond scale @@ -93,7 +94,6 @@ #define IXGBE_INCVAL_SHIFT_82599 7 #define IXGBE_INCPER_SHIFT_82599 24 -#define IXGBE_MAX_TIMEADJ_VALUE 0x7FFFFFFFFFFFFFFFULL #define IXGBE_OVERFLOW_PERIOD (HZ * 30) #define IXGBE_PTP_TX_TIMEOUT (HZ * 15) @@ -104,8 +104,68 @@ */ #define IXGBE_PTP_PPS_HALF_SECOND 500000000ULL +/* In contrast, the X550 controller has two registers, SYSTIMEH and SYSTIMEL + * which contain measurements of seconds and nanoseconds respectively. This + * matches the standard linux representation of time in the kernel. In addition, + * the X550 also has a SYSTIMER register which represents residue, or + * subnanosecond overflow adjustments. To control clock adjustment, the TIMINCA + * register is used, but it is unlike the X540 and 82599 devices. TIMINCA + * represents units of 2^-32 nanoseconds, and uses 31 bits for this, with the + * high bit representing whether the adjustent is positive or negative. Every + * clock cycle, the X550 will add 12.5 ns + TIMINCA which can result in a range + * of 12 to 13 nanoseconds adjustment. Unlike the 82599 and X540 devices, the + * X550's clock for purposes of SYSTIME generation is constant and not dependent + * on the link speed. + * + * SYSTIMEH SYSTIMEL SYSTIMER + * +--------------+ +--------------+ +-------------+ + * X550 | 32 | | 32 | | 32 | + * *--------------+ +--------------+ +-------------+ + * \____seconds___/ \_nanoseconds_/ \__2^-32 ns__/ + * + * This results in a full 96 bits to represent the clock, with 32 bits for + * seconds, 32 bits for nanoseconds (largest value is 0d999999999 or just under + * 1 second) and an additional 32 bits to measure sub nanosecond adjustments for + * underflow of adjustments. + * + * The 32 bits of seconds for the X550 overflows every + * 2^32 / ( 365.25 * 24 * 60 * 60 ) = ~136 years. + * + * In order to adjust the clock frequency for the X550, the TIMINCA register is + * provided. This register represents a + or minus nearly 0.5 ns adjustment to + * the base frequency. It is measured in 2^-32 ns units, with the high bit being + * the sign bit. This register enables software to calculate frequency + * adjustments and apply them directly to the clock rate. + * + * The math for converting ppb into TIMINCA values is fairly straightforward. + * TIMINCA value = ( Base_Frequency * ppb ) / 1000000000ULL + * + * This assumes that ppb is never high enough to create a value bigger than + * TIMINCA's 31 bits can store. This is ensured by the stack. Calculating this + * value is also simple. + * Max ppb = ( Max Adjustment / Base Frequency ) / 1000000000ULL + * + * For the X550, the Max adjustment is +/- 0.5 ns, and the base frequency is + * 12.5 nanoseconds. This means that the Max ppb is 39999999 + * Note: We subtract one in order to ensure no overflow, because the TIMINCA + * register can only hold slightly under 0.5 nanoseconds. + * + * Because TIMINCA is measured in 2^-32 ns units, we have to convert 12.5 ns + * into 2^-32 units, which is + * + * 12.5 * 2^32 = C80000000 + * + * Some revisions of hardware have a faster base frequency than the registers + * were defined for. To fix this, we use a timecounter structure with the + * proper mult and shift to convert the cycles into nanoseconds of time. + */ +#define IXGBE_X550_BASE_PERIOD 0xC80000000ULL +#define INCVALUE_MASK 0x7FFFFFFF +#define ISGN 0x80000000 +#define MAX_TIMADJ 0x7FFFFFFF + /** - * ixgbe_ptp_setup_sdp + * ixgbe_ptp_setup_sdp_x540 * @hw: the hardware private structure * * this function enables or disables the clock out feature on SDP0 for @@ -116,83 +176,116 @@ * aligns the start of the PPS signal to that value. The shift is * necessary because it can change based on the link speed. */ -static void ixgbe_ptp_setup_sdp(struct ixgbe_adapter *adapter) +static void ixgbe_ptp_setup_sdp_x540(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - int shift = adapter->cc.shift; + int shift = adapter->hw_cc.shift; u32 esdp, tsauxc, clktiml, clktimh, trgttiml, trgttimh, rem; u64 ns = 0, clock_edge = 0; - if ((adapter->flags2 & IXGBE_FLAG2_PTP_PPS_ENABLED) && - (hw->mac.type == ixgbe_mac_X540)) { + /* disable the pin first */ + IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0); + IXGBE_WRITE_FLUSH(hw); - /* disable the pin first */ - IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0); - IXGBE_WRITE_FLUSH(hw); + if (!(adapter->flags2 & IXGBE_FLAG2_PTP_PPS_ENABLED)) + return; - esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); - /* - * enable the SDP0 pin as output, and connected to the - * native function for Timesync (ClockOut) - */ - esdp |= (IXGBE_ESDP_SDP0_DIR | - IXGBE_ESDP_SDP0_NATIVE); + /* enable the SDP0 pin as output, and connected to the + * native function for Timesync (ClockOut) + */ + esdp |= IXGBE_ESDP_SDP0_DIR | + IXGBE_ESDP_SDP0_NATIVE; - /* - * enable the Clock Out feature on SDP0, and allow - * interrupts to occur when the pin changes - */ - tsauxc = (IXGBE_TSAUXC_EN_CLK | - IXGBE_TSAUXC_SYNCLK | - IXGBE_TSAUXC_SDP0_INT); + /* enable the Clock Out feature on SDP0, and allow + * interrupts to occur when the pin changes + */ + tsauxc = IXGBE_TSAUXC_EN_CLK | + IXGBE_TSAUXC_SYNCLK | + IXGBE_TSAUXC_SDP0_INT; - /* clock period (or pulse length) */ - clktiml = (u32)(IXGBE_PTP_PPS_HALF_SECOND << shift); - clktimh = (u32)((IXGBE_PTP_PPS_HALF_SECOND << shift) >> 32); + /* clock period (or pulse length) */ + clktiml = (u32)(IXGBE_PTP_PPS_HALF_SECOND << shift); + clktimh = (u32)((IXGBE_PTP_PPS_HALF_SECOND << shift) >> 32); - /* - * Account for the cyclecounter wrap-around value by - * using the converted ns value of the current time to - * check for when the next aligned second would occur. - */ - clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIML); - clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32; - ns = timecounter_cyc2time(&adapter->tc, clock_edge); + /* Account for the cyclecounter wrap-around value by + * using the converted ns value of the current time to + * check for when the next aligned second would occur. + */ + clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIML); + clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32; + ns = timecounter_cyc2time(&adapter->hw_tc, clock_edge); - div_u64_rem(ns, IXGBE_PTP_PPS_HALF_SECOND, &rem); - clock_edge += ((IXGBE_PTP_PPS_HALF_SECOND - (u64)rem) << shift); + div_u64_rem(ns, IXGBE_PTP_PPS_HALF_SECOND, &rem); + clock_edge += ((IXGBE_PTP_PPS_HALF_SECOND - (u64)rem) << shift); - /* specify the initial clock start time */ - trgttiml = (u32)clock_edge; - trgttimh = (u32)(clock_edge >> 32); + /* specify the initial clock start time */ + trgttiml = (u32)clock_edge; + trgttimh = (u32)(clock_edge >> 32); - IXGBE_WRITE_REG(hw, IXGBE_CLKTIML, clktiml); - IXGBE_WRITE_REG(hw, IXGBE_CLKTIMH, clktimh); - IXGBE_WRITE_REG(hw, IXGBE_TRGTTIML0, trgttiml); - IXGBE_WRITE_REG(hw, IXGBE_TRGTTIMH0, trgttimh); + IXGBE_WRITE_REG(hw, IXGBE_CLKTIML, clktiml); + IXGBE_WRITE_REG(hw, IXGBE_CLKTIMH, clktimh); + IXGBE_WRITE_REG(hw, IXGBE_TRGTTIML0, trgttiml); + IXGBE_WRITE_REG(hw, IXGBE_TRGTTIMH0, trgttimh); - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); - IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, tsauxc); - } else { - IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0); - } + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); + IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, tsauxc); IXGBE_WRITE_FLUSH(hw); } /** - * ixgbe_ptp_read - read raw cycle counter (to be used by time counter) + * ixgbe_ptp_read_X550 - read cycle counter value + * @hw_cc: cyclecounter structure + * + * This function reads SYSTIME registers. It is called by the cyclecounter + * structure to convert from internal representation into nanoseconds. We need + * this for X550 since some skews do not have expected clock frequency and + * result of SYSTIME is 32bits of "billions of cycles" and 32 bits of + * "cycles", rather than seconds and nanoseconds. + */ +static cycle_t ixgbe_ptp_read_X550(const struct cyclecounter *hw_cc) +{ + struct ixgbe_adapter *adapter = + container_of(hw_cc, struct ixgbe_adapter, hw_cc); + struct ixgbe_hw *hw = &adapter->hw; + struct timespec64 ts; + + /* storage is 32 bits of 'billions of cycles' and 32 bits of 'cycles'. + * Some revisions of hardware run at a higher frequency and so the + * cycles are not guaranteed to be nanoseconds. The timespec64 created + * here is used for its math/conversions but does not necessarily + * represent nominal time. + * + * It should be noted that this cyclecounter will overflow at a + * non-bitmask field since we have to convert our billions of cycles + * into an actual cycles count. This results in some possible weird + * situations at high cycle counter stamps. However given that 32 bits + * of "seconds" is ~138 years this isn't a problem. Even at the + * increased frequency of some revisions, this is still ~103 years. + * Since the SYSTIME values start at 0 and we never write them, it is + * highly unlikely for the cyclecounter to overflow in practice. + */ + IXGBE_READ_REG(hw, IXGBE_SYSTIMR); + ts.tv_nsec = IXGBE_READ_REG(hw, IXGBE_SYSTIML); + ts.tv_sec = IXGBE_READ_REG(hw, IXGBE_SYSTIMH); + + return (u64)timespec64_to_ns(&ts); +} + +/** + * ixgbe_ptp_read_82599 - read raw cycle counter (to be used by time counter) * @cc: the cyclecounter structure * * this function reads the cyclecounter registers and is called by the * cyclecounter structure used to construct a ns counter from the * arbitrary fixed point registers */ -static cycle_t ixgbe_ptp_read(const struct cyclecounter *cc) +static cycle_t ixgbe_ptp_read_82599(const struct cyclecounter *cc) { struct ixgbe_adapter *adapter = - container_of(cc, struct ixgbe_adapter, cc); + container_of(cc, struct ixgbe_adapter, hw_cc); struct ixgbe_hw *hw = &adapter->hw; u64 stamp = 0; @@ -203,20 +296,79 @@ static cycle_t ixgbe_ptp_read(const struct cyclecounter *cc) } /** - * ixgbe_ptp_adjfreq + * ixgbe_ptp_convert_to_hwtstamp - convert register value to hw timestamp + * @adapter: private adapter structure + * @hwtstamp: stack timestamp structure + * @systim: unsigned 64bit system time value + * + * We need to convert the adapter's RX/TXSTMP registers into a hwtstamp value + * which can be used by the stack's ptp functions. + * + * The lock is used to protect consistency of the cyclecounter and the SYSTIME + * registers. However, it does not need to protect against the Rx or Tx + * timestamp registers, as there can't be a new timestamp until the old one is + * unlatched by reading. + * + * In addition to the timestamp in hardware, some controllers need a software + * overflow cyclecounter, and this function takes this into account as well. + **/ +static void ixgbe_ptp_convert_to_hwtstamp(struct ixgbe_adapter *adapter, + struct skb_shared_hwtstamps *hwtstamp, + u64 timestamp) +{ + unsigned long flags; + struct timespec64 systime; + u64 ns; + + memset(hwtstamp, 0, sizeof(*hwtstamp)); + + switch (adapter->hw.mac.type) { + /* X550 and later hardware supposedly represent time using a seconds + * and nanoseconds counter, instead of raw 64bits nanoseconds. We need + * to convert the timestamp into cycles before it can be fed to the + * cyclecounter. We need an actual cyclecounter because some revisions + * of hardware run at a higher frequency and thus the counter does + * not represent seconds/nanoseconds. Instead it can be thought of as + * cycles and billions of cycles. + */ + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + /* Upper 32 bits represent billions of cycles, lower 32 bits + * represent cycles. However, we use timespec64_to_ns for the + * correct math even though the units haven't been corrected + * yet. + */ + systime.tv_sec = timestamp >> 32; + systime.tv_nsec = timestamp & 0xFFFFFFFF; + + timestamp = timespec64_to_ns(&systime); + break; + default: + break; + } + + spin_lock_irqsave(&adapter->tmreg_lock, flags); + ns = timecounter_cyc2time(&adapter->hw_tc, timestamp); + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + + hwtstamp->hwtstamp = ns_to_ktime(ns); +} + +/** + * ixgbe_ptp_adjfreq_82599 * @ptp: the ptp clock structure * @ppb: parts per billion adjustment from base * * adjust the frequency of the ptp cycle counter by the * indicated ppb from the base frequency. */ -static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) +static int ixgbe_ptp_adjfreq_82599(struct ptp_clock_info *ptp, s32 ppb) { struct ixgbe_adapter *adapter = container_of(ptp, struct ixgbe_adapter, ptp_caps); struct ixgbe_hw *hw = &adapter->hw; - u64 freq; - u32 diff, incval; + u64 freq, incval; + u32 diff; int neg_adj = 0; if (ppb < 0) { @@ -235,12 +387,16 @@ static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) switch (hw->mac.type) { case ixgbe_mac_X540: - IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, incval); + if (incval > 0xFFFFFFFFULL) + e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n"); + IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, (u32)incval); break; case ixgbe_mac_82599EB: + if (incval > 0x00FFFFFFULL) + e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n"); IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, (1 << IXGBE_INCPER_SHIFT_82599) | - incval); + ((u32)incval & 0x00FFFFFFUL)); break; default: break; @@ -250,6 +406,43 @@ static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) } /** + * ixgbe_ptp_adjfreq_X550 + * @ptp: the ptp clock structure + * @ppb: parts per billion adjustment from base + * + * adjust the frequency of the SYSTIME registers by the indicated ppb from base + * frequency + */ +static int ixgbe_ptp_adjfreq_X550(struct ptp_clock_info *ptp, s32 ppb) +{ + struct ixgbe_adapter *adapter = + container_of(ptp, struct ixgbe_adapter, ptp_caps); + struct ixgbe_hw *hw = &adapter->hw; + int neg_adj = 0; + u64 rate = IXGBE_X550_BASE_PERIOD; + u32 inca; + + if (ppb < 0) { + neg_adj = 1; + ppb = -ppb; + } + rate *= ppb; + rate = div_u64(rate, 1000000000ULL); + + /* warn if rate is too large */ + if (rate >= INCVALUE_MASK) + e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n"); + + inca = rate & INCVALUE_MASK; + if (neg_adj) + inca |= ISGN; + + IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, inca); + + return 0; +} + +/** * ixgbe_ptp_adjtime * @ptp: the ptp clock structure * @delta: offset to adjust the cycle counter by @@ -263,10 +456,11 @@ static int ixgbe_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) unsigned long flags; spin_lock_irqsave(&adapter->tmreg_lock, flags); - timecounter_adjtime(&adapter->tc, delta); + timecounter_adjtime(&adapter->hw_tc, delta); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - ixgbe_ptp_setup_sdp(adapter); + if (adapter->ptp_setup_sdp) + adapter->ptp_setup_sdp(adapter); return 0; } @@ -283,11 +477,11 @@ static int ixgbe_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) { struct ixgbe_adapter *adapter = container_of(ptp, struct ixgbe_adapter, ptp_caps); - u64 ns; unsigned long flags; + u64 ns; spin_lock_irqsave(&adapter->tmreg_lock, flags); - ns = timecounter_read(&adapter->tc); + ns = timecounter_read(&adapter->hw_tc); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); *ts = ns_to_timespec64(ns); @@ -308,17 +502,16 @@ static int ixgbe_ptp_settime(struct ptp_clock_info *ptp, { struct ixgbe_adapter *adapter = container_of(ptp, struct ixgbe_adapter, ptp_caps); - u64 ns; unsigned long flags; - - ns = timespec64_to_ns(ts); + u64 ns = timespec64_to_ns(ts); /* reset the timecounter */ spin_lock_irqsave(&adapter->tmreg_lock, flags); - timecounter_init(&adapter->tc, &adapter->cc, ns); + timecounter_init(&adapter->hw_tc, &adapter->hw_cc, ns); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - ixgbe_ptp_setup_sdp(adapter); + if (adapter->ptp_setup_sdp) + adapter->ptp_setup_sdp(adapter); return 0; } @@ -343,33 +536,26 @@ static int ixgbe_ptp_feature_enable(struct ptp_clock_info *ptp, * event when the clock SDP triggers. Clear mask when PPS is * disabled */ - if (rq->type == PTP_CLK_REQ_PPS) { - switch (adapter->hw.mac.type) { - case ixgbe_mac_X540: - if (on) - adapter->flags2 |= IXGBE_FLAG2_PTP_PPS_ENABLED; - else - adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED; - - ixgbe_ptp_setup_sdp(adapter); - return 0; - default: - break; - } - } + if (rq->type != PTP_CLK_REQ_PPS || !adapter->ptp_setup_sdp) + return -ENOTSUPP; + + if (on) + adapter->flags2 |= IXGBE_FLAG2_PTP_PPS_ENABLED; + else + adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED; - return -ENOTSUPP; + adapter->ptp_setup_sdp(adapter); + return 0; } /** * ixgbe_ptp_check_pps_event * @adapter: the private adapter structure - * @eicr: the interrupt cause register value * * This function is called by the interrupt routine when checking for * interrupts. It will check and handle a pps event. */ -void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter, u32 eicr) +void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ptp_clock_event event; @@ -425,7 +611,9 @@ void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL); + struct ixgbe_ring *rx_ring; unsigned long rx_event; + int n; /* if we don't have a valid timestamp in the registers, just update the * timeout counter and exit @@ -437,19 +625,43 @@ void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter) /* determine the most recent watchdog or rx_timestamp event */ rx_event = adapter->last_rx_ptp_check; - if (time_after(adapter->last_rx_timestamp, rx_event)) - rx_event = adapter->last_rx_timestamp; + for (n = 0; n < adapter->num_rx_queues; n++) { + rx_ring = adapter->rx_ring[n]; + if (time_after(rx_ring->last_rx_timestamp, rx_event)) + rx_event = rx_ring->last_rx_timestamp; + } /* only need to read the high RXSTMP register to clear the lock */ - if (time_is_before_jiffies(rx_event + 5*HZ)) { + if (time_is_before_jiffies(rx_event + 5 * HZ)) { IXGBE_READ_REG(hw, IXGBE_RXSTMPH); adapter->last_rx_ptp_check = jiffies; + adapter->rx_hwtstamp_cleared++; e_warn(drv, "clearing RX Timestamp hang\n"); } } /** + * ixgbe_ptp_clear_tx_timestamp - utility function to clear Tx timestamp state + * @adapter: the private adapter structure + * + * This function should be called whenever the state related to a Tx timestamp + * needs to be cleared. This helps ensure that all related bits are reset for + * the next Tx timestamp event. + */ +static void ixgbe_ptp_clear_tx_timestamp(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + IXGBE_READ_REG(hw, IXGBE_TXSTMPH); + if (adapter->ptp_tx_skb) { + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + } + clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); +} + +/** * ixgbe_ptp_tx_hwtstamp - utility function which checks for TX time stamp * @adapter: the private adapter struct * @@ -461,23 +673,15 @@ static void ixgbe_ptp_tx_hwtstamp(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct skb_shared_hwtstamps shhwtstamps; - u64 regval = 0, ns; - unsigned long flags; + u64 regval = 0; regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPL); regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPH) << 32; - spin_lock_irqsave(&adapter->tmreg_lock, flags); - ns = timecounter_cyc2time(&adapter->tc, regval); - spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - - memset(&shhwtstamps, 0, sizeof(shhwtstamps)); - shhwtstamps.hwtstamp = ns_to_ktime(ns); + ixgbe_ptp_convert_to_hwtstamp(adapter, &shhwtstamps, regval); skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps); - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); + ixgbe_ptp_clear_tx_timestamp(adapter); } /** @@ -497,38 +701,85 @@ static void ixgbe_ptp_tx_hwtstamp_work(struct work_struct *work) IXGBE_PTP_TX_TIMEOUT); u32 tsynctxctl; - if (timeout) { - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); - e_warn(drv, "clearing Tx Timestamp hang\n"); + /* we have to have a valid skb to poll for a timestamp */ + if (!adapter->ptp_tx_skb) { + ixgbe_ptp_clear_tx_timestamp(adapter); return; } + /* stop polling once we have a valid timestamp */ tsynctxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCTXCTL); - if (tsynctxctl & IXGBE_TSYNCTXCTL_VALID) + if (tsynctxctl & IXGBE_TSYNCTXCTL_VALID) { ixgbe_ptp_tx_hwtstamp(adapter); - else + return; + } + + if (timeout) { + ixgbe_ptp_clear_tx_timestamp(adapter); + adapter->tx_hwtstamp_timeouts++; + e_warn(drv, "clearing Tx Timestamp hang\n"); + } else { /* reschedule to keep checking if it's not available yet */ schedule_work(&adapter->ptp_tx_work); + } } /** - * ixgbe_ptp_rx_hwtstamp - utility function which checks for RX time stamp - * @adapter: pointer to adapter struct + * ixgbe_ptp_rx_pktstamp - utility function to get RX time stamp from buffer + * @q_vector: structure containing interrupt and ring information + * @skb: the packet + * + * This function will be called by the Rx routine of the timestamp for this + * packet is stored in the buffer. The value is stored in little endian format + * starting at the end of the packet data. + */ +void ixgbe_ptp_rx_pktstamp(struct ixgbe_q_vector *q_vector, + struct sk_buff *skb) +{ + __le64 regval; + + /* copy the bits out of the skb, and then trim the skb length */ + skb_copy_bits(skb, skb->len - IXGBE_TS_HDR_LEN, ®val, + IXGBE_TS_HDR_LEN); + __pskb_trim(skb, skb->len - IXGBE_TS_HDR_LEN); + + /* The timestamp is recorded in little endian format, and is stored at + * the end of the packet. + * + * DWORD: N N + 1 N + 2 + * Field: End of Packet SYSTIMH SYSTIML + */ + ixgbe_ptp_convert_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb), + le64_to_cpu(regval)); +} + +/** + * ixgbe_ptp_rx_rgtstamp - utility function which checks for RX time stamp + * @q_vector: structure containing interrupt and ring information * @skb: particular skb to send timestamp with * * if the timestamp is valid, we convert it into the timecounter ns * value, then store that result into the shhwtstamps structure which * is passed up the network stack */ -void ixgbe_ptp_rx_hwtstamp(struct ixgbe_adapter *adapter, struct sk_buff *skb) +void ixgbe_ptp_rx_rgtstamp(struct ixgbe_q_vector *q_vector, + struct sk_buff *skb) { - struct ixgbe_hw *hw = &adapter->hw; - struct skb_shared_hwtstamps *shhwtstamps; - u64 regval = 0, ns; + struct ixgbe_adapter *adapter; + struct ixgbe_hw *hw; + u64 regval = 0; u32 tsyncrxctl; - unsigned long flags; + + /* we cannot process timestamps on a ring without a q_vector */ + if (!q_vector || !q_vector->adapter) + return; + + adapter = q_vector->adapter; + hw = &adapter->hw; + + /* Read the tsyncrxctl register afterwards in order to prevent taking an + * I/O hit on every packet. + */ tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL); if (!(tsyncrxctl & IXGBE_TSYNCRXCTL_VALID)) @@ -537,17 +788,7 @@ void ixgbe_ptp_rx_hwtstamp(struct ixgbe_adapter *adapter, struct sk_buff *skb) regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPL); regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPH) << 32; - spin_lock_irqsave(&adapter->tmreg_lock, flags); - ns = timecounter_cyc2time(&adapter->tc, regval); - spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - - shhwtstamps = skb_hwtstamps(skb); - shhwtstamps->hwtstamp = ns_to_ktime(ns); - - /* Update the last_rx_timestamp timer in order to enable watchdog check - * for error case of latched timestamp on a dropped packet. - */ - adapter->last_rx_timestamp = jiffies; + ixgbe_ptp_convert_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); } int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr) @@ -610,14 +851,20 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, case HWTSTAMP_FILTER_NONE: tsync_rx_ctl = 0; tsync_rx_mtrl = 0; + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1; tsync_rx_mtrl |= IXGBE_RXMTRL_V1_SYNC_MSG; + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1; tsync_rx_mtrl |= IXGBE_RXMTRL_V1_DELAY_REQ_MSG; + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: @@ -631,9 +878,21 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_EVENT_V2; is_l2 = true; config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: case HWTSTAMP_FILTER_ALL: + /* The X550 controller is capable of timestamping all packets, + * which allows it to accept any filter. + */ + if (hw->mac.type >= ixgbe_mac_X550) { + tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; + adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED; + break; + } + /* fall through */ default: /* * register RXMTRL must be set in order to do V1 packets, @@ -641,16 +900,46 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, * Delay_Req messages and hardware does not support * timestamping all packets => return error */ + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); config->rx_filter = HWTSTAMP_FILTER_NONE; return -ERANGE; } if (hw->mac.type == ixgbe_mac_82598EB) { + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); if (tsync_rx_ctl | tsync_tx_ctl) return -ERANGE; return 0; } + /* Per-packet timestamping only works if the filter is set to all + * packets. Since this is desired, always timestamp all packets as long + * as any Rx filter was configured. + */ + switch (hw->mac.type) { + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + /* enable timestamping all packets only if at least some + * packets were requested. Otherwise, play nice and disable + * timestamping + */ + if (config->rx_filter == HWTSTAMP_FILTER_NONE) + break; + + tsync_rx_ctl = IXGBE_TSYNCRXCTL_ENABLED | + IXGBE_TSYNCRXCTL_TYPE_ALL | + IXGBE_TSYNCRXCTL_TSIP_UT_EN; + config->rx_filter = HWTSTAMP_FILTER_ALL; + adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED; + adapter->flags &= ~IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER; + is_l2 = true; + break; + default: + break; + } + /* define ethertype filter for timestamping L2 packets */ if (is_l2) IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_1588), @@ -678,8 +967,8 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, IXGBE_WRITE_FLUSH(hw); /* clear TX/RX time stamp registers, just to be sure */ - regval = IXGBE_READ_REG(hw, IXGBE_TXSTMPH); - regval = IXGBE_READ_REG(hw, IXGBE_RXSTMPH); + ixgbe_ptp_clear_tx_timestamp(adapter); + IXGBE_READ_REG(hw, IXGBE_RXSTMPH); return 0; } @@ -712,23 +1001,9 @@ int ixgbe_ptp_set_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr) -EFAULT : 0; } -/** - * ixgbe_ptp_start_cyclecounter - create the cycle counter from hw - * @adapter: pointer to the adapter structure - * - * This function should be called to set the proper values for the TIMINCA - * register and tell the cyclecounter structure what the tick rate of SYSTIME - * is. It does not directly modify SYSTIME registers or the timecounter - * structure. It should be called whenever a new TIMINCA value is necessary, - * such as during initialization or when the link speed changes. - */ -void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) +static void ixgbe_ptp_link_speed_adjust(struct ixgbe_adapter *adapter, + u32 *shift, u32 *incval) { - struct ixgbe_hw *hw = &adapter->hw; - u32 incval = 0; - u32 shift = 0; - unsigned long flags; - /** * Scale the NIC cycle counter by a large factor so that * relatively small corrections to the frequency can be added @@ -745,36 +1020,98 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) */ switch (adapter->link_speed) { case IXGBE_LINK_SPEED_100_FULL: - incval = IXGBE_INCVAL_100; - shift = IXGBE_INCVAL_SHIFT_100; + *shift = IXGBE_INCVAL_SHIFT_100; + *incval = IXGBE_INCVAL_100; break; case IXGBE_LINK_SPEED_1GB_FULL: - incval = IXGBE_INCVAL_1GB; - shift = IXGBE_INCVAL_SHIFT_1GB; + *shift = IXGBE_INCVAL_SHIFT_1GB; + *incval = IXGBE_INCVAL_1GB; break; case IXGBE_LINK_SPEED_10GB_FULL: default: - incval = IXGBE_INCVAL_10GB; - shift = IXGBE_INCVAL_SHIFT_10GB; + *shift = IXGBE_INCVAL_SHIFT_10GB; + *incval = IXGBE_INCVAL_10GB; break; } +} - /** - * Modify the calculated values to fit within the correct - * number of bits specified by the hardware. The 82599 doesn't - * have the same space as the X540, so bitshift the calculated - * values to fit. +/** + * ixgbe_ptp_start_cyclecounter - create the cycle counter from hw + * @adapter: pointer to the adapter structure + * + * This function should be called to set the proper values for the TIMINCA + * register and tell the cyclecounter structure what the tick rate of SYSTIME + * is. It does not directly modify SYSTIME registers or the timecounter + * structure. It should be called whenever a new TIMINCA value is necessary, + * such as during initialization or when the link speed changes. + */ +void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct cyclecounter cc; + unsigned long flags; + u32 incval = 0; + u32 tsauxc = 0; + u32 fuse0 = 0; + + /* For some of the boards below this mask is technically incorrect. + * The timestamp mask overflows at approximately 61bits. However the + * particular hardware does not overflow on an even bitmask value. + * Instead, it overflows due to conversion of upper 32bits billions of + * cycles. Timecounters are not really intended for this purpose so + * they do not properly function if the overflow point isn't 2^N-1. + * However, the actual SYSTIME values in question take ~138 years to + * overflow. In practice this means they won't actually overflow. A + * proper fix to this problem would require modification of the + * timecounter delta calculations. */ + cc.mask = CLOCKSOURCE_MASK(64); + cc.mult = 1; + cc.shift = 0; + switch (hw->mac.type) { + case ixgbe_mac_X550EM_x: + /* SYSTIME assumes X550EM_x board frequency is 300Mhz, and is + * designed to represent seconds and nanoseconds when this is + * the case. However, some revisions of hardware have a 400Mhz + * clock and we have to compensate for this frequency + * variation using corrected mult and shift values. + */ + fuse0 = IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0)); + if (!(fuse0 & IXGBE_FUSES0_300MHZ)) { + cc.mult = 3; + cc.shift = 2; + } + /* fallthrough */ + case ixgbe_mac_X550: + cc.read = ixgbe_ptp_read_X550; + + /* enable SYSTIME counter */ + IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0); + IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0); + IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0); + tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC); + IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, + tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME); + IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS); + IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC); + + IXGBE_WRITE_FLUSH(hw); + break; case ixgbe_mac_X540: + cc.read = ixgbe_ptp_read_82599; + + ixgbe_ptp_link_speed_adjust(adapter, &cc.shift, &incval); IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, incval); break; case ixgbe_mac_82599EB: + cc.read = ixgbe_ptp_read_82599; + + ixgbe_ptp_link_speed_adjust(adapter, &cc.shift, &incval); incval >>= IXGBE_INCVAL_SHIFT_82599; - shift -= IXGBE_INCVAL_SHIFT_82599; + cc.shift -= IXGBE_INCVAL_SHIFT_82599; IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, - (1 << IXGBE_INCPER_SHIFT_82599) | - incval); + (1 << IXGBE_INCPER_SHIFT_82599) | incval); break; default: /* other devices aren't supported */ @@ -787,13 +1124,7 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) /* need lock to prevent incorrect read while modifying cyclecounter */ spin_lock_irqsave(&adapter->tmreg_lock, flags); - - memset(&adapter->cc, 0, sizeof(adapter->cc)); - adapter->cc.read = ixgbe_ptp_read; - adapter->cc.mask = CYCLECOUNTER_MASK(64); - adapter->cc.shift = shift; - adapter->cc.mult = 1; - + memcpy(&adapter->hw_cc, &cc, sizeof(adapter->hw_cc)); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); } @@ -814,29 +1145,27 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; unsigned long flags; - /* set SYSTIME registers to 0 just in case */ - IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0x00000000); - IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0x00000000); - IXGBE_WRITE_FLUSH(hw); - /* reset the hardware timestamping mode */ ixgbe_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config); + /* 82598 does not support PTP */ + if (hw->mac.type == ixgbe_mac_82598EB) + return; + ixgbe_ptp_start_cyclecounter(adapter); spin_lock_irqsave(&adapter->tmreg_lock, flags); - - /* reset the ns time counter */ - timecounter_init(&adapter->tc, &adapter->cc, + timecounter_init(&adapter->hw_tc, &adapter->hw_cc, ktime_to_ns(ktime_get_real())); - spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - /* - * Now that the shift has been calculated and the systime + adapter->last_overflow_check = jiffies; + + /* Now that the shift has been calculated and the systime * registers reset, (re-)enable the Clock out feature */ - ixgbe_ptp_setup_sdp(adapter); + if (adapter->ptp_setup_sdp) + adapter->ptp_setup_sdp(adapter); } /** @@ -845,11 +1174,11 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter) * * This function performs setup of the user entry point function table and * initializes the PTP clock device, which is used to access the clock-like - * features of the PTP core. It will be called by ixgbe_ptp_init, only if - * there isn't already a clock device (such as after a suspend/resume cycle, - * where the clock device wasn't destroyed). + * features of the PTP core. It will be called by ixgbe_ptp_init, and may + * reuse a previously initialized clock (such as during a suspend/resume + * cycle). */ -static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) +static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; long err; @@ -869,11 +1198,12 @@ static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) adapter->ptp_caps.n_ext_ts = 0; adapter->ptp_caps.n_per_out = 0; adapter->ptp_caps.pps = 1; - adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq; + adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599; adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime; adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime; adapter->ptp_caps.settime64 = ixgbe_ptp_settime; adapter->ptp_caps.enable = ixgbe_ptp_feature_enable; + adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_x540; break; case ixgbe_mac_82599EB: snprintf(adapter->ptp_caps.name, @@ -885,14 +1215,31 @@ static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) adapter->ptp_caps.n_ext_ts = 0; adapter->ptp_caps.n_per_out = 0; adapter->ptp_caps.pps = 0; - adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq; + adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599; + adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime; + adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime; + adapter->ptp_caps.settime64 = ixgbe_ptp_settime; + adapter->ptp_caps.enable = ixgbe_ptp_feature_enable; + break; + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + snprintf(adapter->ptp_caps.name, 16, "%s", netdev->name); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 30000000; + adapter->ptp_caps.n_alarm = 0; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.n_per_out = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_X550; adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime; adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime; adapter->ptp_caps.settime64 = ixgbe_ptp_settime; adapter->ptp_caps.enable = ixgbe_ptp_feature_enable; + adapter->ptp_setup_sdp = NULL; break; default: adapter->ptp_clock = NULL; + adapter->ptp_setup_sdp = NULL; return -EOPNOTSUPP; } @@ -961,18 +1308,13 @@ void ixgbe_ptp_suspend(struct ixgbe_adapter *adapter) if (!test_and_clear_bit(__IXGBE_PTP_RUNNING, &adapter->state)) return; - /* since this might be called in suspend, we don't clear the state, - * but simply reset the auxiliary PPS signal control register - */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_TSAUXC, 0x0); + adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED; + if (adapter->ptp_setup_sdp) + adapter->ptp_setup_sdp(adapter); /* ensure that we cancel any pending PTP Tx work item in progress */ cancel_work_sync(&adapter->ptp_tx_work); - if (adapter->ptp_tx_skb) { - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); - } + ixgbe_ptp_clear_tx_timestamp(adapter); } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index fcd8b27a0ccba9..31de6cf7adb010 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2014 Intel Corporation. + Copyright(c) 1999 - 2015 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -130,6 +130,38 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter) return -ENOMEM; } +/** + * ixgbe_get_vfs - Find and take references to all vf devices + * @adapter: Pointer to adapter struct + */ +static void ixgbe_get_vfs(struct ixgbe_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + u16 vendor = pdev->vendor; + struct pci_dev *vfdev; + int vf = 0; + u16 vf_id; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + if (!pos) + return; + pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id); + + vfdev = pci_get_device(vendor, vf_id, NULL); + for (; vfdev; vfdev = pci_get_device(vendor, vf_id, vfdev)) { + if (!vfdev->is_virtfn) + continue; + if (vfdev->physfn != pdev) + continue; + if (vf >= adapter->num_vfs) + continue; + pci_dev_get(vfdev); + adapter->vfinfo[vf].vfdev = vfdev; + ++vf; + } +} + /* Note this function is called when the user wants to enable SR-IOV * VFs using the now deprecated module parameter */ @@ -170,8 +202,10 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter) } } - if (!__ixgbe_enable_sriov(adapter)) + if (!__ixgbe_enable_sriov(adapter)) { + ixgbe_get_vfs(adapter); return; + } /* If we have gotten to this point then there is no memory available * to manage the VF devices - print message and bail. @@ -184,6 +218,7 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter) #endif /* #ifdef CONFIG_PCI_IOV */ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) { + unsigned int num_vfs = adapter->num_vfs, vf; struct ixgbe_hw *hw = &adapter->hw; u32 gpie; u32 vmdctl; @@ -192,6 +227,16 @@ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) /* set num VFs to 0 to prevent access to vfinfo */ adapter->num_vfs = 0; + /* put the reference to all of the vf devices */ + for (vf = 0; vf < num_vfs; ++vf) { + struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev; + + if (!vfdev) + continue; + adapter->vfinfo[vf].vfdev = NULL; + pci_dev_put(vfdev); + } + /* free VF control structures */ kfree(adapter->vfinfo); adapter->vfinfo = NULL; @@ -289,6 +334,7 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs) e_dev_warn("Failed to enable PCI sriov: %d\n", err); return err; } + ixgbe_get_vfs(adapter); ixgbe_sriov_reinit(adapter); return num_vfs; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 995f03107eacd0..1329eddfc9ce7e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -1020,6 +1020,7 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_TXSTMPH 0x08C08 /* Tx timestamp value High - RO */ #define IXGBE_SYSTIML 0x08C0C /* System time register Low - RO */ #define IXGBE_SYSTIMH 0x08C10 /* System time register High - RO */ +#define IXGBE_SYSTIMR 0x08C58 /* System time register Residue - RO */ #define IXGBE_TIMINCA 0x08C14 /* Increment attributes register - RW */ #define IXGBE_TIMADJL 0x08C18 /* Time Adjustment Offset register Low - RW */ #define IXGBE_TIMADJH 0x08C1C /* Time Adjustment Offset register High - RW */ @@ -1036,6 +1037,7 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_AUXSTMPH0 0x08C40 /* Auxiliary Time Stamp 0 register High - RO */ #define IXGBE_AUXSTMPL1 0x08C44 /* Auxiliary Time Stamp 1 register Low - RO */ #define IXGBE_AUXSTMPH1 0x08C48 /* Auxiliary Time Stamp 1 register High - RO */ +#define IXGBE_TSIM 0x08C68 /* TimeSync Interrupt Mask Register - RW */ /* Diagnostic Registers */ #define IXGBE_RDSTATCTL 0x02C20 @@ -1345,7 +1347,10 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK 0xFF01 /* int chip-wide mask */ #define IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_FLAG 0xFC01 /* int chip-wide mask */ #define IXGBE_MDIO_GLOBAL_ALARM_1 0xCC00 /* Global alarm 1 */ +#define IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT 0x0010 /* device fault */ #define IXGBE_MDIO_GLOBAL_ALM_1_HI_TMP_FAIL 0x4000 /* high temp failure */ +#define IXGBE_MDIO_GLOBAL_FAULT_MSG 0xC850 /* global fault msg */ +#define IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP 0x8007 /* high temp failure */ #define IXGBE_MDIO_GLOBAL_INT_MASK 0xD400 /* Global int mask */ /* autoneg vendor alarm int enable */ #define IXGBE_MDIO_GLOBAL_AN_VEN_ALM_INT_EN 0x1000 @@ -1353,6 +1358,7 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN 0x1 /* vendor alarm int enable */ #define IXGBE_MDIO_GLOBAL_STD_ALM2_INT 0x200 /* vendor alarm2 int mask */ #define IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN 0x4000 /* int high temp enable */ +#define IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN 0x0010 /*int dev fault enable */ #define IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR 0xC30A /* PHY_XS SDA/SCL Addr Reg */ #define IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA 0xC30B /* PHY_XS SDA/SCL Data Reg */ @@ -2209,6 +2215,7 @@ enum { #define IXGBE_TSAUXC_EN_CLK 0x00000004 #define IXGBE_TSAUXC_SYNCLK 0x00000008 #define IXGBE_TSAUXC_SDP0_INT 0x00000040 +#define IXGBE_TSAUXC_DISABLE_SYSTIME 0x80000000 #define IXGBE_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */ #define IXGBE_TSYNCTXCTL_ENABLED 0x00000010 /* Tx timestamping enabled */ @@ -2218,8 +2225,12 @@ enum { #define IXGBE_TSYNCRXCTL_TYPE_L2_V2 0x00 #define IXGBE_TSYNCRXCTL_TYPE_L4_V1 0x02 #define IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2 0x04 +#define IXGBE_TSYNCRXCTL_TYPE_ALL 0x08 #define IXGBE_TSYNCRXCTL_TYPE_EVENT_V2 0x0A #define IXGBE_TSYNCRXCTL_ENABLED 0x00000010 /* Rx Timestamping enabled */ +#define IXGBE_TSYNCRXCTL_TSIP_UT_EN 0x00800000 /* Rx Timestamp in Packet */ + +#define IXGBE_TSIM_TXTS 0x00000002 #define IXGBE_RXMTRL_V1_CTRLT_MASK 0x000000FF #define IXGBE_RXMTRL_V1_SYNC_MSG 0x00 @@ -2332,6 +2343,7 @@ enum { #define IXGBE_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */ #define IXGBE_RXD_STAT_DYNINT 0x800 /* Pkt caused INT via DYNINT */ #define IXGBE_RXD_STAT_LLINT 0x800 /* Pkt caused Low Latency Interrupt */ +#define IXGBE_RXD_STAT_TSIP 0x08000 /* Time Stamp in packet buffer */ #define IXGBE_RXD_STAT_TS 0x10000 /* Time Stamp */ #define IXGBE_RXD_STAT_SECP 0x20000 /* Security Processing */ #define IXGBE_RXD_STAT_LB 0x40000 /* Loopback Status */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index c1d4584f6469df..bf8225ceab8e4f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -57,8 +57,7 @@ s32 ixgbe_get_invariants_X540(struct ixgbe_hw *hw) struct ixgbe_phy_info *phy = &hw->phy; /* set_phy_power was set by default to NULL */ - if (!ixgbe_mng_present(hw)) - phy->ops.set_phy_power = ixgbe_set_copper_phy_power; + phy->ops.set_phy_power = ixgbe_set_copper_phy_power; mac->mcft_size = IXGBE_X540_MC_TBL_SIZE; mac->vft_size = IXGBE_X540_VFT_TBL_SIZE; @@ -110,13 +109,14 @@ mac_reset_top: ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); IXGBE_WRITE_FLUSH(hw); + usleep_range(1000, 1200); /* Poll for reset bit to self-clear indicating reset is complete */ for (i = 0; i < 10; i++) { - udelay(1); ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); if (!(ctrl & IXGBE_CTRL_RST_MASK)) break; + udelay(1); } if (ctrl & IXGBE_CTRL_RST_MASK) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index bf2ae8daf7175e..f4ef0d1a5dbea3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1444,7 +1444,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) IXGBE_MDIO_GLOBAL_ALARM_1_INT))) return status; - /* High temperature failure alarm triggered */ + /* Global alarm triggered */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_ALARM_1, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®); @@ -1458,6 +1458,21 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) ixgbe_set_copper_phy_power(hw, false); return IXGBE_ERR_OVERTEMP; } + if (reg & IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT) { + /* device fault alarm triggered */ + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_FAULT_MSG, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + ®); + if (status) + return status; + + /* if device fault was due to high temp alarm handle and exit */ + if (reg == IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP) { + /* power down the PHY in case the PHY FW didn't */ + ixgbe_set_copper_phy_power(hw, false); + return IXGBE_ERR_OVERTEMP; + } + } /* Vendor alarm 2 triggered */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG, @@ -1511,14 +1526,15 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) if (status) return status; - /* Enables high temperature failure alarm */ + /* Enable high temperature failure and global fault alarms */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®); if (status) return status; - reg |= IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN; + reg |= (IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN | + IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN); status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, @@ -1727,6 +1743,12 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw) if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) return IXGBE_ERR_CONFIG; + if (hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) { + speed = IXGBE_LINK_SPEED_10GB_FULL | + IXGBE_LINK_SPEED_1GB_FULL; + return ixgbe_setup_kr_speed_x550em(hw, speed); + } + /* If link is not up, then there is no setup necessary so return */ status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up); if (status) @@ -1931,7 +1953,6 @@ static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw) static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) { struct ixgbe_phy_info *phy = &hw->phy; - ixgbe_link_speed speed; s32 ret_val; hw->mac.ops.set_lan_id(hw); @@ -1944,10 +1965,6 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) * to determine internal PHY mode. */ phy->nw_mng_if_sel = IXGBE_READ_REG(hw, IXGBE_NW_MNG_IF_SEL); - if (phy->nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) { - speed = IXGBE_LINK_SPEED_10GB_FULL | - IXGBE_LINK_SPEED_1GB_FULL; - } } /* Identify the PHY or SFP module */ @@ -1979,14 +1996,8 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) /* If internal link mode is XFI, then setup iXFI internal link, * else setup KR now. */ - if (!(phy->nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { - phy->ops.setup_internal_link = - ixgbe_setup_internal_phy_t_x550em; - } else { - speed = IXGBE_LINK_SPEED_10GB_FULL | - IXGBE_LINK_SPEED_1GB_FULL; - ret_val = ixgbe_setup_kr_speed_x550em(hw, speed); - } + phy->ops.setup_internal_link = + ixgbe_setup_internal_phy_t_x550em; /* setup SW LPLU only for first revision */ if (!(IXGBE_FUSES0_REV1 & IXGBE_READ_REG(hw, @@ -2135,13 +2146,14 @@ mac_reset_top: ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); IXGBE_WRITE_FLUSH(hw); + usleep_range(1000, 1200); /* Poll for reset bit to self-clear meaning reset is complete */ for (i = 0; i < 10; i++) { - udelay(1); ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); if (!(ctrl & IXGBE_CTRL_RST_MASK)) break; + udelay(1); } if (ctrl & IXGBE_CTRL_RST_MASK) { diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index dbbd1be474626f..f098952d4fb4ac 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -59,7 +59,7 @@ static const char ixgbevf_driver_string[] = #define DRV_VERSION "2.12.1-k" const char ixgbevf_driver_version[] = DRV_VERSION; static char ixgbevf_copyright[] = - "Copyright (c) 2009 - 2012 Intel Corporation."; + "Copyright (c) 2009 - 2015 Intel Corporation."; static const struct ixgbevf_info *ixgbevf_info_tbl[] = { [board_82599_vf] = &ixgbevf_82599_vf_info, @@ -96,12 +96,14 @@ static int debug = -1; module_param(debug, int, 0); MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); +static struct workqueue_struct *ixgbevf_wq; + static void ixgbevf_service_event_schedule(struct ixgbevf_adapter *adapter) { if (!test_bit(__IXGBEVF_DOWN, &adapter->state) && !test_bit(__IXGBEVF_REMOVING, &adapter->state) && !test_and_set_bit(__IXGBEVF_SERVICE_SCHED, &adapter->state)) - schedule_work(&adapter->service_task); + queue_work(ixgbevf_wq, &adapter->service_task); } static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter) @@ -1332,7 +1334,6 @@ static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter) int txr_remaining = adapter->num_tx_queues; int i, j; int rqpv, tqpv; - int err = 0; q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; @@ -1345,7 +1346,7 @@ static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter) for (; txr_idx < txr_remaining; v_start++, txr_idx++) map_vector_to_txq(adapter, v_start, txr_idx); - goto out; + return 0; } /* If we don't have enough vectors for a 1-to-1 @@ -1370,8 +1371,7 @@ static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter) } } -out: - return err; + return 0; } /** @@ -1469,9 +1469,7 @@ static inline void ixgbevf_reset_q_vectors(struct ixgbevf_adapter *adapter) **/ static int ixgbevf_request_irq(struct ixgbevf_adapter *adapter) { - int err = 0; - - err = ixgbevf_request_msix_irqs(adapter); + int err = ixgbevf_request_msix_irqs(adapter); if (err) hw_dbg(&adapter->hw, "request_irq failed, Error %d\n", err); @@ -1830,7 +1828,7 @@ static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, { struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; - int err = -EOPNOTSUPP; + int err; spin_lock_bh(&adapter->mbx_lock); @@ -2046,7 +2044,7 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) ixgbe_mbox_api_11, ixgbe_mbox_api_10, ixgbe_mbox_api_unknown }; - int err = 0, idx = 0; + int err, idx = 0; spin_lock_bh(&adapter->mbx_lock); @@ -2419,7 +2417,7 @@ err_allocation: static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) { struct net_device *netdev = adapter->netdev; - int err = 0; + int err; int vector, v_budget; /* It's easy to be greedy for MSI-X vectors, but it really @@ -2437,26 +2435,21 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) */ adapter->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry), GFP_KERNEL); - if (!adapter->msix_entries) { - err = -ENOMEM; - goto out; - } + if (!adapter->msix_entries) + return -ENOMEM; for (vector = 0; vector < v_budget; vector++) adapter->msix_entries[vector].entry = vector; err = ixgbevf_acquire_msix_vectors(adapter, v_budget); if (err) - goto out; + return err; err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); if (err) - goto out; - - err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); + return err; -out: - return err; + return netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); } /** @@ -3351,6 +3344,7 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, if (skb->ip_summed == CHECKSUM_PARTIAL) { u8 l4_hdr = 0; + __be16 frag_off; switch (first->protocol) { case htons(ETH_P_IP): @@ -3361,13 +3355,16 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, case htons(ETH_P_IPV6): vlan_macip_lens |= skb_network_header_len(skb); l4_hdr = ipv6_hdr(skb)->nexthdr; + if (likely(skb_network_header_len(skb) == + sizeof(struct ipv6hdr))) + break; + ipv6_skip_exthdr(skb, skb_network_offset(skb) + + sizeof(struct ipv6hdr), + &l4_hdr, &frag_off); + if (unlikely(frag_off)) + l4_hdr = NEXTHDR_FRAGMENT; break; default: - if (unlikely(net_ratelimit())) { - dev_warn(tx_ring->dev, - "partial checksum but proto=%x!\n", - first->protocol); - } break; } @@ -3389,16 +3386,18 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, default: if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, - "partial checksum but l4 proto=%x!\n", - l4_hdr); + "partial checksum, l3 proto=%x, l4 proto=%x\n", + first->protocol, l4_hdr); } - break; + skb_checksum_help(skb); + goto no_csum; } /* update TX checksum flag */ first->tx_flags |= IXGBE_TX_FLAGS_CSUM; } +no_csum: /* vlan_macip_lens: MACLEN, VLAN tag */ vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; @@ -4244,15 +4243,17 @@ static struct pci_driver ixgbevf_driver = { **/ static int __init ixgbevf_init_module(void) { - int ret; - pr_info("%s - version %s\n", ixgbevf_driver_string, ixgbevf_driver_version); pr_info("%s\n", ixgbevf_copyright); + ixgbevf_wq = create_singlethread_workqueue(ixgbevf_driver_name); + if (!ixgbevf_wq) { + pr_err("%s: Failed to create workqueue\n", ixgbevf_driver_name); + return -ENOMEM; + } - ret = pci_register_driver(&ixgbevf_driver); - return ret; + return pci_register_driver(&ixgbevf_driver); } module_init(ixgbevf_init_module); @@ -4266,6 +4267,10 @@ module_init(ixgbevf_init_module); static void __exit ixgbevf_exit_module(void) { pci_unregister_driver(&ixgbevf_driver); + if (ixgbevf_wq) { + destroy_workqueue(ixgbevf_wq); + ixgbevf_wq = NULL; + } } #ifdef DEBUG diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 4182290fdbcf10..4eba2ed530525e 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -3257,25 +3257,20 @@ static struct platform_driver mv643xx_eth_driver = { }, }; +static struct platform_driver * const drivers[] = { + &mv643xx_eth_shared_driver, + &mv643xx_eth_driver, +}; + static int __init mv643xx_eth_init_module(void) { - int rc; - - rc = platform_driver_register(&mv643xx_eth_shared_driver); - if (!rc) { - rc = platform_driver_register(&mv643xx_eth_driver); - if (rc) - platform_driver_unregister(&mv643xx_eth_shared_driver); - } - - return rc; + return platform_register_drivers(drivers, ARRAY_SIZE(drivers)); } module_init(mv643xx_eth_init_module); static void __exit mv643xx_eth_cleanup_module(void) { - platform_driver_unregister(&mv643xx_eth_driver); - platform_driver_unregister(&mv643xx_eth_shared_driver); + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); } module_exit(mv643xx_eth_cleanup_module); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index ed622fa29dfab6..528c2544389b90 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -371,7 +371,7 @@ struct mvneta_port { unsigned int duplex; unsigned int speed; unsigned int tx_csum_limit; - int use_inband_status:1; + unsigned int use_inband_status:1; u64 ethtool_stats[ARRAY_SIZE(mvneta_statistics)]; }; @@ -973,6 +973,44 @@ static void mvneta_set_other_mcast_table(struct mvneta_port *pp, int queue) mvreg_write(pp, MVNETA_DA_FILT_OTH_MCAST + offset, val); } +static void mvneta_set_autoneg(struct mvneta_port *pp, int enable) +{ + u32 val; + + if (enable) { + val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + val &= ~(MVNETA_GMAC_FORCE_LINK_PASS | + MVNETA_GMAC_FORCE_LINK_DOWN | + MVNETA_GMAC_AN_FLOW_CTRL_EN); + val |= MVNETA_GMAC_INBAND_AN_ENABLE | + MVNETA_GMAC_AN_SPEED_EN | + MVNETA_GMAC_AN_DUPLEX_EN; + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); + + val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER); + val |= MVNETA_GMAC_1MS_CLOCK_ENABLE; + mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val); + + val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); + val |= MVNETA_GMAC2_INBAND_AN_ENABLE; + mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); + } else { + val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + val &= ~(MVNETA_GMAC_INBAND_AN_ENABLE | + MVNETA_GMAC_AN_SPEED_EN | + MVNETA_GMAC_AN_DUPLEX_EN); + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); + + val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER); + val &= ~MVNETA_GMAC_1MS_CLOCK_ENABLE; + mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val); + + val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); + val &= ~MVNETA_GMAC2_INBAND_AN_ENABLE; + mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); + } +} + /* This method sets defaults to the NETA port: * Clears interrupt Cause and Mask registers. * Clears all MAC tables. @@ -1058,26 +1096,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp) val &= ~MVNETA_PHY_POLLING_ENABLE; mvreg_write(pp, MVNETA_UNIT_CONTROL, val); - if (pp->use_inband_status) { - val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); - val &= ~(MVNETA_GMAC_FORCE_LINK_PASS | - MVNETA_GMAC_FORCE_LINK_DOWN | - MVNETA_GMAC_AN_FLOW_CTRL_EN); - val |= MVNETA_GMAC_INBAND_AN_ENABLE | - MVNETA_GMAC_AN_SPEED_EN | - MVNETA_GMAC_AN_DUPLEX_EN; - mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); - val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER); - val |= MVNETA_GMAC_1MS_CLOCK_ENABLE; - mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val); - } else { - val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); - val &= ~(MVNETA_GMAC_INBAND_AN_ENABLE | - MVNETA_GMAC_AN_SPEED_EN | - MVNETA_GMAC_AN_DUPLEX_EN); - mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); - } - + mvneta_set_autoneg(pp, pp->use_inband_status); mvneta_set_ucast_table(pp, -1); mvneta_set_special_mcast_table(pp, -1); mvneta_set_other_mcast_table(pp, -1); @@ -2943,10 +2962,43 @@ int mvneta_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct mvneta_port *pp = netdev_priv(dev); + struct phy_device *phydev = pp->phy_dev; - if (!pp->phy_dev) + if (!phydev) return -ENODEV; + if ((cmd->autoneg == AUTONEG_ENABLE) != pp->use_inband_status) { + u32 val; + + mvneta_set_autoneg(pp, cmd->autoneg == AUTONEG_ENABLE); + + if (cmd->autoneg == AUTONEG_DISABLE) { + val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + val &= ~(MVNETA_GMAC_CONFIG_MII_SPEED | + MVNETA_GMAC_CONFIG_GMII_SPEED | + MVNETA_GMAC_CONFIG_FULL_DUPLEX); + + if (phydev->duplex) + val |= MVNETA_GMAC_CONFIG_FULL_DUPLEX; + + if (phydev->speed == SPEED_1000) + val |= MVNETA_GMAC_CONFIG_GMII_SPEED; + else if (phydev->speed == SPEED_100) + val |= MVNETA_GMAC_CONFIG_MII_SPEED; + + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); + } + + pp->use_inband_status = (cmd->autoneg == AUTONEG_ENABLE); + netdev_info(pp->dev, "autoneg status set to %i\n", + pp->use_inband_status); + + if (netif_running(dev)) { + mvneta_port_down(pp); + mvneta_port_up(pp); + } + } + return phy_ethtool_sset(pp->phy_dev, cmd); } @@ -3230,9 +3282,6 @@ static int mvneta_port_power_up(struct mvneta_port *pp, int phy_mode) return -EINVAL; } - if (pp->use_inband_status) - ctrl |= MVNETA_GMAC2_INBAND_AN_ENABLE; - /* Cancel Port Reset */ ctrl &= ~MVNETA_GMAC2_PORT_RESET; mvreg_write(pp, MVNETA_GMAC_CTRL_2, ctrl); diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 603d1c3d3b2ea1..4696053165f8ca 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -151,6 +151,17 @@ void mlx4_gen_slave_eqe(struct work_struct *work) eqe = next_slave_event_eqe(slave_eq)) { slave = eqe->slave_id; + if (eqe->type == MLX4_EVENT_TYPE_PORT_CHANGE && + eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN && + mlx4_is_bonded(dev)) { + struct mlx4_port_cap port_cap; + + if (!mlx4_QUERY_PORT(dev, 1, &port_cap) && port_cap.link_state) + goto consume; + + if (!mlx4_QUERY_PORT(dev, 2, &port_cap) && port_cap.link_state) + goto consume; + } /* All active slaves need to receive the event */ if (slave == ALL_SLAVES) { for (i = 0; i <= dev->persist->num_vfs; i++) { @@ -174,6 +185,7 @@ void mlx4_gen_slave_eqe(struct work_struct *work) mlx4_warn(dev, "Failed to generate event for slave %d\n", slave); } +consume: ++slave_eq->cons; } } @@ -594,7 +606,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) break; for (i = 0; i < dev->persist->num_vfs + 1; i++) { - if (!test_bit(i, slaves_port.slaves)) + int reported_port = mlx4_is_bonded(dev) ? 1 : mlx4_phys_to_slave_port(dev, i, port); + + if (!test_bit(i, slaves_port.slaves) && !mlx4_is_bonded(dev)) continue; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) { if (i == mlx4_master_func_num(dev)) @@ -606,7 +620,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) eqe->event.port_change.port = cpu_to_be32( (be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF) - | (mlx4_phys_to_slave_port(dev, i, port) << 28)); + | (reported_port << 28)); mlx4_slave_event(dev, i, eqe); } } else { /* IB port */ @@ -636,7 +650,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) for (i = 0; i < dev->persist->num_vfs + 1; i++) { - if (!test_bit(i, slaves_port.slaves)) + int reported_port = mlx4_is_bonded(dev) ? 1 : mlx4_phys_to_slave_port(dev, i, port); + + if (!test_bit(i, slaves_port.slaves) && !mlx4_is_bonded(dev)) continue; if (i == mlx4_master_func_num(dev)) continue; @@ -645,7 +661,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) eqe->event.port_change.port = cpu_to_be32( (be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF) - | (mlx4_phys_to_slave_port(dev, i, port) << 28)); + | (reported_port << 28)); mlx4_slave_event(dev, i, eqe); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 90db94e83fdeef..2c2baab9d88044 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1104,6 +1104,7 @@ int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_c goto out; MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET); + port_cap->link_state = (field & 0x80) >> 7; port_cap->supported_port_types = field & 3; port_cap->suggested_type = (field >> 3) & 1; port_cap->default_sense = (field >> 4) & 1; @@ -1310,6 +1311,15 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, port_type |= MLX4_PORT_LINK_UP_MASK; else if (IFLA_VF_LINK_STATE_DISABLE == admin_link_state) port_type &= ~MLX4_PORT_LINK_UP_MASK; + else if (IFLA_VF_LINK_STATE_AUTO == admin_link_state && mlx4_is_bonded(dev)) { + int other_port = (port == 1) ? 2 : 1; + struct mlx4_port_cap port_cap; + + err = mlx4_QUERY_PORT(dev, other_port, &port_cap); + if (err) + goto out; + port_type |= (port_cap.link_state << 7); + } MLX4_PUT(outbox->buf, port_type, QUERY_PORT_SUPPORTED_TYPE_OFFSET); @@ -1325,7 +1335,7 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, short_field, QUERY_PORT_CUR_MAX_PKEY_OFFSET); } - +out: return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 08de5555c2f4d0..7ea258af636aec 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -44,6 +44,7 @@ struct mlx4_mod_stat_cfg { }; struct mlx4_port_cap { + u8 link_state; u8 supported_port_types; u8 suggested_type; u8 default_sense; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 31c491e02e69f1..f1b6d219e44543 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1221,6 +1221,76 @@ err_set_port: return err ? err : count; } +/* bond for multi-function device */ +#define MAX_MF_BOND_ALLOWED_SLAVES 63 +static int mlx4_mf_bond(struct mlx4_dev *dev) +{ + int err = 0; + struct mlx4_slaves_pport slaves_port1; + struct mlx4_slaves_pport slaves_port2; + DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX); + + slaves_port1 = mlx4_phys_to_slaves_pport(dev, 1); + slaves_port2 = mlx4_phys_to_slaves_pport(dev, 2); + bitmap_and(slaves_port_1_2, + slaves_port1.slaves, slaves_port2.slaves, + dev->persist->num_vfs + 1); + + /* only single port vfs are allowed */ + if (bitmap_weight(slaves_port_1_2, dev->persist->num_vfs + 1) > 1) { + mlx4_warn(dev, "HA mode unsupported for dual ported VFs\n"); + return -EINVAL; + } + + /* limit on maximum allowed VFs */ + if ((bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) + + bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1)) > + MAX_MF_BOND_ALLOWED_SLAVES) + return -EINVAL; + + if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) { + mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n"); + return -EINVAL; + } + + err = mlx4_bond_mac_table(dev); + if (err) + return err; + err = mlx4_bond_vlan_table(dev); + if (err) + goto err1; + err = mlx4_bond_fs_rules(dev); + if (err) + goto err2; + + return 0; +err2: + (void)mlx4_unbond_vlan_table(dev); +err1: + (void)mlx4_unbond_mac_table(dev); + return err; +} + +static int mlx4_mf_unbond(struct mlx4_dev *dev) +{ + int ret, ret1; + + ret = mlx4_unbond_fs_rules(dev); + if (ret) + mlx4_warn(dev, "multifunction unbond for flow rules failedi (%d)\n", ret); + ret1 = mlx4_unbond_mac_table(dev); + if (ret1) { + mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1); + ret = ret1; + } + ret1 = mlx4_unbond_vlan_table(dev); + if (ret1) { + mlx4_warn(dev, "multifunction unbond for VLAN table failed (%d)\n", ret1); + ret = ret1; + } + return ret; +} + int mlx4_bond(struct mlx4_dev *dev) { int ret = 0; @@ -1228,16 +1298,23 @@ int mlx4_bond(struct mlx4_dev *dev) mutex_lock(&priv->bond_mutex); - if (!mlx4_is_bonded(dev)) + if (!mlx4_is_bonded(dev)) { ret = mlx4_do_bond(dev, true); - else - ret = 0; + if (ret) + mlx4_err(dev, "Failed to bond device: %d\n", ret); + if (!ret && mlx4_is_master(dev)) { + ret = mlx4_mf_bond(dev); + if (ret) { + mlx4_err(dev, "bond for multifunction failed\n"); + mlx4_do_bond(dev, false); + } + } + } mutex_unlock(&priv->bond_mutex); - if (ret) - mlx4_err(dev, "Failed to bond device: %d\n", ret); - else + if (!ret) mlx4_dbg(dev, "Device is bonded\n"); + return ret; } EXPORT_SYMBOL_GPL(mlx4_bond); @@ -1249,14 +1326,24 @@ int mlx4_unbond(struct mlx4_dev *dev) mutex_lock(&priv->bond_mutex); - if (mlx4_is_bonded(dev)) + if (mlx4_is_bonded(dev)) { + int ret2 = 0; + ret = mlx4_do_bond(dev, false); + if (ret) + mlx4_err(dev, "Failed to unbond device: %d\n", ret); + if (mlx4_is_master(dev)) + ret2 = mlx4_mf_unbond(dev); + if (ret2) { + mlx4_warn(dev, "Failed to unbond device for multifunction (%d)\n", ret2); + ret = ret2; + } + } mutex_unlock(&priv->bond_mutex); - if (ret) - mlx4_err(dev, "Failed to unbond device: %d\n", ret); - else + if (!ret) mlx4_dbg(dev, "Device is unbonded\n"); + return ret; } EXPORT_SYMBOL_GPL(mlx4_unbond); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index e1cf9036af2259..2404c22ad2b2ac 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -736,6 +736,7 @@ struct mlx4_catas_err { struct mlx4_mac_table { __be64 entries[MLX4_MAX_MAC_NUM]; int refs[MLX4_MAX_MAC_NUM]; + bool is_dup[MLX4_MAX_MAC_NUM]; struct mutex mutex; int total; int max; @@ -758,6 +759,7 @@ struct mlx4_roce_gid_table { struct mlx4_vlan_table { __be32 entries[MLX4_MAX_VLAN_NUM]; int refs[MLX4_MAX_VLAN_NUM]; + int is_dup[MLX4_MAX_VLAN_NUM]; struct mutex mutex; int total; int max; @@ -1225,6 +1227,10 @@ void mlx4_init_roce_gid_table(struct mlx4_dev *dev, struct mlx4_roce_gid_table *table); void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); +int mlx4_bond_vlan_table(struct mlx4_dev *dev); +int mlx4_unbond_vlan_table(struct mlx4_dev *dev); +int mlx4_bond_mac_table(struct mlx4_dev *dev); +int mlx4_unbond_mac_table(struct mlx4_dev *dev); int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz); /* resource tracker functions*/ @@ -1385,6 +1391,8 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); int mlx4_config_mad_demux(struct mlx4_dev *dev); int mlx4_do_bond(struct mlx4_dev *dev, bool enable); +int mlx4_bond_fs_rules(struct mlx4_dev *dev); +int mlx4_unbond_fs_rules(struct mlx4_dev *dev); enum mlx4_zone_flags { MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO = 1UL << 0, diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index c2b21313dba7f6..f2550425c25147 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -61,6 +61,7 @@ void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table) for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { table->entries[i] = 0; table->refs[i] = 0; + table->is_dup[i] = false; } table->max = 1 << dev->caps.log_num_macs; table->total = 0; @@ -74,6 +75,7 @@ void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table) for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { table->entries[i] = 0; table->refs[i] = 0; + table->is_dup[i] = false; } table->max = (1 << dev->caps.log_num_vlans) - MLX4_VLAN_REGULAR; table->total = 0; @@ -159,21 +161,94 @@ int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx) } EXPORT_SYMBOL_GPL(mlx4_find_cached_mac); +static bool mlx4_need_mf_bond(struct mlx4_dev *dev) +{ + int i, num_eth_ports = 0; + + if (!mlx4_is_mfunc(dev)) + return false; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) + ++num_eth_ports; + + return (num_eth_ports == 2) ? true : false; +} + int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) { struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; struct mlx4_mac_table *table = &info->mac_table; int i, err = 0; int free = -1; + int free_for_dup = -1; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table; + bool need_mf_bond = mlx4_need_mf_bond(dev); + bool can_mf_bond = true; + + mlx4_dbg(dev, "Registering MAC: 0x%llx for port %d %s duplicate\n", + (unsigned long long)mac, port, + dup ? "with" : "without"); + + if (need_mf_bond) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } + + if (need_mf_bond) { + int index_at_port = -1; + int index_at_dup_port = -1; - mlx4_dbg(dev, "Registering MAC: 0x%llx for port %d\n", - (unsigned long long) mac, port); + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i])))) + index_at_port = i; + if (((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(dup_table->entries[i])))) + index_at_dup_port = i; + } + + /* check that same mac is not in the tables at different indices */ + if ((index_at_port != index_at_dup_port) && + (index_at_port >= 0) && + (index_at_dup_port >= 0)) + can_mf_bond = false; + + /* If the mac is already in the primary table, the slot must be + * available in the duplicate table as well. + */ + if (index_at_port >= 0 && index_at_dup_port < 0 && + dup_table->refs[index_at_port]) { + can_mf_bond = false; + } + /* If the mac is already in the duplicate table, check that the + * corresponding index is not occupied in the primary table, or + * the primary table already contains the mac at the same index. + * Otherwise, you cannot bond (primary contains a different mac + * at that index). + */ + if (index_at_dup_port >= 0) { + if (!table->refs[index_at_dup_port] || + ((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(table->entries[index_at_dup_port])))) + free_for_dup = index_at_dup_port; + else + can_mf_bond = false; + } + } - mutex_lock(&table->mutex); for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { if (!table->refs[i]) { if (free < 0) free = i; + if (free_for_dup < 0 && need_mf_bond && can_mf_bond) { + if (!dup_table->refs[i]) + free_for_dup = i; + } continue; } @@ -182,10 +257,30 @@ int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) /* MAC already registered, increment ref count */ err = i; ++table->refs[i]; + if (dup) { + u64 dup_mac = MLX4_MAC_MASK & be64_to_cpu(dup_table->entries[i]); + + if (dup_mac != mac || !dup_table->is_dup[i]) { + mlx4_warn(dev, "register mac: expect duplicate mac 0x%llx on port %d index %d\n", + mac, dup_port, i); + } + } goto out; } } + if (need_mf_bond && (free_for_dup < 0)) { + if (dup) { + mlx4_warn(dev, "Fail to allocate duplicate MAC table entry\n"); + mlx4_warn(dev, "High Availability for virtual functions may not work as expected\n"); + dup = false; + } + can_mf_bond = false; + } + + if (need_mf_bond && can_mf_bond) + free = free_for_dup; + mlx4_dbg(dev, "Free MAC index is %d\n", free); if (table->total == table->max) { @@ -205,10 +300,35 @@ int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) goto out; } table->refs[free] = 1; - err = free; + table->is_dup[free] = false; ++table->total; + if (dup) { + dup_table->refs[free] = 0; + dup_table->is_dup[free] = true; + dup_table->entries[free] = cpu_to_be64(mac | MLX4_MAC_VALID); + + err = mlx4_set_port_mac_table(dev, dup_port, dup_table->entries); + if (unlikely(err)) { + mlx4_warn(dev, "Failed adding duplicate mac: 0x%llx\n", mac); + dup_table->is_dup[free] = false; + dup_table->entries[free] = 0; + goto out; + } + ++dup_table->total; + } + err = free; out: - mutex_unlock(&table->mutex); + if (need_mf_bond) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } return err; } EXPORT_SYMBOL_GPL(__mlx4_register_mac); @@ -255,6 +375,9 @@ void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac) struct mlx4_port_info *info; struct mlx4_mac_table *table; int index; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table; if (port < 1 || port > dev->caps.num_ports) { mlx4_warn(dev, "invalid port number (%d), aborting...\n", port); @@ -262,22 +385,59 @@ void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac) } info = &mlx4_priv(dev)->port[port]; table = &info->mac_table; - mutex_lock(&table->mutex); + + if (dup) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } + index = find_index(dev, table, mac); if (validate_index(dev, table, index)) goto out; - if (--table->refs[index]) { + + if (--table->refs[index] || table->is_dup[index]) { mlx4_dbg(dev, "Have more references for index %d, no need to modify mac table\n", index); + if (!table->refs[index]) + dup_table->is_dup[index] = false; goto out; } table->entries[index] = 0; - mlx4_set_port_mac_table(dev, port, table->entries); + if (mlx4_set_port_mac_table(dev, port, table->entries)) + mlx4_warn(dev, "Fail to set mac in port %d during unregister\n", port); --table->total; + + if (dup) { + dup_table->is_dup[index] = false; + if (dup_table->refs[index]) + goto out; + dup_table->entries[index] = 0; + if (mlx4_set_port_mac_table(dev, dup_port, dup_table->entries)) + mlx4_warn(dev, "Fail to set mac in duplicate port %d during unregister\n", dup_port); + + --table->total; + } out: - mutex_unlock(&table->mutex); + if (dup) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } } EXPORT_SYMBOL_GPL(__mlx4_unregister_mac); @@ -311,9 +471,22 @@ int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac) struct mlx4_mac_table *table = &info->mac_table; int index = qpn - info->base_qpn; int err = 0; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table; /* CX1 doesn't support multi-functions */ - mutex_lock(&table->mutex); + if (dup) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } err = validate_index(dev, table, index); if (err) @@ -326,9 +499,30 @@ int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac) mlx4_err(dev, "Failed adding MAC: 0x%llx\n", (unsigned long long) new_mac); table->entries[index] = 0; + } else { + if (dup) { + dup_table->entries[index] = cpu_to_be64(new_mac | MLX4_MAC_VALID); + + err = mlx4_set_port_mac_table(dev, dup_port, dup_table->entries); + if (unlikely(err)) { + mlx4_err(dev, "Failed adding duplicate MAC: 0x%llx\n", + (unsigned long long)new_mac); + dup_table->entries[index] = 0; + } + } } out: - mutex_unlock(&table->mutex); + if (dup) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } return err; } EXPORT_SYMBOL_GPL(__mlx4_replace_mac); @@ -380,8 +574,28 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; int i, err = 0; int free = -1; - - mutex_lock(&table->mutex); + int free_for_dup = -1; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_vlan_table *dup_table = &mlx4_priv(dev)->port[dup_port].vlan_table; + bool need_mf_bond = mlx4_need_mf_bond(dev); + bool can_mf_bond = true; + + mlx4_dbg(dev, "Registering VLAN: %d for port %d %s duplicate\n", + vlan, port, + dup ? "with" : "without"); + + if (need_mf_bond) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } if (table->total == table->max) { /* No free vlan entries */ @@ -389,22 +603,85 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, goto out; } + if (need_mf_bond) { + int index_at_port = -1; + int index_at_dup_port = -1; + + for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) { + if ((vlan == (MLX4_VLAN_MASK & be32_to_cpu(table->entries[i])))) + index_at_port = i; + if ((vlan == (MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[i])))) + index_at_dup_port = i; + } + /* check that same vlan is not in the tables at different indices */ + if ((index_at_port != index_at_dup_port) && + (index_at_port >= 0) && + (index_at_dup_port >= 0)) + can_mf_bond = false; + + /* If the vlan is already in the primary table, the slot must be + * available in the duplicate table as well. + */ + if (index_at_port >= 0 && index_at_dup_port < 0 && + dup_table->refs[index_at_port]) { + can_mf_bond = false; + } + /* If the vlan is already in the duplicate table, check that the + * corresponding index is not occupied in the primary table, or + * the primary table already contains the vlan at the same index. + * Otherwise, you cannot bond (primary contains a different vlan + * at that index). + */ + if (index_at_dup_port >= 0) { + if (!table->refs[index_at_dup_port] || + (vlan == (MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[index_at_dup_port])))) + free_for_dup = index_at_dup_port; + else + can_mf_bond = false; + } + } + for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) { - if (free < 0 && (table->refs[i] == 0)) { - free = i; - continue; + if (!table->refs[i]) { + if (free < 0) + free = i; + if (free_for_dup < 0 && need_mf_bond && can_mf_bond) { + if (!dup_table->refs[i]) + free_for_dup = i; + } } - if (table->refs[i] && + if ((table->refs[i] || table->is_dup[i]) && (vlan == (MLX4_VLAN_MASK & be32_to_cpu(table->entries[i])))) { /* Vlan already registered, increase references count */ + mlx4_dbg(dev, "vlan %u is already registered.\n", vlan); *index = i; ++table->refs[i]; + if (dup) { + u16 dup_vlan = MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[i]); + + if (dup_vlan != vlan || !dup_table->is_dup[i]) { + mlx4_warn(dev, "register vlan: expected duplicate vlan %u on port %d index %d\n", + vlan, dup_port, i); + } + } goto out; } } + if (need_mf_bond && (free_for_dup < 0)) { + if (dup) { + mlx4_warn(dev, "Fail to allocate duplicate VLAN table entry\n"); + mlx4_warn(dev, "High Availability for virtual functions may not work as expected\n"); + dup = false; + } + can_mf_bond = false; + } + + if (need_mf_bond && can_mf_bond) + free = free_for_dup; + if (free < 0) { err = -ENOMEM; goto out; @@ -412,6 +689,7 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, /* Register new VLAN */ table->refs[free] = 1; + table->is_dup[free] = false; table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID); err = mlx4_set_port_vlan_table(dev, port, table->entries); @@ -421,11 +699,35 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, table->entries[free] = 0; goto out; } + ++table->total; + if (dup) { + dup_table->refs[free] = 0; + dup_table->is_dup[free] = true; + dup_table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID); + + err = mlx4_set_port_vlan_table(dev, dup_port, dup_table->entries); + if (unlikely(err)) { + mlx4_warn(dev, "Failed adding duplicate vlan: %u\n", vlan); + dup_table->is_dup[free] = false; + dup_table->entries[free] = 0; + goto out; + } + ++dup_table->total; + } *index = free; - ++table->total; out: - mutex_unlock(&table->mutex); + if (need_mf_bond) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } return err; } @@ -455,8 +757,22 @@ void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) { struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; int index; + bool dup = mlx4_is_mf_bonded(dev); + u8 dup_port = (port == 1) ? 2 : 1; + struct mlx4_vlan_table *dup_table = &mlx4_priv(dev)->port[dup_port].vlan_table; + + if (dup) { + if (port == 1) { + mutex_lock(&table->mutex); + mutex_lock(&dup_table->mutex); + } else { + mutex_lock(&dup_table->mutex); + mutex_lock(&table->mutex); + } + } else { + mutex_lock(&table->mutex); + } - mutex_lock(&table->mutex); if (mlx4_find_cached_vlan(dev, port, vlan, &index)) { mlx4_warn(dev, "vlan 0x%x is not in the vlan table\n", vlan); goto out; @@ -467,16 +783,38 @@ void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) goto out; } - if (--table->refs[index]) { + if (--table->refs[index] || table->is_dup[index]) { mlx4_dbg(dev, "Have %d more references for index %d, no need to modify vlan table\n", table->refs[index], index); + if (!table->refs[index]) + dup_table->is_dup[index] = false; goto out; } table->entries[index] = 0; - mlx4_set_port_vlan_table(dev, port, table->entries); + if (mlx4_set_port_vlan_table(dev, port, table->entries)) + mlx4_warn(dev, "Fail to set vlan in port %d during unregister\n", port); --table->total; + if (dup) { + dup_table->is_dup[index] = false; + if (dup_table->refs[index]) + goto out; + dup_table->entries[index] = 0; + if (mlx4_set_port_vlan_table(dev, dup_port, dup_table->entries)) + mlx4_warn(dev, "Fail to set vlan in duplicate port %d during unregister\n", dup_port); + --dup_table->total; + } out: - mutex_unlock(&table->mutex); + if (dup) { + if (port == 2) { + mutex_unlock(&table->mutex); + mutex_unlock(&dup_table->mutex); + } else { + mutex_unlock(&dup_table->mutex); + mutex_unlock(&table->mutex); + } + } else { + mutex_unlock(&table->mutex); + } } void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) @@ -495,6 +833,220 @@ void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan) } EXPORT_SYMBOL_GPL(mlx4_unregister_vlan); +int mlx4_bond_mac_table(struct mlx4_dev *dev) +{ + struct mlx4_mac_table *t1 = &mlx4_priv(dev)->port[1].mac_table; + struct mlx4_mac_table *t2 = &mlx4_priv(dev)->port[2].mac_table; + int ret = 0; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if ((t1->entries[i] != t2->entries[i]) && + t1->entries[i] && t2->entries[i]) { + mlx4_warn(dev, "can't duplicate entry %d in mac table\n", i); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (t1->entries[i] && !t2->entries[i]) { + t2->entries[i] = t1->entries[i]; + t2->is_dup[i] = true; + update2 = true; + } else if (!t1->entries[i] && t2->entries[i]) { + t1->entries[i] = t2->entries[i]; + t1->is_dup[i] = true; + update1 = true; + } else if (t1->entries[i] && t2->entries[i]) { + t1->is_dup[i] = true; + t2->is_dup[i] = true; + } + } + + if (update1) { + ret = mlx4_set_port_mac_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to set MAC table for port 1 (%d)\n", ret); + } + if (!ret && update2) { + ret = mlx4_set_port_mac_table(dev, 2, t2->entries); + if (ret) + mlx4_warn(dev, "failed to set MAC table for port 2 (%d)\n", ret); + } + + if (ret) + mlx4_warn(dev, "failed to create mirror MAC tables\n"); +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + +int mlx4_unbond_mac_table(struct mlx4_dev *dev) +{ + struct mlx4_mac_table *t1 = &mlx4_priv(dev)->port[1].mac_table; + struct mlx4_mac_table *t2 = &mlx4_priv(dev)->port[2].mac_table; + int ret = 0; + int ret1; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (t1->entries[i] != t2->entries[i]) { + mlx4_warn(dev, "mac table is in an unexpected state when trying to unbond\n"); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (!t1->entries[i]) + continue; + t1->is_dup[i] = false; + if (!t1->refs[i]) { + t1->entries[i] = 0; + update1 = true; + } + t2->is_dup[i] = false; + if (!t2->refs[i]) { + t2->entries[i] = 0; + update2 = true; + } + } + + if (update1) { + ret = mlx4_set_port_mac_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to unmirror MAC tables for port 1(%d)\n", ret); + } + if (update2) { + ret1 = mlx4_set_port_mac_table(dev, 2, t2->entries); + if (ret1) { + mlx4_warn(dev, "failed to unmirror MAC tables for port 2(%d)\n", ret1); + ret = ret1; + } + } +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + +int mlx4_bond_vlan_table(struct mlx4_dev *dev) +{ + struct mlx4_vlan_table *t1 = &mlx4_priv(dev)->port[1].vlan_table; + struct mlx4_vlan_table *t2 = &mlx4_priv(dev)->port[2].vlan_table; + int ret = 0; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if ((t1->entries[i] != t2->entries[i]) && + t1->entries[i] && t2->entries[i]) { + mlx4_warn(dev, "can't duplicate entry %d in vlan table\n", i); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if (t1->entries[i] && !t2->entries[i]) { + t2->entries[i] = t1->entries[i]; + t2->is_dup[i] = true; + update2 = true; + } else if (!t1->entries[i] && t2->entries[i]) { + t1->entries[i] = t2->entries[i]; + t1->is_dup[i] = true; + update1 = true; + } else if (t1->entries[i] && t2->entries[i]) { + t1->is_dup[i] = true; + t2->is_dup[i] = true; + } + } + + if (update1) { + ret = mlx4_set_port_vlan_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to set VLAN table for port 1 (%d)\n", ret); + } + if (!ret && update2) { + ret = mlx4_set_port_vlan_table(dev, 2, t2->entries); + if (ret) + mlx4_warn(dev, "failed to set VLAN table for port 2 (%d)\n", ret); + } + + if (ret) + mlx4_warn(dev, "failed to create mirror VLAN tables\n"); +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + +int mlx4_unbond_vlan_table(struct mlx4_dev *dev) +{ + struct mlx4_vlan_table *t1 = &mlx4_priv(dev)->port[1].vlan_table; + struct mlx4_vlan_table *t2 = &mlx4_priv(dev)->port[2].vlan_table; + int ret = 0; + int ret1; + int i; + bool update1 = false; + bool update2 = false; + + mutex_lock(&t1->mutex); + mutex_lock(&t2->mutex); + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if (t1->entries[i] != t2->entries[i]) { + mlx4_warn(dev, "vlan table is in an unexpected state when trying to unbond\n"); + ret = -EINVAL; + goto unlock; + } + } + + for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) { + if (!t1->entries[i]) + continue; + t1->is_dup[i] = false; + if (!t1->refs[i]) { + t1->entries[i] = 0; + update1 = true; + } + t2->is_dup[i] = false; + if (!t2->refs[i]) { + t2->entries[i] = 0; + update2 = true; + } + } + + if (update1) { + ret = mlx4_set_port_vlan_table(dev, 1, t1->entries); + if (ret) + mlx4_warn(dev, "failed to unmirror VLAN tables for port 1(%d)\n", ret); + } + if (update2) { + ret1 = mlx4_set_port_vlan_table(dev, 2, t2->entries); + if (ret1) { + mlx4_warn(dev, "failed to unmirror VLAN tables for port 2(%d)\n", ret1); + ret = ret1; + } + } +unlock: + mutex_unlock(&t2->mutex); + mutex_unlock(&t1->mutex); + return ret; +} + int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps) { struct mlx4_cmd_mailbox *inmailbox, *outmailbox; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 6fec3e993d020e..da7f578a3fe13f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -222,6 +222,13 @@ enum res_fs_rule_states { struct res_fs_rule { struct res_common com; int qpn; + /* VF DMFS mbox with port flipped */ + void *mirr_mbox; + /* > 0 --> apply mirror when getting into HA mode */ + /* = 0 --> un-apply mirror when getting out of HA mode */ + u32 mirr_mbox_size; + struct list_head mirr_list; + u64 mirr_rule_id; }; static void *res_tracker_lookup(struct rb_root *root, u64 res_id) @@ -4284,6 +4291,22 @@ err_mac: return err; } +static u32 qp_attach_mbox_size(void *mbox) +{ + u32 size = sizeof(struct mlx4_net_trans_rule_hw_ctrl); + struct _rule_hw *rule_header; + + rule_header = (struct _rule_hw *)(mbox + size); + + while (rule_header->size) { + size += rule_header->size * sizeof(u32); + rule_header += 1; + } + return size; +} + +static int mlx4_do_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule); + int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -4300,6 +4323,8 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_net_trans_rule_hw_ctrl *ctrl; struct _rule_hw *rule_header; int header_id; + struct res_fs_rule *rrule; + u32 mbox_size; if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) @@ -4328,7 +4353,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, case MLX4_NET_TRANS_RULE_ID_ETH: if (validate_eth_header_mac(slave, rule_header, rlist)) { err = -EINVAL; - goto err_put; + goto err_put_qp; } break; case MLX4_NET_TRANS_RULE_ID_IB: @@ -4339,7 +4364,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, pr_warn("Can't attach FS rule without L2 headers, adding L2 header\n"); if (add_eth_header(dev, slave, inbox, rlist, header_id)) { err = -EINVAL; - goto err_put; + goto err_put_qp; } vhcr->in_modifier += sizeof(struct mlx4_net_trans_rule_hw_eth) >> 2; @@ -4347,7 +4372,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, default: pr_err("Corrupted mailbox\n"); err = -EINVAL; - goto err_put; + goto err_put_qp; } execute: @@ -4356,23 +4381,69 @@ execute: MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) - goto err_put; + goto err_put_qp; + err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, qpn); if (err) { mlx4_err(dev, "Fail to add flow steering resources\n"); - /* detach rule*/ + goto err_detach; + } + + err = get_res(dev, slave, vhcr->out_param, RES_FS_RULE, &rrule); + if (err) + goto err_detach; + + mbox_size = qp_attach_mbox_size(inbox->buf); + rrule->mirr_mbox = kmalloc(mbox_size, GFP_KERNEL); + if (!rrule->mirr_mbox) { + err = -ENOMEM; + goto err_put_rule; + } + rrule->mirr_mbox_size = mbox_size; + rrule->mirr_rule_id = 0; + memcpy(rrule->mirr_mbox, inbox->buf, mbox_size); + + /* set different port */ + ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)rrule->mirr_mbox; + if (ctrl->port == 1) + ctrl->port = 2; + else + ctrl->port = 1; + + if (mlx4_is_bonded(dev)) + mlx4_do_mirror_rule(dev, rrule); + + atomic_inc(&rqp->ref_count); + +err_put_rule: + put_res(dev, slave, vhcr->out_param, RES_FS_RULE); +err_detach: + /* detach rule on error */ + if (err) mlx4_cmd(dev, vhcr->out_param, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); - goto err_put; - } - atomic_inc(&rqp->ref_count); -err_put: +err_put_qp: put_res(dev, slave, qpn, RES_QP); return err; } +static int mlx4_undo_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule) +{ + int err; + + err = rem_res_range(dev, fs_rule->com.owner, fs_rule->com.res_id, 1, RES_FS_RULE, 0); + if (err) { + mlx4_err(dev, "Fail to remove flow steering resources\n"); + return err; + } + + mlx4_cmd(dev, fs_rule->com.res_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + return 0; +} + int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -4382,6 +4453,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, int err; struct res_qp *rqp; struct res_fs_rule *rrule; + u64 mirr_reg_id; if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) @@ -4390,12 +4462,30 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, err = get_res(dev, slave, vhcr->in_param, RES_FS_RULE, &rrule); if (err) return err; + + if (!rrule->mirr_mbox) { + mlx4_err(dev, "Mirror rules cannot be removed explicitly\n"); + put_res(dev, slave, vhcr->in_param, RES_FS_RULE); + return -EINVAL; + } + mirr_reg_id = rrule->mirr_rule_id; + kfree(rrule->mirr_mbox); + /* Release the rule form busy state before removal */ put_res(dev, slave, vhcr->in_param, RES_FS_RULE); err = get_res(dev, slave, rrule->qpn, RES_QP, &rqp); if (err) return err; + if (mirr_reg_id && mlx4_is_bonded(dev)) { + err = get_res(dev, slave, mirr_reg_id, RES_FS_RULE, &rrule); + if (err) { + mlx4_err(dev, "Fail to get resource of mirror rule\n"); + } else { + put_res(dev, slave, mirr_reg_id, RES_FS_RULE); + mlx4_undo_mirror_rule(dev, rrule); + } + } err = rem_res_range(dev, slave, vhcr->in_param, 1, RES_FS_RULE, 0); if (err) { mlx4_err(dev, "Fail to remove flow steering resources\n"); @@ -4833,6 +4923,91 @@ static void rem_slave_mtts(struct mlx4_dev *dev, int slave) spin_unlock_irq(mlx4_tlock(dev)); } +static int mlx4_do_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule) +{ + struct mlx4_cmd_mailbox *mailbox; + int err; + struct res_fs_rule *mirr_rule; + u64 reg_id; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + if (!fs_rule->mirr_mbox) { + mlx4_err(dev, "rule mirroring mailbox is null\n"); + return -EINVAL; + } + memcpy(mailbox->buf, fs_rule->mirr_mbox, fs_rule->mirr_mbox_size); + err = mlx4_cmd_imm(dev, mailbox->dma, ®_id, fs_rule->mirr_mbox_size >> 2, 0, + MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + mlx4_free_cmd_mailbox(dev, mailbox); + + if (err) + goto err; + + err = add_res_range(dev, fs_rule->com.owner, reg_id, 1, RES_FS_RULE, fs_rule->qpn); + if (err) + goto err_detach; + + err = get_res(dev, fs_rule->com.owner, reg_id, RES_FS_RULE, &mirr_rule); + if (err) + goto err_rem; + + fs_rule->mirr_rule_id = reg_id; + mirr_rule->mirr_rule_id = 0; + mirr_rule->mirr_mbox_size = 0; + mirr_rule->mirr_mbox = NULL; + put_res(dev, fs_rule->com.owner, reg_id, RES_FS_RULE); + + return 0; +err_rem: + rem_res_range(dev, fs_rule->com.owner, reg_id, 1, RES_FS_RULE, 0); +err_detach: + mlx4_cmd(dev, reg_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); +err: + return err; +} + +static int mlx4_mirror_fs_rules(struct mlx4_dev *dev, bool bond) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = + &priv->mfunc.master.res_tracker; + struct rb_root *root = &tracker->res_tree[RES_FS_RULE]; + struct rb_node *p; + struct res_fs_rule *fs_rule; + int err = 0; + LIST_HEAD(mirr_list); + + for (p = rb_first(root); p; p = rb_next(p)) { + fs_rule = rb_entry(p, struct res_fs_rule, com.node); + if ((bond && fs_rule->mirr_mbox_size) || + (!bond && !fs_rule->mirr_mbox_size)) + list_add_tail(&fs_rule->mirr_list, &mirr_list); + } + + list_for_each_entry(fs_rule, &mirr_list, mirr_list) { + if (bond) + err += mlx4_do_mirror_rule(dev, fs_rule); + else + err += mlx4_undo_mirror_rule(dev, fs_rule); + } + return err; +} + +int mlx4_bond_fs_rules(struct mlx4_dev *dev) +{ + return mlx4_mirror_fs_rules(dev, true); +} + +int mlx4_unbond_fs_rules(struct mlx4_dev *dev) +{ + return mlx4_mirror_fs_rules(dev, false); +} + static void rem_slave_fs_rule(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 26a68b8af2c5c6..a0755919ccaf0f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o \ + mad.o transobj.o vport.o sriov.o +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o eswitch.o \ en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \ en_txrx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 69f1c1a412b4ef..89313d46952ded 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -465,6 +465,7 @@ enum { }; struct mlx5e_vlan_db { + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; u32 active_vlans_ft_ix[VLAN_N_VID]; u32 untagged_rule_ft_ix; u32 any_vlan_rule_ft_ix; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c index 22d603f7827333..5b93c9c6e341d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c @@ -502,6 +502,49 @@ add_eth_addr_rule_out: return err; } +static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) +{ + struct net_device *ndev = priv->netdev; + int max_list_size; + int list_size; + u16 *vlans; + int vlan; + int err; + int i; + + list_size = 0; + for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) + list_size++; + + max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list); + + if (list_size > max_list_size) { + netdev_warn(ndev, + "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n", + list_size, max_list_size); + list_size = max_list_size; + } + + vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL); + if (!vlans) + return -ENOMEM; + + i = 0; + for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) { + if (i >= list_size) + break; + vlans[i++] = vlan; + } + + err = mlx5_modify_nic_vport_vlans(priv->mdev, vlans, list_size); + if (err) + netdev_err(ndev, "Failed to modify vport vlans list err(%d)\n", + err); + + kfree(vlans); + return err; +} + enum mlx5e_vlan_rule_type { MLX5E_VLAN_RULE_TYPE_UNTAGGED, MLX5E_VLAN_RULE_TYPE_ANY_VID, @@ -552,6 +595,10 @@ static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, 1); break; default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ + err = mlx5e_vport_context_update_vlans(priv); + if (err) + goto add_vlan_rule_out; + ft_ix = &priv->vlan.active_vlans_ft_ix[vid]; MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag, 1); @@ -588,6 +635,7 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, case MLX5E_VLAN_RULE_TYPE_MATCH_VID: mlx5_del_flow_table_entry(priv->ft.vlan, priv->vlan.active_vlans_ft_ix[vid]); + mlx5e_vport_context_update_vlans(priv); break; } } @@ -619,6 +667,8 @@ int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, { struct mlx5e_priv *priv = netdev_priv(dev); + set_bit(vid, priv->vlan.active_vlans); + return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); } @@ -627,6 +677,8 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, { struct mlx5e_priv *priv = netdev_priv(dev); + clear_bit(vid, priv->vlan.active_vlans); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); return 0; @@ -671,6 +723,91 @@ static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) netif_addr_unlock_bh(netdev); } +static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type, + u8 addr_array[][ETH_ALEN], int size) +{ + bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); + struct net_device *ndev = priv->netdev; + struct mlx5e_eth_addr_hash_node *hn; + struct hlist_head *addr_list; + struct hlist_node *tmp; + int i = 0; + int hi; + + addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc; + + if (is_uc) /* Make sure our own address is pushed first */ + ether_addr_copy(addr_array[i++], ndev->dev_addr); + else if (priv->eth_addr.broadcast_enabled) + ether_addr_copy(addr_array[i++], ndev->broadcast); + + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) { + if (ether_addr_equal(ndev->dev_addr, hn->ai.addr)) + continue; + if (i >= size) + break; + ether_addr_copy(addr_array[i++], hn->ai.addr); + } +} + +static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv, + int list_type) +{ + bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); + struct mlx5e_eth_addr_hash_node *hn; + u8 (*addr_array)[ETH_ALEN] = NULL; + struct hlist_head *addr_list; + struct hlist_node *tmp; + int max_size; + int size; + int err; + int hi; + + size = is_uc ? 0 : (priv->eth_addr.broadcast_enabled ? 1 : 0); + max_size = is_uc ? + 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list); + + addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc; + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) + size++; + + if (size > max_size) { + netdev_warn(priv->netdev, + "netdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n", + is_uc ? "UC" : "MC", size, max_size); + size = max_size; + } + + if (size) { + addr_array = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!addr_array) { + err = -ENOMEM; + goto out; + } + mlx5e_fill_addr_array(priv, list_type, addr_array, size); + } + + err = mlx5_modify_nic_vport_mac_list(priv->mdev, list_type, addr_array, size); +out: + if (err) + netdev_err(priv->netdev, + "Failed to modify vport %s list err(%d)\n", + is_uc ? "UC" : "MC", err); + kfree(addr_array); +} + +static void mlx5e_vport_context_update(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + + mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_UC); + mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_MC); + mlx5_modify_nic_vport_promisc(priv->mdev, 0, + ea->allmulti_enabled, + ea->promisc_enabled); +} + static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv) { struct mlx5e_eth_addr_hash_node *hn; @@ -748,6 +885,8 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) ea->promisc_enabled = promisc_enabled; ea->allmulti_enabled = allmulti_enabled; ea->broadcast_enabled = broadcast_enabled; + + mlx5e_vport_context_update(priv); } void mlx5e_init_eth_addr(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f6a8cc78760355..d67058afe87ec9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -32,6 +32,7 @@ #include <linux/mlx5/flow_table.h> #include "en.h" +#include "eswitch.h" struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; @@ -63,7 +64,7 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv) u8 port_state; port_state = mlx5_query_vport_state(mdev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT); + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0); if (port_state == VPORT_STATE_UP) netif_carrier_on(priv->netdev); @@ -1931,6 +1932,79 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) return err; } +static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac); +} + +static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1, + vlan, qos); +} + +static int mlx5_vport_link2ifla(u8 esw_link) +{ + switch (esw_link) { + case MLX5_ESW_VPORT_ADMIN_STATE_DOWN: + return IFLA_VF_LINK_STATE_DISABLE; + case MLX5_ESW_VPORT_ADMIN_STATE_UP: + return IFLA_VF_LINK_STATE_ENABLE; + } + return IFLA_VF_LINK_STATE_AUTO; +} + +static int mlx5_ifla_link2vport(u8 ifla_link) +{ + switch (ifla_link) { + case IFLA_VF_LINK_STATE_DISABLE: + return MLX5_ESW_VPORT_ADMIN_STATE_DOWN; + case IFLA_VF_LINK_STATE_ENABLE: + return MLX5_ESW_VPORT_ADMIN_STATE_UP; + } + return MLX5_ESW_VPORT_ADMIN_STATE_AUTO; +} + +static int mlx5e_set_vf_link_state(struct net_device *dev, int vf, + int link_state) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1, + mlx5_ifla_link2vport(link_state)); +} + +static int mlx5e_get_vf_config(struct net_device *dev, + int vf, struct ifla_vf_info *ivi) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi); + if (err) + return err; + ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate); + return 0; +} + +static int mlx5e_get_vf_stats(struct net_device *dev, + int vf, struct ifla_vf_stats *vf_stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1, + vf_stats); +} + static struct net_device_ops mlx5e_netdev_ops = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, @@ -1941,7 +2015,7 @@ static struct net_device_ops mlx5e_netdev_ops = { .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, .ndo_set_features = mlx5e_set_features, - .ndo_change_mtu = mlx5e_change_mtu, + .ndo_change_mtu = mlx5e_change_mtu }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) @@ -2028,7 +2102,7 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - mlx5_query_nic_vport_mac_address(priv->mdev, netdev->dev_addr); + mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr); } static void mlx5e_build_netdev(struct net_device *netdev) @@ -2041,6 +2115,14 @@ static void mlx5e_build_netdev(struct net_device *netdev) if (priv->params.num_tc > 1) mlx5e_netdev_ops.ndo_select_queue = mlx5e_select_queue; + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + mlx5e_netdev_ops.ndo_set_vf_mac = mlx5e_set_vf_mac; + mlx5e_netdev_ops.ndo_set_vf_vlan = mlx5e_set_vf_vlan; + mlx5e_netdev_ops.ndo_get_vf_config = mlx5e_get_vf_config; + mlx5e_netdev_ops.ndo_set_vf_link_state = mlx5e_set_vf_link_state; + mlx5e_netdev_ops.ndo_get_vf_stats = mlx5e_get_vf_stats; + } + netdev->netdev_ops = &mlx5e_netdev_ops; netdev->watchdog_timeo = 15 * HZ; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 713ead583347f1..23c244a7e5d73d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -35,6 +35,9 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/cmd.h> #include "mlx5_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif enum { MLX5_EQE_SIZE = sizeof(struct mlx5_eqe), @@ -287,6 +290,11 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) break; #endif +#ifdef CONFIG_MLX5_CORE_EN + case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: + mlx5_eswitch_vport_event(dev->priv.eswitch, eqe); + break; +#endif default: mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", eqe->type, eq->eqn); @@ -459,6 +467,11 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, pg)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT); + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH && + MLX5_CAP_GEN(dev, vport_group_manager) && + mlx5_core_is_pf(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); + err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, "mlx5_cmd_eq", &dev->priv.uuari.uars[0]); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c new file mode 100644 index 00000000000000..d8939e597c5465 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -0,0 +1,1282 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/flow_table.h> +#include "mlx5_core.h" +#include "eswitch.h" + +#define UPLINK_VPORT 0xFFFF + +#define MLX5_DEBUG_ESWITCH_MASK BIT(3) + +#define esw_info(dev, format, ...) \ + pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_warn(dev, format, ...) \ + pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_debug(dev, format, ...) \ + mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) + +enum { + MLX5_ACTION_NONE = 0, + MLX5_ACTION_ADD = 1, + MLX5_ACTION_DEL = 2, +}; + +/* E-Switch UC L2 table hash node */ +struct esw_uc_addr { + struct l2addr_node node; + u32 table_index; + u32 vport; +}; + +/* E-Switch MC FDB table hash node */ +struct esw_mc_addr { /* SRIOV only */ + struct l2addr_node node; + struct mlx5_flow_rule *uplink_rule; /* Forward to uplink rule */ + u32 refcnt; +}; + +/* Vport UC/MC hash node */ +struct vport_addr { + struct l2addr_node node; + u8 action; + u32 vport; + struct mlx5_flow_rule *flow_rule; /* SRIOV only */ +}; + +enum { + UC_ADDR_CHANGE = BIT(0), + MC_ADDR_CHANGE = BIT(1), +}; + +/* Vport context events */ +#define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \ + MC_ADDR_CHANGE) + +static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, + u32 events_mask) +{ + int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)]; + int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int err; + + memset(out, 0, sizeof(out)); + memset(in, 0, sizeof(in)); + + MLX5_SET(modify_nic_vport_context_in, in, + opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1); + + if (events_mask & UC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_uc_address_change, 1); + if (events_mask & MC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_mc_address_change, 1); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + goto ex; + err = mlx5_cmd_status_to_err_v2(out); + if (err) + goto ex; + return 0; +ex: + return err; +} + +/* E-Switch vport context HW commands */ +static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); + + MLX5_SET(query_esw_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_esw_vport_context_in, in, other_vport, 1); + + return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); +} + +static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, + u16 *vlan, u8 *qos) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)]; + int err; + bool cvlan_strip; + bool cvlan_insert; + + memset(out, 0, sizeof(out)); + + *vlan = 0; + *qos = 0; + + if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || + !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) + return -ENOTSUPP; + + err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out)); + if (err) + goto out; + + cvlan_strip = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.vport_cvlan_strip); + + cvlan_insert = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.vport_cvlan_insert); + + if (cvlan_strip || cvlan_insert) { + *vlan = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.cvlan_id); + *qos = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.cvlan_pcp); + } + + esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n", + vport, *vlan, *qos); +out: + return err; +} + +static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, + void *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)]; + + memset(out, 0, sizeof(out)); + + MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + + MLX5_SET(modify_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT); + + return mlx5_cmd_exec_check_status(dev, in, inlen, + out, sizeof(out)); +} + +static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, + u16 vlan, u8 qos, bool set) +{ + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)]; + + memset(in, 0, sizeof(in)); + + if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || + !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) + return -ENOTSUPP; + + esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n", + vport, vlan, qos, set); + + if (set) { + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_strip, 1); + /* insert only if no vlan in packet */ + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_insert, 1); + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_pcp, qos); + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_id, vlan); + } + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.vport_cvlan_strip, 1); + MLX5_SET(modify_esw_vport_context_in, in, + field_select.vport_cvlan_insert, 1); + + return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in)); +} + +/* HW L2 Table (MPFS) management */ +static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index, + u8 *mac, u8 vlan_valid, u16 vlan) +{ + u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)]; + u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)]; + u8 *in_mac_addr; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(set_l2_table_entry_in, in, opcode, + MLX5_CMD_OP_SET_L2_TABLE_ENTRY); + MLX5_SET(set_l2_table_entry_in, in, table_index, index); + MLX5_SET(set_l2_table_entry_in, in, vlan_valid, vlan_valid); + MLX5_SET(set_l2_table_entry_in, in, vlan, vlan); + + in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); + ether_addr_copy(&in_mac_addr[2], mac); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, sizeof(out)); +} + +static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index) +{ + u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)]; + u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(delete_l2_table_entry_in, in, opcode, + MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + MLX5_SET(delete_l2_table_entry_in, in, table_index, index); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, sizeof(out)); +} + +static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix) +{ + int err = 0; + + *ix = find_first_zero_bit(l2_table->bitmap, l2_table->size); + if (*ix >= l2_table->size) + err = -ENOSPC; + else + __set_bit(*ix, l2_table->bitmap); + + return err; +} + +static void free_l2_table_index(struct mlx5_l2_table *l2_table, u32 ix) +{ + __clear_bit(ix, l2_table->bitmap); +} + +static int set_l2_table_entry(struct mlx5_core_dev *dev, u8 *mac, + u8 vlan_valid, u16 vlan, + u32 *index) +{ + struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; + int err; + + err = alloc_l2_table_index(l2_table, index); + if (err) + return err; + + err = set_l2_table_entry_cmd(dev, *index, mac, vlan_valid, vlan); + if (err) + free_l2_table_index(l2_table, *index); + + return err; +} + +static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index) +{ + struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; + + del_l2_table_entry_cmd(dev, index); + free_l2_table_index(l2_table, index); +} + +/* E-Switch FDB flow steering */ +struct dest_node { + struct list_head list; + struct mlx5_flow_destination dest; +}; + +static int _mlx5_flow_rule_apply(struct mlx5_flow_rule *fr) +{ + bool was_valid = fr->valid; + struct dest_node *dest_n; + u32 dest_list_size = 0; + void *in_match_value; + u32 *flow_context; + u32 flow_index; + int err; + int i; + + if (list_empty(&fr->dest_list)) { + if (fr->valid) + mlx5_del_flow_table_entry(fr->ft, fr->fi); + fr->valid = false; + return 0; + } + + list_for_each_entry(dest_n, &fr->dest_list, list) + dest_list_size++; + + flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + + MLX5_ST_SZ_BYTES(dest_format_struct) * + dest_list_size); + if (!flow_context) + return -ENOMEM; + + MLX5_SET(flow_context, flow_context, flow_tag, fr->flow_tag); + MLX5_SET(flow_context, flow_context, action, fr->action); + MLX5_SET(flow_context, flow_context, destination_list_size, + dest_list_size); + + i = 0; + list_for_each_entry(dest_n, &fr->dest_list, list) { + void *dest_addr = MLX5_ADDR_OF(flow_context, flow_context, + destination[i++]); + + MLX5_SET(dest_format_struct, dest_addr, destination_type, + dest_n->dest.type); + MLX5_SET(dest_format_struct, dest_addr, destination_id, + dest_n->dest.vport_num); + } + + in_match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); + memcpy(in_match_value, fr->match_value, MLX5_ST_SZ_BYTES(fte_match_param)); + + err = mlx5_add_flow_table_entry(fr->ft, fr->match_criteria_enable, + fr->match_criteria, flow_context, + &flow_index); + if (!err) { + if (was_valid) + mlx5_del_flow_table_entry(fr->ft, fr->fi); + fr->fi = flow_index; + fr->valid = true; + } + kfree(flow_context); + return err; +} + +static int mlx5_flow_rule_add_dest(struct mlx5_flow_rule *fr, + struct mlx5_flow_destination *new_dest) +{ + struct dest_node *dest_n; + int err; + + dest_n = kzalloc(sizeof(*dest_n), GFP_KERNEL); + if (!dest_n) + return -ENOMEM; + + memcpy(&dest_n->dest, new_dest, sizeof(dest_n->dest)); + mutex_lock(&fr->mutex); + list_add(&dest_n->list, &fr->dest_list); + err = _mlx5_flow_rule_apply(fr); + if (err) { + list_del(&dest_n->list); + kfree(dest_n); + } + mutex_unlock(&fr->mutex); + return err; +} + +static int mlx5_flow_rule_del_dest(struct mlx5_flow_rule *fr, + struct mlx5_flow_destination *dest) +{ + struct dest_node *dest_n; + struct dest_node *n; + int err; + + mutex_lock(&fr->mutex); + list_for_each_entry_safe(dest_n, n, &fr->dest_list, list) { + if (dest->vport_num == dest_n->dest.vport_num) + goto found; + } + mutex_unlock(&fr->mutex); + return -ENOENT; + +found: + list_del(&dest_n->list); + err = _mlx5_flow_rule_apply(fr); + mutex_unlock(&fr->mutex); + kfree(dest_n); + + return err; +} + +static struct mlx5_flow_rule *find_fr(struct mlx5_eswitch *esw, + u8 match_criteria_enable, + u32 *match_value) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *dmac_v; + + dmac_v = MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.dmac_47_16); + + /* UNICAST FULL MATCH */ + if (!is_multicast_ether_addr(dmac_v)) + return NULL; + + /* MULTICAST FULL MATCH */ + esw_mc = l2addr_hash_find(hash, dmac_v, struct esw_mc_addr); + + return esw_mc ? esw_mc->uplink_rule : NULL; +} + +static struct mlx5_flow_rule *alloc_fr(void *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag) +{ + struct mlx5_flow_rule *fr = kzalloc(sizeof(*fr), GFP_KERNEL); + + if (!fr) + return NULL; + + fr->match_criteria = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + fr->match_value = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!fr->match_criteria || !fr->match_value) { + kfree(fr->match_criteria); + kfree(fr->match_value); + kfree(fr); + return NULL; + } + + memcpy(fr->match_criteria, match_criteria, MLX5_ST_SZ_BYTES(fte_match_param)); + memcpy(fr->match_value, match_value, MLX5_ST_SZ_BYTES(fte_match_param)); + fr->match_criteria_enable = match_criteria_enable; + fr->flow_tag = flow_tag; + fr->action = action; + + mutex_init(&fr->mutex); + INIT_LIST_HEAD(&fr->dest_list); + atomic_set(&fr->refcount, 0); + fr->ft = ft; + return fr; +} + +static void deref_fr(struct mlx5_flow_rule *fr) +{ + if (!atomic_dec_and_test(&fr->refcount)) + return; + + kfree(fr->match_criteria); + kfree(fr->match_value); + kfree(fr); +} + +static struct mlx5_flow_rule * +mlx5_add_flow_rule(struct mlx5_eswitch *esw, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *fr; + int err; + + fr = find_fr(esw, match_criteria_enable, match_value); + fr = fr ? fr : alloc_fr(esw->fdb_table.fdb, match_criteria_enable, match_criteria, + match_value, action, flow_tag); + if (!fr) + return NULL; + + atomic_inc(&fr->refcount); + + err = mlx5_flow_rule_add_dest(fr, dest); + if (err) { + deref_fr(fr); + return NULL; + } + + return fr; +} + +static void mlx5_del_flow_rule(struct mlx5_flow_rule *fr, u32 vport) +{ + struct mlx5_flow_destination dest; + + dest.vport_num = vport; + mlx5_flow_rule_del_dest(fr, &dest); + deref_fr(fr); +} + +/* E-Switch FDB */ +static struct mlx5_flow_rule * +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +{ + int match_header = MLX5_MATCH_OUTER_HEADERS; + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule = NULL; + u32 *match_v; + u32 *match_c; + u8 *dmac_v; + u8 *dmac_c; + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + pr_warn("FDB: Failed to alloc match parameters\n"); + goto out; + } + dmac_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers.dmac_47_16); + dmac_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers.dmac_47_16); + + ether_addr_copy(dmac_v, mac); + /* Match criteria mask */ + memset(dmac_c, 0xff, 6); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = vport; + + esw_debug(esw->dev, + "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", + dmac_v, dmac_c, vport); + flow_rule = + mlx5_add_flow_rule(esw, + match_header, + match_c, + match_v, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR_OR_NULL(flow_rule)) { + pr_warn( + "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", + dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); + flow_rule = NULL; + } +out: + kfree(match_v); + kfree(match_c); + return flow_rule; +} + +static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_table_group g; + struct mlx5_flow_table *fdb; + u8 *dmac; + + esw_debug(dev, "Create FDB log_max_size(%d)\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + memset(&g, 0, sizeof(g)); + /* UC MC Full match rules*/ + g.log_sz = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); + g.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g.match_criteria, + outer_headers.dmac_47_16); + /* Match criteria mask */ + memset(dmac, 0xff, 6); + + fdb = mlx5_create_flow_table(dev, 0, + MLX5_FLOW_TABLE_TYPE_ESWITCH, + 1, &g); + if (fdb) + esw_debug(dev, "ESW: FDB Table created fdb->id %d\n", mlx5_get_flow_table_id(fdb)); + else + esw_warn(dev, "ESW: Failed to create FDB Table\n"); + + esw->fdb_table.fdb = fdb; + return fdb ? 0 : -ENOMEM; +} + +static void esw_destroy_fdb_table(struct mlx5_eswitch *esw) +{ + if (!esw->fdb_table.fdb) + return; + + esw_debug(esw->dev, "Destroy FDB Table fdb(%d)\n", + mlx5_get_flow_table_id(esw->fdb_table.fdb)); + mlx5_destroy_flow_table(esw->fdb_table.fdb); + esw->fdb_table.fdb = NULL; +} + +/* E-Switch vport UC/MC lists management */ +typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, + struct vport_addr *vaddr); + +static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->l2_table.l2_hash; + struct esw_uc_addr *esw_uc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + int err; + + esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); + if (esw_uc) { + esw_warn(esw->dev, + "Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n", + mac, vport, esw_uc->vport); + return -EEXIST; + } + + esw_uc = l2addr_hash_add(hash, mac, struct esw_uc_addr, GFP_KERNEL); + if (!esw_uc) + return -ENOMEM; + esw_uc->vport = vport; + + err = set_l2_table_entry(esw->dev, mac, 0, 0, &esw_uc->table_index); + if (err) + goto abort; + + if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + + esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n", + vport, mac, esw_uc->table_index, vaddr->flow_rule); + return err; +abort: + l2addr_hash_del(esw_uc); + return err; +} + +static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->l2_table.l2_hash; + struct esw_uc_addr *esw_uc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); + if (!esw_uc || esw_uc->vport != vport) { + esw_debug(esw->dev, + "MAC(%pM) doesn't belong to vport (%d)\n", + mac, vport); + return -EINVAL; + } + esw_debug(esw->dev, "\tDELETE UC MAC: vport[%d] %pM index:%d fr(%p)\n", + vport, mac, esw_uc->table_index, vaddr->flow_rule); + + del_l2_table_entry(esw->dev, esw_uc->table_index); + + if (vaddr->flow_rule) + mlx5_del_flow_rule(vaddr->flow_rule, vport); + vaddr->flow_rule = NULL; + + l2addr_hash_del(esw_uc); + return 0; +} + +static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + if (!esw->fdb_table.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (esw_mc) + goto add; + + esw_mc = l2addr_hash_add(hash, mac, struct esw_mc_addr, GFP_KERNEL); + if (!esw_mc) + return -ENOMEM; + + esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ + esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT); +add: + esw_mc->refcnt++; + /* Forward MC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + esw_debug(esw->dev, + "\tADDED MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, + esw_mc->refcnt, esw_mc->uplink_rule); + return 0; +} + +static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + if (!esw->fdb_table.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (!esw_mc) { + esw_warn(esw->dev, + "Failed to find eswitch MC addr for MAC(%pM) vport(%d)", + mac, vport); + return -EINVAL; + } + esw_debug(esw->dev, + "\tDELETE MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, esw_mc->refcnt, + esw_mc->uplink_rule); + + if (vaddr->flow_rule) + mlx5_del_flow_rule(vaddr->flow_rule, vport); + vaddr->flow_rule = NULL; + + if (--esw_mc->refcnt) + return 0; + + if (esw_mc->uplink_rule) + mlx5_del_flow_rule(esw_mc->uplink_rule, UPLINK_VPORT); + + l2addr_hash_del(esw_mc); + return 0; +} + +/* Apply vport UC/MC list to HW l2 table and FDB table */ +static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, + u32 vport_num, int list_type) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; + vport_addr_action vport_addr_add; + vport_addr_action vport_addr_del; + struct vport_addr *addr; + struct l2addr_node *node; + struct hlist_head *hash; + struct hlist_node *tmp; + int hi; + + vport_addr_add = is_uc ? esw_add_uc_addr : + esw_add_mc_addr; + vport_addr_del = is_uc ? esw_del_uc_addr : + esw_del_mc_addr; + + hash = is_uc ? vport->uc_list : vport->mc_list; + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct vport_addr, node); + switch (addr->action) { + case MLX5_ACTION_ADD: + vport_addr_add(esw, addr); + addr->action = MLX5_ACTION_NONE; + break; + case MLX5_ACTION_DEL: + vport_addr_del(esw, addr); + l2addr_hash_del(addr); + break; + } + } +} + +/* Sync vport UC/MC list from vport context */ +static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, + u32 vport_num, int list_type) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; + u8 (*mac_list)[ETH_ALEN]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_head *hash; + struct hlist_node *tmp; + int size; + int err; + int hi; + int i; + + size = is_uc ? MLX5_MAX_UC_PER_VPORT(esw->dev) : + MLX5_MAX_MC_PER_VPORT(esw->dev); + + mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!mac_list) + return; + + hash = is_uc ? vport->uc_list : vport->mc_list; + + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + + err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, list_type, + mac_list, &size); + if (err) + return; + esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n", + vport_num, is_uc ? "UC" : "MC", size); + + for (i = 0; i < size; i++) { + if (is_uc && !is_valid_ether_addr(mac_list[i])) + continue; + + if (!is_uc && !is_multicast_ether_addr(mac_list[i])) + continue; + + addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr); + if (addr) { + addr->action = MLX5_ACTION_NONE; + continue; + } + + addr = l2addr_hash_add(hash, mac_list[i], struct vport_addr, + GFP_KERNEL); + if (!addr) { + esw_warn(esw->dev, + "Failed to add MAC(%pM) to vport[%d] DB\n", + mac_list[i], vport_num); + continue; + } + addr->vport = vport_num; + addr->action = MLX5_ACTION_ADD; + } + kfree(mac_list); +} + +static void esw_vport_change_handler(struct work_struct *work) +{ + struct mlx5_vport *vport = + container_of(work, struct mlx5_vport, vport_change_handler); + struct mlx5_core_dev *dev = vport->dev; + struct mlx5_eswitch *esw = dev->priv.eswitch; + u8 mac[ETH_ALEN]; + + mlx5_query_nic_vport_mac_address(dev, vport->vport, mac); + esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", + vport->vport, mac); + + if (vport->enabled_events & UC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_UC); + esw_apply_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_UC); + } + + if (vport->enabled_events & MC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_MC); + esw_apply_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_MC); + } + + esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport); + if (vport->enabled) + arm_vport_context_events_cmd(dev, vport->vport, + vport->enabled_events); +} + +static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, + int enable_events) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + unsigned long flags; + + WARN_ON(vport->enabled); + + esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_AUTO); + + /* Sync with current vport context */ + vport->enabled_events = enable_events; + esw_vport_change_handler(&vport->vport_change_handler); + + spin_lock_irqsave(&vport->lock, flags); + vport->enabled = true; + spin_unlock_irqrestore(&vport->lock, flags); + + arm_vport_context_events_cmd(esw->dev, vport_num, enable_events); + + esw->enabled_vports++; + esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); +} + +static void esw_cleanup_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_node *tmp; + int hi; + + for_each_l2hash_node(node, tmp, vport->uc_list, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_UC); + + for_each_l2hash_node(node, tmp, vport->mc_list, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_MC); +} + +static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + unsigned long flags; + + if (!vport->enabled) + return; + + esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num); + /* Mark this vport as disabled to discard new events */ + spin_lock_irqsave(&vport->lock, flags); + vport->enabled = false; + vport->enabled_events = 0; + spin_unlock_irqrestore(&vport->lock, flags); + + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_DOWN); + /* Wait for current already scheduled events to complete */ + flush_workqueue(esw->work_queue); + /* Disable events from this vport */ + arm_vport_context_events_cmd(esw->dev, vport->vport, 0); + /* We don't assume VFs will cleanup after themselves */ + esw_cleanup_vport(esw, vport_num); + esw->enabled_vports--; +} + +/* Public E-Switch API */ +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs) +{ + int err; + int i; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { + esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); + return -ENOTSUPP; + } + + esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d)\n", nvfs); + + esw_disable_vport(esw, 0); + + err = esw_create_fdb_table(esw, nvfs + 1); + if (err) + goto abort; + + for (i = 0; i <= nvfs; i++) + esw_enable_vport(esw, i, SRIOV_VPORT_EVENTS); + + esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", + esw->enabled_vports); + return 0; + +abort: + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + return err; +} + +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) +{ + int i; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_info(esw->dev, "disable SRIOV: active vports(%d)\n", + esw->enabled_vports); + + for (i = 0; i < esw->total_vports; i++) + esw_disable_vport(esw, i); + + esw_destroy_fdb_table(esw); + + /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); +} + +int mlx5_eswitch_init(struct mlx5_core_dev *dev) +{ + int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); + int total_vports = 1 + pci_sriov_get_totalvfs(dev->pdev); + struct mlx5_eswitch *esw; + int vport_num; + int err; + + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + esw_info(dev, + "Total vports %d, l2 table size(%d), per vport: max uc(%d) max mc(%d)\n", + total_vports, l2_table_size, + MLX5_MAX_UC_PER_VPORT(dev), + MLX5_MAX_MC_PER_VPORT(dev)); + + esw = kzalloc(sizeof(*esw), GFP_KERNEL); + if (!esw) + return -ENOMEM; + + esw->dev = dev; + + esw->l2_table.bitmap = kcalloc(BITS_TO_LONGS(l2_table_size), + sizeof(uintptr_t), GFP_KERNEL); + if (!esw->l2_table.bitmap) { + err = -ENOMEM; + goto abort; + } + esw->l2_table.size = l2_table_size; + + esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); + if (!esw->work_queue) { + err = -ENOMEM; + goto abort; + } + + esw->vports = kcalloc(total_vports, sizeof(struct mlx5_vport), + GFP_KERNEL); + if (!esw->vports) { + err = -ENOMEM; + goto abort; + } + + for (vport_num = 0; vport_num < total_vports; vport_num++) { + struct mlx5_vport *vport = &esw->vports[vport_num]; + + vport->vport = vport_num; + vport->dev = dev; + INIT_WORK(&vport->vport_change_handler, + esw_vport_change_handler); + spin_lock_init(&vport->lock); + } + + esw->total_vports = total_vports; + esw->enabled_vports = 0; + + dev->priv.eswitch = esw; + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + /* VF Vports will be enabled when SRIOV is enabled */ + return 0; +abort: + if (esw->work_queue) + destroy_workqueue(esw->work_queue); + kfree(esw->l2_table.bitmap); + kfree(esw->vports); + kfree(esw); + return err; +} + +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) +{ + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_info(esw->dev, "cleanup\n"); + esw_disable_vport(esw, 0); + + esw->dev->priv.eswitch = NULL; + destroy_workqueue(esw->work_queue); + kfree(esw->l2_table.bitmap); + kfree(esw->vports); + kfree(esw); +} + +void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) +{ + struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change; + u16 vport_num = be16_to_cpu(vc_eqe->vport_num); + struct mlx5_vport *vport; + + if (!esw) { + pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n", + vport_num); + return; + } + + vport = &esw->vports[vport_num]; + spin_lock(&vport->lock); + if (vport->enabled) + queue_work(esw->work_queue, &vport->vport_change_handler); + spin_unlock(&vport->lock); +} + +/* Vport Administration */ +#define ESW_ALLOWED(esw) \ + (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev)) +#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) + +int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + int vport, u8 mac[ETH_ALEN]) +{ + int err = 0; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); + if (err) { + mlx5_core_warn(esw->dev, + "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n", + vport, err); + return err; + } + + return err; +} + +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, + int vport, int link_state) +{ + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + return mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport, link_state); +} + +int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, + int vport, struct ifla_vf_info *ivi) +{ + u16 vlan; + u8 qos; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + memset(ivi, 0, sizeof(*ivi)); + ivi->vf = vport - 1; + + mlx5_query_nic_vport_mac_address(esw->dev, vport, ivi->mac); + ivi->linkstate = mlx5_query_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport); + query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos); + ivi->vlan = vlan; + ivi->qos = qos; + ivi->spoofchk = 0; + + return 0; +} + +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos) +{ + int set = 0; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7)) + return -EINVAL; + + if (vlan || qos) + set = 1; + + return modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set); +} + +int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, + int vport, + struct ifla_vf_stats *vf_stats) +{ + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; + int err = 0; + u32 *out; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_vport_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_COUNTER); + MLX5_SET(query_vport_counter_in, in, op_mod, 0); + MLX5_SET(query_vport_counter_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vport_counter_in, in, other_vport, 1); + + memset(out, 0, outlen); + err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen); + if (err) + goto free_out; + + #define MLX5_GET_CTR(p, x) \ + MLX5_GET64(query_vport_counter_out, p, x) + + memset(vf_stats, 0, sizeof(*vf_stats)); + vf_stats->rx_packets = + MLX5_GET_CTR(out, received_eth_unicast.packets) + + MLX5_GET_CTR(out, received_eth_multicast.packets) + + MLX5_GET_CTR(out, received_eth_broadcast.packets); + + vf_stats->rx_bytes = + MLX5_GET_CTR(out, received_eth_unicast.octets) + + MLX5_GET_CTR(out, received_eth_multicast.octets) + + MLX5_GET_CTR(out, received_eth_broadcast.octets); + + vf_stats->tx_packets = + MLX5_GET_CTR(out, transmitted_eth_unicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); + + vf_stats->tx_bytes = + MLX5_GET_CTR(out, transmitted_eth_unicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + + vf_stats->multicast = + MLX5_GET_CTR(out, received_eth_multicast.packets); + + vf_stats->broadcast = + MLX5_GET_CTR(out, received_eth_broadcast.packets); + +free_out: + kvfree(out); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h new file mode 100644 index 00000000000000..02ff3eade02605 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_ESWITCH_H__ +#define __MLX5_ESWITCH_H__ + +#include <linux/if_ether.h> +#include <linux/if_link.h> +#include <linux/mlx5/device.h> + +#define MLX5_MAX_UC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list)) + +#define MLX5_MAX_MC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) + +#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) +#define MLX5_L2_ADDR_HASH(addr) (addr[5]) + +/* L2 -mac address based- hash helpers */ +struct l2addr_node { + struct hlist_node hlist; + u8 addr[ETH_ALEN]; +}; + +#define for_each_l2hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) + +#define l2addr_hash_find(hash, mac, type) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + bool found = false; \ + type *ptr = NULL; \ + \ + hlist_for_each_entry(ptr, &hash[ix], node.hlist) \ + if (ether_addr_equal(ptr->node.addr, mac)) {\ + found = true; \ + break; \ + } \ + if (!found) \ + ptr = NULL; \ + ptr; \ +}) + +#define l2addr_hash_add(hash, mac, type, gfp) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + type *ptr = NULL; \ + \ + ptr = kzalloc(sizeof(type), gfp); \ + if (ptr) { \ + ether_addr_copy(ptr->node.addr, mac); \ + hlist_add_head(&ptr->node.hlist, &hash[ix]);\ + } \ + ptr; \ +}) + +#define l2addr_hash_del(ptr) ({ \ + hlist_del(&ptr->node.hlist); \ + kfree(ptr); \ +}) + +struct mlx5_flow_rule { + void *ft; + u32 fi; + u8 match_criteria_enable; + u32 *match_criteria; + u32 *match_value; + u32 action; + u32 flow_tag; + bool valid; + atomic_t refcount; + struct mutex mutex; /* protect flow rule updates */ + struct list_head dest_list; +}; + +struct mlx5_vport { + struct mlx5_core_dev *dev; + int vport; + struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct work_struct vport_change_handler; + + /* This spinlock protects access to vport data, between + * "esw_vport_disable" and ongoing interrupt "mlx5_eswitch_vport_event" + * once vport marked as disabled new interrupts are discarded. + */ + spinlock_t lock; /* vport events sync */ + bool enabled; + u16 enabled_events; +}; + +struct mlx5_l2_table { + struct hlist_head l2_hash[MLX5_L2_ADDR_HASH_SIZE]; + u32 size; + unsigned long *bitmap; +}; + +struct mlx5_eswitch_fdb { + void *fdb; +}; + +struct mlx5_eswitch { + struct mlx5_core_dev *dev; + struct mlx5_l2_table l2_table; + struct mlx5_eswitch_fdb fdb_table; + struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; + struct workqueue_struct *work_queue; + struct mlx5_vport *vports; + int total_vports; + int enabled_vports; +}; + +/* E-Switch API */ +int mlx5_eswitch_init(struct mlx5_core_dev *dev); +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); +void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs); +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); +int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + int vport, u8 mac[ETH_ALEN]); +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, + int vport, int link_state); +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos); +int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, + int vport, struct ifla_vf_info *ivi); +int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, + int vport, + struct ifla_vf_stats *vf_stats); + +#endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 9335e5ae18ccee..1c9f9a54a87339 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -160,6 +160,30 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) if (err) return err; } + + if (MLX5_CAP_GEN(dev, vport_group_manager) && + MLX5_CAP_GEN(dev, eswitch_flow_table)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, vport_group_manager)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 4ac8d4cc49737d..c6de3240f76f21 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -49,6 +49,9 @@ #include <linux/delay.h> #include <linux/mlx5/mlx5_ifc.h> #include "mlx5_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver"); @@ -454,6 +457,9 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev) struct mlx5_reg_host_endianess he_out; int err; + if (!mlx5_core_is_pf(dev)) + return 0; + memset(&he_in, 0, sizeof(he_in)); he_in.he = MLX5_SET_HOST_ENDIANNESS; err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in), @@ -462,42 +468,39 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev) return err; } -static int mlx5_core_enable_hca(struct mlx5_core_dev *dev) +int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) { + u32 out[MLX5_ST_SZ_DW(enable_hca_out)]; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)]; int err; - struct mlx5_enable_hca_mbox_in in; - struct mlx5_enable_hca_mbox_out out; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ENABLE_HCA); + memset(in, 0, sizeof(in)); + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, func_id); + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - return 0; + return mlx5_cmd_status_to_err_v2(out); } -static int mlx5_core_disable_hca(struct mlx5_core_dev *dev) +int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) { + u32 out[MLX5_ST_SZ_DW(disable_hca_out)]; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)]; int err; - struct mlx5_disable_hca_mbox_in in; - struct mlx5_disable_hca_mbox_out out; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DISABLE_HCA); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + memset(in, 0, sizeof(in)); + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, func_id); + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - return 0; + return mlx5_cmd_status_to_err_v2(out); } static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) @@ -942,7 +945,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_pagealloc_init(dev); - err = mlx5_core_enable_hca(dev); + err = mlx5_core_enable_hca(dev, 0); if (err) { dev_err(&pdev->dev, "enable hca failed\n"); goto err_pagealloc_cleanup; @@ -1052,6 +1055,20 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_srq_table(dev); mlx5_init_mr_table(dev); +#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_eswitch_init(dev); + if (err) { + dev_err(&pdev->dev, "eswitch init failed %d\n", err); + goto err_reg_dev; + } +#endif + + err = mlx5_sriov_init(dev); + if (err) { + dev_err(&pdev->dev, "sriov init failed %d\n", err); + goto err_sriov; + } + err = mlx5_register_device(dev); if (err) { dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); @@ -1068,6 +1085,13 @@ out: return 0; +err_sriov: + if (mlx5_sriov_cleanup(dev)) + dev_err(&dev->pdev->dev, "sriov cleanup failed\n"); + +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); +#endif err_reg_dev: mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); @@ -1106,7 +1130,7 @@ reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); err_disable_hca: - mlx5_core_disable_hca(dev); + mlx5_core_disable_hca(dev, 0); err_pagealloc_cleanup: mlx5_pagealloc_cleanup(dev); @@ -1123,6 +1147,13 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { int err = 0; + err = mlx5_sriov_cleanup(dev); + if (err) { + dev_warn(&dev->pdev->dev, "%s: sriov cleanup failed - abort\n", + __func__); + return err; + } + mutex_lock(&dev->intf_state_mutex); if (dev->interface_state == MLX5_INTERFACE_STATE_DOWN) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", @@ -1130,6 +1161,10 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out; } mlx5_unregister_device(dev); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); +#endif + mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); @@ -1149,7 +1184,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) } mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); - mlx5_core_disable_hca(dev); + mlx5_core_disable_hca(dev, 0); mlx5_pagealloc_cleanup(dev); mlx5_cmd_cleanup(dev); @@ -1195,6 +1230,7 @@ static int init_one(struct pci_dev *pdev, return -ENOMEM; } priv = &dev->priv; + priv->pci_dev_data = id->driver_data; pci_set_drvdata(pdev, dev); @@ -1365,12 +1401,12 @@ static const struct pci_error_handlers mlx5_err_handler = { }; static const struct pci_device_id mlx5_core_pci_table[] = { - { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ - { PCI_VDEVICE(MELLANOX, 0x1012) }, /* Connect-IB VF */ - { PCI_VDEVICE(MELLANOX, 0x1013) }, /* ConnectX-4 */ - { PCI_VDEVICE(MELLANOX, 0x1014) }, /* ConnectX-4 VF */ - { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ - { PCI_VDEVICE(MELLANOX, 0x1016) }, /* ConnectX-4LX VF */ + { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ + { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */ + { PCI_VDEVICE(MELLANOX, 0x1013) }, /* ConnectX-4 */ + { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */ + { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ + { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ { 0, } }; @@ -1381,7 +1417,8 @@ static struct pci_driver mlx5_core_driver = { .id_table = mlx5_core_pci_table, .probe = init_one, .remove = remove_one, - .err_handler = &mlx5_err_handler + .err_handler = &mlx5_err_handler, + .sriov_configure = mlx5_core_sriov_configure, }; static int __init init(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index cee5b7a839bc33..bee7da822dfe20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -36,6 +36,7 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/if_link.h> #define DRIVER_NAME "mlx5_core" #define DRIVER_VERSION "3.0-1" @@ -90,6 +91,10 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); +int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); +int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); void mlx5e_init(void); void mlx5e_cleanup(void); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 4d3377b1265787..9eeee0545f1cf2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -33,6 +33,7 @@ #include <linux/highmem.h> #include <linux/kernel.h> #include <linux/module.h> +#include <linux/delay.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/cmd.h> #include "mlx5_core.h" @@ -95,6 +96,7 @@ struct mlx5_manage_pages_outbox { enum { MAX_RECLAIM_TIME_MSECS = 5000, + MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60, }; enum { @@ -352,6 +354,10 @@ retry: goto out_4k; } + dev->priv.fw_pages += npages; + if (func_id) + dev->priv.vfs_pages += npages; + mlx5_core_dbg(dev, "err %d\n", err); kvfree(in); @@ -405,6 +411,12 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, } num_claimed = be32_to_cpu(out->num_entries); + if (num_claimed > npages) { + mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n", + num_claimed, npages); + err = -EINVAL; + goto out_free; + } if (nclaimed) *nclaimed = num_claimed; @@ -412,6 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, addr = be64_to_cpu(out->pas[i]); free_4k(dev, addr); } + dev->priv.fw_pages -= num_claimed; + if (func_id) + dev->priv.vfs_pages -= num_claimed; out_free: kvfree(out); @@ -548,3 +563,26 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) { destroy_workqueue(dev->priv.pg_wq); } + +int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) +{ + unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); + int prev_vfs_pages = dev->priv.vfs_pages; + + mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages, + dev->priv.name); + while (dev->priv.vfs_pages) { + if (time_after(jiffies, end)) { + mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages); + return -ETIMEDOUT; + } + if (dev->priv.vfs_pages < prev_vfs_pages) { + end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); + prev_vfs_pages = dev->priv.vfs_pages; + } + msleep(50); + } + + mlx5_core_dbg(dev, "All pages received from %s\n", dev->priv.name); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c new file mode 100644 index 00000000000000..7b24386794f994 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2014, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/pci.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif + +static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + int vf; + + for (vf = 1; vf <= num_vfs; vf++) { + err = mlx5_core_enable_hca(dev, vf); + if (err) { + mlx5_core_warn(dev, "failed to enable VF %d\n", vf - 1); + } else { + sriov->vfs_ctx[vf - 1].enabled = 1; + mlx5_core_dbg(dev, "successfully enabled VF %d\n", vf - 1); + } + } +} + +static void disable_vfs(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int vf; + + for (vf = 1; vf <= num_vfs; vf++) { + if (sriov->vfs_ctx[vf - 1].enabled) { + if (mlx5_core_disable_hca(dev, vf)) + mlx5_core_warn(dev, "failed to disable VF %d\n", vf - 1); + else + sriov->vfs_ctx[vf - 1].enabled = 0; + } + } +} + +static int mlx5_core_create_vfs(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err; + + if (pci_num_vf(pdev)) + pci_disable_sriov(pdev); + + enable_vfs(dev, num_vfs); + + err = pci_enable_sriov(pdev, num_vfs); + if (err) { + dev_warn(&pdev->dev, "enable sriov failed %d\n", err); + goto ex; + } + + return 0; + +ex: + disable_vfs(dev, num_vfs); + return err; +} + +static int mlx5_core_sriov_enable(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + + kfree(sriov->vfs_ctx); + sriov->vfs_ctx = kcalloc(num_vfs, sizeof(*sriov->vfs_ctx), GFP_ATOMIC); + if (!sriov->vfs_ctx) + return -ENOMEM; + + sriov->enabled_vfs = num_vfs; + err = mlx5_core_create_vfs(pdev, num_vfs); + if (err) { + kfree(sriov->vfs_ctx); + sriov->vfs_ctx = NULL; + return err; + } + + return 0; +} + +static void mlx5_core_init_vfs(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + + sriov->num_vfs = num_vfs; +} + +static void mlx5_core_cleanup_vfs(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov; + + sriov = &dev->priv.sriov; + disable_vfs(dev, sriov->num_vfs); + + if (mlx5_wait_for_vf_pages(dev)) + mlx5_core_warn(dev, "timeout claiming VFs pages\n"); + + sriov->num_vfs = 0; +} + +int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + + mlx5_core_dbg(dev, "requsted num_vfs %d\n", num_vfs); + if (!mlx5_core_is_pf(dev)) + return -EPERM; + + mlx5_core_cleanup_vfs(dev); + + if (!num_vfs) { +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_disable_sriov(dev->priv.eswitch); +#endif + kfree(sriov->vfs_ctx); + sriov->vfs_ctx = NULL; + if (!pci_vfs_assigned(pdev)) + pci_disable_sriov(pdev); + else + pr_info("unloading PF driver while leaving orphan VFs\n"); + return 0; + } + + err = mlx5_core_sriov_enable(pdev, num_vfs); + if (err) { + dev_warn(&pdev->dev, "mlx5_core_sriov_enable failed %d\n", err); + return err; + } + + mlx5_core_init_vfs(dev, num_vfs); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs); +#endif + + return num_vfs; +} + +static int sync_required(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int cur_vfs = pci_num_vf(pdev); + + if (cur_vfs != sriov->num_vfs) { + pr_info("current VFs %d, registered %d - sync needed\n", cur_vfs, sriov->num_vfs); + return 1; + } + + return 0; +} + +int mlx5_sriov_init(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + struct pci_dev *pdev = dev->pdev; + int cur_vfs; + + if (!mlx5_core_is_pf(dev)) + return 0; + + if (!sync_required(dev->pdev)) + return 0; + + cur_vfs = pci_num_vf(pdev); + sriov->vfs_ctx = kcalloc(cur_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); + if (!sriov->vfs_ctx) + return -ENOMEM; + + sriov->enabled_vfs = cur_vfs; + + mlx5_core_init_vfs(dev, cur_vfs); +#ifdef CONFIG_MLX5_CORE_EN + if (cur_vfs) + mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs); +#endif + + enable_vfs(dev, cur_vfs); + + return 0; +} + +int mlx5_sriov_cleanup(struct mlx5_core_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + int err; + + if (!mlx5_core_is_pf(dev)) + return 0; + + err = mlx5_core_sriov_configure(pdev, 0); + if (err) + return err; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index b94177ebcf3a31..076197efea9b07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -36,54 +36,399 @@ #include <linux/mlx5/vport.h> #include "mlx5_core.h" -u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod) +static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u32 *out, int outlen) { - u32 in[MLX5_ST_SZ_DW(query_vport_state_in)]; - u32 out[MLX5_ST_SZ_DW(query_vport_state_out)]; int err; + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)]; memset(in, 0, sizeof(in)); MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); MLX5_SET(query_vport_state_in, in, op_mod, opmod); + MLX5_SET(query_vport_state_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vport_state_in, in, other_vport, 1); - err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, - sizeof(out)); + err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); if (err) mlx5_core_warn(mdev, "MLX5_CMD_OP_QUERY_VPORT_STATE failed\n"); + return err; +} + +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0}; + + _mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out)); + return MLX5_GET(query_vport_state_out, out, state); } -EXPORT_SYMBOL(mlx5_query_vport_state); +EXPORT_SYMBOL_GPL(mlx5_query_vport_state); + +u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0}; + + _mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out)); + + return MLX5_GET(query_vport_state_out, out, admin_state); +} +EXPORT_SYMBOL(mlx5_query_vport_admin_state); -void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) +int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u8 state) +{ + u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)]; + u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(modify_vport_state_in, in, opcode, + MLX5_CMD_OP_MODIFY_VPORT_STATE); + MLX5_SET(modify_vport_state_in, in, op_mod, opmod); + MLX5_SET(modify_vport_state_in, in, vport_number, vport); + + if (vport) + MLX5_SET(modify_vport_state_in, in, other_vport, 1); + + MLX5_SET(modify_vport_state_in, in, admin_state, state); + + err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, + sizeof(out)); + if (err) + mlx5_core_warn(mdev, "MLX5_CMD_OP_MODIFY_VPORT_STATE failed\n"); + + return err; +} +EXPORT_SYMBOL(mlx5_modify_vport_admin_state); + +static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); +} + +static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out)); +} + +int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, u8 *addr) { - u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); u8 *out_addr; + int err; out = mlx5_vzalloc(outlen); if (!out) - return; + return -ENOMEM; out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out, nic_vport_context.permanent_address); + err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + if (err) + goto out; + + ether_addr_copy(addr, &out_addr[2]); + +out: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address); + +int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, u8 *addr) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + void *nic_vport_ctx; + u8 *perm_mac; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(mdev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.permanent_address, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + perm_mac = MLX5_ADDR_OF(nic_vport_context, nic_vport_ctx, + permanent_address); + + ether_addr_copy(&perm_mac[2], addr); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} +EXPORT_SYMBOL(mlx5_modify_nic_vport_mac_address); + +int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, + u32 vport, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int *list_size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + void *nic_vport_ctx; + int max_list_size; + int req_list_size; + int out_sz; + void *out; + int err; + int i; + + req_list_size = *list_size; + + max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; MLX5_SET(query_nic_vport_context_in, in, opcode, MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); - memset(out, 0, outlen); - mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); - ether_addr_copy(addr, &out_addr[2]); + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; - kvfree(out); + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *list_size = req_list_size; + for (i = 0; i < req_list_size; i++) { + u8 *mac_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(addr_list[i], mac_addr); + } +out: + kfree(out); + return err; } -EXPORT_SYMBOL(mlx5_query_nic_vport_mac_address); +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list); + +int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + + memset(out, 0, sizeof(out)); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, list_type); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + u8 *curr_mac = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(curr_mac, addr_list[i]); + } + + err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); + +int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, + u32 vport, + u16 vlans[], + int *size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + void *nic_vport_ctx; + int req_list_size; + int max_list_size; + int out_sz; + void *out; + int err; + int i; + + req_list_size = *size; + max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max list size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + req_list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, + MLX5_NVPRT_LIST_TYPE_VLAN); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; + + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *size = req_list_size; + for (i = 0; i < req_list_size; i++) { + void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]); + vlans[i] = MLX5_GET(vlan_layout, vlan_addr, vlan); + } +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_vlans); + +int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, + u16 vlans[], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(out, 0, sizeof(out)); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, MLX5_NVPRT_LIST_TYPE_VLAN); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]); + MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]); + } + + err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, u16 vf_num, u16 gid_index, @@ -343,3 +688,65 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, return err; } EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); + +int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, + u32 vport, + int *promisc_uc, + int *promisc_mc, + int *promisc_all) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + if (err) + goto out; + + *promisc_uc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_uc); + *promisc_mc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_mc); + *promisc_all = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_all); + +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc); + +int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, + int promisc_uc, + int promisc_mc, + int promisc_all) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_err(mdev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(modify_nic_vport_context_in, in, field_select.promisc, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_uc, promisc_uc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_mc, promisc_mc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_all, promisc_all); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc); diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index e36e12219c9be2..ec8caf8fedc63b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -10,6 +10,14 @@ config MLXSW_CORE To compile this driver as a module, choose M here: the module will be called mlxsw_core. +config MLXSW_CORE_HWMON + bool "HWMON support for Mellanox Technologies Switch ASICs" + depends on MLXSW_CORE && HWMON + depends on !(MLXSW_CORE=y && HWMON=m) + default y + ---help--- + Say Y here if you want to expose HWMON interface on mlxsw devices. + config MLXSW_PCI tristate "PCI bus implementation for Mellanox Technologies Switch ASICs" depends on PCI && HAS_DMA && HAS_IOMEM && MLXSW_CORE diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index af015818fd1903..584cac44485200 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_MLXSW_CORE) += mlxsw_core.o mlxsw_core-objs := core.o +mlxsw_core-$(CONFIG_MLXSW_CORE_HWMON) += core_hwmon.o obj-$(CONFIG_MLXSW_PCI) += mlxsw_pci.o mlxsw_pci-objs := pci.o obj-$(CONFIG_MLXSW_SWITCHX2) += mlxsw_switchx2.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 97f0d93caf994e..af8a48b3b3ad6e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -105,6 +105,10 @@ struct mlxsw_core { struct debugfs_blob_wrapper vsd_blob; struct debugfs_blob_wrapper psid_blob; } dbg; + struct { + u8 *mapping; /* lag_id+port_index to local_port mapping */ + } lag; + struct mlxsw_hwmon *hwmon; unsigned long driver_priv[0]; /* driver_priv has to be always the last item */ }; @@ -814,6 +818,17 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_alloc_stats; } + if (mlxsw_driver->profile->used_max_lag && + mlxsw_driver->profile->used_max_port_per_lag) { + alloc_size = sizeof(u8) * mlxsw_driver->profile->max_lag * + mlxsw_driver->profile->max_port_per_lag; + mlxsw_core->lag.mapping = kzalloc(alloc_size, GFP_KERNEL); + if (!mlxsw_core->lag.mapping) { + err = -ENOMEM; + goto err_alloc_lag_mapping; + } + } + err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile); if (err) goto err_bus_init; @@ -822,6 +837,10 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (err) goto err_emad_init; + err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon); + if (err) + goto err_hwmon_init; + err = mlxsw_driver->init(mlxsw_core->driver_priv, mlxsw_core, mlxsw_bus_info); if (err) @@ -836,10 +855,14 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, err_debugfs_init: mlxsw_core->driver->fini(mlxsw_core->driver_priv); err_driver_init: + mlxsw_hwmon_fini(mlxsw_core->hwmon); +err_hwmon_init: mlxsw_emad_fini(mlxsw_core); err_emad_init: mlxsw_bus->fini(bus_priv); err_bus_init: + kfree(mlxsw_core->lag.mapping); +err_alloc_lag_mapping: free_percpu(mlxsw_core->pcpu_stats); err_alloc_stats: kfree(mlxsw_core); @@ -855,8 +878,10 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core) mlxsw_core_debugfs_fini(mlxsw_core); mlxsw_core->driver->fini(mlxsw_core->driver_priv); + mlxsw_hwmon_fini(mlxsw_core->hwmon); mlxsw_emad_fini(mlxsw_core); mlxsw_core->bus->fini(mlxsw_core->bus_priv); + kfree(mlxsw_core->lag.mapping); free_percpu(mlxsw_core->pcpu_stats); kfree(mlxsw_core); mlxsw_core_driver_put(device_kind); @@ -1188,11 +1213,25 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, struct mlxsw_rx_listener_item *rxl_item; const struct mlxsw_rx_listener *rxl; struct mlxsw_core_pcpu_stats *pcpu_stats; - u8 local_port = rx_info->sys_port; + u8 local_port; bool found = false; - dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: sys_port = %d, trap_id = 0x%x\n", - __func__, rx_info->sys_port, rx_info->trap_id); + if (rx_info->is_lag) { + dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: lag_id = %d, lag_port_index = 0x%x\n", + __func__, rx_info->u.lag_id, + rx_info->trap_id); + /* Upper layer does not care if the skb came from LAG or not, + * so just get the local_port for the lag port and push it up. + */ + local_port = mlxsw_core_lag_mapping_get(mlxsw_core, + rx_info->u.lag_id, + rx_info->lag_port_index); + } else { + local_port = rx_info->u.sys_port; + } + + dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: local_port = %d, trap_id = 0x%x\n", + __func__, local_port, rx_info->trap_id); if ((rx_info->trap_id >= MLXSW_TRAP_ID_MAX) || (local_port >= MLXSW_PORT_MAX_PORTS)) @@ -1236,6 +1275,48 @@ drop: } EXPORT_SYMBOL(mlxsw_core_skb_receive); +static int mlxsw_core_lag_mapping_index(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index) +{ + return mlxsw_core->driver->profile->max_port_per_lag * lag_id + + port_index; +} + +void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index, u8 local_port) +{ + int index = mlxsw_core_lag_mapping_index(mlxsw_core, + lag_id, port_index); + + mlxsw_core->lag.mapping[index] = local_port; +} +EXPORT_SYMBOL(mlxsw_core_lag_mapping_set); + +u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index) +{ + int index = mlxsw_core_lag_mapping_index(mlxsw_core, + lag_id, port_index); + + return mlxsw_core->lag.mapping[index]; +} +EXPORT_SYMBOL(mlxsw_core_lag_mapping_get); + +void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 local_port) +{ + int i; + + for (i = 0; i < mlxsw_core->driver->profile->max_port_per_lag; i++) { + int index = mlxsw_core_lag_mapping_index(mlxsw_core, + lag_id, i); + + if (mlxsw_core->lag.mapping[index] == local_port) + mlxsw_core->lag.mapping[index] = 0; + } +} +EXPORT_SYMBOL(mlxsw_core_lag_mapping_clear); + int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod, u32 in_mod, bool out_mbox_direct, char *in_mbox, size_t in_mbox_size, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 807827350a8990..4833fb33ce07a1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -112,13 +112,25 @@ int mlxsw_reg_write(struct mlxsw_core *mlxsw_core, const struct mlxsw_reg_info *reg, char *payload); struct mlxsw_rx_info { - u16 sys_port; + bool is_lag; + union { + u16 sys_port; + u16 lag_id; + } u; + u8 lag_port_index; int trap_id; }; void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, struct mlxsw_rx_info *rx_info); +void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index, u8 local_port); +u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index); +void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 local_port); + #define MLXSW_CONFIG_PROFILE_SWID_COUNT 8 struct mlxsw_swid_config { @@ -209,4 +221,28 @@ struct mlxsw_bus_info { u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN]; }; +struct mlxsw_hwmon; + +#ifdef CONFIG_MLXSW_CORE_HWMON + +int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct mlxsw_hwmon **p_hwmon); +void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon); + +#else + +static inline int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct mlxsw_hwmon **p_hwmon) +{ + return 0; +} + +static inline void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon) +{ +} + +#endif + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c new file mode 100644 index 00000000000000..4dad146b41ae5d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -0,0 +1,342 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/device.h> +#include <linux/sysfs.h> +#include <linux/hwmon.h> +#include <linux/err.h> + +#include "core.h" + +#define MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT 127 +#define MLXSW_HWMON_ATTR_COUNT (MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT * 4 + \ + MLXSW_MFCR_TACHOS_MAX + MLXSW_MFCR_PWMS_MAX) + +struct mlxsw_hwmon_attr { + struct device_attribute dev_attr; + struct mlxsw_hwmon *hwmon; + unsigned int type_index; + char name[16]; +}; + +struct mlxsw_hwmon { + struct mlxsw_core *core; + const struct mlxsw_bus_info *bus_info; + struct device *hwmon_dev; + struct attribute_group group; + const struct attribute_group *groups[2]; + struct attribute *attrs[MLXSW_HWMON_ATTR_COUNT + 1]; + struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT]; + unsigned int attrs_count; +}; + +static ssize_t mlxsw_hwmon_temp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + unsigned int temp; + int err; + + mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, + false, false); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n"); + return err; + } + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + return sprintf(buf, "%u\n", temp); +} + +static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + unsigned int temp_max; + int err; + + mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, + false, false); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n"); + return err; + } + mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL); + return sprintf(buf, "%u\n", temp_max); +} + +static ssize_t mlxsw_hwmon_fan_rpm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mfsm_pl[MLXSW_REG_MFSM_LEN]; + int err; + + mlxsw_reg_mfsm_pack(mfsm_pl, mlwsw_hwmon_attr->type_index); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfsm), mfsm_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query fan\n"); + return err; + } + return sprintf(buf, "%u\n", mlxsw_reg_mfsm_rpm_get(mfsm_pl)); +} + +static ssize_t mlxsw_hwmon_pwm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mfsc_pl[MLXSW_REG_MFSC_LEN]; + int err; + + mlxsw_reg_mfsc_pack(mfsc_pl, mlwsw_hwmon_attr->type_index, 0); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfsc), mfsc_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query PWM\n"); + return err; + } + return sprintf(buf, "%u\n", + mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl)); +} + +static ssize_t mlxsw_hwmon_pwm_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mfsc_pl[MLXSW_REG_MFSC_LEN]; + unsigned long val; + int err; + + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val > 255) + return -EINVAL; + + mlxsw_reg_mfsc_pack(mfsc_pl, mlwsw_hwmon_attr->type_index, val); + err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mfsc), mfsc_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to write PWM\n"); + return err; + } + return err ? err : len; +} + +enum mlxsw_hwmon_attr_type { + MLXSW_HWMON_ATTR_TYPE_TEMP, + MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, + MLXSW_HWMON_ATTR_TYPE_FAN_RPM, + MLXSW_HWMON_ATTR_TYPE_PWM, +}; + +static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, + enum mlxsw_hwmon_attr_type attr_type, + unsigned int type_index, unsigned int num) { + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr; + unsigned int attr_index; + + attr_index = mlxsw_hwmon->attrs_count; + mlxsw_hwmon_attr = &mlxsw_hwmon->hwmon_attrs[attr_index]; + + switch (attr_type) { + case MLXSW_HWMON_ATTR_TYPE_TEMP: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_temp_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_input", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MAX: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_temp_max_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_highest", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_FAN_RPM: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_fan_rpm_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "fan%u_input", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_PWM: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_pwm_show; + mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_pwm_store; + mlxsw_hwmon_attr->dev_attr.attr.mode = S_IWUSR | S_IRUGO; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "pwm%u", num + 1); + break; + default: + WARN_ON(1); + } + + mlxsw_hwmon_attr->type_index = type_index; + mlxsw_hwmon_attr->hwmon = mlxsw_hwmon; + mlxsw_hwmon_attr->dev_attr.attr.name = mlxsw_hwmon_attr->name; + sysfs_attr_init(&mlxsw_hwmon_attr->dev_attr.attr); + + mlxsw_hwmon->attrs[attr_index] = &mlxsw_hwmon_attr->dev_attr.attr; + mlxsw_hwmon->attrs_count++; +} + +static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon) +{ + char mtcap_pl[MLXSW_REG_MTCAP_LEN]; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + u8 sensor_count; + int i; + int err; + + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtcap), mtcap_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get number of temp sensors\n"); + return err; + } + sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl); + for (i = 0; i < sensor_count; i++) { + mlxsw_reg_mtmp_pack(mtmp_pl, 0, true, true); + err = mlxsw_reg_write(mlxsw_hwmon->core, + MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to setup temp sensor number %d\n", + i); + return err; + } + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP, i, i); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, i, i); + } + return 0; +} + +static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon) +{ + char mfcr_pl[MLXSW_REG_MFCR_LEN]; + enum mlxsw_reg_mfcr_pwm_frequency freq; + unsigned int type_index; + unsigned int num; + u16 tacho_active; + u8 pwm_active; + int err; + + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfcr), mfcr_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get to probe PWMs and Tachometers\n"); + return err; + } + mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active); + num = 0; + for (type_index = 0; type_index < MLXSW_MFCR_TACHOS_MAX; type_index++) { + if (tacho_active & BIT(type_index)) + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_FAN_RPM, + type_index, num++); + } + num = 0; + for (type_index = 0; type_index < MLXSW_MFCR_PWMS_MAX; type_index++) { + if (pwm_active & BIT(type_index)) + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_PWM, + type_index, num++); + } + return 0; +} + +int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct mlxsw_hwmon **p_hwmon) +{ + struct mlxsw_hwmon *mlxsw_hwmon; + struct device *hwmon_dev; + int err; + + mlxsw_hwmon = kzalloc(sizeof(*mlxsw_hwmon), GFP_KERNEL); + if (!mlxsw_hwmon) + return -ENOMEM; + mlxsw_hwmon->core = mlxsw_core; + mlxsw_hwmon->bus_info = mlxsw_bus_info; + + err = mlxsw_hwmon_temp_init(mlxsw_hwmon); + if (err) + goto err_temp_init; + + err = mlxsw_hwmon_fans_init(mlxsw_hwmon); + if (err) + goto err_fans_init; + + mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group; + mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs; + + hwmon_dev = devm_hwmon_device_register_with_groups(mlxsw_bus_info->dev, + "mlxsw", + mlxsw_hwmon, + mlxsw_hwmon->groups); + if (IS_ERR(hwmon_dev)) { + err = PTR_ERR(hwmon_dev); + goto err_hwmon_register; + } + + mlxsw_hwmon->hwmon_dev = hwmon_dev; + *p_hwmon = mlxsw_hwmon; + return 0; + +err_hwmon_register: +err_fans_init: +err_temp_init: + kfree(mlxsw_hwmon); + return err; +} + +void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon) +{ + kfree(mlxsw_hwmon); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index de69e719dc9d96..d2102e572b1d50 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -686,11 +686,15 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, if (q->consumer_counter++ != consumer_counter_limit) dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n"); - /* We do not support lag now */ - if (mlxsw_pci_cqe_lag_get(cqe)) - goto drop; + if (mlxsw_pci_cqe_lag_get(cqe)) { + rx_info.is_lag = true; + rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe); + rx_info.lag_port_index = mlxsw_pci_cqe_lag_port_index_get(cqe); + } else { + rx_info.is_lag = false; + rx_info.u.sys_port = mlxsw_pci_cqe_system_port_get(cqe); + } - rx_info.sys_port = mlxsw_pci_cqe_system_port_get(cqe); rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe); byte_count = mlxsw_pci_cqe_byte_count_get(cqe); @@ -699,7 +703,6 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, skb_put(skb, byte_count); mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info); -put_new_skb: memset(wqe, 0, q->elem_size); err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); if (err) @@ -708,10 +711,6 @@ put_new_skb: q->producer_counter++; mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); return; - -drop: - dev_kfree_skb_any(skb); - goto put_new_skb; } static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h index 142f33d978c5f9..912106054ff270 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h @@ -129,13 +129,15 @@ MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false); */ MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1); -/* pci_cqe_system_port +/* pci_cqe_system_port/lag_id * When lag=0: System port on which the packet was received * When lag=1: * bits [15:4] LAG ID on which the packet was received * bits [3:0] sub_port on which the packet was received */ MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16); +MLXSW_ITEM32(pci, cqe, lag_id, 0x00, 4, 12); +MLXSW_ITEM32(pci, cqe, lag_port_index, 0x00, 0, 4); /* pci_cqe_wqe_counter * WQE count of the WQEs completed on the associated dqn diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 236fb5d2ad69c9..4e4e4dcf054ff2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -286,6 +286,7 @@ MLXSW_ITEM32_INDEXED(reg, sfd, rec_swid, MLXSW_REG_SFD_BASE_LEN, 24, 8, enum mlxsw_reg_sfd_rec_type { MLXSW_REG_SFD_REC_TYPE_UNICAST = 0x0, + MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG = 0x1, }; /* reg_sfd_rec_type @@ -376,24 +377,34 @@ MLXSW_ITEM32_INDEXED(reg, sfd, uc_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16, MLXSW_ITEM32_INDEXED(reg, sfd, uc_system_port, MLXSW_REG_SFD_BASE_LEN, 0, 16, MLXSW_REG_SFD_REC_LEN, 0x0C, false); -static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index, - enum mlxsw_reg_sfd_rec_policy policy, - const char *mac, u16 vid, - enum mlxsw_reg_sfd_rec_action action, - u8 local_port) +static inline void mlxsw_reg_sfd_rec_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_type rec_type, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, + enum mlxsw_reg_sfd_rec_action action) { u8 num_rec = mlxsw_reg_sfd_num_rec_get(payload); if (rec_index >= num_rec) mlxsw_reg_sfd_num_rec_set(payload, rec_index + 1); mlxsw_reg_sfd_rec_swid_set(payload, rec_index, 0); - mlxsw_reg_sfd_rec_type_set(payload, rec_index, - MLXSW_REG_SFD_REC_TYPE_UNICAST); + mlxsw_reg_sfd_rec_type_set(payload, rec_index, rec_type); mlxsw_reg_sfd_rec_policy_set(payload, rec_index, policy); mlxsw_reg_sfd_rec_mac_memcpy_to(payload, rec_index, mac); + mlxsw_reg_sfd_rec_action_set(payload, rec_index, action); +} + +static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, u16 vid, + enum mlxsw_reg_sfd_rec_action action, + u8 local_port) +{ + mlxsw_reg_sfd_rec_pack(payload, rec_index, + MLXSW_REG_SFD_REC_TYPE_UNICAST, + policy, mac, action); mlxsw_reg_sfd_uc_sub_port_set(payload, rec_index, 0); mlxsw_reg_sfd_uc_fid_vid_set(payload, rec_index, vid); - mlxsw_reg_sfd_rec_action_set(payload, rec_index, action); mlxsw_reg_sfd_uc_system_port_set(payload, rec_index, local_port); } @@ -406,6 +417,58 @@ static inline void mlxsw_reg_sfd_uc_unpack(char *payload, int rec_index, *p_local_port = mlxsw_reg_sfd_uc_system_port_get(payload, rec_index); } +/* reg_sfd_uc_lag_sub_port + * LAG sub port. + * Must be 0 if multichannel VEPA is not enabled. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_lag_fid_vid + * Filtering ID or VLAN ID + * For SwitchX and SwitchX-2: + * - Dynamic entries (policy 2,3) use FID + * - Static entries (policy 0) use VID + * - When independent learning is configured, VID=FID + * For Spectrum: use FID for both Dynamic and Static entries. + * VID should not be used. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_lag_lag_id + * LAG Identifier - pointer into the LAG descriptor table. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_lag_id, MLXSW_REG_SFD_BASE_LEN, 0, 10, + MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +static inline void +mlxsw_reg_sfd_uc_lag_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, u16 vid, + enum mlxsw_reg_sfd_rec_action action, + u16 lag_id) +{ + mlxsw_reg_sfd_rec_pack(payload, rec_index, + MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG, + policy, mac, action); + mlxsw_reg_sfd_uc_lag_sub_port_set(payload, rec_index, 0); + mlxsw_reg_sfd_uc_lag_fid_vid_set(payload, rec_index, vid); + mlxsw_reg_sfd_uc_lag_lag_id_set(payload, rec_index, lag_id); +} + +static inline void mlxsw_reg_sfd_uc_lag_unpack(char *payload, int rec_index, + char *mac, u16 *p_vid, + u16 *p_lag_id) +{ + mlxsw_reg_sfd_rec_mac_memcpy_from(payload, rec_index, mac); + *p_vid = mlxsw_reg_sfd_uc_lag_fid_vid_get(payload, rec_index); + *p_lag_id = mlxsw_reg_sfd_uc_lag_lag_id_get(payload, rec_index); +} + /* SFN - Switch FDB Notification Register * ------------------------------------------- * The switch provides notifications on newly learned FDB entries and @@ -456,8 +519,12 @@ MLXSW_ITEM32_INDEXED(reg, sfn, rec_swid, MLXSW_REG_SFN_BASE_LEN, 24, 8, enum mlxsw_reg_sfn_rec_type { /* MAC addresses learned on a regular port. */ MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC = 0x5, - /* Aged-out MAC address on a regular port */ + /* MAC addresses learned on a LAG port. */ + MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC_LAG = 0x6, + /* Aged-out MAC address on a regular port. */ MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC = 0x7, + /* Aged-out MAC address on a LAG port. */ + MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG = 0x8, }; /* reg_sfn_rec_type @@ -505,6 +572,22 @@ static inline void mlxsw_reg_sfn_mac_unpack(char *payload, int rec_index, *p_local_port = mlxsw_reg_sfn_mac_system_port_get(payload, rec_index); } +/* reg_sfn_mac_lag_lag_id + * LAG ID (pointer into the LAG descriptor table). + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, mac_lag_lag_id, MLXSW_REG_SFN_BASE_LEN, 0, 10, + MLXSW_REG_SFN_REC_LEN, 0x0C, false); + +static inline void mlxsw_reg_sfn_mac_lag_unpack(char *payload, int rec_index, + char *mac, u16 *p_vid, + u16 *p_lag_id) +{ + mlxsw_reg_sfn_rec_mac_memcpy_from(payload, rec_index, mac); + *p_vid = mlxsw_reg_sfn_mac_fid_get(payload, rec_index); + *p_lag_id = mlxsw_reg_sfn_mac_lag_lag_id_get(payload, rec_index); +} + /* SPMS - Switch Port MSTP/RSTP State Register * ------------------------------------------- * Configures the spanning tree state of a physical port. @@ -865,6 +948,293 @@ static inline void mlxsw_reg_sftr_pack(char *payload, mlxsw_reg_sftr_port_mask_set(payload, port, 1); } +/* SLDR - Switch LAG Descriptor Register + * ----------------------------------------- + * The switch LAG descriptor register is populated by LAG descriptors. + * Each LAG descriptor is indexed by lag_id. The LAG ID runs from 0 to + * max_lag-1. + */ +#define MLXSW_REG_SLDR_ID 0x2014 +#define MLXSW_REG_SLDR_LEN 0x0C /* counting in only one port in list */ + +static const struct mlxsw_reg_info mlxsw_reg_sldr = { + .id = MLXSW_REG_SLDR_ID, + .len = MLXSW_REG_SLDR_LEN, +}; + +enum mlxsw_reg_sldr_op { + /* Indicates a creation of a new LAG-ID, lag_id must be valid */ + MLXSW_REG_SLDR_OP_LAG_CREATE, + MLXSW_REG_SLDR_OP_LAG_DESTROY, + /* Ports that appear in the list have the Distributor enabled */ + MLXSW_REG_SLDR_OP_LAG_ADD_PORT_LIST, + /* Removes ports from the disributor list */ + MLXSW_REG_SLDR_OP_LAG_REMOVE_PORT_LIST, +}; + +/* reg_sldr_op + * Operation. + * Access: RW + */ +MLXSW_ITEM32(reg, sldr, op, 0x00, 29, 3); + +/* reg_sldr_lag_id + * LAG identifier. The lag_id is the index into the LAG descriptor table. + * Access: Index + */ +MLXSW_ITEM32(reg, sldr, lag_id, 0x00, 0, 10); + +static inline void mlxsw_reg_sldr_lag_create_pack(char *payload, u8 lag_id) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_CREATE); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); +} + +static inline void mlxsw_reg_sldr_lag_destroy_pack(char *payload, u8 lag_id) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_DESTROY); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); +} + +/* reg_sldr_num_ports + * The number of member ports of the LAG. + * Reserved for Create / Destroy operations + * For Add / Remove operations - indicates the number of ports in the list. + * Access: RW + */ +MLXSW_ITEM32(reg, sldr, num_ports, 0x04, 24, 8); + +/* reg_sldr_system_port + * System port. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sldr, system_port, 0x08, 0, 16, 4, 0, false); + +static inline void mlxsw_reg_sldr_lag_add_port_pack(char *payload, u8 lag_id, + u8 local_port) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_ADD_PORT_LIST); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); + mlxsw_reg_sldr_num_ports_set(payload, 1); + mlxsw_reg_sldr_system_port_set(payload, 0, local_port); +} + +static inline void mlxsw_reg_sldr_lag_remove_port_pack(char *payload, u8 lag_id, + u8 local_port) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_REMOVE_PORT_LIST); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); + mlxsw_reg_sldr_num_ports_set(payload, 1); + mlxsw_reg_sldr_system_port_set(payload, 0, local_port); +} + +/* SLCR - Switch LAG Configuration 2 Register + * ------------------------------------------- + * The Switch LAG Configuration register is used for configuring the + * LAG properties of the switch. + */ +#define MLXSW_REG_SLCR_ID 0x2015 +#define MLXSW_REG_SLCR_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_slcr = { + .id = MLXSW_REG_SLCR_ID, + .len = MLXSW_REG_SLCR_LEN, +}; + +enum mlxsw_reg_slcr_pp { + /* Global Configuration (for all ports) */ + MLXSW_REG_SLCR_PP_GLOBAL, + /* Per port configuration, based on local_port field */ + MLXSW_REG_SLCR_PP_PER_PORT, +}; + +/* reg_slcr_pp + * Per Port Configuration + * Note: Reading at Global mode results in reading port 1 configuration. + * Access: Index + */ +MLXSW_ITEM32(reg, slcr, pp, 0x00, 24, 1); + +/* reg_slcr_local_port + * Local port number + * Supported from CPU port + * Not supported from router port + * Reserved when pp = Global Configuration + * Access: Index + */ +MLXSW_ITEM32(reg, slcr, local_port, 0x00, 16, 8); + +enum mlxsw_reg_slcr_type { + MLXSW_REG_SLCR_TYPE_CRC, /* default */ + MLXSW_REG_SLCR_TYPE_XOR, + MLXSW_REG_SLCR_TYPE_RANDOM, +}; + +/* reg_slcr_type + * Hash type + * Access: RW + */ +MLXSW_ITEM32(reg, slcr, type, 0x00, 0, 4); + +/* Ingress port */ +#define MLXSW_REG_SLCR_LAG_HASH_IN_PORT BIT(0) +/* SMAC - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_SMAC_IP BIT(1) +/* SMAC - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_SMAC_NONIP BIT(2) +#define MLXSW_REG_SLCR_LAG_HASH_SMAC \ + (MLXSW_REG_SLCR_LAG_HASH_SMAC_IP | \ + MLXSW_REG_SLCR_LAG_HASH_SMAC_NONIP) +/* DMAC - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_DMAC_IP BIT(3) +/* DMAC - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_DMAC_NONIP BIT(4) +#define MLXSW_REG_SLCR_LAG_HASH_DMAC \ + (MLXSW_REG_SLCR_LAG_HASH_DMAC_IP | \ + MLXSW_REG_SLCR_LAG_HASH_DMAC_NONIP) +/* Ethertype - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_IP BIT(5) +/* Ethertype - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_NONIP BIT(6) +#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE \ + (MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_IP | \ + MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_NONIP) +/* VLAN ID - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_VLANID_IP BIT(7) +/* VLAN ID - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_VLANID_NONIP BIT(8) +#define MLXSW_REG_SLCR_LAG_HASH_VLANID \ + (MLXSW_REG_SLCR_LAG_HASH_VLANID_IP | \ + MLXSW_REG_SLCR_LAG_HASH_VLANID_NONIP) +/* Source IP address (can be IPv4 or IPv6) */ +#define MLXSW_REG_SLCR_LAG_HASH_SIP BIT(9) +/* Destination IP address (can be IPv4 or IPv6) */ +#define MLXSW_REG_SLCR_LAG_HASH_DIP BIT(10) +/* TCP/UDP source port */ +#define MLXSW_REG_SLCR_LAG_HASH_SPORT BIT(11) +/* TCP/UDP destination port*/ +#define MLXSW_REG_SLCR_LAG_HASH_DPORT BIT(12) +/* IPv4 Protocol/IPv6 Next Header */ +#define MLXSW_REG_SLCR_LAG_HASH_IPPROTO BIT(13) +/* IPv6 Flow label */ +#define MLXSW_REG_SLCR_LAG_HASH_FLOWLABEL BIT(14) +/* SID - FCoE source ID */ +#define MLXSW_REG_SLCR_LAG_HASH_FCOE_SID BIT(15) +/* DID - FCoE destination ID */ +#define MLXSW_REG_SLCR_LAG_HASH_FCOE_DID BIT(16) +/* OXID - FCoE originator exchange ID */ +#define MLXSW_REG_SLCR_LAG_HASH_FCOE_OXID BIT(17) +/* Destination QP number - for RoCE packets */ +#define MLXSW_REG_SLCR_LAG_HASH_ROCE_DQP BIT(19) + +/* reg_slcr_lag_hash + * LAG hashing configuration. This is a bitmask, in which each set + * bit includes the corresponding item in the LAG hash calculation. + * The default lag_hash contains SMAC, DMAC, VLANID and + * Ethertype (for all packet types). + * Access: RW + */ +MLXSW_ITEM32(reg, slcr, lag_hash, 0x04, 0, 20); + +static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash) +{ + MLXSW_REG_ZERO(slcr, payload); + mlxsw_reg_slcr_pp_set(payload, MLXSW_REG_SLCR_PP_GLOBAL); + mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_XOR); + mlxsw_reg_slcr_lag_hash_set(payload, lag_hash); +} + +/* SLCOR - Switch LAG Collector Register + * ------------------------------------- + * The Switch LAG Collector register controls the Local Port membership + * in a LAG and enablement of the collector. + */ +#define MLXSW_REG_SLCOR_ID 0x2016 +#define MLXSW_REG_SLCOR_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_slcor = { + .id = MLXSW_REG_SLCOR_ID, + .len = MLXSW_REG_SLCOR_LEN, +}; + +enum mlxsw_reg_slcor_col { + /* Port is added with collector disabled */ + MLXSW_REG_SLCOR_COL_LAG_ADD_PORT, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_DISABLED, + MLXSW_REG_SLCOR_COL_LAG_REMOVE_PORT, +}; + +/* reg_slcor_col + * Collector configuration + * Access: RW + */ +MLXSW_ITEM32(reg, slcor, col, 0x00, 30, 2); + +/* reg_slcor_local_port + * Local port number + * Not supported for CPU port + * Access: Index + */ +MLXSW_ITEM32(reg, slcor, local_port, 0x00, 16, 8); + +/* reg_slcor_lag_id + * LAG Identifier. Index into the LAG descriptor table. + * Access: Index + */ +MLXSW_ITEM32(reg, slcor, lag_id, 0x00, 0, 10); + +/* reg_slcor_port_index + * Port index in the LAG list. Only valid on Add Port to LAG col. + * Valid range is from 0 to cap_max_lag_members-1 + * Access: RW + */ +MLXSW_ITEM32(reg, slcor, port_index, 0x04, 0, 10); + +static inline void mlxsw_reg_slcor_pack(char *payload, + u8 local_port, u16 lag_id, + enum mlxsw_reg_slcor_col col) +{ + MLXSW_REG_ZERO(slcor, payload); + mlxsw_reg_slcor_col_set(payload, col); + mlxsw_reg_slcor_local_port_set(payload, local_port); + mlxsw_reg_slcor_lag_id_set(payload, lag_id); +} + +static inline void mlxsw_reg_slcor_port_add_pack(char *payload, + u8 local_port, u16 lag_id, + u8 port_index) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_ADD_PORT); + mlxsw_reg_slcor_port_index_set(payload, port_index); +} + +static inline void mlxsw_reg_slcor_port_remove_pack(char *payload, + u8 local_port, u16 lag_id) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_REMOVE_PORT); +} + +static inline void mlxsw_reg_slcor_col_enable_pack(char *payload, + u8 local_port, u16 lag_id) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED); +} + +static inline void mlxsw_reg_slcor_col_disable_pack(char *payload, + u8 local_port, u16 lag_id) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED); +} + /* SPMLR - Switch Port MAC Learning Register * ----------------------------------------- * Controls the Switch MAC learning policy per port. @@ -2087,6 +2457,284 @@ static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id) mlxsw_reg_hpkt_ctrl_set(payload, MLXSW_REG_HPKT_CTRL_PACKET_DEFAULT); } +/* MFCR - Management Fan Control Register + * -------------------------------------- + * This register controls the settings of the Fan Speed PWM mechanism. + */ +#define MLXSW_REG_MFCR_ID 0x9001 +#define MLXSW_REG_MFCR_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_mfcr = { + .id = MLXSW_REG_MFCR_ID, + .len = MLXSW_REG_MFCR_LEN, +}; + +enum mlxsw_reg_mfcr_pwm_frequency { + MLXSW_REG_MFCR_PWM_FEQ_11HZ = 0x00, + MLXSW_REG_MFCR_PWM_FEQ_14_7HZ = 0x01, + MLXSW_REG_MFCR_PWM_FEQ_22_1HZ = 0x02, + MLXSW_REG_MFCR_PWM_FEQ_1_4KHZ = 0x40, + MLXSW_REG_MFCR_PWM_FEQ_5KHZ = 0x41, + MLXSW_REG_MFCR_PWM_FEQ_20KHZ = 0x42, + MLXSW_REG_MFCR_PWM_FEQ_22_5KHZ = 0x43, + MLXSW_REG_MFCR_PWM_FEQ_25KHZ = 0x44, +}; + +/* reg_mfcr_pwm_frequency + * Controls the frequency of the PWM signal. + * Access: RW + */ +MLXSW_ITEM32(reg, mfcr, pwm_frequency, 0x00, 0, 6); + +#define MLXSW_MFCR_TACHOS_MAX 10 + +/* reg_mfcr_tacho_active + * Indicates which of the tachometer is active (bit per tachometer). + * Access: RO + */ +MLXSW_ITEM32(reg, mfcr, tacho_active, 0x04, 16, MLXSW_MFCR_TACHOS_MAX); + +#define MLXSW_MFCR_PWMS_MAX 5 + +/* reg_mfcr_pwm_active + * Indicates which of the PWM control is active (bit per PWM). + * Access: RO + */ +MLXSW_ITEM32(reg, mfcr, pwm_active, 0x04, 0, MLXSW_MFCR_PWMS_MAX); + +static inline void +mlxsw_reg_mfcr_pack(char *payload, + enum mlxsw_reg_mfcr_pwm_frequency pwm_frequency) +{ + MLXSW_REG_ZERO(mfcr, payload); + mlxsw_reg_mfcr_pwm_frequency_set(payload, pwm_frequency); +} + +static inline void +mlxsw_reg_mfcr_unpack(char *payload, + enum mlxsw_reg_mfcr_pwm_frequency *p_pwm_frequency, + u16 *p_tacho_active, u8 *p_pwm_active) +{ + *p_pwm_frequency = mlxsw_reg_mfcr_pwm_frequency_get(payload); + *p_tacho_active = mlxsw_reg_mfcr_tacho_active_get(payload); + *p_pwm_active = mlxsw_reg_mfcr_pwm_active_get(payload); +} + +/* MFSC - Management Fan Speed Control Register + * -------------------------------------------- + * This register controls the settings of the Fan Speed PWM mechanism. + */ +#define MLXSW_REG_MFSC_ID 0x9002 +#define MLXSW_REG_MFSC_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_mfsc = { + .id = MLXSW_REG_MFSC_ID, + .len = MLXSW_REG_MFSC_LEN, +}; + +/* reg_mfsc_pwm + * Fan pwm to control / monitor. + * Access: Index + */ +MLXSW_ITEM32(reg, mfsc, pwm, 0x00, 24, 3); + +/* reg_mfsc_pwm_duty_cycle + * Controls the duty cycle of the PWM. Value range from 0..255 to + * represent duty cycle of 0%...100%. + * Access: RW + */ +MLXSW_ITEM32(reg, mfsc, pwm_duty_cycle, 0x04, 0, 8); + +static inline void mlxsw_reg_mfsc_pack(char *payload, u8 pwm, + u8 pwm_duty_cycle) +{ + MLXSW_REG_ZERO(mfsc, payload); + mlxsw_reg_mfsc_pwm_set(payload, pwm); + mlxsw_reg_mfsc_pwm_duty_cycle_set(payload, pwm_duty_cycle); +} + +/* MFSM - Management Fan Speed Measurement + * --------------------------------------- + * This register controls the settings of the Tacho measurements and + * enables reading the Tachometer measurements. + */ +#define MLXSW_REG_MFSM_ID 0x9003 +#define MLXSW_REG_MFSM_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_mfsm = { + .id = MLXSW_REG_MFSM_ID, + .len = MLXSW_REG_MFSM_LEN, +}; + +/* reg_mfsm_tacho + * Fan tachometer index. + * Access: Index + */ +MLXSW_ITEM32(reg, mfsm, tacho, 0x00, 24, 4); + +/* reg_mfsm_rpm + * Fan speed (round per minute). + * Access: RO + */ +MLXSW_ITEM32(reg, mfsm, rpm, 0x04, 0, 16); + +static inline void mlxsw_reg_mfsm_pack(char *payload, u8 tacho) +{ + MLXSW_REG_ZERO(mfsm, payload); + mlxsw_reg_mfsm_tacho_set(payload, tacho); +} + +/* MTCAP - Management Temperature Capabilities + * ------------------------------------------- + * This register exposes the capabilities of the device and + * system temperature sensing. + */ +#define MLXSW_REG_MTCAP_ID 0x9009 +#define MLXSW_REG_MTCAP_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_mtcap = { + .id = MLXSW_REG_MTCAP_ID, + .len = MLXSW_REG_MTCAP_LEN, +}; + +/* reg_mtcap_sensor_count + * Number of sensors supported by the device. + * This includes the QSFP module sensors (if exists in the QSFP module). + * Access: RO + */ +MLXSW_ITEM32(reg, mtcap, sensor_count, 0x00, 0, 7); + +/* MTMP - Management Temperature + * ----------------------------- + * This register controls the settings of the temperature measurements + * and enables reading the temperature measurements. Note that temperature + * is in 0.125 degrees Celsius. + */ +#define MLXSW_REG_MTMP_ID 0x900A +#define MLXSW_REG_MTMP_LEN 0x20 + +static const struct mlxsw_reg_info mlxsw_reg_mtmp = { + .id = MLXSW_REG_MTMP_ID, + .len = MLXSW_REG_MTMP_LEN, +}; + +/* reg_mtmp_sensor_index + * Sensors index to access. + * 64-127 of sensor_index are mapped to the SFP+/QSFP modules sequentially + * (module 0 is mapped to sensor_index 64). + * Access: Index + */ +MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 7); + +/* Convert to milli degrees Celsius */ +#define MLXSW_REG_MTMP_TEMP_TO_MC(val) (val * 125) + +/* reg_mtmp_temperature + * Temperature reading from the sensor. Reading is in 0.125 Celsius + * degrees units. + * Access: RO + */ +MLXSW_ITEM32(reg, mtmp, temperature, 0x04, 0, 16); + +/* reg_mtmp_mte + * Max Temperature Enable - enables measuring the max temperature on a sensor. + * Access: RW + */ +MLXSW_ITEM32(reg, mtmp, mte, 0x08, 31, 1); + +/* reg_mtmp_mtr + * Max Temperature Reset - clears the value of the max temperature register. + * Access: WO + */ +MLXSW_ITEM32(reg, mtmp, mtr, 0x08, 30, 1); + +/* reg_mtmp_max_temperature + * The highest measured temperature from the sensor. + * When the bit mte is cleared, the field max_temperature is reserved. + * Access: RO + */ +MLXSW_ITEM32(reg, mtmp, max_temperature, 0x08, 0, 16); + +#define MLXSW_REG_MTMP_SENSOR_NAME_SIZE 8 + +/* reg_mtmp_sensor_name + * Sensor Name + * Access: RO + */ +MLXSW_ITEM_BUF(reg, mtmp, sensor_name, 0x18, MLXSW_REG_MTMP_SENSOR_NAME_SIZE); + +static inline void mlxsw_reg_mtmp_pack(char *payload, u8 sensor_index, + bool max_temp_enable, + bool max_temp_reset) +{ + MLXSW_REG_ZERO(mtmp, payload); + mlxsw_reg_mtmp_sensor_index_set(payload, sensor_index); + mlxsw_reg_mtmp_mte_set(payload, max_temp_enable); + mlxsw_reg_mtmp_mtr_set(payload, max_temp_reset); +} + +static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp, + unsigned int *p_max_temp, + char *sensor_name) +{ + u16 temp; + + if (p_temp) { + temp = mlxsw_reg_mtmp_temperature_get(payload); + *p_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + } + if (p_max_temp) { + temp = mlxsw_reg_mtmp_temperature_get(payload); + *p_max_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + } + if (sensor_name) + mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name); +} + +/* MLCR - Management LED Control Register + * -------------------------------------- + * Controls the system LEDs. + */ +#define MLXSW_REG_MLCR_ID 0x902B +#define MLXSW_REG_MLCR_LEN 0x0C + +static const struct mlxsw_reg_info mlxsw_reg_mlcr = { + .id = MLXSW_REG_MLCR_ID, + .len = MLXSW_REG_MLCR_LEN, +}; + +/* reg_mlcr_local_port + * Local port number. + * Access: RW + */ +MLXSW_ITEM32(reg, mlcr, local_port, 0x00, 16, 8); + +#define MLXSW_REG_MLCR_DURATION_MAX 0xFFFF + +/* reg_mlcr_beacon_duration + * Duration of the beacon to be active, in seconds. + * 0x0 - Will turn off the beacon. + * 0xFFFF - Will turn on the beacon until explicitly turned off. + * Access: RW + */ +MLXSW_ITEM32(reg, mlcr, beacon_duration, 0x04, 0, 16); + +/* reg_mlcr_beacon_remain + * Remaining duration of the beacon, in seconds. + * 0xFFFF indicates an infinite amount of time. + * Access: RO + */ +MLXSW_ITEM32(reg, mlcr, beacon_remain, 0x08, 0, 16); + +static inline void mlxsw_reg_mlcr_pack(char *payload, u8 local_port, + bool active) +{ + MLXSW_REG_ZERO(mlcr, payload); + mlxsw_reg_mlcr_local_port_set(payload, local_port); + mlxsw_reg_mlcr_beacon_duration_set(payload, active ? + MLXSW_REG_MLCR_DURATION_MAX : 0); +} + /* SBPR - Shared Buffer Pools Register * ----------------------------------- * The SBPR configures and retrieves the shared buffer pools and configuration. @@ -2375,6 +3023,12 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "SFGC"; case MLXSW_REG_SFTR_ID: return "SFTR"; + case MLXSW_REG_SLDR_ID: + return "SLDR"; + case MLXSW_REG_SLCR_ID: + return "SLCR"; + case MLXSW_REG_SLCOR_ID: + return "SLCOR"; case MLXSW_REG_SPMLR_ID: return "SPMLR"; case MLXSW_REG_SVFA_ID: @@ -2405,6 +3059,18 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "HTGT"; case MLXSW_REG_HPKT_ID: return "HPKT"; + case MLXSW_REG_MFCR_ID: + return "MFCR"; + case MLXSW_REG_MFSC_ID: + return "MFSC"; + case MLXSW_REG_MFSM_ID: + return "MFSM"; + case MLXSW_REG_MTCAP_ID: + return "MTCAP"; + case MLXSW_REG_MTMP_ID: + return "MTMP"; + case MLXSW_REG_MLCR_ID: + return "MLCR"; case MLXSW_REG_SBPR_ID: return "SBPR"; case MLXSW_REG_SBCM_ID: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3be4a2355ead52..3ec07b9a458df1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -417,6 +417,10 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } +static void mlxsw_sp_set_rx_mode(struct net_device *dev) +{ +} + static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); @@ -725,6 +729,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_open = mlxsw_sp_port_open, .ndo_stop = mlxsw_sp_port_stop, .ndo_start_xmit = mlxsw_sp_port_xmit, + .ndo_set_rx_mode = mlxsw_sp_set_rx_mode, .ndo_set_mac_address = mlxsw_sp_port_set_mac_address, .ndo_change_mtu = mlxsw_sp_port_change_mtu, .ndo_get_stats64 = mlxsw_sp_port_get_stats64, @@ -859,6 +864,29 @@ static void mlxsw_sp_port_get_strings(struct net_device *dev, } } +static int mlxsw_sp_port_set_phys_id(struct net_device *dev, + enum ethtool_phys_id_state state) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char mlcr_pl[MLXSW_REG_MLCR_LEN]; + bool active; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + active = true; + break; + case ETHTOOL_ID_INACTIVE: + active = false; + break; + default: + return -EOPNOTSUPP; + } + + mlxsw_reg_mlcr_pack(mlcr_pl, mlxsw_sp_port->local_port, active); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mlcr), mlcr_pl); +} + static void mlxsw_sp_port_get_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { @@ -1205,6 +1233,7 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_drvinfo = mlxsw_sp_port_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlxsw_sp_port_get_strings, + .set_phys_id = mlxsw_sp_port_set_phys_id, .get_ethtool_stats = mlxsw_sp_port_get_stats, .get_sset_count = mlxsw_sp_port_get_sset_count, .get_settings = mlxsw_sp_port_get_settings, @@ -1683,6 +1712,22 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp) return 0; } +static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) +{ + char slcr_pl[MLXSW_REG_SLCR_LEN]; + + mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC | + MLXSW_REG_SLCR_LAG_HASH_DMAC | + MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE | + MLXSW_REG_SLCR_LAG_HASH_VLANID | + MLXSW_REG_SLCR_LAG_HASH_SIP | + MLXSW_REG_SLCR_LAG_HASH_DIP | + MLXSW_REG_SLCR_LAG_HASH_SPORT | + MLXSW_REG_SLCR_LAG_HASH_DPORT | + MLXSW_REG_SLCR_LAG_HASH_IPPROTO); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl); +} + static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info) { @@ -1728,6 +1773,12 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, goto err_buffers_init; } + err = mlxsw_sp_lag_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize LAG\n"); + goto err_lag_init; + } + err = mlxsw_sp_switchdev_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize switchdev\n"); @@ -1737,6 +1788,7 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, return 0; err_switchdev_init: +err_lag_init: err_buffers_init: err_flood_init: mlxsw_sp_traps_fini(mlxsw_sp); @@ -1764,9 +1816,9 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, .used_max_lag = 1, - .max_lag = 64, + .max_lag = MLXSW_SP_LAG_MAX, .used_max_port_per_lag = 1, - .max_port_per_lag = 16, + .max_port_per_lag = MLXSW_SP_PORT_PER_LAG_MAX, .used_max_mid = 1, .max_mid = 7000, .used_max_pgt = 1, @@ -1865,19 +1917,245 @@ static void mlxsw_sp_master_bridge_dec(struct mlxsw_sp *mlxsw_sp, mlxsw_sp->master_bridge.dev = NULL; } -static int mlxsw_sp_netdevice_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +{ + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_create_pack(sldr_pl, lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_lag_destroy(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +{ + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_destroy_pack(sldr_pl, lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_lag_col_port_add(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id, u8 port_index) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_port_add_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id, port_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_col_port_remove(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_port_remove_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_col_port_enable(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_col_enable_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_col_port_disable(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_col_disable_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp, + struct net_device *lag_dev, + u16 *p_lag_id) +{ + struct mlxsw_sp_upper *lag; + int free_lag_id = -1; + int i; + + for (i = 0; i < MLXSW_SP_LAG_MAX; i++) { + lag = mlxsw_sp_lag_get(mlxsw_sp, i); + if (lag->ref_count) { + if (lag->dev == lag_dev) { + *p_lag_id = i; + return 0; + } + } else if (free_lag_id < 0) { + free_lag_id = i; + } + } + if (free_lag_id < 0) + return -EBUSY; + *p_lag_id = free_lag_id; + return 0; +} + +static bool +mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp, + struct net_device *lag_dev, + struct netdev_lag_upper_info *lag_upper_info) +{ + u16 lag_id; + + if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) + return false; + if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) + return false; + return true; +} + +static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, + u16 lag_id, u8 *p_port_index) +{ + int i; + + for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + if (!mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i)) { + *p_port_index = i; + return 0; + } + } + return -EBUSY; +} + +static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_upper *lag; + u16 lag_id; + u8 port_index; + int err; + + err = mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id); + if (err) + return err; + lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); + if (!lag->ref_count) { + err = mlxsw_sp_lag_create(mlxsw_sp, lag_id); + if (err) + return err; + lag->dev = lag_dev; + } + + err = mlxsw_sp_port_lag_index_get(mlxsw_sp, lag_id, &port_index); + if (err) + return err; + err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index); + if (err) + goto err_col_port_add; + err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, lag_id); + if (err) + goto err_col_port_enable; + + mlxsw_core_lag_mapping_set(mlxsw_sp->core, lag_id, port_index, + mlxsw_sp_port->local_port); + mlxsw_sp_port->lag_id = lag_id; + mlxsw_sp_port->lagged = 1; + lag->ref_count++; + return 0; + +err_col_port_add: + if (!lag->ref_count) + mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); +err_col_port_enable: + mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); + return err; +} + +static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_upper *lag; + u16 lag_id = mlxsw_sp_port->lag_id; + int err; + + if (!mlxsw_sp_port->lagged) + return 0; + lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); + WARN_ON(lag->ref_count == 0); + + err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id); + if (err) + return err; + mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); + if (err) + return err; + + if (lag->ref_count == 1) { + err = mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); + if (err) + return err; + } + + mlxsw_core_lag_mapping_clear(mlxsw_sp->core, lag_id, + mlxsw_sp_port->local_port); + mlxsw_sp_port->lagged = 0; + lag->ref_count--; + return 0; +} + +static int mlxsw_sp_lag_dist_port_add(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_add_port_pack(sldr_pl, lag_id, + mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_lag_dist_port_remove(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_remove_port_pack(sldr_pl, lag_id, + mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_port_lag_tx_en_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool lag_tx_enabled) +{ + if (lag_tx_enabled) + return mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + else + return mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port, + mlxsw_sp_port->lag_id); +} + +static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port, + struct netdev_lag_lower_state_info *info) +{ + return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled); +} + +static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, + unsigned long event, void *ptr) { - struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_changeupper_info *info; struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *upper_dev; struct mlxsw_sp *mlxsw_sp; int err; - if (!mlxsw_sp_port_dev_check(dev)) - return NOTIFY_DONE; - mlxsw_sp_port = netdev_priv(dev); mlxsw_sp = mlxsw_sp_port->mlxsw_sp; info = ptr; @@ -1885,16 +2163,22 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, switch (event) { case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; + if (!info->master || !info->linking) + break; /* HW limitation forbids to put ports to multiple bridges. */ - if (info->master && info->linking && - netif_is_bridge_master(upper_dev) && + if (netif_is_bridge_master(upper_dev) && !mlxsw_sp_master_bridge_check(mlxsw_sp, upper_dev)) return NOTIFY_BAD; + if (netif_is_lag_master(upper_dev) && + !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev, + info->upper_info)) + return NOTIFY_BAD; break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; - if (info->master && - netif_is_bridge_master(upper_dev)) { + if (!info->master) + break; + if (netif_is_bridge_master(upper_dev)) { if (info->linking) { err = mlxsw_sp_port_bridge_join(mlxsw_sp_port); if (err) @@ -1908,6 +2192,46 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, mlxsw_sp_port->bridged = 0; mlxsw_sp_master_bridge_dec(mlxsw_sp, upper_dev); } + } else if (netif_is_lag_master(upper_dev)) { + if (info->linking) { + err = mlxsw_sp_port_lag_join(mlxsw_sp_port, + upper_dev); + if (err) { + netdev_err(dev, "Failed to join link aggregation\n"); + return NOTIFY_BAD; + } + } else { + err = mlxsw_sp_port_lag_leave(mlxsw_sp_port, + upper_dev); + if (err) { + netdev_err(dev, "Failed to leave link aggregation\n"); + return NOTIFY_BAD; + } + } + } + break; + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev, + unsigned long event, void *ptr) +{ + struct netdev_notifier_changelowerstate_info *info; + struct mlxsw_sp_port *mlxsw_sp_port; + int err; + + mlxsw_sp_port = netdev_priv(dev); + info = ptr; + + switch (event) { + case NETDEV_CHANGELOWERSTATE: + if (netif_is_lag_port(dev) && mlxsw_sp_port->lagged) { + err = mlxsw_sp_port_lag_changed(mlxsw_sp_port, + info->lower_state_info); + if (err) + netdev_err(dev, "Failed to reflect link aggregation lower state change\n"); } break; } @@ -1915,6 +2239,52 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, return NOTIFY_DONE; } +static int mlxsw_sp_netdevice_port_event(struct net_device *dev, + unsigned long event, void *ptr) +{ + switch (event) { + case NETDEV_PRECHANGEUPPER: + case NETDEV_CHANGEUPPER: + return mlxsw_sp_netdevice_port_upper_event(dev, event, ptr); + case NETDEV_CHANGELOWERSTATE: + return mlxsw_sp_netdevice_port_lower_event(dev, event, ptr); + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, + unsigned long event, void *ptr) +{ + struct net_device *dev; + struct list_head *iter; + int ret; + + netdev_for_each_lower_dev(lag_dev, dev, iter) { + if (mlxsw_sp_port_dev_check(dev)) { + ret = mlxsw_sp_netdevice_port_event(dev, event, ptr); + if (ret == NOTIFY_BAD) + return ret; + } + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (mlxsw_sp_port_dev_check(dev)) + return mlxsw_sp_netdevice_port_event(dev, event, ptr); + + if (netif_is_lag_master(dev)) + return mlxsw_sp_netdevice_lag_event(dev, event, ptr); + + return NOTIFY_DONE; +} + static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = { .notifier_call = mlxsw_sp_netdevice_event, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 4365c8bccc6d29..48be5a63b9b5b2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -46,9 +46,16 @@ #include "core.h" #define MLXSW_SP_VFID_BASE VLAN_N_VID +#define MLXSW_SP_LAG_MAX 64 +#define MLXSW_SP_PORT_PER_LAG_MAX 16 struct mlxsw_sp_port; +struct mlxsw_sp_upper { + struct net_device *dev; + unsigned int ref_count; +}; + struct mlxsw_sp { unsigned long active_vfids[BITS_TO_LONGS(VLAN_N_VID)]; unsigned long active_fids[BITS_TO_LONGS(VLAN_N_VID)]; @@ -63,12 +70,16 @@ struct mlxsw_sp { } fdb_notify; #define MLXSW_SP_DEFAULT_AGEING_TIME 300 u32 ageing_time; - struct { - struct net_device *dev; - unsigned int ref_count; - } master_bridge; + struct mlxsw_sp_upper master_bridge; + struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX]; }; +static inline struct mlxsw_sp_upper * +mlxsw_sp_lag_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +{ + return &mlxsw_sp->lags[lag_id]; +} + struct mlxsw_sp_port_pcpu_stats { u64 rx_packets; u64 rx_bytes; @@ -87,8 +98,10 @@ struct mlxsw_sp_port { u8 learning:1, learning_sync:1, uc_flood:1, - bridged:1; + bridged:1, + lagged:1; u16 pvid; + u16 lag_id; /* 802.1Q bridge VLANs */ unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; /* VLAN interfaces */ @@ -96,6 +109,18 @@ struct mlxsw_sp_port { u16 nr_vfids; }; +static inline struct mlxsw_sp_port * +mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + u8 local_port; + + local_port = mlxsw_core_lag_mapping_get(mlxsw_sp->core, + lag_id, port_index); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + return mlxsw_sp_port && mlxsw_sp_port->lagged ? mlxsw_sp_port : NULL; +} + enum mlxsw_sp_flood_table { MLXSW_SP_FLOOD_TABLE_UC, MLXSW_SP_FLOOD_TABLE_BM, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index f21e23983a1a0e..406dab2f6b17ce 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -490,32 +490,56 @@ static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, untagged_flag, pvid_flag); } -static int mlxsw_sp_port_fdb_op(struct mlxsw_sp_port *mlxsw_sp_port, - const char *mac, u16 vid, bool adding, - bool dynamic) +static enum mlxsw_reg_sfd_rec_policy mlxsw_sp_sfd_rec_policy(bool dynamic) { - enum mlxsw_reg_sfd_rec_policy policy; - enum mlxsw_reg_sfd_op op; + return dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS : + MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY; +} + +static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding) +{ + return adding ? MLXSW_REG_SFD_OP_WRITE_EDIT : + MLXSW_REG_SFD_OP_WRITE_REMOVE; +} + +static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp_port *mlxsw_sp_port, + const char *mac, u16 vid, bool adding, + bool dynamic) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char *sfd_pl; int err; - if (!vid) - vid = mlxsw_sp_port->pvid; - sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); if (!sfd_pl) return -ENOMEM; - policy = dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS : - MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY; - op = adding ? MLXSW_REG_SFD_OP_WRITE_EDIT : - MLXSW_REG_SFD_OP_WRITE_REMOVE; - mlxsw_reg_sfd_pack(sfd_pl, op, 0); - mlxsw_reg_sfd_uc_pack(sfd_pl, 0, policy, + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), mac, vid, MLXSW_REG_SFD_REC_ACTION_NOP, mlxsw_sp_port->local_port); - err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(sfd), - sfd_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); + kfree(sfd_pl); + + return err; +} + +static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id, + const char *mac, u16 vid, bool adding, + bool dynamic) +{ + char *sfd_pl; + int err; + + sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); + if (!sfd_pl) + return -ENOMEM; + + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_uc_lag_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), + mac, vid, MLXSW_REG_SFD_REC_ACTION_NOP, + lag_id); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); kfree(sfd_pl); return err; @@ -526,11 +550,21 @@ mlxsw_sp_port_fdb_static_add(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { + u16 vid = fdb->vid; + if (switchdev_trans_ph_prepare(trans)) return 0; - return mlxsw_sp_port_fdb_op(mlxsw_sp_port, fdb->addr, fdb->vid, - true, false); + if (!vid) + vid = mlxsw_sp_port->pvid; + + if (!mlxsw_sp_port->lagged) + return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, + fdb->addr, vid, true, false); + else + return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->lag_id, + fdb->addr, vid, true, false); } static int mlxsw_sp_port_obj_add(struct net_device *dev, @@ -645,8 +679,15 @@ static int mlxsw_sp_port_fdb_static_del(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_fdb *fdb) { - return mlxsw_sp_port_fdb_op(mlxsw_sp_port, fdb->addr, fdb->vid, - false, false); + if (!mlxsw_sp_port->lagged) + return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, + fdb->addr, fdb->vid, + false, false); + else + return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->lag_id, + fdb->addr, fdb->vid, + false, false); } static int mlxsw_sp_port_obj_del(struct net_device *dev, @@ -672,14 +713,30 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev, return err; } +static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp, + u16 lag_id) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + int i; + + for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); + if (mlxsw_sp_port) + return mlxsw_sp_port; + } + return NULL; +} + static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_obj_port_fdb *fdb, switchdev_obj_dump_cb_t *cb) { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char *sfd_pl; char mac[ETH_ALEN]; u16 vid; u8 local_port; + u16 lag_id; u8 num_rec; int stored_err = 0; int i; @@ -692,8 +749,7 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0); do { mlxsw_reg_sfd_num_rec_set(sfd_pl, MLXSW_REG_SFD_REC_MAX_COUNT); - err = mlxsw_reg_query(mlxsw_sp_port->mlxsw_sp->core, - MLXSW_REG(sfd), sfd_pl); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); if (err) goto out; @@ -718,6 +774,20 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, if (err) stored_err = err; } + break; + case MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG: + mlxsw_reg_sfd_uc_lag_unpack(sfd_pl, i, + mac, &vid, &lag_id); + if (mlxsw_sp_port == + mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id)) { + ether_addr_copy(fdb->addr, mac); + fdb->ndm_state = NUD_REACHABLE; + fdb->vid = vid; + err = cb(&fdb->obj); + if (err) + stored_err = err; + } + break; } } } while (num_rec == MLXSW_REG_SFD_REC_MAX_COUNT); @@ -779,6 +849,21 @@ static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = { .switchdev_port_obj_dump = mlxsw_sp_port_obj_dump, }; +static void mlxsw_sp_fdb_call_notifiers(bool learning, bool learning_sync, + bool adding, char *mac, u16 vid, + struct net_device *dev) +{ + struct switchdev_notifier_fdb_info info; + unsigned long notifier_type; + + if (learning && learning_sync) { + info.addr = mac; + info.vid = vid; + notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL; + call_switchdev_notifiers(notifier_type, dev, &info.info); + } +} + static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, char *sfn_pl, int rec_index, bool adding) @@ -796,24 +881,49 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, return; } - err = mlxsw_sp_port_fdb_op(mlxsw_sp_port, mac, vid, - adding && mlxsw_sp_port->learning, true); + err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, mac, vid, + adding && mlxsw_sp_port->learning, true); if (err) { if (net_ratelimit()) netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n"); return; } - if (mlxsw_sp_port->learning && mlxsw_sp_port->learning_sync) { - struct switchdev_notifier_fdb_info info; - unsigned long notifier_type; + mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning, + mlxsw_sp_port->learning_sync, + adding, mac, vid, mlxsw_sp_port->dev); +} - info.addr = mac; - info.vid = vid; - notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL; - call_switchdev_notifiers(notifier_type, mlxsw_sp_port->dev, - &info.info); +static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, + char *sfn_pl, int rec_index, + bool adding) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + char mac[ETH_ALEN]; + u16 lag_id; + u16 vid; + int err; + + mlxsw_reg_sfn_mac_lag_unpack(sfn_pl, rec_index, mac, &vid, &lag_id); + mlxsw_sp_port = mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id); + if (!mlxsw_sp_port) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Cannot find port representor for LAG\n"); + return; } + + err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, vid, + adding && mlxsw_sp_port->learning, + true); + if (err) { + if (net_ratelimit()) + netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n"); + return; + } + + mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning, + mlxsw_sp_port->learning_sync, + adding, mac, vid, + mlxsw_sp_lag_get(mlxsw_sp, lag_id)->dev); } static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, @@ -828,6 +938,14 @@ static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fdb_notify_mac_process(mlxsw_sp, sfn_pl, rec_index, false); break; + case MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC_LAG: + mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl, + rec_index, true); + break; + case MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG: + mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl, + rec_index, false); + break; } } diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig new file mode 100644 index 00000000000000..9508ad782c3062 --- /dev/null +++ b/drivers/net/ethernet/netronome/Kconfig @@ -0,0 +1,36 @@ +# +# Netronome device configuration +# + +config NET_VENDOR_NETRONOME + bool "Netronome(R) devices" + default y + ---help--- + If you have a Netronome(R) network (Ethernet) card or device, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Netronome(R) cards. If you say Y, you will be + asked for your specific card in the following questions. + +if NET_VENDOR_NETRONOME + +config NFP_NETVF + tristate "Netronome(R) NFP4000/NFP6000 VF NIC driver" + depends on PCI && PCI_MSI + depends on VXLAN || VXLAN=n + ---help--- + This driver supports SR-IOV virtual functions of + the Netronome(R) NFP4000/NFP6000 cards working as + a advanced Ethernet NIC. + +config NFP_NET_DEBUG + bool "Debug support for Netronome(R) NFP3200/NFP6000 NIC drivers" + depends on NFP_NET || NFP_NETVF + ---help--- + Enable extra sanity checks and debugfs support in + Netronome(R) NFP3200/NFP6000 NIC PF and VF drivers. + Note: selecting this option may adversely impact + performance. + +endif diff --git a/drivers/net/ethernet/netronome/Makefile b/drivers/net/ethernet/netronome/Makefile new file mode 100644 index 00000000000000..dcb7b383f63481 --- /dev/null +++ b/drivers/net/ethernet/netronome/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the Netronome network device drivers +# + +obj-$(CONFIG_NFP_NETVF) += nfp/ diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile new file mode 100644 index 00000000000000..68178819ff12e2 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -0,0 +1,8 @@ +obj-$(CONFIG_NFP_NETVF) += nfp_netvf.o + +nfp_netvf-objs := \ + nfp_net_common.o \ + nfp_net_ethtool.o \ + nfp_netvf_main.o + +nfp_netvf-$(CONFIG_NFP_NET_DEBUG) += nfp_net_debugfs.o diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h new file mode 100644 index 00000000000000..ab264e1bccd0f2 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -0,0 +1,748 @@ +/* + * Copyright (C) 2015 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_net.h + * Declarations for Netronome network device driver. + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + */ + +#ifndef _NFP_NET_H_ +#define _NFP_NET_H_ + +#include <linux/interrupt.h> +#include <linux/netdevice.h> +#include <linux/pci.h> +#include <asm-generic/io-64-nonatomic-hi-lo.h> + +#include "nfp_net_ctrl.h" + +#define nn_err(nn, fmt, args...) netdev_err((nn)->netdev, fmt, ## args) +#define nn_warn(nn, fmt, args...) netdev_warn((nn)->netdev, fmt, ## args) +#define nn_info(nn, fmt, args...) netdev_info((nn)->netdev, fmt, ## args) +#define nn_dbg(nn, fmt, args...) netdev_dbg((nn)->netdev, fmt, ## args) +#define nn_warn_ratelimit(nn, fmt, args...) \ + do { \ + if (unlikely(net_ratelimit())) \ + netdev_warn((nn)->netdev, fmt, ## args); \ + } while (0) + +/* Max time to wait for NFP to respond on updates (in ms) */ +#define NFP_NET_POLL_TIMEOUT 5000 + +/* Bar allocation */ +#define NFP_NET_CRTL_BAR 0 +#define NFP_NET_Q0_BAR 2 +#define NFP_NET_Q1_BAR 4 /* OBSOLETE */ + +/* Max bits in DMA address */ +#define NFP_NET_MAX_DMA_BITS 40 + +/* Default size for MTU and freelist buffer sizes */ +#define NFP_NET_DEFAULT_MTU 1500 +#define NFP_NET_DEFAULT_RX_BUFSZ 2048 + +/* Maximum number of bytes prepended to a packet */ +#define NFP_NET_MAX_PREPEND 64 + +/* Interrupt definitions */ +#define NFP_NET_NON_Q_VECTORS 2 +#define NFP_NET_IRQ_LSC_IDX 0 +#define NFP_NET_IRQ_EXN_IDX 1 + +/* Queue/Ring definitions */ +#define NFP_NET_MAX_TX_RINGS 64 /* Max. # of Tx rings per device */ +#define NFP_NET_MAX_RX_RINGS 64 /* Max. # of Rx rings per device */ + +#define NFP_NET_MIN_TX_DESCS 256 /* Min. # of Tx descs per ring */ +#define NFP_NET_MIN_RX_DESCS 256 /* Min. # of Rx descs per ring */ +#define NFP_NET_MAX_TX_DESCS (256 * 1024) /* Max. # of Tx descs per ring */ +#define NFP_NET_MAX_RX_DESCS (256 * 1024) /* Max. # of Rx descs per ring */ + +#define NFP_NET_TX_DESCS_DEFAULT 4096 /* Default # of Tx descs per ring */ +#define NFP_NET_RX_DESCS_DEFAULT 4096 /* Default # of Rx descs per ring */ + +#define NFP_NET_FL_BATCH 16 /* Add freelist in this Batch size */ + +/* Offload definitions */ +#define NFP_NET_N_VXLAN_PORTS (NFP_NET_CFG_VXLAN_SZ / sizeof(__be16)) + +/* Forward declarations */ +struct nfp_net; +struct nfp_net_r_vector; + +/* Convenience macro for writing dma address into RX/TX descriptors */ +#define nfp_desc_set_dma_addr(desc, dma_addr) \ + do { \ + __typeof(desc) __d = (desc); \ + dma_addr_t __addr = (dma_addr); \ + \ + __d->dma_addr_lo = cpu_to_le32(lower_32_bits(__addr)); \ + __d->dma_addr_hi = upper_32_bits(__addr) & 0xff; \ + } while (0) + +/* TX descriptor format */ + +#define PCIE_DESC_TX_EOP BIT(7) +#define PCIE_DESC_TX_OFFSET_MASK GENMASK(6, 0) +#define PCIE_DESC_TX_MSS_MASK GENMASK(13, 0) + +/* Flags in the host TX descriptor */ +#define PCIE_DESC_TX_CSUM BIT(7) +#define PCIE_DESC_TX_IP4_CSUM BIT(6) +#define PCIE_DESC_TX_TCP_CSUM BIT(5) +#define PCIE_DESC_TX_UDP_CSUM BIT(4) +#define PCIE_DESC_TX_VLAN BIT(3) +#define PCIE_DESC_TX_LSO BIT(2) +#define PCIE_DESC_TX_ENCAP BIT(1) +#define PCIE_DESC_TX_O_IP4_CSUM BIT(0) + +struct nfp_net_tx_desc { + union { + struct { + u8 dma_addr_hi; /* High bits of host buf address */ + __le16 dma_len; /* Length to DMA for this desc */ + u8 offset_eop; /* Offset in buf where pkt starts + + * highest bit is eop flag. + */ + __le32 dma_addr_lo; /* Low 32bit of host buf addr */ + + __le16 mss; /* MSS to be used for LSO */ + u8 l4_offset; /* LSO, where the L4 data starts */ + u8 flags; /* TX Flags, see @PCIE_DESC_TX_* */ + + __le16 vlan; /* VLAN tag to add if indicated */ + __le16 data_len; /* Length of frame + meta data */ + } __packed; + __le32 vals[4]; + }; +}; + +/** + * struct nfp_net_tx_buf - software TX buffer descriptor + * @skb: sk_buff associated with this buffer + * @dma_addr: DMA mapping address of the buffer + * @fidx: Fragment index (-1 for the head and [0..nr_frags-1] for frags) + * @pkt_cnt: Number of packets to be produced out of the skb associated + * with this buffer (valid only on the head's buffer). + * Will be 1 for all non-TSO packets. + * @real_len: Number of bytes which to be produced out of the skb (valid only + * on the head's buffer). Equal to skb->len for non-TSO packets. + */ +struct nfp_net_tx_buf { + struct sk_buff *skb; + dma_addr_t dma_addr; + short int fidx; + u16 pkt_cnt; + u32 real_len; +}; + +/** + * struct nfp_net_tx_ring - TX ring structure + * @r_vec: Back pointer to ring vector structure + * @idx: Ring index from Linux's perspective + * @qcidx: Queue Controller Peripheral (QCP) queue index for the TX queue + * @qcp_q: Pointer to base of the QCP TX queue + * @cnt: Size of the queue in number of descriptors + * @wr_p: TX ring write pointer (free running) + * @rd_p: TX ring read pointer (free running) + * @qcp_rd_p: Local copy of QCP TX queue read pointer + * @wr_ptr_add: Accumulated number of buffers to add to QCP write pointer + * (used for .xmit_more delayed kick) + * @txbufs: Array of transmitted TX buffers, to free on transmit + * @txds: Virtual address of TX ring in host memory + * @dma: DMA address of the TX ring + * @size: Size, in bytes, of the TX ring (needed to free) + */ +struct nfp_net_tx_ring { + struct nfp_net_r_vector *r_vec; + + u32 idx; + int qcidx; + u8 __iomem *qcp_q; + + u32 cnt; + u32 wr_p; + u32 rd_p; + u32 qcp_rd_p; + + u32 wr_ptr_add; + + struct nfp_net_tx_buf *txbufs; + struct nfp_net_tx_desc *txds; + + dma_addr_t dma; + unsigned int size; +} ____cacheline_aligned; + +/* RX and freelist descriptor format */ + +#define PCIE_DESC_RX_DD BIT(7) +#define PCIE_DESC_RX_META_LEN_MASK GENMASK(6, 0) + +/* Flags in the RX descriptor */ +#define PCIE_DESC_RX_RSS cpu_to_le16(BIT(15)) +#define PCIE_DESC_RX_I_IP4_CSUM cpu_to_le16(BIT(14)) +#define PCIE_DESC_RX_I_IP4_CSUM_OK cpu_to_le16(BIT(13)) +#define PCIE_DESC_RX_I_TCP_CSUM cpu_to_le16(BIT(12)) +#define PCIE_DESC_RX_I_TCP_CSUM_OK cpu_to_le16(BIT(11)) +#define PCIE_DESC_RX_I_UDP_CSUM cpu_to_le16(BIT(10)) +#define PCIE_DESC_RX_I_UDP_CSUM_OK cpu_to_le16(BIT(9)) +#define PCIE_DESC_RX_SPARE cpu_to_le16(BIT(8)) +#define PCIE_DESC_RX_EOP cpu_to_le16(BIT(7)) +#define PCIE_DESC_RX_IP4_CSUM cpu_to_le16(BIT(6)) +#define PCIE_DESC_RX_IP4_CSUM_OK cpu_to_le16(BIT(5)) +#define PCIE_DESC_RX_TCP_CSUM cpu_to_le16(BIT(4)) +#define PCIE_DESC_RX_TCP_CSUM_OK cpu_to_le16(BIT(3)) +#define PCIE_DESC_RX_UDP_CSUM cpu_to_le16(BIT(2)) +#define PCIE_DESC_RX_UDP_CSUM_OK cpu_to_le16(BIT(1)) +#define PCIE_DESC_RX_VLAN cpu_to_le16(BIT(0)) + +#define PCIE_DESC_RX_CSUM_ALL (PCIE_DESC_RX_IP4_CSUM | \ + PCIE_DESC_RX_TCP_CSUM | \ + PCIE_DESC_RX_UDP_CSUM | \ + PCIE_DESC_RX_I_IP4_CSUM | \ + PCIE_DESC_RX_I_TCP_CSUM | \ + PCIE_DESC_RX_I_UDP_CSUM) +#define PCIE_DESC_RX_CSUM_OK_SHIFT 1 +#define __PCIE_DESC_RX_CSUM_ALL le16_to_cpu(PCIE_DESC_RX_CSUM_ALL) +#define __PCIE_DESC_RX_CSUM_ALL_OK (__PCIE_DESC_RX_CSUM_ALL >> \ + PCIE_DESC_RX_CSUM_OK_SHIFT) + +struct nfp_net_rx_desc { + union { + struct { + u8 dma_addr_hi; /* High bits of the buf address */ + __le16 reserved; /* Must be zero */ + u8 meta_len_dd; /* Must be zero */ + + __le32 dma_addr_lo; /* Low bits of the buffer address */ + } __packed fld; + + struct { + __le16 data_len; /* Length of the frame + meta data */ + u8 reserved; + u8 meta_len_dd; /* Length of meta data prepended + + * descriptor done flag. + */ + + __le16 flags; /* RX flags. See @PCIE_DESC_RX_* */ + __le16 vlan; /* VLAN if stripped */ + } __packed rxd; + + __le32 vals[2]; + }; +}; + +struct nfp_net_rx_hash { + __be32 hash_type; + __be32 hash; +}; + +/** + * struct nfp_net_rx_buf - software RX buffer descriptor + * @skb: sk_buff associated with this buffer + * @dma_addr: DMA mapping address of the buffer + */ +struct nfp_net_rx_buf { + struct sk_buff *skb; + dma_addr_t dma_addr; +}; + +/** + * struct nfp_net_rx_ring - RX ring structure + * @r_vec: Back pointer to ring vector structure + * @cnt: Size of the queue in number of descriptors + * @wr_p: FL/RX ring write pointer (free running) + * @rd_p: FL/RX ring read pointer (free running) + * @idx: Ring index from Linux's perspective + * @fl_qcidx: Queue Controller Peripheral (QCP) queue index for the freelist + * @rx_qcidx: Queue Controller Peripheral (QCP) queue index for the RX queue + * @qcp_fl: Pointer to base of the QCP freelist queue + * @qcp_rx: Pointer to base of the QCP RX queue + * @wr_ptr_add: Accumulated number of buffers to add to QCP write pointer + * (used for free list batching) + * @rxbufs: Array of transmitted FL/RX buffers + * @rxds: Virtual address of FL/RX ring in host memory + * @dma: DMA address of the FL/RX ring + * @size: Size, in bytes, of the FL/RX ring (needed to free) + */ +struct nfp_net_rx_ring { + struct nfp_net_r_vector *r_vec; + + u32 cnt; + u32 wr_p; + u32 rd_p; + + u16 idx; + u16 wr_ptr_add; + + int fl_qcidx; + int rx_qcidx; + u8 __iomem *qcp_fl; + u8 __iomem *qcp_rx; + + struct nfp_net_rx_buf *rxbufs; + struct nfp_net_rx_desc *rxds; + + dma_addr_t dma; + unsigned int size; +} ____cacheline_aligned; + +/** + * struct nfp_net_r_vector - Per ring interrupt vector configuration + * @nfp_net: Backpointer to nfp_net structure + * @napi: NAPI structure for this ring vec + * @tx_ring: Pointer to TX ring + * @rx_ring: Pointer to RX ring + * @irq_idx: Index into MSI-X table + * @rx_sync: Seqlock for atomic updates of RX stats + * @rx_pkts: Number of received packets + * @rx_bytes: Number of received bytes + * @rx_drops: Number of packets dropped on RX due to lack of resources + * @hw_csum_rx_ok: Counter of packets where the HW checksum was OK + * @hw_csum_rx_inner_ok: Counter of packets where the inner HW checksum was OK + * @hw_csum_rx_error: Counter of packets with bad checksums + * @tx_sync: Seqlock for atomic updates of TX stats + * @tx_pkts: Number of Transmitted packets + * @tx_bytes: Number of Transmitted bytes + * @hw_csum_tx: Counter of packets with TX checksum offload requested + * @hw_csum_tx_inner: Counter of inner TX checksum offload requests + * @tx_gather: Counter of packets with Gather DMA + * @tx_lso: Counter of LSO packets sent + * @tx_errors: How many TX errors were encountered + * @tx_busy: How often was TX busy (no space)? + * @handler: Interrupt handler for this ring vector + * @name: Name of the interrupt vector + * @affinity_mask: SMP affinity mask for this vector + * + * This structure ties RX and TX rings to interrupt vectors and a NAPI + * context. This currently only supports one RX and TX ring per + * interrupt vector but might be extended in the future to allow + * association of multiple rings per vector. + */ +struct nfp_net_r_vector { + struct nfp_net *nfp_net; + struct napi_struct napi; + + struct nfp_net_tx_ring *tx_ring; + struct nfp_net_rx_ring *rx_ring; + + int irq_idx; + + struct u64_stats_sync rx_sync; + u64 rx_pkts; + u64 rx_bytes; + u64 rx_drops; + u64 hw_csum_rx_ok; + u64 hw_csum_rx_inner_ok; + u64 hw_csum_rx_error; + + struct u64_stats_sync tx_sync; + u64 tx_pkts; + u64 tx_bytes; + u64 hw_csum_tx; + u64 hw_csum_tx_inner; + u64 tx_gather; + u64 tx_lso; + u64 tx_errors; + u64 tx_busy; + + irq_handler_t handler; + char name[IFNAMSIZ + 8]; + cpumask_t affinity_mask; +} ____cacheline_aligned; + +/* Firmware version as it is written in the 32bit value in the BAR */ +struct nfp_net_fw_version { + u8 minor; + u8 major; + u8 class; + u8 resv; +} __packed; + +static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, + u8 resv, u8 class, u8 major, u8 minor) +{ + return fw_ver->resv == resv && + fw_ver->class == class && + fw_ver->major == major && + fw_ver->minor == minor; +} + +/** + * struct nfp_net - NFP network device structure + * @pdev: Backpointer to PCI device + * @netdev: Backpointer to net_device structure + * @nfp_fallback: Is the driver used in fallback mode? + * @is_vf: Is the driver attached to a VF? + * @is_nfp3200: Is the driver for a NFP-3200 card? + * @fw_loaded: Is the firmware loaded? + * @ctrl: Local copy of the control register/word. + * @fl_bufsz: Currently configured size of the freelist buffers + * @rx_offset: Offset in the RX buffers where packet data starts + * @cpp: Pointer to the CPP handle + * @nfp_dev_cpp: Pointer to the NFP Device handle + * @ctrl_area: Pointer to the CPP area for the control BAR + * @tx_area: Pointer to the CPP area for the TX queues + * @rx_area: Pointer to the CPP area for the FL/RX queues + * @fw_ver: Firmware version + * @cap: Capabilities advertised by the Firmware + * @max_mtu: Maximum support MTU advertised by the Firmware + * @rss_cfg: RSS configuration + * @rss_key: RSS secret key + * @rss_itbl: RSS indirection table + * @max_tx_rings: Maximum number of TX rings supported by the Firmware + * @max_rx_rings: Maximum number of RX rings supported by the Firmware + * @num_tx_rings: Currently configured number of TX rings + * @num_rx_rings: Currently configured number of RX rings + * @txd_cnt: Size of the TX ring in number of descriptors + * @rxd_cnt: Size of the RX ring in number of descriptors + * @tx_rings: Array of pre-allocated TX ring structures + * @rx_rings: Array of pre-allocated RX ring structures + * @num_irqs: Number of allocated interrupt vectors + * @num_r_vecs: Number of used ring vectors + * @r_vecs: Pre-allocated array of ring vectors + * @irq_entries: Pre-allocated array of MSI-X entries + * @lsc_handler: Handler for Link State Change interrupt + * @lsc_name: Name for Link State Change interrupt + * @exn_handler: Handler for Exception interrupt + * @exn_name: Name for Exception interrupt + * @shared_handler: Handler for shared interrupts + * @shared_name: Name for shared interrupt + * @me_freq_mhz: ME clock_freq (MHz) + * @reconfig_lock: Protects HW reconfiguration request regs/machinery + * @link_up: Is the link up? + * @link_status_lock: Protects @link_up and ensures atomicity with BAR reading + * @rx_coalesce_usecs: RX interrupt moderation usecs delay parameter + * @rx_coalesce_max_frames: RX interrupt moderation frame count parameter + * @tx_coalesce_usecs: TX interrupt moderation usecs delay parameter + * @tx_coalesce_max_frames: TX interrupt moderation frame count parameter + * @vxlan_ports: VXLAN ports for RX inner csum offload communicated to HW + * @vxlan_usecnt: IPv4/IPv6 VXLAN port use counts + * @qcp_cfg: Pointer to QCP queue used for configuration notification + * @ctrl_bar: Pointer to mapped control BAR + * @tx_bar: Pointer to mapped TX queues + * @rx_bar: Pointer to mapped FL/RX queues + * @debugfs_dir: Device directory in debugfs + */ +struct nfp_net { + struct pci_dev *pdev; + struct net_device *netdev; + + unsigned nfp_fallback:1; + unsigned is_vf:1; + unsigned is_nfp3200:1; + unsigned fw_loaded:1; + + u32 ctrl; + u32 fl_bufsz; + + u32 rx_offset; + +#ifdef CONFIG_PCI_IOV + unsigned int num_vfs; + struct vf_data_storage *vfinfo; + int vf_rate_link_speed; +#endif + + struct nfp_cpp *cpp; + struct platform_device *nfp_dev_cpp; + struct nfp_cpp_area *ctrl_area; + struct nfp_cpp_area *tx_area; + struct nfp_cpp_area *rx_area; + + struct nfp_net_fw_version fw_ver; + u32 cap; + u32 max_mtu; + + u32 rss_cfg; + u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ]; + u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ]; + + int max_tx_rings; + int max_rx_rings; + + int num_tx_rings; + int num_rx_rings; + + int stride_tx; + int stride_rx; + + int txd_cnt; + int rxd_cnt; + + struct nfp_net_tx_ring tx_rings[NFP_NET_MAX_TX_RINGS]; + struct nfp_net_rx_ring rx_rings[NFP_NET_MAX_RX_RINGS]; + + u8 num_irqs; + u8 num_r_vecs; + struct nfp_net_r_vector r_vecs[NFP_NET_MAX_TX_RINGS]; + struct msix_entry irq_entries[NFP_NET_NON_Q_VECTORS + + NFP_NET_MAX_TX_RINGS]; + + irq_handler_t lsc_handler; + char lsc_name[IFNAMSIZ + 8]; + + irq_handler_t exn_handler; + char exn_name[IFNAMSIZ + 8]; + + irq_handler_t shared_handler; + char shared_name[IFNAMSIZ + 8]; + + u32 me_freq_mhz; + + bool link_up; + spinlock_t link_status_lock; + + spinlock_t reconfig_lock; + + u32 rx_coalesce_usecs; + u32 rx_coalesce_max_frames; + u32 tx_coalesce_usecs; + u32 tx_coalesce_max_frames; + + __be16 vxlan_ports[NFP_NET_N_VXLAN_PORTS]; + u8 vxlan_usecnt[NFP_NET_N_VXLAN_PORTS]; + + u8 __iomem *qcp_cfg; + + u8 __iomem *ctrl_bar; + u8 __iomem *q_bar; + u8 __iomem *tx_bar; + u8 __iomem *rx_bar; + + struct dentry *debugfs_dir; +}; + +/* Functions to read/write from/to a BAR + * Performs any endian conversion necessary. + */ +static inline void nn_writeb(struct nfp_net *nn, int off, u8 val) +{ + writeb(val, nn->ctrl_bar + off); +} + +/* NFP-3200 can't handle 16-bit accesses too well - hence no readw/writew */ + +static inline u32 nn_readl(struct nfp_net *nn, int off) +{ + return readl(nn->ctrl_bar + off); +} + +static inline void nn_writel(struct nfp_net *nn, int off, u32 val) +{ + writel(val, nn->ctrl_bar + off); +} + +static inline u64 nn_readq(struct nfp_net *nn, int off) +{ + return readq(nn->ctrl_bar + off); +} + +static inline void nn_writeq(struct nfp_net *nn, int off, u64 val) +{ + writeq(val, nn->ctrl_bar + off); +} + +/* Flush posted PCI writes by reading something without side effects */ +static inline void nn_pci_flush(struct nfp_net *nn) +{ + nn_readl(nn, NFP_NET_CFG_VERSION); +} + +/* Queue Controller Peripheral access functions and definitions. + * + * Some of the BARs of the NFP are mapped to portions of the Queue + * Controller Peripheral (QCP) address space on the NFP. A QCP queue + * has a read and a write pointer (as well as a size and flags, + * indicating overflow etc). The QCP offers a number of different + * operation on queue pointers, but here we only offer function to + * either add to a pointer or to read the pointer value. + */ +#define NFP_QCP_QUEUE_ADDR_SZ 0x800 +#define NFP_QCP_QUEUE_OFF(_x) ((_x) * NFP_QCP_QUEUE_ADDR_SZ) +#define NFP_QCP_QUEUE_ADD_RPTR 0x0000 +#define NFP_QCP_QUEUE_ADD_WPTR 0x0004 +#define NFP_QCP_QUEUE_STS_LO 0x0008 +#define NFP_QCP_QUEUE_STS_LO_READPTR_mask 0x3ffff +#define NFP_QCP_QUEUE_STS_HI 0x000c +#define NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask 0x3ffff + +/* The offset of a QCP queues in the PCIe Target (same on NFP3200 and NFP6000 */ +#define NFP_PCIE_QUEUE(_q) (0x80000 + (NFP_QCP_QUEUE_ADDR_SZ * ((_q) & 0xff))) + +/* nfp_qcp_ptr - Read or Write Pointer of a queue */ +enum nfp_qcp_ptr { + NFP_QCP_READ_PTR = 0, + NFP_QCP_WRITE_PTR +}; + +/* There appear to be an *undocumented* upper limit on the value which + * one can add to a queue and that value is either 0x3f or 0x7f. We + * go with 0x3f as a conservative measure. + */ +#define NFP_QCP_MAX_ADD 0x3f + +static inline void _nfp_qcp_ptr_add(u8 __iomem *q, + enum nfp_qcp_ptr ptr, u32 val) +{ + u32 off; + + if (ptr == NFP_QCP_READ_PTR) + off = NFP_QCP_QUEUE_ADD_RPTR; + else + off = NFP_QCP_QUEUE_ADD_WPTR; + + while (val > NFP_QCP_MAX_ADD) { + writel(NFP_QCP_MAX_ADD, q + off); + val -= NFP_QCP_MAX_ADD; + } + + writel(val, q + off); +} + +/** + * nfp_qcp_rd_ptr_add() - Add the value to the read pointer of a queue + * + * @q: Base address for queue structure + * @val: Value to add to the queue pointer + * + * If @val is greater than @NFP_QCP_MAX_ADD multiple writes are performed. + */ +static inline void nfp_qcp_rd_ptr_add(u8 __iomem *q, u32 val) +{ + _nfp_qcp_ptr_add(q, NFP_QCP_READ_PTR, val); +} + +/** + * nfp_qcp_wr_ptr_add() - Add the value to the write pointer of a queue + * + * @q: Base address for queue structure + * @val: Value to add to the queue pointer + * + * If @val is greater than @NFP_QCP_MAX_ADD multiple writes are performed. + */ +static inline void nfp_qcp_wr_ptr_add(u8 __iomem *q, u32 val) +{ + _nfp_qcp_ptr_add(q, NFP_QCP_WRITE_PTR, val); +} + +static inline u32 _nfp_qcp_read(u8 __iomem *q, enum nfp_qcp_ptr ptr) +{ + u32 off; + u32 val; + + if (ptr == NFP_QCP_READ_PTR) + off = NFP_QCP_QUEUE_STS_LO; + else + off = NFP_QCP_QUEUE_STS_HI; + + val = readl(q + off); + + if (ptr == NFP_QCP_READ_PTR) + return val & NFP_QCP_QUEUE_STS_LO_READPTR_mask; + else + return val & NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask; +} + +/** + * nfp_qcp_rd_ptr_read() - Read the current read pointer value for a queue + * @q: Base address for queue structure + * + * Return: Value read. + */ +static inline u32 nfp_qcp_rd_ptr_read(u8 __iomem *q) +{ + return _nfp_qcp_read(q, NFP_QCP_READ_PTR); +} + +/** + * nfp_qcp_wr_ptr_read() - Read the current write pointer value for a queue + * @q: Base address for queue structure + * + * Return: Value read. + */ +static inline u32 nfp_qcp_wr_ptr_read(u8 __iomem *q) +{ + return _nfp_qcp_read(q, NFP_QCP_WRITE_PTR); +} + +/* Globals */ +extern const char nfp_net_driver_name[]; +extern const char nfp_net_driver_version[]; + +/* Prototypes */ +void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, + void __iomem *ctrl_bar); + +struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, + int max_tx_rings, int max_rx_rings); +void nfp_net_netdev_free(struct nfp_net *nn); +int nfp_net_netdev_init(struct net_device *netdev); +void nfp_net_netdev_clean(struct net_device *netdev); +void nfp_net_set_ethtool_ops(struct net_device *netdev); +void nfp_net_info(struct nfp_net *nn); +int nfp_net_reconfig(struct nfp_net *nn, u32 update); +void nfp_net_rss_write_itbl(struct nfp_net *nn); +void nfp_net_rss_write_key(struct nfp_net *nn); +void nfp_net_coalesce_write_cfg(struct nfp_net *nn); +int nfp_net_irqs_alloc(struct nfp_net *nn); +void nfp_net_irqs_disable(struct nfp_net *nn); + +#ifdef CONFIG_NFP_NET_DEBUG +void nfp_net_debugfs_create(void); +void nfp_net_debugfs_destroy(void); +void nfp_net_debugfs_adapter_add(struct nfp_net *nn); +void nfp_net_debugfs_adapter_del(struct nfp_net *nn); +#else +static inline void nfp_net_debugfs_create(void) +{ +} + +static inline void nfp_net_debugfs_destroy(void) +{ +} + +static inline void nfp_net_debugfs_adapter_add(struct nfp_net *nn) +{ +} + +static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn) +{ +} +#endif /* CONFIG_NFP_NET_DEBUG */ + +#endif /* _NFP_NET_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c new file mode 100644 index 00000000000000..038ac6b14a6039 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -0,0 +1,2432 @@ +/* + * Copyright (C) 2015 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_net_common.c + * Netronome network device driver: Common functions between PF and VF + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + * Brad Petrus <brad.petrus@netronome.com> + * Chris Telfer <chris.telfer@netronome.com> + */ + +#include <linux/version.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/interrupt.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/pci.h> +#include <linux/pci_regs.h> +#include <linux/msi.h> +#include <linux/ethtool.h> +#include <linux/log2.h> +#include <linux/if_vlan.h> +#include <linux/random.h> + +#include <linux/ktime.h> + +#include <net/vxlan.h> + +#include "nfp_net_ctrl.h" +#include "nfp_net.h" + +/** + * nfp_net_get_fw_version() - Read and parse the FW version + * @fw_ver: Output fw_version structure to read to + * @ctrl_bar: Mapped address of the control BAR + */ +void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, + void __iomem *ctrl_bar) +{ + u32 reg; + + reg = readl(ctrl_bar + NFP_NET_CFG_VERSION); + put_unaligned_le32(reg, fw_ver); +} + +/** + * nfp_net_reconfig() - Reconfigure the firmware + * @nn: NFP Net device to reconfigure + * @update: The value for the update field in the BAR config + * + * Write the update word to the BAR and ping the reconfig queue. The + * poll until the firmware has acknowledged the update by zeroing the + * update word. + * + * Return: Negative errno on error, 0 on success + */ +int nfp_net_reconfig(struct nfp_net *nn, u32 update) +{ + int cnt, ret = 0; + u32 new; + + spin_lock_bh(&nn->reconfig_lock); + + nn_writel(nn, NFP_NET_CFG_UPDATE, update); + /* ensure update is written before pinging HW */ + nn_pci_flush(nn); + nfp_qcp_wr_ptr_add(nn->qcp_cfg, 1); + + /* Poll update field, waiting for NFP to ack the config */ + for (cnt = 0; ; cnt++) { + new = nn_readl(nn, NFP_NET_CFG_UPDATE); + if (new == 0) + break; + if (new & NFP_NET_CFG_UPDATE_ERR) { + nn_err(nn, "Reconfig error: 0x%08x\n", new); + ret = -EIO; + break; + } else if (cnt >= NFP_NET_POLL_TIMEOUT) { + nn_err(nn, "Reconfig timeout for 0x%08x after %dms\n", + update, cnt); + ret = -EIO; + break; + } + mdelay(1); + } + + spin_unlock_bh(&nn->reconfig_lock); + return ret; +} + +/* Interrupt configuration and handling + */ + +/** + * nfp_net_irq_unmask_msix() - Unmask MSI-X after automasking + * @nn: NFP Network structure + * @entry_nr: MSI-X table entry + * + * Clear the MSI-X table mask bit for the given entry bypassing Linux irq + * handling subsystem. Use *only* to reenable automasked vectors. + */ +static void nfp_net_irq_unmask_msix(struct nfp_net *nn, unsigned int entry_nr) +{ + struct list_head *msi_head = &nn->pdev->dev.msi_list; + struct msi_desc *entry; + u32 off; + + /* All MSI-Xs have the same mask_base */ + entry = list_first_entry(msi_head, struct msi_desc, list); + + off = (PCI_MSIX_ENTRY_SIZE * entry_nr) + + PCI_MSIX_ENTRY_VECTOR_CTRL; + writel(0, entry->mask_base + off); + readl(entry->mask_base); +} + +/** + * nfp_net_irq_unmask() - Unmask automasked interrupt + * @nn: NFP Network structure + * @entry_nr: MSI-X table entry + * + * If MSI-X auto-masking is enabled clear the mask bit, otherwise + * clear the ICR for the entry. + */ +static void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr) +{ + if (nn->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) { + nfp_net_irq_unmask_msix(nn, entry_nr); + return; + } + + nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED); + nn_pci_flush(nn); +} + +/** + * nfp_net_msix_alloc() - Try to allocate MSI-X irqs + * @nn: NFP Network structure + * @nr_vecs: Number of MSI-X vectors to allocate + * + * For MSI-X we want at least NFP_NET_NON_Q_VECTORS + 1 vectors. + * + * Return: Number of MSI-X vectors obtained or 0 on error. + */ +static int nfp_net_msix_alloc(struct nfp_net *nn, int nr_vecs) +{ + struct pci_dev *pdev = nn->pdev; + int nvecs; + int i; + + for (i = 0; i < nr_vecs; i++) + nn->irq_entries[i].entry = i; + + nvecs = pci_enable_msix_range(pdev, nn->irq_entries, + NFP_NET_NON_Q_VECTORS + 1, nr_vecs); + if (nvecs < 0) { + nn_warn(nn, "Failed to enable MSI-X. Wanted %d-%d (err=%d)\n", + NFP_NET_NON_Q_VECTORS + 1, nr_vecs, nvecs); + return 0; + } + + return nvecs; +} + +/** + * nfp_net_irqs_wanted() - Work out how many interrupt vectors we want + * @nn: NFP Network structure + * + * We want a vector per CPU (or ring), whatever is smaller plus + * NFP_NET_NON_Q_VECTORS for LSC etc. + * + * Return: Number of interrupts wanted + */ +static int nfp_net_irqs_wanted(struct nfp_net *nn) +{ + int ncpus; + int vecs; + + ncpus = num_online_cpus(); + + vecs = max_t(int, nn->num_tx_rings, nn->num_rx_rings); + vecs = min_t(int, vecs, ncpus); + + return vecs + NFP_NET_NON_Q_VECTORS; +} + +/** + * nfp_net_irqs_alloc() - allocates MSI-X irqs + * @nn: NFP Network structure + * + * Return: Number of irqs obtained or 0 on error. + */ +int nfp_net_irqs_alloc(struct nfp_net *nn) +{ + int wanted_irqs; + + wanted_irqs = nfp_net_irqs_wanted(nn); + + nn->num_irqs = nfp_net_msix_alloc(nn, wanted_irqs); + if (nn->num_irqs == 0) { + nn_err(nn, "Failed to allocate MSI-X IRQs\n"); + return 0; + } + + nn->num_r_vecs = nn->num_irqs - NFP_NET_NON_Q_VECTORS; + + if (nn->num_irqs < wanted_irqs) + nn_warn(nn, "Unable to allocate %d vectors. Got %d instead\n", + wanted_irqs, nn->num_irqs); + + return nn->num_irqs; +} + +/** + * nfp_net_irqs_disable() - Disable interrupts + * @nn: NFP Network structure + * + * Undoes what @nfp_net_irqs_alloc() does. + */ +void nfp_net_irqs_disable(struct nfp_net *nn) +{ + pci_disable_msix(nn->pdev); +} + +/** + * nfp_net_irq_rxtx() - Interrupt service routine for RX/TX rings. + * @irq: Interrupt + * @data: Opaque data structure + * + * Return: Indicate if the interrupt has been handled. + */ +static irqreturn_t nfp_net_irq_rxtx(int irq, void *data) +{ + struct nfp_net_r_vector *r_vec = data; + + napi_schedule_irqoff(&r_vec->napi); + + /* The FW auto-masks any interrupt, either via the MASK bit in + * the MSI-X table or via the per entry ICR field. So there + * is no need to disable interrupts here. + */ + return IRQ_HANDLED; +} + +/** + * nfp_net_read_link_status() - Reread link status from control BAR + * @nn: NFP Network structure + */ +static void nfp_net_read_link_status(struct nfp_net *nn) +{ + unsigned long flags; + bool link_up; + u32 sts; + + spin_lock_irqsave(&nn->link_status_lock, flags); + + sts = nn_readl(nn, NFP_NET_CFG_STS); + link_up = !!(sts & NFP_NET_CFG_STS_LINK); + + if (nn->link_up == link_up) + goto out; + + nn->link_up = link_up; + + if (nn->link_up) { + netif_carrier_on(nn->netdev); + netdev_info(nn->netdev, "NIC Link is Up\n"); + } else { + netif_carrier_off(nn->netdev); + netdev_info(nn->netdev, "NIC Link is Down\n"); + } +out: + spin_unlock_irqrestore(&nn->link_status_lock, flags); +} + +/** + * nfp_net_irq_lsc() - Interrupt service routine for link state changes + * @irq: Interrupt + * @data: Opaque data structure + * + * Return: Indicate if the interrupt has been handled. + */ +static irqreturn_t nfp_net_irq_lsc(int irq, void *data) +{ + struct nfp_net *nn = data; + + nfp_net_read_link_status(nn); + + nfp_net_irq_unmask(nn, NFP_NET_IRQ_LSC_IDX); + + return IRQ_HANDLED; +} + +/** + * nfp_net_irq_exn() - Interrupt service routine for exceptions + * @irq: Interrupt + * @data: Opaque data structure + * + * Return: Indicate if the interrupt has been handled. + */ +static irqreturn_t nfp_net_irq_exn(int irq, void *data) +{ + struct nfp_net *nn = data; + + nn_err(nn, "%s: UNIMPLEMENTED.\n", __func__); + /* XXX TO BE IMPLEMENTED */ + return IRQ_HANDLED; +} + +/** + * nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring + * @tx_ring: TX ring structure + */ +static void nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + + tx_ring->qcidx = tx_ring->idx * nn->stride_tx; + tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx); +} + +/** + * nfp_net_rx_ring_init() - Fill in the boilerplate for a RX ring + * @rx_ring: RX ring structure + */ +static void nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + + rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx; + rx_ring->rx_qcidx = rx_ring->fl_qcidx + (nn->stride_rx - 1); + + rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx); + rx_ring->qcp_rx = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->rx_qcidx); +} + +/** + * nfp_net_irqs_assign() - Assign IRQs and setup rvecs. + * @netdev: netdev structure + */ +static void nfp_net_irqs_assign(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net_r_vector *r_vec; + int r; + + /* Assumes nn->num_tx_rings == nn->num_rx_rings */ + if (nn->num_tx_rings > nn->num_r_vecs) { + nn_warn(nn, "More rings (%d) than vectors (%d).\n", + nn->num_tx_rings, nn->num_r_vecs); + nn->num_tx_rings = nn->num_r_vecs; + nn->num_rx_rings = nn->num_r_vecs; + } + + nn->lsc_handler = nfp_net_irq_lsc; + nn->exn_handler = nfp_net_irq_exn; + + for (r = 0; r < nn->num_r_vecs; r++) { + r_vec = &nn->r_vecs[r]; + r_vec->nfp_net = nn; + r_vec->handler = nfp_net_irq_rxtx; + r_vec->irq_idx = NFP_NET_NON_Q_VECTORS + r; + + cpumask_set_cpu(r, &r_vec->affinity_mask); + + r_vec->tx_ring = &nn->tx_rings[r]; + nn->tx_rings[r].idx = r; + nn->tx_rings[r].r_vec = r_vec; + nfp_net_tx_ring_init(r_vec->tx_ring); + + r_vec->rx_ring = &nn->rx_rings[r]; + nn->rx_rings[r].idx = r; + nn->rx_rings[r].r_vec = r_vec; + nfp_net_rx_ring_init(r_vec->rx_ring); + } +} + +/** + * nfp_net_aux_irq_request() - Request an auxiliary interrupt (LSC or EXN) + * @nn: NFP Network structure + * @ctrl_offset: Control BAR offset where IRQ configuration should be written + * @format: printf-style format to construct the interrupt name + * @name: Pointer to allocated space for interrupt name + * @name_sz: Size of space for interrupt name + * @vector_idx: Index of MSI-X vector used for this interrupt + * @handler: IRQ handler to register for this interrupt + */ +static int +nfp_net_aux_irq_request(struct nfp_net *nn, u32 ctrl_offset, + const char *format, char *name, size_t name_sz, + unsigned int vector_idx, irq_handler_t handler) +{ + struct msix_entry *entry; + int err; + + entry = &nn->irq_entries[vector_idx]; + + snprintf(name, name_sz, format, netdev_name(nn->netdev)); + err = request_irq(entry->vector, handler, 0, name, nn); + if (err) { + nn_err(nn, "Failed to request IRQ %d (err=%d).\n", + entry->vector, err); + return err; + } + nn_writeb(nn, ctrl_offset, vector_idx); + + return 0; +} + +/** + * nfp_net_aux_irq_free() - Free an auxiliary interrupt (LSC or EXN) + * @nn: NFP Network structure + * @ctrl_offset: Control BAR offset where IRQ configuration should be written + * @vector_idx: Index of MSI-X vector used for this interrupt + */ +static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset, + unsigned int vector_idx) +{ + nn_writeb(nn, ctrl_offset, 0xff); + free_irq(nn->irq_entries[vector_idx].vector, nn); +} + +/* Transmit + * + * One queue controller peripheral queue is used for transmit. The + * driver en-queues packets for transmit by advancing the write + * pointer. The device indicates that packets have transmitted by + * advancing the read pointer. The driver maintains a local copy of + * the read and write pointer in @struct nfp_net_tx_ring. The driver + * keeps @wr_p in sync with the queue controller write pointer and can + * determine how many packets have been transmitted by comparing its + * copy of the read pointer @rd_p with the read pointer maintained by + * the queue controller peripheral. + */ + +/** + * nfp_net_tx_full() - Check if the TX ring is full + * @tx_ring: TX ring to check + * @dcnt: Number of descriptors that need to be enqueued (must be >= 1) + * + * This function checks, based on the *host copy* of read/write + * pointer if a given TX ring is full. The real TX queue may have + * some newly made available slots. + * + * Return: True if the ring is full. + */ +static inline int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt) +{ + return (tx_ring->wr_p - tx_ring->rd_p) >= (tx_ring->cnt - dcnt); +} + +/* Wrappers for deciding when to stop and restart TX queues */ +static int nfp_net_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring) +{ + return !nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS * 4); +} + +static int nfp_net_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring) +{ + return nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS + 1); +} + +/** + * nfp_net_tx_ring_stop() - stop tx ring + * @nd_q: netdev queue + * @tx_ring: driver tx queue structure + * + * Safely stop TX ring. Remember that while we are running .start_xmit() + * someone else may be cleaning the TX ring completions so we need to be + * extra careful here. + */ +static void nfp_net_tx_ring_stop(struct netdev_queue *nd_q, + struct nfp_net_tx_ring *tx_ring) +{ + netif_tx_stop_queue(nd_q); + + /* We can race with the TX completion out of NAPI so recheck */ + smp_mb(); + if (unlikely(nfp_net_tx_ring_should_wake(tx_ring))) + netif_tx_start_queue(nd_q); +} + +/** + * nfp_net_tx_tso() - Set up Tx descriptor for LSO + * @nn: NFP Net device + * @r_vec: per-ring structure + * @txbuf: Pointer to driver soft TX descriptor + * @txd: Pointer to HW TX descriptor + * @skb: Pointer to SKB + * + * Set up Tx descriptor for LSO, do nothing for non-LSO skbs. + * Return error on packet header greater than maximum supported LSO header size. + */ +static void nfp_net_tx_tso(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct nfp_net_tx_buf *txbuf, + struct nfp_net_tx_desc *txd, struct sk_buff *skb) +{ + u32 hdrlen; + u16 mss; + + if (!skb_is_gso(skb)) + return; + + if (!skb->encapsulation) + hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb); + else + hdrlen = skb_inner_transport_header(skb) - skb->data + + inner_tcp_hdrlen(skb); + + txbuf->pkt_cnt = skb_shinfo(skb)->gso_segs; + txbuf->real_len += hdrlen * (txbuf->pkt_cnt - 1); + + mss = skb_shinfo(skb)->gso_size & PCIE_DESC_TX_MSS_MASK; + txd->l4_offset = hdrlen; + txd->mss = cpu_to_le16(mss); + txd->flags |= PCIE_DESC_TX_LSO; + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_lso++; + u64_stats_update_end(&r_vec->tx_sync); +} + +/** + * nfp_net_tx_csum() - Set TX CSUM offload flags in TX descriptor + * @nn: NFP Net device + * @r_vec: per-ring structure + * @txbuf: Pointer to driver soft TX descriptor + * @txd: Pointer to TX descriptor + * @skb: Pointer to SKB + * + * This function sets the TX checksum flags in the TX descriptor based + * on the configuration and the protocol of the packet to be transmitted. + */ +static void nfp_net_tx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct nfp_net_tx_buf *txbuf, + struct nfp_net_tx_desc *txd, struct sk_buff *skb) +{ + struct ipv6hdr *ipv6h; + struct iphdr *iph; + u8 l4_hdr; + + if (!(nn->ctrl & NFP_NET_CFG_CTRL_TXCSUM)) + return; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return; + + txd->flags |= PCIE_DESC_TX_CSUM; + if (skb->encapsulation) + txd->flags |= PCIE_DESC_TX_ENCAP; + + iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); + ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); + + if (iph->version == 4) { + txd->flags |= PCIE_DESC_TX_IP4_CSUM; + l4_hdr = iph->protocol; + } else if (ipv6h->version == 6) { + l4_hdr = ipv6h->nexthdr; + } else { + nn_warn_ratelimit(nn, "partial checksum but ipv=%x!\n", + iph->version); + return; + } + + switch (l4_hdr) { + case IPPROTO_TCP: + txd->flags |= PCIE_DESC_TX_TCP_CSUM; + break; + case IPPROTO_UDP: + txd->flags |= PCIE_DESC_TX_UDP_CSUM; + break; + default: + nn_warn_ratelimit(nn, "partial checksum but l4 proto=%x!\n", + l4_hdr); + return; + } + + u64_stats_update_begin(&r_vec->tx_sync); + if (skb->encapsulation) + r_vec->hw_csum_tx_inner += txbuf->pkt_cnt; + else + r_vec->hw_csum_tx += txbuf->pkt_cnt; + u64_stats_update_end(&r_vec->tx_sync); +} + +/** + * nfp_net_tx() - Main transmit entry point + * @skb: SKB to transmit + * @netdev: netdev structure + * + * Return: NETDEV_TX_OK on success. + */ +static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + const struct skb_frag_struct *frag; + struct nfp_net_r_vector *r_vec; + struct nfp_net_tx_desc *txd, txdg; + struct nfp_net_tx_buf *txbuf; + struct nfp_net_tx_ring *tx_ring; + struct netdev_queue *nd_q; + dma_addr_t dma_addr; + unsigned int fsize; + int f, nr_frags; + int wr_idx; + u16 qidx; + + qidx = skb_get_queue_mapping(skb); + tx_ring = &nn->tx_rings[qidx]; + r_vec = tx_ring->r_vec; + nd_q = netdev_get_tx_queue(nn->netdev, qidx); + + nr_frags = skb_shinfo(skb)->nr_frags; + + if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) { + nn_warn_ratelimit(nn, "TX ring %d busy. wrp=%u rdp=%u\n", + qidx, tx_ring->wr_p, tx_ring->rd_p); + netif_tx_stop_queue(nd_q); + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_busy++; + u64_stats_update_end(&r_vec->tx_sync); + return NETDEV_TX_BUSY; + } + + /* Start with the head skbuf */ + dma_addr = dma_map_single(&nn->pdev->dev, skb->data, skb_headlen(skb), + DMA_TO_DEVICE); + if (dma_mapping_error(&nn->pdev->dev, dma_addr)) + goto err_free; + + wr_idx = tx_ring->wr_p % tx_ring->cnt; + + /* Stash the soft descriptor of the head then initialize it */ + txbuf = &tx_ring->txbufs[wr_idx]; + txbuf->skb = skb; + txbuf->dma_addr = dma_addr; + txbuf->fidx = -1; + txbuf->pkt_cnt = 1; + txbuf->real_len = skb->len; + + /* Build TX descriptor */ + txd = &tx_ring->txds[wr_idx]; + txd->offset_eop = (nr_frags == 0) ? PCIE_DESC_TX_EOP : 0; + txd->dma_len = cpu_to_le16(skb_headlen(skb)); + nfp_desc_set_dma_addr(txd, dma_addr); + txd->data_len = cpu_to_le16(skb->len); + + txd->flags = 0; + txd->mss = 0; + txd->l4_offset = 0; + + nfp_net_tx_tso(nn, r_vec, txbuf, txd, skb); + + nfp_net_tx_csum(nn, r_vec, txbuf, txd, skb); + + if (skb_vlan_tag_present(skb) && nn->ctrl & NFP_NET_CFG_CTRL_TXVLAN) { + txd->flags |= PCIE_DESC_TX_VLAN; + txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb)); + } + + /* Gather DMA */ + if (nr_frags > 0) { + /* all descs must match except for in addr, length and eop */ + txdg = *txd; + + for (f = 0; f < nr_frags; f++) { + frag = &skb_shinfo(skb)->frags[f]; + fsize = skb_frag_size(frag); + + dma_addr = skb_frag_dma_map(&nn->pdev->dev, frag, 0, + fsize, DMA_TO_DEVICE); + if (dma_mapping_error(&nn->pdev->dev, dma_addr)) + goto err_unmap; + + wr_idx = (wr_idx + 1) % tx_ring->cnt; + tx_ring->txbufs[wr_idx].skb = skb; + tx_ring->txbufs[wr_idx].dma_addr = dma_addr; + tx_ring->txbufs[wr_idx].fidx = f; + + txd = &tx_ring->txds[wr_idx]; + *txd = txdg; + txd->dma_len = cpu_to_le16(fsize); + nfp_desc_set_dma_addr(txd, dma_addr); + txd->offset_eop = + (f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0; + } + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_gather++; + u64_stats_update_end(&r_vec->tx_sync); + } + + netdev_tx_sent_queue(nd_q, txbuf->real_len); + + tx_ring->wr_p += nr_frags + 1; + if (nfp_net_tx_ring_should_stop(tx_ring)) + nfp_net_tx_ring_stop(nd_q, tx_ring); + + tx_ring->wr_ptr_add += nr_frags + 1; + if (!skb->xmit_more || netif_xmit_stopped(nd_q)) { + /* force memory write before we let HW know */ + wmb(); + nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add); + tx_ring->wr_ptr_add = 0; + } + + skb_tx_timestamp(skb); + + return NETDEV_TX_OK; + +err_unmap: + --f; + while (f >= 0) { + frag = &skb_shinfo(skb)->frags[f]; + dma_unmap_page(&nn->pdev->dev, + tx_ring->txbufs[wr_idx].dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + tx_ring->txbufs[wr_idx].skb = NULL; + tx_ring->txbufs[wr_idx].dma_addr = 0; + tx_ring->txbufs[wr_idx].fidx = -2; + wr_idx = wr_idx - 1; + if (wr_idx < 0) + wr_idx += tx_ring->cnt; + } + dma_unmap_single(&nn->pdev->dev, tx_ring->txbufs[wr_idx].dma_addr, + skb_headlen(skb), DMA_TO_DEVICE); + tx_ring->txbufs[wr_idx].skb = NULL; + tx_ring->txbufs[wr_idx].dma_addr = 0; + tx_ring->txbufs[wr_idx].fidx = -2; +err_free: + nn_warn_ratelimit(nn, "Failed to map DMA TX buffer\n"); + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_errors++; + u64_stats_update_end(&r_vec->tx_sync); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +/** + * nfp_net_tx_complete() - Handled completed TX packets + * @tx_ring: TX ring structure + * + * Return: Number of completed TX descriptors + */ +static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + const struct skb_frag_struct *frag; + struct netdev_queue *nd_q; + u32 done_pkts = 0, done_bytes = 0; + struct sk_buff *skb; + int todo, nr_frags; + u32 qcp_rd_p; + int fidx; + int idx; + + /* Work out how many descriptors have been transmitted */ + qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); + + if (qcp_rd_p == tx_ring->qcp_rd_p) + return; + + if (qcp_rd_p > tx_ring->qcp_rd_p) + todo = qcp_rd_p - tx_ring->qcp_rd_p; + else + todo = qcp_rd_p + tx_ring->cnt - tx_ring->qcp_rd_p; + + while (todo--) { + idx = tx_ring->rd_p % tx_ring->cnt; + tx_ring->rd_p++; + + skb = tx_ring->txbufs[idx].skb; + if (!skb) + continue; + + nr_frags = skb_shinfo(skb)->nr_frags; + fidx = tx_ring->txbufs[idx].fidx; + + if (fidx == -1) { + /* unmap head */ + dma_unmap_single(&nn->pdev->dev, + tx_ring->txbufs[idx].dma_addr, + skb_headlen(skb), DMA_TO_DEVICE); + + done_pkts += tx_ring->txbufs[idx].pkt_cnt; + done_bytes += tx_ring->txbufs[idx].real_len; + } else { + /* unmap fragment */ + frag = &skb_shinfo(skb)->frags[fidx]; + dma_unmap_page(&nn->pdev->dev, + tx_ring->txbufs[idx].dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + } + + /* check for last gather fragment */ + if (fidx == nr_frags - 1) + dev_kfree_skb_any(skb); + + tx_ring->txbufs[idx].dma_addr = 0; + tx_ring->txbufs[idx].skb = NULL; + tx_ring->txbufs[idx].fidx = -2; + } + + tx_ring->qcp_rd_p = qcp_rd_p; + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_bytes += done_bytes; + r_vec->tx_pkts += done_pkts; + u64_stats_update_end(&r_vec->tx_sync); + + nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx); + netdev_tx_completed_queue(nd_q, done_pkts, done_bytes); + if (nfp_net_tx_ring_should_wake(tx_ring)) { + /* Make sure TX thread will see updated tx_ring->rd_p */ + smp_mb(); + + if (unlikely(netif_tx_queue_stopped(nd_q))) + netif_tx_wake_queue(nd_q); + } + + WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, + "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", + tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); +} + +/** + * nfp_net_tx_flush() - Free any untransmitted buffers currently on the TX ring + * @tx_ring: TX ring structure + * + * Assumes that the device is stopped + */ +static void nfp_net_tx_flush(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + struct pci_dev *pdev = nn->pdev; + const struct skb_frag_struct *frag; + struct netdev_queue *nd_q; + struct sk_buff *skb; + int nr_frags; + int fidx; + int idx; + + while (tx_ring->rd_p != tx_ring->wr_p) { + idx = tx_ring->rd_p % tx_ring->cnt; + + skb = tx_ring->txbufs[idx].skb; + if (skb) { + nr_frags = skb_shinfo(skb)->nr_frags; + fidx = tx_ring->txbufs[idx].fidx; + + if (fidx == -1) { + /* unmap head */ + dma_unmap_single(&pdev->dev, + tx_ring->txbufs[idx].dma_addr, + skb_headlen(skb), + DMA_TO_DEVICE); + } else { + /* unmap fragment */ + frag = &skb_shinfo(skb)->frags[fidx]; + dma_unmap_page(&pdev->dev, + tx_ring->txbufs[idx].dma_addr, + skb_frag_size(frag), + DMA_TO_DEVICE); + } + + /* check for last gather fragment */ + if (fidx == nr_frags - 1) + dev_kfree_skb_any(skb); + + tx_ring->txbufs[idx].dma_addr = 0; + tx_ring->txbufs[idx].skb = NULL; + tx_ring->txbufs[idx].fidx = -2; + } + + memset(&tx_ring->txds[idx], 0, sizeof(tx_ring->txds[idx])); + + tx_ring->qcp_rd_p++; + tx_ring->rd_p++; + } + + nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx); + netdev_tx_reset_queue(nd_q); +} + +static void nfp_net_tx_timeout(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int i; + + for (i = 0; i < nn->num_tx_rings; i++) { + if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i))) + continue; + nn_warn(nn, "TX timeout on ring: %d\n", i); + } + nn_warn(nn, "TX watchdog timeout\n"); +} + +/* Receive processing + */ + +/** + * nfp_net_rx_space() - return the number of free slots on the RX ring + * @rx_ring: RX ring structure + * + * Make sure we leave at least one slot free. + * + * Return: True if there is space on the RX ring + */ +static inline int nfp_net_rx_space(struct nfp_net_rx_ring *rx_ring) +{ + return (rx_ring->cnt - 1) - (rx_ring->wr_p - rx_ring->rd_p); +} + +/** + * nfp_net_rx_alloc_one() - Allocate and map skb for RX + * @rx_ring: RX ring structure of the skb + * @dma_addr: Pointer to storage for DMA address (output param) + * + * This function will allcate a new skb, map it for DMA. + * + * Return: allocated skb or NULL on failure. + */ +static struct sk_buff * +nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr) +{ + struct nfp_net *nn = rx_ring->r_vec->nfp_net; + struct sk_buff *skb; + + skb = netdev_alloc_skb(nn->netdev, nn->fl_bufsz); + if (!skb) { + nn_warn_ratelimit(nn, "Failed to alloc receive SKB\n"); + return NULL; + } + + *dma_addr = dma_map_single(&nn->pdev->dev, skb->data, + nn->fl_bufsz, DMA_FROM_DEVICE); + if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) { + dev_kfree_skb_any(skb); + nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n"); + return NULL; + } + + return skb; +} + +/** + * nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings + * @rx_ring: RX ring structure + * @skb: Skb to put on rings + * @dma_addr: DMA address of skb mapping + */ +static void nfp_net_rx_give_one(struct nfp_net_rx_ring *rx_ring, + struct sk_buff *skb, dma_addr_t dma_addr) +{ + unsigned int wr_idx; + + wr_idx = rx_ring->wr_p % rx_ring->cnt; + + /* Stash SKB and DMA address away */ + rx_ring->rxbufs[wr_idx].skb = skb; + rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; + + /* Fill freelist descriptor */ + rx_ring->rxds[wr_idx].fld.reserved = 0; + rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; + nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld, dma_addr); + + rx_ring->wr_p++; + rx_ring->wr_ptr_add++; + if (rx_ring->wr_ptr_add >= NFP_NET_FL_BATCH) { + /* Update write pointer of the freelist queue. Make + * sure all writes are flushed before telling the hardware. + */ + wmb(); + nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, rx_ring->wr_ptr_add); + rx_ring->wr_ptr_add = 0; + } +} + +/** + * nfp_net_rx_flush() - Free any buffers currently on the RX ring + * @rx_ring: RX ring to remove buffers from + * + * Assumes that the device is stopped + */ +static void nfp_net_rx_flush(struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net *nn = rx_ring->r_vec->nfp_net; + struct pci_dev *pdev = nn->pdev; + int idx; + + while (rx_ring->rd_p != rx_ring->wr_p) { + idx = rx_ring->rd_p % rx_ring->cnt; + + if (rx_ring->rxbufs[idx].skb) { + dma_unmap_single(&pdev->dev, + rx_ring->rxbufs[idx].dma_addr, + nn->fl_bufsz, DMA_FROM_DEVICE); + dev_kfree_skb_any(rx_ring->rxbufs[idx].skb); + rx_ring->rxbufs[idx].dma_addr = 0; + rx_ring->rxbufs[idx].skb = NULL; + } + + memset(&rx_ring->rxds[idx], 0, sizeof(rx_ring->rxds[idx])); + + rx_ring->rd_p++; + } +} + +/** + * nfp_net_rx_fill_freelist() - Attempt filling freelist with RX buffers + * @rx_ring: RX ring to fill + * + * Try to fill as many buffers as possible into freelist. Return + * number of buffers added. + * + * Return: Number of freelist buffers added. + */ +static int nfp_net_rx_fill_freelist(struct nfp_net_rx_ring *rx_ring) +{ + struct sk_buff *skb; + dma_addr_t dma_addr; + + while (nfp_net_rx_space(rx_ring)) { + skb = nfp_net_rx_alloc_one(rx_ring, &dma_addr); + if (!skb) { + nfp_net_rx_flush(rx_ring); + return -ENOMEM; + } + nfp_net_rx_give_one(rx_ring, skb, dma_addr); + } + + return 0; +} + +/** + * nfp_net_rx_csum_has_errors() - group check if rxd has any csum errors + * @flags: RX descriptor flags field in CPU byte order + */ +static int nfp_net_rx_csum_has_errors(u16 flags) +{ + u16 csum_all_checked, csum_all_ok; + + csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL; + csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK; + + return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT); +} + +/** + * nfp_net_rx_csum() - set SKB checksum field based on RX descriptor flags + * @nn: NFP Net device + * @r_vec: per-ring structure + * @rxd: Pointer to RX descriptor + * @skb: Pointer to SKB + */ +static void nfp_net_rx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct nfp_net_rx_desc *rxd, struct sk_buff *skb) +{ + skb_checksum_none_assert(skb); + + if (!(nn->netdev->features & NETIF_F_RXCSUM)) + return; + + if (nfp_net_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) { + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_error++; + u64_stats_update_end(&r_vec->rx_sync); + return; + } + + /* Assume that the firmware will never report inner CSUM_OK unless outer + * L4 headers were successfully parsed. FW will always report zero UDP + * checksum as CSUM_OK. + */ + if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK || + rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) { + __skb_incr_checksum_unnecessary(skb); + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_ok++; + u64_stats_update_end(&r_vec->rx_sync); + } + + if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK || + rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) { + __skb_incr_checksum_unnecessary(skb); + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_inner_ok++; + u64_stats_update_end(&r_vec->rx_sync); + } +} + +/** + * nfp_net_set_hash() - Set SKB hash data + * @netdev: adapter's net_device structure + * @skb: SKB to set the hash data on + * @rxd: RX descriptor + * + * The RSS hash and hash-type are pre-pended to the packet data. + * Extract and decode it and set the skb fields. + */ +static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb, + struct nfp_net_rx_desc *rxd) +{ + struct nfp_net_rx_hash *rx_hash; + + if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS) || + !(netdev->features & NETIF_F_RXHASH)) + return; + + rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash)); + + switch (be32_to_cpu(rx_hash->hash_type)) { + case NFP_NET_RSS_IPV4: + case NFP_NET_RSS_IPV6: + case NFP_NET_RSS_IPV6_EX: + skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L3); + break; + default: + skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L4); + break; + } +} + +/** + * nfp_net_rx() - receive up to @budget packets on @rx_ring + * @rx_ring: RX ring to receive from + * @budget: NAPI budget + * + * Note, this function is separated out from the napi poll function to + * more cleanly separate packet receive code from other bookkeeping + * functions performed in the napi poll function. + * + * There are differences between the NFP-3200 firmware and the + * NFP-6000 firmware. The NFP-3200 firmware uses a dedicated RX queue + * to indicate that new packets have arrived. The NFP-6000 does not + * have this queue and uses the DD bit in the RX descriptor. This + * method cannot be used on the NFP-3200 as it causes a race + * condition: The RX ring write pointer on the NFP-3200 is updated + * after packets (and descriptors) have been DMAed. If the DD bit is + * used and subsequently the read pointer is updated this may lead to + * the RX queue to underflow (if the firmware has not yet update the + * write pointer). Therefore we use slightly ugly conditional code + * below to handle the differences. We may, in the future update the + * NFP-3200 firmware to behave the same as the firmware on the + * NFP-6000. + * + * Return: Number of packets received. + */ +static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + unsigned int data_len, meta_len; + int avail = 0, pkts_polled = 0; + struct sk_buff *skb, *new_skb; + struct nfp_net_rx_desc *rxd; + dma_addr_t new_dma_addr; + u32 qcp_wr_p; + int idx; + + if (nn->is_nfp3200) { + /* Work out how many packets arrived */ + qcp_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_rx); + idx = rx_ring->rd_p % rx_ring->cnt; + + if (qcp_wr_p == idx) + /* No new packets */ + return 0; + + if (qcp_wr_p > idx) + avail = qcp_wr_p - idx; + else + avail = qcp_wr_p + rx_ring->cnt - idx; + } else { + avail = budget + 1; + } + + while (avail > 0 && pkts_polled < budget) { + idx = rx_ring->rd_p % rx_ring->cnt; + + rxd = &rx_ring->rxds[idx]; + if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) { + if (nn->is_nfp3200) + nn_dbg(nn, "RX descriptor not valid (DD)%d:%u rxd[0]=%#x rxd[1]=%#x\n", + rx_ring->idx, idx, + rxd->vals[0], rxd->vals[1]); + break; + } + /* Memory barrier to ensure that we won't do other reads + * before the DD bit. + */ + dma_rmb(); + + rx_ring->rd_p++; + pkts_polled++; + avail--; + + skb = rx_ring->rxbufs[idx].skb; + + new_skb = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr); + if (!new_skb) { + nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[idx].skb, + rx_ring->rxbufs[idx].dma_addr); + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_drops++; + u64_stats_update_end(&r_vec->rx_sync); + continue; + } + + dma_unmap_single(&nn->pdev->dev, + rx_ring->rxbufs[idx].dma_addr, + nn->fl_bufsz, DMA_FROM_DEVICE); + + nfp_net_rx_give_one(rx_ring, new_skb, new_dma_addr); + + meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; + data_len = le16_to_cpu(rxd->rxd.data_len); + + if (WARN_ON_ONCE(data_len > nn->fl_bufsz)) { + dev_kfree_skb_any(skb); + continue; + } + + if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) { + /* The packet data starts after the metadata */ + skb_reserve(skb, meta_len); + } else { + /* The packet data starts at a fixed offset */ + skb_reserve(skb, nn->rx_offset); + } + + /* Adjust the SKB for the dynamic meta data pre-pended */ + skb_put(skb, data_len - meta_len); + + nfp_net_set_hash(nn->netdev, skb, rxd); + + /* Pad small frames to minimum */ + if (skb_put_padto(skb, 60)) + break; + + /* Stats update */ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_pkts++; + r_vec->rx_bytes += skb->len; + u64_stats_update_end(&r_vec->rx_sync); + + skb_record_rx_queue(skb, rx_ring->idx); + skb->protocol = eth_type_trans(skb, nn->netdev); + + nfp_net_rx_csum(nn, r_vec, rxd, skb); + + if (rxd->rxd.flags & PCIE_DESC_RX_VLAN) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + le16_to_cpu(rxd->rxd.vlan)); + + napi_gro_receive(&rx_ring->r_vec->napi, skb); + } + + if (nn->is_nfp3200) + nfp_qcp_rd_ptr_add(rx_ring->qcp_rx, pkts_polled); + + return pkts_polled; +} + +/** + * nfp_net_poll() - napi poll function + * @napi: NAPI structure + * @budget: NAPI budget + * + * Return: number of packets polled. + */ +static int nfp_net_poll(struct napi_struct *napi, int budget) +{ + struct nfp_net_r_vector *r_vec = + container_of(napi, struct nfp_net_r_vector, napi); + struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; + struct nfp_net_tx_ring *tx_ring = r_vec->tx_ring; + struct nfp_net *nn = r_vec->nfp_net; + struct netdev_queue *txq; + unsigned int pkts_polled; + + tx_ring = &nn->tx_rings[rx_ring->idx]; + txq = netdev_get_tx_queue(nn->netdev, tx_ring->idx); + nfp_net_tx_complete(tx_ring); + + pkts_polled = nfp_net_rx(rx_ring, budget); + + if (pkts_polled < budget) { + napi_complete_done(napi, pkts_polled); + nfp_net_irq_unmask(nn, r_vec->irq_idx); + } + + return pkts_polled; +} + +/* Setup and Configuration + */ + +/** + * nfp_net_tx_ring_free() - Free resources allocated to a TX ring + * @tx_ring: TX ring to free + */ +static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + struct pci_dev *pdev = nn->pdev; + + nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(tx_ring->idx), 0); + nn_writeb(nn, NFP_NET_CFG_TXR_SZ(tx_ring->idx), 0); + nn_writeb(nn, NFP_NET_CFG_TXR_VEC(tx_ring->idx), 0); + + kfree(tx_ring->txbufs); + + if (tx_ring->txds) + dma_free_coherent(&pdev->dev, tx_ring->size, + tx_ring->txds, tx_ring->dma); + + tx_ring->cnt = 0; + tx_ring->wr_p = 0; + tx_ring->rd_p = 0; + tx_ring->qcp_rd_p = 0; + + tx_ring->txbufs = NULL; + tx_ring->txds = NULL; + tx_ring->dma = 0; + tx_ring->size = 0; +} + +/** + * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring + * @tx_ring: TX Ring structure to allocate + * + * Return: 0 on success, negative errno otherwise. + */ +static int nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + struct pci_dev *pdev = nn->pdev; + int sz; + + tx_ring->cnt = nn->txd_cnt; + + tx_ring->size = sizeof(*tx_ring->txds) * tx_ring->cnt; + tx_ring->txds = dma_zalloc_coherent(&pdev->dev, tx_ring->size, + &tx_ring->dma, GFP_KERNEL); + if (!tx_ring->txds) + goto err_alloc; + + sz = sizeof(*tx_ring->txbufs) * tx_ring->cnt; + tx_ring->txbufs = kzalloc(sz, GFP_KERNEL); + if (!tx_ring->txbufs) + goto err_alloc; + + /* Write the DMA address, size and MSI-X info to the device */ + nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(tx_ring->idx), tx_ring->dma); + nn_writeb(nn, NFP_NET_CFG_TXR_SZ(tx_ring->idx), ilog2(tx_ring->cnt)); + nn_writeb(nn, NFP_NET_CFG_TXR_VEC(tx_ring->idx), r_vec->irq_idx); + + netif_set_xps_queue(nn->netdev, &r_vec->affinity_mask, tx_ring->idx); + + nn_dbg(nn, "TxQ%02d: QCidx=%02d cnt=%d dma=%#llx host=%p\n", + tx_ring->idx, tx_ring->qcidx, + tx_ring->cnt, (unsigned long long)tx_ring->dma, tx_ring->txds); + + return 0; + +err_alloc: + nfp_net_tx_ring_free(tx_ring); + return -ENOMEM; +} + +/** + * nfp_net_rx_ring_free() - Free resources allocated to a RX ring + * @rx_ring: RX ring to free + */ +static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + struct pci_dev *pdev = nn->pdev; + + nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(rx_ring->idx), 0); + nn_writeb(nn, NFP_NET_CFG_RXR_SZ(rx_ring->idx), 0); + nn_writeb(nn, NFP_NET_CFG_RXR_VEC(rx_ring->idx), 0); + + kfree(rx_ring->rxbufs); + + if (rx_ring->rxds) + dma_free_coherent(&pdev->dev, rx_ring->size, + rx_ring->rxds, rx_ring->dma); + + rx_ring->cnt = 0; + rx_ring->wr_p = 0; + rx_ring->rd_p = 0; + + rx_ring->rxbufs = NULL; + rx_ring->rxds = NULL; + rx_ring->dma = 0; + rx_ring->size = 0; +} + +/** + * nfp_net_rx_ring_alloc() - Allocate resource for a RX ring + * @rx_ring: RX ring to allocate + * + * Return: 0 on success, negative errno otherwise. + */ +static int nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + struct pci_dev *pdev = nn->pdev; + int sz; + + rx_ring->cnt = nn->rxd_cnt; + + rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt; + rx_ring->rxds = dma_zalloc_coherent(&pdev->dev, rx_ring->size, + &rx_ring->dma, GFP_KERNEL); + if (!rx_ring->rxds) + goto err_alloc; + + sz = sizeof(*rx_ring->rxbufs) * rx_ring->cnt; + rx_ring->rxbufs = kzalloc(sz, GFP_KERNEL); + if (!rx_ring->rxbufs) + goto err_alloc; + + /* Write the DMA address, size and MSI-X info to the device */ + nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(rx_ring->idx), rx_ring->dma); + nn_writeb(nn, NFP_NET_CFG_RXR_SZ(rx_ring->idx), ilog2(rx_ring->cnt)); + nn_writeb(nn, NFP_NET_CFG_RXR_VEC(rx_ring->idx), r_vec->irq_idx); + + nn_dbg(nn, "RxQ%02d: FlQCidx=%02d RxQCidx=%02d cnt=%d dma=%#llx host=%p\n", + rx_ring->idx, rx_ring->fl_qcidx, rx_ring->rx_qcidx, + rx_ring->cnt, (unsigned long long)rx_ring->dma, rx_ring->rxds); + + return 0; + +err_alloc: + nfp_net_rx_ring_free(rx_ring); + return -ENOMEM; +} + +static void __nfp_net_free_rings(struct nfp_net *nn, unsigned int n_free) +{ + struct nfp_net_r_vector *r_vec; + struct msix_entry *entry; + + while (n_free--) { + r_vec = &nn->r_vecs[n_free]; + entry = &nn->irq_entries[r_vec->irq_idx]; + + nfp_net_rx_ring_free(r_vec->rx_ring); + nfp_net_tx_ring_free(r_vec->tx_ring); + + irq_set_affinity_hint(entry->vector, NULL); + free_irq(entry->vector, r_vec); + + netif_napi_del(&r_vec->napi); + } +} + +/** + * nfp_net_free_rings() - Free all ring resources + * @nn: NFP Net device to reconfigure + */ +static void nfp_net_free_rings(struct nfp_net *nn) +{ + __nfp_net_free_rings(nn, nn->num_r_vecs); +} + +/** + * nfp_net_alloc_rings() - Allocate resources for RX and TX rings + * @nn: NFP Net device to reconfigure + * + * Return: 0 on success or negative errno on error. + */ +static int nfp_net_alloc_rings(struct nfp_net *nn) +{ + struct nfp_net_r_vector *r_vec; + struct msix_entry *entry; + int err; + int r; + + for (r = 0; r < nn->num_r_vecs; r++) { + r_vec = &nn->r_vecs[r]; + entry = &nn->irq_entries[r_vec->irq_idx]; + + /* Setup NAPI */ + netif_napi_add(nn->netdev, &r_vec->napi, + nfp_net_poll, NAPI_POLL_WEIGHT); + + snprintf(r_vec->name, sizeof(r_vec->name), + "%s-rxtx-%d", nn->netdev->name, r); + err = request_irq(entry->vector, r_vec->handler, 0, + r_vec->name, r_vec); + if (err) { + nn_dbg(nn, "Error requesting IRQ %d\n", entry->vector); + goto err_napi_del; + } + + irq_set_affinity_hint(entry->vector, &r_vec->affinity_mask); + + nn_dbg(nn, "RV%02d: irq=%03d/%03d\n", + r, entry->vector, entry->entry); + + /* Allocate TX ring resources */ + err = nfp_net_tx_ring_alloc(r_vec->tx_ring); + if (err) + goto err_free_irq; + + /* Allocate RX ring resources */ + err = nfp_net_rx_ring_alloc(r_vec->rx_ring); + if (err) + goto err_free_tx; + } + + return 0; + +err_free_tx: + nfp_net_tx_ring_free(r_vec->tx_ring); +err_free_irq: + irq_set_affinity_hint(entry->vector, NULL); + free_irq(entry->vector, r_vec); +err_napi_del: + netif_napi_del(&r_vec->napi); + __nfp_net_free_rings(nn, r); + return err; +} + +/** + * nfp_net_rss_write_itbl() - Write RSS indirection table to device + * @nn: NFP Net device to reconfigure + */ +void nfp_net_rss_write_itbl(struct nfp_net *nn) +{ + int i; + + for (i = 0; i < NFP_NET_CFG_RSS_ITBL_SZ; i += 4) + nn_writel(nn, NFP_NET_CFG_RSS_ITBL + i, + get_unaligned_le32(nn->rss_itbl + i)); +} + +/** + * nfp_net_rss_write_key() - Write RSS hash key to device + * @nn: NFP Net device to reconfigure + */ +void nfp_net_rss_write_key(struct nfp_net *nn) +{ + int i; + + for (i = 0; i < NFP_NET_CFG_RSS_KEY_SZ; i += 4) + nn_writel(nn, NFP_NET_CFG_RSS_KEY + i, + get_unaligned_le32(nn->rss_key + i)); +} + +/** + * nfp_net_coalesce_write_cfg() - Write irq coalescence configuration to HW + * @nn: NFP Net device to reconfigure + */ +void nfp_net_coalesce_write_cfg(struct nfp_net *nn) +{ + u8 i; + u32 factor; + u32 value; + + /* Compute factor used to convert coalesce '_usecs' parameters to + * ME timestamp ticks. There are 16 ME clock cycles for each timestamp + * count. + */ + factor = nn->me_freq_mhz / 16; + + /* copy RX interrupt coalesce parameters */ + value = (nn->rx_coalesce_max_frames << 16) | + (factor * nn->rx_coalesce_usecs); + for (i = 0; i < nn->num_r_vecs; i++) + nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(i), value); + + /* copy TX interrupt coalesce parameters */ + value = (nn->tx_coalesce_max_frames << 16) | + (factor * nn->tx_coalesce_usecs); + for (i = 0; i < nn->num_r_vecs; i++) + nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(i), value); +} + +/** + * nfp_net_write_mac_addr() - Write mac address to device registers + * @nn: NFP Net device to reconfigure + * @mac: Six-byte MAC address to be written + * + * We do a bit of byte swapping dance because firmware is LE. + */ +static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *mac) +{ + nn_writel(nn, NFP_NET_CFG_MACADDR + 0, + get_unaligned_be32(nn->netdev->dev_addr)); + /* We can't do writew for NFP-3200 compatibility */ + nn_writel(nn, NFP_NET_CFG_MACADDR + 4, + get_unaligned_be16(nn->netdev->dev_addr + 4) << 16); +} + +/** + * nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP + * @nn: NFP Net device to reconfigure + */ +static void nfp_net_clear_config_and_disable(struct nfp_net *nn) +{ + u32 new_ctrl, update; + int err; + + new_ctrl = nn->ctrl; + new_ctrl &= ~NFP_NET_CFG_CTRL_ENABLE; + update = NFP_NET_CFG_UPDATE_GEN; + update |= NFP_NET_CFG_UPDATE_MSIX; + update |= NFP_NET_CFG_UPDATE_RING; + + if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG) + new_ctrl &= ~NFP_NET_CFG_CTRL_RINGCFG; + + nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0); + nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0); + + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + err = nfp_net_reconfig(nn, update); + if (err) { + nn_err(nn, "Could not disable device: %d\n", err); + return; + } + + nn->ctrl = new_ctrl; +} + +/** + * nfp_net_start_vec() - Start ring vector + * @nn: NFP Net device structure + * @r_vec: Ring vector to be started + */ +static int nfp_net_start_vec(struct nfp_net *nn, struct nfp_net_r_vector *r_vec) +{ + unsigned int irq_vec; + int err = 0; + + irq_vec = nn->irq_entries[r_vec->irq_idx].vector; + + disable_irq(irq_vec); + + err = nfp_net_rx_fill_freelist(r_vec->rx_ring); + if (err) { + nn_err(nn, "RV%02d: couldn't allocate enough buffers\n", + r_vec->irq_idx); + goto out; + } + + napi_enable(&r_vec->napi); +out: + enable_irq(irq_vec); + + return err; +} + +static int nfp_net_netdev_open(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int err, r; + u32 update = 0; + u32 new_ctrl; + + if (nn->ctrl & NFP_NET_CFG_CTRL_ENABLE) { + nn_err(nn, "Dev is already enabled: 0x%08x\n", nn->ctrl); + return -EBUSY; + } + + new_ctrl = nn->ctrl; + + /* Step 1: Allocate resources for rings and the like + * - Request interrupts + * - Allocate RX and TX ring resources + * - Setup initial RSS table + */ + err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_EXN, "%s-exn", + nn->exn_name, sizeof(nn->exn_name), + NFP_NET_IRQ_EXN_IDX, nn->exn_handler); + if (err) + return err; + + err = nfp_net_alloc_rings(nn); + if (err) + goto err_free_exn; + + err = netif_set_real_num_tx_queues(netdev, nn->num_tx_rings); + if (err) + goto err_free_rings; + + err = netif_set_real_num_rx_queues(netdev, nn->num_rx_rings); + if (err) + goto err_free_rings; + + if (nn->cap & NFP_NET_CFG_CTRL_RSS) { + nfp_net_rss_write_key(nn); + nfp_net_rss_write_itbl(nn); + nn_writel(nn, NFP_NET_CFG_RSS_CTRL, nn->rss_cfg); + update |= NFP_NET_CFG_UPDATE_RSS; + } + + if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) { + nfp_net_coalesce_write_cfg(nn); + + new_ctrl |= NFP_NET_CFG_CTRL_IRQMOD; + update |= NFP_NET_CFG_UPDATE_IRQMOD; + } + + /* Step 2: Configure the NFP + * - Enable rings from 0 to tx_rings/rx_rings - 1. + * - Write MAC address (in case it changed) + * - Set the MTU + * - Set the Freelist buffer size + * - Enable the FW + */ + nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->num_tx_rings == 64 ? + 0xffffffffffffffffULL : ((u64)1 << nn->num_tx_rings) - 1); + + nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->num_rx_rings == 64 ? + 0xffffffffffffffffULL : ((u64)1 << nn->num_rx_rings) - 1); + + nfp_net_write_mac_addr(nn, netdev->dev_addr); + + nn_writel(nn, NFP_NET_CFG_MTU, netdev->mtu); + nn_writel(nn, NFP_NET_CFG_FLBUFSZ, nn->fl_bufsz); + + /* Enable device */ + new_ctrl |= NFP_NET_CFG_CTRL_ENABLE; + update |= NFP_NET_CFG_UPDATE_GEN; + update |= NFP_NET_CFG_UPDATE_MSIX; + update |= NFP_NET_CFG_UPDATE_RING; + if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG) + new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG; + + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + err = nfp_net_reconfig(nn, update); + if (err) + goto err_clear_config; + + nn->ctrl = new_ctrl; + + /* Since reconfiguration requests while NFP is down are ignored we + * have to wipe the entire VXLAN configuration and reinitialize it. + */ + if (nn->ctrl & NFP_NET_CFG_CTRL_VXLAN) { + memset(&nn->vxlan_ports, 0, sizeof(nn->vxlan_ports)); + memset(&nn->vxlan_usecnt, 0, sizeof(nn->vxlan_usecnt)); + vxlan_get_rx_port(netdev); + } + + /* Step 3: Enable for kernel + * - put some freelist descriptors on each RX ring + * - enable NAPI on each ring + * - enable all TX queues + * - set link state + */ + for (r = 0; r < nn->num_r_vecs; r++) { + err = nfp_net_start_vec(nn, &nn->r_vecs[r]); + if (err) + goto err_disable_napi; + } + + netif_tx_wake_all_queues(netdev); + + err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_LSC, "%s-lsc", + nn->lsc_name, sizeof(nn->lsc_name), + NFP_NET_IRQ_LSC_IDX, nn->lsc_handler); + if (err) + goto err_stop_tx; + nfp_net_read_link_status(nn); + + return 0; + +err_stop_tx: + netif_tx_disable(netdev); + for (r = 0; r < nn->num_r_vecs; r++) + nfp_net_tx_flush(nn->r_vecs[r].tx_ring); +err_disable_napi: + while (r--) { + napi_disable(&nn->r_vecs[r].napi); + nfp_net_rx_flush(nn->r_vecs[r].rx_ring); + } +err_clear_config: + nfp_net_clear_config_and_disable(nn); +err_free_rings: + nfp_net_free_rings(nn); +err_free_exn: + nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX); + return err; +} + +/** + * nfp_net_netdev_close() - Called when the device is downed + * @netdev: netdev structure + */ +static int nfp_net_netdev_close(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int r; + + if (!(nn->ctrl & NFP_NET_CFG_CTRL_ENABLE)) { + nn_err(nn, "Dev is not up: 0x%08x\n", nn->ctrl); + return 0; + } + + /* Step 1: Disable RX and TX rings from the Linux kernel perspective + */ + nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX); + netif_carrier_off(netdev); + nn->link_up = false; + + for (r = 0; r < nn->num_r_vecs; r++) + napi_disable(&nn->r_vecs[r].napi); + + netif_tx_disable(netdev); + + /* Step 2: Tell NFP + */ + nfp_net_clear_config_and_disable(nn); + + /* Step 3: Free resources + */ + for (r = 0; r < nn->num_r_vecs; r++) { + nfp_net_rx_flush(nn->r_vecs[r].rx_ring); + nfp_net_tx_flush(nn->r_vecs[r].tx_ring); + } + + nfp_net_free_rings(nn); + nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX); + + nn_dbg(nn, "%s down", netdev->name); + return 0; +} + +static void nfp_net_set_rx_mode(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + u32 new_ctrl; + + new_ctrl = nn->ctrl; + + if (netdev->flags & IFF_PROMISC) { + if (nn->cap & NFP_NET_CFG_CTRL_PROMISC) + new_ctrl |= NFP_NET_CFG_CTRL_PROMISC; + else + nn_warn(nn, "FW does not support promiscuous mode\n"); + } else { + new_ctrl &= ~NFP_NET_CFG_CTRL_PROMISC; + } + + if (new_ctrl == nn->ctrl) + return; + + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + if (nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN)) + return; + + nn->ctrl = new_ctrl; +} + +static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct nfp_net *nn = netdev_priv(netdev); + u32 tmp; + + nn_dbg(nn, "New MTU = %d\n", new_mtu); + + if (new_mtu < 68 || new_mtu > nn->max_mtu) { + nn_err(nn, "New MTU (%d) is not valid\n", new_mtu); + return -EINVAL; + } + + netdev->mtu = new_mtu; + + /* Freelist buffer size rounded up to the nearest 1K */ + tmp = new_mtu + ETH_HLEN + VLAN_HLEN + NFP_NET_MAX_PREPEND; + nn->fl_bufsz = roundup(tmp, 1024); + + /* restart if running */ + if (netif_running(netdev)) { + nfp_net_netdev_close(netdev); + nfp_net_netdev_open(netdev); + } + + return 0; +} + +static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct nfp_net *nn = netdev_priv(netdev); + int r; + + for (r = 0; r < nn->num_r_vecs; r++) { + struct nfp_net_r_vector *r_vec = &nn->r_vecs[r]; + u64 data[3]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&r_vec->rx_sync); + data[0] = r_vec->rx_pkts; + data[1] = r_vec->rx_bytes; + data[2] = r_vec->rx_drops; + } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); + stats->rx_packets += data[0]; + stats->rx_bytes += data[1]; + stats->rx_dropped += data[2]; + + do { + start = u64_stats_fetch_begin(&r_vec->tx_sync); + data[0] = r_vec->tx_pkts; + data[1] = r_vec->tx_bytes; + data[2] = r_vec->tx_errors; + } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); + stats->tx_packets += data[0]; + stats->tx_bytes += data[1]; + stats->tx_errors += data[2]; + } + + return stats; +} + +static int nfp_net_set_features(struct net_device *netdev, + netdev_features_t features) +{ + netdev_features_t changed = netdev->features ^ features; + struct nfp_net *nn = netdev_priv(netdev); + u32 new_ctrl; + int err; + + /* Assume this is not called with features we have not advertised */ + + new_ctrl = nn->ctrl; + + if (changed & NETIF_F_RXCSUM) { + if (features & NETIF_F_RXCSUM) + new_ctrl |= NFP_NET_CFG_CTRL_RXCSUM; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_RXCSUM; + } + + if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) { + if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) + new_ctrl |= NFP_NET_CFG_CTRL_TXCSUM; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_TXCSUM; + } + + if (changed & (NETIF_F_TSO | NETIF_F_TSO6)) { + if (features & (NETIF_F_TSO | NETIF_F_TSO6)) + new_ctrl |= NFP_NET_CFG_CTRL_LSO; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_LSO; + } + + if (changed & NETIF_F_HW_VLAN_CTAG_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) + new_ctrl |= NFP_NET_CFG_CTRL_RXVLAN; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_RXVLAN; + } + + if (changed & NETIF_F_HW_VLAN_CTAG_TX) { + if (features & NETIF_F_HW_VLAN_CTAG_TX) + new_ctrl |= NFP_NET_CFG_CTRL_TXVLAN; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_TXVLAN; + } + + if (changed & NETIF_F_SG) { + if (features & NETIF_F_SG) + new_ctrl |= NFP_NET_CFG_CTRL_GATHER; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER; + } + + nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n", + netdev->features, features, changed); + + if (new_ctrl == nn->ctrl) + return 0; + + nn_dbg(nn, "NIC ctrl: 0x%x -> 0x%x\n", nn->ctrl, new_ctrl); + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); + if (err) + return err; + + nn->ctrl = new_ctrl; + + return 0; +} + +static netdev_features_t +nfp_net_features_check(struct sk_buff *skb, struct net_device *dev, + netdev_features_t features) +{ + u8 l4_hdr; + + /* We can't do TSO over double tagged packets (802.1AD) */ + features &= vlan_features_check(skb, features); + + if (!skb->encapsulation) + return features; + + /* Ensure that inner L4 header offset fits into TX descriptor field */ + if (skb_is_gso(skb)) { + u32 hdrlen; + + hdrlen = skb_inner_transport_header(skb) - skb->data + + inner_tcp_hdrlen(skb); + + if (unlikely(hdrlen > NFP_NET_LSO_MAX_HDR_SZ)) + features &= ~NETIF_F_GSO_MASK; + } + + /* VXLAN/GRE check */ + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IP): + l4_hdr = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + l4_hdr = ipv6_hdr(skb)->nexthdr; + break; + default: + return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + } + + if (skb->inner_protocol_type != ENCAP_TYPE_ETHER || + skb->inner_protocol != htons(ETH_P_TEB) || + (l4_hdr != IPPROTO_UDP && l4_hdr != IPPROTO_GRE) || + (l4_hdr == IPPROTO_UDP && + (skb_inner_mac_header(skb) - skb_transport_header(skb) != + sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) + return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + + return features; +} + +/** + * nfp_net_set_vxlan_port() - set vxlan port in SW and reconfigure HW + * @nn: NFP Net device to reconfigure + * @idx: Index into the port table where new port should be written + * @port: UDP port to configure (pass zero to remove VXLAN port) + */ +static void nfp_net_set_vxlan_port(struct nfp_net *nn, int idx, __be16 port) +{ + int i; + + nn->vxlan_ports[idx] = port; + + if (!(nn->ctrl & NFP_NET_CFG_CTRL_VXLAN)) + return; + + BUILD_BUG_ON(NFP_NET_N_VXLAN_PORTS & 1); + for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i += 2) + nn_writel(nn, NFP_NET_CFG_VXLAN_PORT + i * sizeof(port), + be16_to_cpu(nn->vxlan_ports[i + 1]) << 16 | + be16_to_cpu(nn->vxlan_ports[i])); + + nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_VXLAN); +} + +/** + * nfp_net_find_vxlan_idx() - find table entry of the port or a free one + * @nn: NFP Network structure + * @port: UDP port to look for + * + * Return: if the port is already in the table -- it's position; + * if the port is not in the table -- free position to use; + * if the table is full -- -ENOSPC. + */ +static int nfp_net_find_vxlan_idx(struct nfp_net *nn, __be16 port) +{ + int i, free_idx = -ENOSPC; + + for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i++) { + if (nn->vxlan_ports[i] == port) + return i; + if (!nn->vxlan_usecnt[i]) + free_idx = i; + } + + return free_idx; +} + +static void nfp_net_add_vxlan_port(struct net_device *netdev, + sa_family_t sa_family, __be16 port) +{ + struct nfp_net *nn = netdev_priv(netdev); + int idx; + + idx = nfp_net_find_vxlan_idx(nn, port); + if (idx == -ENOSPC) + return; + + if (!nn->vxlan_usecnt[idx]++) + nfp_net_set_vxlan_port(nn, idx, port); +} + +static void nfp_net_del_vxlan_port(struct net_device *netdev, + sa_family_t sa_family, __be16 port) +{ + struct nfp_net *nn = netdev_priv(netdev); + int idx; + + idx = nfp_net_find_vxlan_idx(nn, port); + if (!nn->vxlan_usecnt[idx] || idx == -ENOSPC) + return; + + if (!--nn->vxlan_usecnt[idx]) + nfp_net_set_vxlan_port(nn, idx, 0); +} + +static const struct net_device_ops nfp_net_netdev_ops = { + .ndo_open = nfp_net_netdev_open, + .ndo_stop = nfp_net_netdev_close, + .ndo_start_xmit = nfp_net_tx, + .ndo_get_stats64 = nfp_net_stat64, + .ndo_tx_timeout = nfp_net_tx_timeout, + .ndo_set_rx_mode = nfp_net_set_rx_mode, + .ndo_change_mtu = nfp_net_change_mtu, + .ndo_set_mac_address = eth_mac_addr, + .ndo_set_features = nfp_net_set_features, + .ndo_features_check = nfp_net_features_check, + .ndo_add_vxlan_port = nfp_net_add_vxlan_port, + .ndo_del_vxlan_port = nfp_net_del_vxlan_port, +}; + +/** + * nfp_net_info() - Print general info about the NIC + * @nn: NFP Net device to reconfigure + */ +void nfp_net_info(struct nfp_net *nn) +{ + nn_info(nn, "Netronome %s %sNetdev: TxQs=%d/%d RxQs=%d/%d\n", + nn->is_nfp3200 ? "NFP-32xx" : "NFP-6xxx", + nn->is_vf ? "VF " : "", + nn->num_tx_rings, nn->max_tx_rings, + nn->num_rx_rings, nn->max_rx_rings); + nn_info(nn, "VER: %d.%d.%d.%d, Maximum supported MTU: %d\n", + nn->fw_ver.resv, nn->fw_ver.class, + nn->fw_ver.major, nn->fw_ver.minor, + nn->max_mtu); + nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + nn->cap, + nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "", + nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "", + nn->cap & NFP_NET_CFG_CTRL_L2MC ? "L2MCFILT " : "", + nn->cap & NFP_NET_CFG_CTRL_RXCSUM ? "RXCSUM " : "", + nn->cap & NFP_NET_CFG_CTRL_TXCSUM ? "TXCSUM " : "", + nn->cap & NFP_NET_CFG_CTRL_RXVLAN ? "RXVLAN " : "", + nn->cap & NFP_NET_CFG_CTRL_TXVLAN ? "TXVLAN " : "", + nn->cap & NFP_NET_CFG_CTRL_SCATTER ? "SCATTER " : "", + nn->cap & NFP_NET_CFG_CTRL_GATHER ? "GATHER " : "", + nn->cap & NFP_NET_CFG_CTRL_LSO ? "TSO " : "", + nn->cap & NFP_NET_CFG_CTRL_RSS ? "RSS " : "", + nn->cap & NFP_NET_CFG_CTRL_L2SWITCH ? "L2SWITCH " : "", + nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "", + nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "", + nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "", + nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : ""); +} + +/** + * nfp_net_netdev_alloc() - Allocate netdev and related structure + * @pdev: PCI device + * @max_tx_rings: Maximum number of TX rings supported by device + * @max_rx_rings: Maximum number of RX rings supported by device + * + * This function allocates a netdev device and fills in the initial + * part of the @struct nfp_net structure. + * + * Return: NFP Net device structure, or ERR_PTR on error. + */ +struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, + int max_tx_rings, int max_rx_rings) +{ + struct net_device *netdev; + struct nfp_net *nn; + int nqs; + + netdev = alloc_etherdev_mqs(sizeof(struct nfp_net), + max_tx_rings, max_rx_rings); + if (!netdev) + return ERR_PTR(-ENOMEM); + + SET_NETDEV_DEV(netdev, &pdev->dev); + nn = netdev_priv(netdev); + + nn->netdev = netdev; + nn->pdev = pdev; + + nn->max_tx_rings = max_tx_rings; + nn->max_rx_rings = max_rx_rings; + + nqs = netif_get_num_default_rss_queues(); + nn->num_tx_rings = min_t(int, nqs, max_tx_rings); + nn->num_rx_rings = min_t(int, nqs, max_rx_rings); + + nn->txd_cnt = NFP_NET_TX_DESCS_DEFAULT; + nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT; + + spin_lock_init(&nn->reconfig_lock); + spin_lock_init(&nn->link_status_lock); + + return nn; +} + +/** + * nfp_net_netdev_free() - Undo what @nfp_net_netdev_alloc() did + * @nn: NFP Net device to reconfigure + */ +void nfp_net_netdev_free(struct nfp_net *nn) +{ + free_netdev(nn->netdev); +} + +/** + * nfp_net_rss_init() - Set the initial RSS parameters + * @nn: NFP Net device to reconfigure + */ +static void nfp_net_rss_init(struct nfp_net *nn) +{ + int i; + + netdev_rss_key_fill(nn->rss_key, NFP_NET_CFG_RSS_KEY_SZ); + + for (i = 0; i < sizeof(nn->rss_itbl); i++) + nn->rss_itbl[i] = + ethtool_rxfh_indir_default(i, nn->num_rx_rings); + + /* Enable IPv4/IPv6 TCP by default */ + nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP | + NFP_NET_CFG_RSS_IPV6_TCP | + NFP_NET_CFG_RSS_TOEPLITZ | + NFP_NET_CFG_RSS_MASK; +} + +/** + * nfp_net_irqmod_init() - Set the initial IRQ moderation parameters + * @nn: NFP Net device to reconfigure + */ +static void nfp_net_irqmod_init(struct nfp_net *nn) +{ + nn->rx_coalesce_usecs = 50; + nn->rx_coalesce_max_frames = 64; + nn->tx_coalesce_usecs = 50; + nn->tx_coalesce_max_frames = 64; +} + +/** + * nfp_net_netdev_init() - Initialise/finalise the netdev structure + * @netdev: netdev structure + * + * Return: 0 on success or negative errno on error. + */ +int nfp_net_netdev_init(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int err; + + /* Get some of the read-only fields from the BAR */ + nn->cap = nn_readl(nn, NFP_NET_CFG_CAP); + nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU); + + nfp_net_write_mac_addr(nn, nn->netdev->dev_addr); + + /* Set default MTU and Freelist buffer size */ + if (nn->max_mtu < NFP_NET_DEFAULT_MTU) + netdev->mtu = nn->max_mtu; + else + netdev->mtu = NFP_NET_DEFAULT_MTU; + nn->fl_bufsz = NFP_NET_DEFAULT_RX_BUFSZ; + + /* Advertise/enable offloads based on capabilities + * + * Note: netdev->features show the currently enabled features + * and netdev->hw_features advertises which features are + * supported. By default we enable most features. + */ + netdev->hw_features = NETIF_F_HIGHDMA; + if (nn->cap & NFP_NET_CFG_CTRL_RXCSUM) { + netdev->hw_features |= NETIF_F_RXCSUM; + nn->ctrl |= NFP_NET_CFG_CTRL_RXCSUM; + } + if (nn->cap & NFP_NET_CFG_CTRL_TXCSUM) { + netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + nn->ctrl |= NFP_NET_CFG_CTRL_TXCSUM; + } + if (nn->cap & NFP_NET_CFG_CTRL_GATHER) { + netdev->hw_features |= NETIF_F_SG; + nn->ctrl |= NFP_NET_CFG_CTRL_GATHER; + } + if ((nn->cap & NFP_NET_CFG_CTRL_LSO) && nn->fw_ver.major > 2) { + netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; + nn->ctrl |= NFP_NET_CFG_CTRL_LSO; + } + if (nn->cap & NFP_NET_CFG_CTRL_RSS) { + netdev->hw_features |= NETIF_F_RXHASH; + nfp_net_rss_init(nn); + nn->ctrl |= NFP_NET_CFG_CTRL_RSS; + } + if (nn->cap & NFP_NET_CFG_CTRL_VXLAN && + nn->cap & NFP_NET_CFG_CTRL_NVGRE) { + if (nn->cap & NFP_NET_CFG_CTRL_LSO) + netdev->hw_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_UDP_TUNNEL; + nn->ctrl |= NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE; + + netdev->hw_enc_features = netdev->hw_features; + } + + netdev->vlan_features = netdev->hw_features; + + if (nn->cap & NFP_NET_CFG_CTRL_RXVLAN) { + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; + nn->ctrl |= NFP_NET_CFG_CTRL_RXVLAN; + } + if (nn->cap & NFP_NET_CFG_CTRL_TXVLAN) { + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; + nn->ctrl |= NFP_NET_CFG_CTRL_TXVLAN; + } + + netdev->features = netdev->hw_features; + + /* Advertise but disable TSO by default. */ + netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); + + /* Allow L2 Broadcast and Multicast through by default, if supported */ + if (nn->cap & NFP_NET_CFG_CTRL_L2BC) + nn->ctrl |= NFP_NET_CFG_CTRL_L2BC; + if (nn->cap & NFP_NET_CFG_CTRL_L2MC) + nn->ctrl |= NFP_NET_CFG_CTRL_L2MC; + + /* Allow IRQ moderation, if supported */ + if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) { + nfp_net_irqmod_init(nn); + nn->ctrl |= NFP_NET_CFG_CTRL_IRQMOD; + } + + /* On NFP-3200 enable MSI-X auto-masking, if supported and the + * interrupts are not shared. + */ + if (nn->is_nfp3200 && nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO) + nn->ctrl |= NFP_NET_CFG_CTRL_MSIXAUTO; + + /* On NFP4000/NFP6000, determine RX packet/metadata boundary offset */ + if (nn->fw_ver.major >= 2) + nn->rx_offset = nn_readl(nn, NFP_NET_CFG_RX_OFFSET); + else + nn->rx_offset = NFP_NET_RX_OFFSET; + + /* Stash the re-configuration queue away. First odd queue in TX Bar */ + nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ; + + /* Make sure the FW knows the netdev is supposed to be disabled here */ + nn_writel(nn, NFP_NET_CFG_CTRL, 0); + nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0); + nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RING | + NFP_NET_CFG_UPDATE_GEN); + if (err) + return err; + + /* Finalise the netdev setup */ + ether_setup(netdev); + netdev->netdev_ops = &nfp_net_netdev_ops; + netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000); + + nfp_net_set_ethtool_ops(netdev); + nfp_net_irqs_assign(netdev); + + return register_netdev(netdev); +} + +/** + * nfp_net_netdev_clean() - Undo what nfp_net_netdev_init() did. + * @netdev: netdev structure + */ +void nfp_net_netdev_clean(struct net_device *netdev) +{ + unregister_netdev(netdev); +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h new file mode 100644 index 00000000000000..8692003aeed886 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -0,0 +1,323 @@ +/* + * Copyright (C) 2015 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_net_ctrl.h + * Netronome network device driver: Control BAR layout + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + * Brad Petrus <brad.petrus@netronome.com> + */ + +#ifndef _NFP_NET_CTRL_H_ +#define _NFP_NET_CTRL_H_ + +/* IMPORTANT: This header file is shared with the FW, + * no OS specific constructs, please! + */ + +/** + * Configuration BAR size. + * + * The configuration BAR is 8K in size, but on the NFP6000, due to + * THB-350, 32k needs to be reserved. + */ +#define NFP_NET_CFG_BAR_SZ (32 * 1024) + +/** + * Offset in Freelist buffer where packet starts on RX + */ +#define NFP_NET_RX_OFFSET 32 + +/** + * Maximum header size supported for LSO frames + */ +#define NFP_NET_LSO_MAX_HDR_SZ 255 + +/** + * Hash type pre-pended when a RSS hash was computed + */ +#define NFP_NET_RSS_NONE 0 +#define NFP_NET_RSS_IPV4 1 +#define NFP_NET_RSS_IPV6 2 +#define NFP_NET_RSS_IPV6_EX 3 +#define NFP_NET_RSS_IPV4_TCP 4 +#define NFP_NET_RSS_IPV6_TCP 5 +#define NFP_NET_RSS_IPV6_EX_TCP 6 +#define NFP_NET_RSS_IPV4_UDP 7 +#define NFP_NET_RSS_IPV6_UDP 8 +#define NFP_NET_RSS_IPV6_EX_UDP 9 + +/** + * @NFP_NET_TXR_MAX: Maximum number of TX rings + * @NFP_NET_TXR_MASK: Mask for TX rings + * @NFP_NET_RXR_MAX: Maximum number of RX rings + * @NFP_NET_RXR_MASK: Mask for RX rings + */ +#define NFP_NET_TXR_MAX 64 +#define NFP_NET_TXR_MASK (NFP_NET_TXR_MAX - 1) +#define NFP_NET_RXR_MAX 64 +#define NFP_NET_RXR_MASK (NFP_NET_RXR_MAX - 1) + +/** + * Read/Write config words (0x0000 - 0x002c) + * @NFP_NET_CFG_CTRL: Global control + * @NFP_NET_CFG_UPDATE: Indicate which fields are updated + * @NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings + * @NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings + * @NFP_NET_CFG_MTU: Set MTU size + * @NFP_NET_CFG_FLBUFSZ: Set freelist buffer size (must be larger than MTU) + * @NFP_NET_CFG_EXN: MSI-X table entry for exceptions + * @NFP_NET_CFG_LSC: MSI-X table entry for link state changes + * @NFP_NET_CFG_MACADDR: MAC address + * + * TODO: + * - define Error details in UPDATE + */ +#define NFP_NET_CFG_CTRL 0x0000 +#define NFP_NET_CFG_CTRL_ENABLE (0x1 << 0) /* Global enable */ +#define NFP_NET_CFG_CTRL_PROMISC (0x1 << 1) /* Enable Promisc mode */ +#define NFP_NET_CFG_CTRL_L2BC (0x1 << 2) /* Allow L2 Broadcast */ +#define NFP_NET_CFG_CTRL_L2MC (0x1 << 3) /* Allow L2 Multicast */ +#define NFP_NET_CFG_CTRL_RXCSUM (0x1 << 4) /* Enable RX Checksum */ +#define NFP_NET_CFG_CTRL_TXCSUM (0x1 << 5) /* Enable TX Checksum */ +#define NFP_NET_CFG_CTRL_RXVLAN (0x1 << 6) /* Enable VLAN strip */ +#define NFP_NET_CFG_CTRL_TXVLAN (0x1 << 7) /* Enable VLAN insert */ +#define NFP_NET_CFG_CTRL_SCATTER (0x1 << 8) /* Scatter DMA */ +#define NFP_NET_CFG_CTRL_GATHER (0x1 << 9) /* Gather DMA */ +#define NFP_NET_CFG_CTRL_LSO (0x1 << 10) /* LSO/TSO */ +#define NFP_NET_CFG_CTRL_RINGCFG (0x1 << 16) /* Ring runtime changes */ +#define NFP_NET_CFG_CTRL_RSS (0x1 << 17) /* RSS */ +#define NFP_NET_CFG_CTRL_IRQMOD (0x1 << 18) /* Interrupt moderation */ +#define NFP_NET_CFG_CTRL_RINGPRIO (0x1 << 19) /* Ring priorities */ +#define NFP_NET_CFG_CTRL_MSIXAUTO (0x1 << 20) /* MSI-X auto-masking */ +#define NFP_NET_CFG_CTRL_TXRWB (0x1 << 21) /* Write-back of TX ring*/ +#define NFP_NET_CFG_CTRL_L2SWITCH (0x1 << 22) /* L2 Switch */ +#define NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */ +#define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */ +#define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */ +#define NFP_NET_CFG_UPDATE 0x0004 +#define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */ +#define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */ +#define NFP_NET_CFG_UPDATE_RSS (0x1 << 2) /* RSS config change */ +#define NFP_NET_CFG_UPDATE_TXRPRIO (0x1 << 3) /* TX Ring prio change */ +#define NFP_NET_CFG_UPDATE_RXRPRIO (0x1 << 4) /* RX Ring prio change */ +#define NFP_NET_CFG_UPDATE_MSIX (0x1 << 5) /* MSI-X change */ +#define NFP_NET_CFG_UPDATE_L2SWITCH (0x1 << 6) /* Switch changes */ +#define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */ +#define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */ +#define NFP_NET_CFG_UPDATE_VXLAN (0x1 << 9) /* VXLAN port change */ +#define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */ +#define NFP_NET_CFG_TXRS_ENABLE 0x0008 +#define NFP_NET_CFG_RXRS_ENABLE 0x0010 +#define NFP_NET_CFG_MTU 0x0018 +#define NFP_NET_CFG_FLBUFSZ 0x001c +#define NFP_NET_CFG_EXN 0x001f +#define NFP_NET_CFG_LSC 0x0020 +#define NFP_NET_CFG_MACADDR 0x0024 + +/** + * Read-only words (0x0030 - 0x0050): + * @NFP_NET_CFG_VERSION: Firmware version number + * @NFP_NET_CFG_STS: Status + * @NFP_NET_CFG_CAP: Capabilities (same bits as @NFP_NET_CFG_CTRL) + * @NFP_NET_MAX_TXRINGS: Maximum number of TX rings + * @NFP_NET_MAX_RXRINGS: Maximum number of RX rings + * @NFP_NET_MAX_MTU: Maximum support MTU + * @NFP_NET_CFG_START_TXQ: Start Queue Control Queue to use for TX (PF only) + * @NFP_NET_CFG_START_RXQ: Start Queue Control Queue to use for RX (PF only) + * + * TODO: + * - define more STS bits + */ +#define NFP_NET_CFG_VERSION 0x0030 +#define NFP_NET_CFG_VERSION_RESERVED_MASK (0xff << 24) +#define NFP_NET_CFG_VERSION_CLASS_MASK (0xff << 16) +#define NFP_NET_CFG_VERSION_CLASS(x) (((x) & 0xff) << 16) +#define NFP_NET_CFG_VERSION_CLASS_GENERIC 0 +#define NFP_NET_CFG_VERSION_MAJOR_MASK (0xff << 8) +#define NFP_NET_CFG_VERSION_MAJOR(x) (((x) & 0xff) << 8) +#define NFP_NET_CFG_VERSION_MINOR_MASK (0xff << 0) +#define NFP_NET_CFG_VERSION_MINOR(x) (((x) & 0xff) << 0) +#define NFP_NET_CFG_STS 0x0034 +#define NFP_NET_CFG_STS_LINK (0x1 << 0) /* Link up or down */ +#define NFP_NET_CFG_CAP 0x0038 +#define NFP_NET_CFG_MAX_TXRINGS 0x003c +#define NFP_NET_CFG_MAX_RXRINGS 0x0040 +#define NFP_NET_CFG_MAX_MTU 0x0044 +/* Next two words are being used by VFs for solving THB350 issue */ +#define NFP_NET_CFG_START_TXQ 0x0048 +#define NFP_NET_CFG_START_RXQ 0x004c + +/** + * NFP-3200 workaround (0x0050 - 0x0058) + * @NFP_NET_CFG_SPARE_ADDR: DMA address for ME code to use (e.g. YDS-155 fix) + */ +#define NFP_NET_CFG_SPARE_ADDR 0x0050 +/** + * NFP6000/NFP4000 - Prepend configuration + */ +#define NFP_NET_CFG_RX_OFFSET 0x0050 +#define NFP_NET_CFG_RX_OFFSET_DYNAMIC 0 /* Prepend mode */ + +/** + * NFP6000/NFP4000 - VXLAN/UDP encap configuration + * @NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports + * @NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes + */ +#define NFP_NET_CFG_VXLAN_PORT 0x0060 +#define NFP_NET_CFG_VXLAN_SZ 0x0008 + +/** + * 64B reserved for future use (0x0080 - 0x00c0) + */ +#define NFP_NET_CFG_RESERVED 0x0080 +#define NFP_NET_CFG_RESERVED_SZ 0x0040 + +/** + * RSS configuration (0x0100 - 0x01ac): + * Used only when NFP_NET_CFG_CTRL_RSS is enabled + * @NFP_NET_CFG_RSS_CFG: RSS configuration word + * @NFP_NET_CFG_RSS_KEY: RSS "secret" key + * @NFP_NET_CFG_RSS_ITBL: RSS indirection table + */ +#define NFP_NET_CFG_RSS_BASE 0x0100 +#define NFP_NET_CFG_RSS_CTRL NFP_NET_CFG_RSS_BASE +#define NFP_NET_CFG_RSS_MASK (0x7f) +#define NFP_NET_CFG_RSS_MASK_of(_x) ((_x) & 0x7f) +#define NFP_NET_CFG_RSS_IPV4 (1 << 8) /* RSS for IPv4 */ +#define NFP_NET_CFG_RSS_IPV6 (1 << 9) /* RSS for IPv6 */ +#define NFP_NET_CFG_RSS_IPV4_TCP (1 << 10) /* RSS for IPv4/TCP */ +#define NFP_NET_CFG_RSS_IPV4_UDP (1 << 11) /* RSS for IPv4/UDP */ +#define NFP_NET_CFG_RSS_IPV6_TCP (1 << 12) /* RSS for IPv6/TCP */ +#define NFP_NET_CFG_RSS_IPV6_UDP (1 << 13) /* RSS for IPv6/UDP */ +#define NFP_NET_CFG_RSS_TOEPLITZ (1 << 24) /* Use Toeplitz hash */ +#define NFP_NET_CFG_RSS_KEY (NFP_NET_CFG_RSS_BASE + 0x4) +#define NFP_NET_CFG_RSS_KEY_SZ 0x28 +#define NFP_NET_CFG_RSS_ITBL (NFP_NET_CFG_RSS_BASE + 0x4 + \ + NFP_NET_CFG_RSS_KEY_SZ) +#define NFP_NET_CFG_RSS_ITBL_SZ 0x80 + +/** + * TX ring configuration (0x200 - 0x800) + * @NFP_NET_CFG_TXR_BASE: Base offset for TX ring configuration + * @NFP_NET_CFG_TXR_ADDR: Per TX ring DMA address (8B entries) + * @NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries) + * @NFP_NET_CFG_TXR_SZ: Per TX ring ring size (1B entries) + * @NFP_NET_CFG_TXR_VEC: Per TX ring MSI-X table entry (1B entries) + * @NFP_NET_CFG_TXR_PRIO: Per TX ring priority (1B entries) + * @NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet + */ +#define NFP_NET_CFG_TXR_BASE 0x0200 +#define NFP_NET_CFG_TXR_ADDR(_x) (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8)) +#define NFP_NET_CFG_TXR_WB_ADDR(_x) (NFP_NET_CFG_TXR_BASE + 0x200 + \ + ((_x) * 0x8)) +#define NFP_NET_CFG_TXR_SZ(_x) (NFP_NET_CFG_TXR_BASE + 0x400 + (_x)) +#define NFP_NET_CFG_TXR_VEC(_x) (NFP_NET_CFG_TXR_BASE + 0x440 + (_x)) +#define NFP_NET_CFG_TXR_PRIO(_x) (NFP_NET_CFG_TXR_BASE + 0x480 + (_x)) +#define NFP_NET_CFG_TXR_IRQ_MOD(_x) (NFP_NET_CFG_TXR_BASE + 0x500 + \ + ((_x) * 0x4)) + +/** + * RX ring configuration (0x0800 - 0x0c00) + * @NFP_NET_CFG_RXR_BASE: Base offset for RX ring configuration + * @NFP_NET_CFG_RXR_ADDR: Per RX ring DMA address (8B entries) + * @NFP_NET_CFG_RXR_SZ: Per RX ring ring size (1B entries) + * @NFP_NET_CFG_RXR_VEC: Per RX ring MSI-X table entry (1B entries) + * @NFP_NET_CFG_RXR_PRIO: Per RX ring priority (1B entries) + * @NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries) + */ +#define NFP_NET_CFG_RXR_BASE 0x0800 +#define NFP_NET_CFG_RXR_ADDR(_x) (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8)) +#define NFP_NET_CFG_RXR_SZ(_x) (NFP_NET_CFG_RXR_BASE + 0x200 + (_x)) +#define NFP_NET_CFG_RXR_VEC(_x) (NFP_NET_CFG_RXR_BASE + 0x240 + (_x)) +#define NFP_NET_CFG_RXR_PRIO(_x) (NFP_NET_CFG_RXR_BASE + 0x280 + (_x)) +#define NFP_NET_CFG_RXR_IRQ_MOD(_x) (NFP_NET_CFG_RXR_BASE + 0x300 + \ + ((_x) * 0x4)) + +/** + * Interrupt Control/Cause registers (0x0c00 - 0x0d00) + * These registers are only used when MSI-X auto-masking is not + * enabled (@NFP_NET_CFG_CTRL_MSIXAUTO not set). The array is index + * by MSI-X entry and are 1B in size. If an entry is zero, the + * corresponding entry is enabled. If the FW generates an interrupt, + * it writes a cause into the corresponding field. This also masks + * the MSI-X entry and the host driver must clear the register to + * re-enable the interrupt. + */ +#define NFP_NET_CFG_ICR_BASE 0x0c00 +#define NFP_NET_CFG_ICR(_x) (NFP_NET_CFG_ICR_BASE + (_x)) +#define NFP_NET_CFG_ICR_UNMASKED 0x0 +#define NFP_NET_CFG_ICR_RXTX 0x1 +#define NFP_NET_CFG_ICR_LSC 0x2 + +/** + * General device stats (0x0d00 - 0x0d90) + * all counters are 64bit. + */ +#define NFP_NET_CFG_STATS_BASE 0x0d00 +#define NFP_NET_CFG_STATS_RX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x00) +#define NFP_NET_CFG_STATS_RX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x08) +#define NFP_NET_CFG_STATS_RX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x10) +#define NFP_NET_CFG_STATS_RX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x18) +#define NFP_NET_CFG_STATS_RX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x20) +#define NFP_NET_CFG_STATS_RX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x28) +#define NFP_NET_CFG_STATS_RX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x30) +#define NFP_NET_CFG_STATS_RX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x38) +#define NFP_NET_CFG_STATS_RX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x40) + +#define NFP_NET_CFG_STATS_TX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x48) +#define NFP_NET_CFG_STATS_TX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x50) +#define NFP_NET_CFG_STATS_TX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x58) +#define NFP_NET_CFG_STATS_TX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x60) +#define NFP_NET_CFG_STATS_TX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x68) +#define NFP_NET_CFG_STATS_TX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x70) +#define NFP_NET_CFG_STATS_TX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x78) +#define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80) +#define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88) + +/** + * Per ring stats (0x1000 - 0x1800) + * options, 64bit per entry + * @NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count) + * @NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count) + */ +#define NFP_NET_CFG_TXR_STATS_BASE 0x1000 +#define NFP_NET_CFG_TXR_STATS(_x) (NFP_NET_CFG_TXR_STATS_BASE + \ + ((_x) * 0x10)) +#define NFP_NET_CFG_RXR_STATS_BASE 0x1400 +#define NFP_NET_CFG_RXR_STATS(_x) (NFP_NET_CFG_RXR_STATS_BASE + \ + ((_x) * 0x10)) + +#endif /* _NFP_NET_CTRL_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c new file mode 100644 index 00000000000000..4c97c713121cdb --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2015 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/debugfs.h> +#include <linux/module.h> +#include <linux/rtnetlink.h> + +#include "nfp_net.h" + +static struct dentry *nfp_dir; + +static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data) +{ + struct nfp_net_rx_ring *rx_ring = file->private; + int fl_rd_p, fl_wr_p, rx_rd_p, rx_wr_p, rxd_cnt; + struct nfp_net_rx_desc *rxd; + struct sk_buff *skb; + struct nfp_net *nn; + int i; + + rtnl_lock(); + + if (!rx_ring->r_vec || !rx_ring->r_vec->nfp_net) + goto out; + nn = rx_ring->r_vec->nfp_net; + if (!netif_running(nn->netdev)) + goto out; + + rxd_cnt = rx_ring->cnt; + + fl_rd_p = nfp_qcp_rd_ptr_read(rx_ring->qcp_fl); + fl_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_fl); + rx_rd_p = nfp_qcp_rd_ptr_read(rx_ring->qcp_rx); + rx_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_rx); + + seq_printf(file, "RX[%02d]: H_RD=%d H_WR=%d FL_RD=%d FL_WR=%d RX_RD=%d RX_WR=%d\n", + rx_ring->idx, rx_ring->rd_p, rx_ring->wr_p, + fl_rd_p, fl_wr_p, rx_rd_p, rx_wr_p); + + for (i = 0; i < rxd_cnt; i++) { + rxd = &rx_ring->rxds[i]; + seq_printf(file, "%04d: 0x%08x 0x%08x", i, + rxd->vals[0], rxd->vals[1]); + + skb = READ_ONCE(rx_ring->rxbufs[i].skb); + if (skb) + seq_printf(file, " skb->head=%p skb->data=%p", + skb->head, skb->data); + + if (rx_ring->rxbufs[i].dma_addr) + seq_printf(file, " dma_addr=%pad", + &rx_ring->rxbufs[i].dma_addr); + + if (i == rx_ring->rd_p % rxd_cnt) + seq_puts(file, " H_RD "); + if (i == rx_ring->wr_p % rxd_cnt) + seq_puts(file, " H_WR "); + if (i == fl_rd_p % rxd_cnt) + seq_puts(file, " FL_RD"); + if (i == fl_wr_p % rxd_cnt) + seq_puts(file, " FL_WR"); + if (i == rx_rd_p % rxd_cnt) + seq_puts(file, " RX_RD"); + if (i == rx_wr_p % rxd_cnt) + seq_puts(file, " RX_WR"); + + seq_putc(file, '\n'); + } +out: + rtnl_unlock(); + return 0; +} + +static int nfp_net_debugfs_rx_q_open(struct inode *inode, struct file *f) +{ + return single_open(f, nfp_net_debugfs_rx_q_read, inode->i_private); +} + +static const struct file_operations nfp_rx_q_fops = { + .owner = THIS_MODULE, + .open = nfp_net_debugfs_rx_q_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek +}; + +static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data) +{ + struct nfp_net_tx_ring *tx_ring = file->private; + struct nfp_net_tx_desc *txd; + int d_rd_p, d_wr_p, txd_cnt; + struct sk_buff *skb; + struct nfp_net *nn; + int i; + + rtnl_lock(); + + if (!tx_ring->r_vec || !tx_ring->r_vec->nfp_net) + goto out; + nn = tx_ring->r_vec->nfp_net; + if (!netif_running(nn->netdev)) + goto out; + + txd_cnt = tx_ring->cnt; + + d_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); + d_wr_p = nfp_qcp_wr_ptr_read(tx_ring->qcp_q); + + seq_printf(file, "TX[%02d]: H_RD=%d H_WR=%d D_RD=%d D_WR=%d\n", + tx_ring->idx, tx_ring->rd_p, tx_ring->wr_p, d_rd_p, d_wr_p); + + for (i = 0; i < txd_cnt; i++) { + txd = &tx_ring->txds[i]; + seq_printf(file, "%04d: 0x%08x 0x%08x 0x%08x 0x%08x", i, + txd->vals[0], txd->vals[1], + txd->vals[2], txd->vals[3]); + + skb = READ_ONCE(tx_ring->txbufs[i].skb); + if (skb) + seq_printf(file, " skb->head=%p skb->data=%p", + skb->head, skb->data); + if (tx_ring->txbufs[i].dma_addr) + seq_printf(file, " dma_addr=%pad", + &tx_ring->txbufs[i].dma_addr); + + if (i == tx_ring->rd_p % txd_cnt) + seq_puts(file, " H_RD"); + if (i == tx_ring->wr_p % txd_cnt) + seq_puts(file, " H_WR"); + if (i == d_rd_p % txd_cnt) + seq_puts(file, " D_RD"); + if (i == d_wr_p % txd_cnt) + seq_puts(file, " D_WR"); + + seq_putc(file, '\n'); + } +out: + rtnl_unlock(); + return 0; +} + +static int nfp_net_debugfs_tx_q_open(struct inode *inode, struct file *f) +{ + return single_open(f, nfp_net_debugfs_tx_q_read, inode->i_private); +} + +static const struct file_operations nfp_tx_q_fops = { + .owner = THIS_MODULE, + .open = nfp_net_debugfs_tx_q_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek +}; + +void nfp_net_debugfs_adapter_add(struct nfp_net *nn) +{ + static struct dentry *queues, *tx, *rx; + char int_name[16]; + int i; + + if (IS_ERR_OR_NULL(nfp_dir)) + return; + + nn->debugfs_dir = debugfs_create_dir(pci_name(nn->pdev), nfp_dir); + if (IS_ERR_OR_NULL(nn->debugfs_dir)) + return; + + /* Create queue debugging sub-tree */ + queues = debugfs_create_dir("queue", nn->debugfs_dir); + if (IS_ERR_OR_NULL(nn->debugfs_dir)) + return; + + rx = debugfs_create_dir("rx", queues); + tx = debugfs_create_dir("tx", queues); + if (IS_ERR_OR_NULL(rx) || IS_ERR_OR_NULL(tx)) + return; + + for (i = 0; i < nn->num_rx_rings; i++) { + sprintf(int_name, "%d", i); + debugfs_create_file(int_name, S_IRUSR, rx, + &nn->rx_rings[i], &nfp_rx_q_fops); + } + + for (i = 0; i < nn->num_tx_rings; i++) { + sprintf(int_name, "%d", i); + debugfs_create_file(int_name, S_IRUSR, tx, + &nn->tx_rings[i], &nfp_tx_q_fops); + } +} + +void nfp_net_debugfs_adapter_del(struct nfp_net *nn) +{ + debugfs_remove_recursive(nn->debugfs_dir); + nn->debugfs_dir = NULL; +} + +void nfp_net_debugfs_create(void) +{ + nfp_dir = debugfs_create_dir("nfp_net", NULL); +} + +void nfp_net_debugfs_destroy(void) +{ + debugfs_remove_recursive(nfp_dir); + nfp_dir = NULL; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c new file mode 100644 index 00000000000000..9a4084a68db5c2 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -0,0 +1,640 @@ +/* + * Copyright (C) 2015 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_net_ethtool.c + * Netronome network device driver: ethtool support + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + * Brad Petrus <brad.petrus@netronome.com> + */ + +#include <linux/version.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/ethtool.h> + +#include "nfp_net_ctrl.h" +#include "nfp_net.h" + +/* Support for stats. Returns netdev, driver, and device stats */ +enum { NETDEV_ET_STATS, NFP_NET_DRV_ET_STATS, NFP_NET_DEV_ET_STATS }; +struct _nfp_net_et_stats { + char name[ETH_GSTRING_LEN]; + int type; + int sz; + int off; +}; + +#define NN_ET_NETDEV_STAT(m) NETDEV_ET_STATS, \ + FIELD_SIZEOF(struct net_device_stats, m), \ + offsetof(struct net_device_stats, m) +/* For stats in the control BAR (other than Q stats) */ +#define NN_ET_DEV_STAT(m) NFP_NET_DEV_ET_STATS, \ + sizeof(u64), \ + (m) +static const struct _nfp_net_et_stats nfp_net_et_stats[] = { + /* netdev stats */ + {"rx_packets", NN_ET_NETDEV_STAT(rx_packets)}, + {"tx_packets", NN_ET_NETDEV_STAT(tx_packets)}, + {"rx_bytes", NN_ET_NETDEV_STAT(rx_bytes)}, + {"tx_bytes", NN_ET_NETDEV_STAT(tx_bytes)}, + {"rx_errors", NN_ET_NETDEV_STAT(rx_errors)}, + {"tx_errors", NN_ET_NETDEV_STAT(tx_errors)}, + {"rx_dropped", NN_ET_NETDEV_STAT(rx_dropped)}, + {"tx_dropped", NN_ET_NETDEV_STAT(tx_dropped)}, + {"multicast", NN_ET_NETDEV_STAT(multicast)}, + {"collisions", NN_ET_NETDEV_STAT(collisions)}, + {"rx_over_errors", NN_ET_NETDEV_STAT(rx_over_errors)}, + {"rx_crc_errors", NN_ET_NETDEV_STAT(rx_crc_errors)}, + {"rx_frame_errors", NN_ET_NETDEV_STAT(rx_frame_errors)}, + {"rx_fifo_errors", NN_ET_NETDEV_STAT(rx_fifo_errors)}, + {"rx_missed_errors", NN_ET_NETDEV_STAT(rx_missed_errors)}, + {"tx_aborted_errors", NN_ET_NETDEV_STAT(tx_aborted_errors)}, + {"tx_carrier_errors", NN_ET_NETDEV_STAT(tx_carrier_errors)}, + {"tx_fifo_errors", NN_ET_NETDEV_STAT(tx_fifo_errors)}, + /* Stats from the device */ + {"dev_rx_discards", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_DISCARDS)}, + {"dev_rx_errors", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_ERRORS)}, + {"dev_rx_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_OCTETS)}, + {"dev_rx_uc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_UC_OCTETS)}, + {"dev_rx_mc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_MC_OCTETS)}, + {"dev_rx_bc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_BC_OCTETS)}, + {"dev_rx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_FRAMES)}, + {"dev_rx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_MC_FRAMES)}, + {"dev_rx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_BC_FRAMES)}, + + {"dev_tx_discards", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_DISCARDS)}, + {"dev_tx_errors", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_ERRORS)}, + {"dev_tx_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_OCTETS)}, + {"dev_tx_uc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_UC_OCTETS)}, + {"dev_tx_mc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_OCTETS)}, + {"dev_tx_bc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_OCTETS)}, + {"dev_tx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_FRAMES)}, + {"dev_tx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_FRAMES)}, + {"dev_tx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_FRAMES)}, +}; + +#define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats) +#define NN_ET_RVEC_STATS_LEN (nn->num_r_vecs * 3) +#define NN_ET_RVEC_GATHER_STATS 7 +#define NN_ET_QUEUE_STATS_LEN ((nn->num_tx_rings + nn->num_rx_rings) * 2) +#define NN_ET_STATS_LEN (NN_ET_GLOBAL_STATS_LEN + NN_ET_RVEC_GATHER_STATS + \ + NN_ET_RVEC_STATS_LEN + NN_ET_QUEUE_STATS_LEN) + +static void nfp_net_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct nfp_net *nn = netdev_priv(netdev); + + strlcpy(drvinfo->driver, nfp_net_driver_name, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, nfp_net_driver_version, + sizeof(drvinfo->version)); + + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%d.%d", + nn->fw_ver.resv, nn->fw_ver.class, + nn->fw_ver.major, nn->fw_ver.minor); + strlcpy(drvinfo->bus_info, pci_name(nn->pdev), + sizeof(drvinfo->bus_info)); + + drvinfo->n_stats = NN_ET_STATS_LEN; + drvinfo->regdump_len = NFP_NET_CFG_BAR_SZ; +} + +static void nfp_net_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct nfp_net *nn = netdev_priv(netdev); + + ring->rx_max_pending = NFP_NET_MAX_RX_DESCS; + ring->tx_max_pending = NFP_NET_MAX_TX_DESCS; + ring->rx_pending = nn->rxd_cnt; + ring->tx_pending = nn->txd_cnt; +} + +static int nfp_net_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct nfp_net *nn = netdev_priv(netdev); + u32 rxd_cnt, txd_cnt; + + if (netif_running(netdev)) { + /* Some NIC drivers allow reconfiguration on the fly, + * some down the interface, change and then up it + * again. For now we don't allow changes when the + * device is up. + */ + nn_warn(nn, "Can't change rings while device is up\n"); + return -EBUSY; + } + + /* We don't have separate queues/rings for small/large frames. */ + if (ring->rx_mini_pending || ring->rx_jumbo_pending) + return -EINVAL; + + /* Round up to supported values */ + rxd_cnt = roundup_pow_of_two(ring->rx_pending); + rxd_cnt = max_t(u32, rxd_cnt, NFP_NET_MIN_RX_DESCS); + rxd_cnt = min_t(u32, rxd_cnt, NFP_NET_MAX_RX_DESCS); + + txd_cnt = roundup_pow_of_two(ring->tx_pending); + txd_cnt = max_t(u32, txd_cnt, NFP_NET_MIN_TX_DESCS); + txd_cnt = min_t(u32, txd_cnt, NFP_NET_MAX_TX_DESCS); + + if (nn->rxd_cnt != rxd_cnt || nn->txd_cnt != txd_cnt) + nn_dbg(nn, "Change ring size: RxQ %u->%u, TxQ %u->%u\n", + nn->rxd_cnt, rxd_cnt, nn->txd_cnt, txd_cnt); + + nn->rxd_cnt = rxd_cnt; + nn->txd_cnt = txd_cnt; + + return 0; +} + +static void nfp_net_get_strings(struct net_device *netdev, + u32 stringset, u8 *data) +{ + struct nfp_net *nn = netdev_priv(netdev); + u8 *p = data; + int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < NN_ET_GLOBAL_STATS_LEN; i++) { + memcpy(p, nfp_net_et_stats[i].name, ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < nn->num_r_vecs; i++) { + sprintf(p, "rvec_%u_rx_pkts", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rvec_%u_tx_pkts", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rvec_%u_tx_busy", i); + p += ETH_GSTRING_LEN; + } + strncpy(p, "hw_rx_csum_ok", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + strncpy(p, "hw_rx_csum_inner_ok", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + strncpy(p, "hw_rx_csum_err", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + strncpy(p, "hw_tx_csum", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + strncpy(p, "hw_tx_inner_csum", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + strncpy(p, "tx_gather", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + strncpy(p, "tx_lso", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + for (i = 0; i < nn->num_tx_rings; i++) { + sprintf(p, "txq_%u_pkts", i); + p += ETH_GSTRING_LEN; + sprintf(p, "txq_%u_bytes", i); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < nn->num_rx_rings; i++) { + sprintf(p, "rxq_%u_pkts", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rxq_%u_bytes", i); + p += ETH_GSTRING_LEN; + } + break; + } +} + +static void nfp_net_get_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + u64 gathered_stats[NN_ET_RVEC_GATHER_STATS] = {}; + struct nfp_net *nn = netdev_priv(netdev); + struct rtnl_link_stats64 *netdev_stats; + struct rtnl_link_stats64 temp = {}; + u64 tmp[NN_ET_RVEC_GATHER_STATS]; + u8 __iomem *io_p; + int i, j, k; + u8 *p; + + netdev_stats = dev_get_stats(netdev, &temp); + + for (i = 0; i < NN_ET_GLOBAL_STATS_LEN; i++) { + switch (nfp_net_et_stats[i].type) { + case NETDEV_ET_STATS: + p = (char *)netdev_stats + nfp_net_et_stats[i].off; + data[i] = nfp_net_et_stats[i].sz == sizeof(u64) ? + *(u64 *)p : *(u32 *)p; + break; + + case NFP_NET_DEV_ET_STATS: + io_p = nn->ctrl_bar + nfp_net_et_stats[i].off; + data[i] = readq(io_p); + break; + } + } + for (j = 0; j < nn->num_r_vecs; j++) { + unsigned int start; + + do { + start = u64_stats_fetch_begin(&nn->r_vecs[j].rx_sync); + data[i++] = nn->r_vecs[j].rx_pkts; + tmp[0] = nn->r_vecs[j].hw_csum_rx_ok; + tmp[1] = nn->r_vecs[j].hw_csum_rx_inner_ok; + tmp[2] = nn->r_vecs[j].hw_csum_rx_error; + } while (u64_stats_fetch_retry(&nn->r_vecs[j].rx_sync, start)); + + do { + start = u64_stats_fetch_begin(&nn->r_vecs[j].tx_sync); + data[i++] = nn->r_vecs[j].tx_pkts; + data[i++] = nn->r_vecs[j].tx_busy; + tmp[3] = nn->r_vecs[j].hw_csum_tx; + tmp[4] = nn->r_vecs[j].hw_csum_tx_inner; + tmp[5] = nn->r_vecs[j].tx_gather; + tmp[6] = nn->r_vecs[j].tx_lso; + } while (u64_stats_fetch_retry(&nn->r_vecs[j].tx_sync, start)); + + for (k = 0; k < NN_ET_RVEC_GATHER_STATS; k++) + gathered_stats[k] += tmp[k]; + } + for (j = 0; j < NN_ET_RVEC_GATHER_STATS; j++) + data[i++] = gathered_stats[j]; + for (j = 0; j < nn->num_tx_rings; j++) { + io_p = nn->ctrl_bar + NFP_NET_CFG_TXR_STATS(j); + data[i++] = readq(io_p); + io_p = nn->ctrl_bar + NFP_NET_CFG_TXR_STATS(j) + 8; + data[i++] = readq(io_p); + } + for (j = 0; j < nn->num_rx_rings; j++) { + io_p = nn->ctrl_bar + NFP_NET_CFG_RXR_STATS(j); + data[i++] = readq(io_p); + io_p = nn->ctrl_bar + NFP_NET_CFG_RXR_STATS(j) + 8; + data[i++] = readq(io_p); + } +} + +static int nfp_net_get_sset_count(struct net_device *netdev, int sset) +{ + struct nfp_net *nn = netdev_priv(netdev); + + switch (sset) { + case ETH_SS_STATS: + return NN_ET_STATS_LEN; + default: + return -EOPNOTSUPP; + } +} + +/* RX network flow classification (RSS, filters, etc) + */ +static u32 ethtool_flow_to_nfp_flag(u32 flow_type) +{ + static const u32 xlate_ethtool_to_nfp[IPV6_FLOW + 1] = { + [TCP_V4_FLOW] = NFP_NET_CFG_RSS_IPV4_TCP, + [TCP_V6_FLOW] = NFP_NET_CFG_RSS_IPV6_TCP, + [UDP_V4_FLOW] = NFP_NET_CFG_RSS_IPV4_UDP, + [UDP_V6_FLOW] = NFP_NET_CFG_RSS_IPV6_UDP, + [IPV4_FLOW] = NFP_NET_CFG_RSS_IPV4, + [IPV6_FLOW] = NFP_NET_CFG_RSS_IPV6, + }; + + if (flow_type >= ARRAY_SIZE(xlate_ethtool_to_nfp)) + return 0; + + return xlate_ethtool_to_nfp[flow_type]; +} + +static int nfp_net_get_rss_hash_opts(struct nfp_net *nn, + struct ethtool_rxnfc *cmd) +{ + u32 nfp_rss_flag; + + cmd->data = 0; + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS)) + return -EOPNOTSUPP; + + nfp_rss_flag = ethtool_flow_to_nfp_flag(cmd->flow_type); + if (!nfp_rss_flag) + return -EINVAL; + + cmd->data |= RXH_IP_SRC | RXH_IP_DST; + if (nn->rss_cfg & nfp_rss_flag) + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + + return 0; +} + +static int nfp_net_get_rxnfc(struct net_device *netdev, + struct ethtool_rxnfc *cmd, u32 *rule_locs) +{ + struct nfp_net *nn = netdev_priv(netdev); + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = nn->num_rx_rings; + return 0; + case ETHTOOL_GRXFH: + return nfp_net_get_rss_hash_opts(nn, cmd); + default: + return -EOPNOTSUPP; + } +} + +static int nfp_net_set_rss_hash_opt(struct nfp_net *nn, + struct ethtool_rxnfc *nfc) +{ + u32 new_rss_cfg = nn->rss_cfg; + u32 nfp_rss_flag; + int err; + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS)) + return -EOPNOTSUPP; + + /* RSS only supports IP SA/DA and L4 src/dst ports */ + if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EINVAL; + + /* We need at least the IP SA/DA fields for hashing */ + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + + nfp_rss_flag = ethtool_flow_to_nfp_flag(nfc->flow_type); + if (!nfp_rss_flag) + return -EINVAL; + + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + new_rss_cfg &= ~nfp_rss_flag; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + new_rss_cfg |= nfp_rss_flag; + break; + default: + return -EINVAL; + } + + new_rss_cfg |= NFP_NET_CFG_RSS_TOEPLITZ; + new_rss_cfg |= NFP_NET_CFG_RSS_MASK; + + if (new_rss_cfg == nn->rss_cfg) + return 0; + + writel(new_rss_cfg, nn->ctrl_bar + NFP_NET_CFG_RSS_CTRL); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RSS); + if (err) + return err; + + nn->rss_cfg = new_rss_cfg; + + nn_dbg(nn, "Changed RSS config to 0x%x\n", nn->rss_cfg); + return 0; +} + +static int nfp_net_set_rxnfc(struct net_device *netdev, + struct ethtool_rxnfc *cmd) +{ + struct nfp_net *nn = netdev_priv(netdev); + + switch (cmd->cmd) { + case ETHTOOL_SRXFH: + return nfp_net_set_rss_hash_opt(nn, cmd); + default: + return -EOPNOTSUPP; + } +} + +static u32 nfp_net_get_rxfh_indir_size(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS)) + return 0; + + return ARRAY_SIZE(nn->rss_itbl); +} + +static u32 nfp_net_get_rxfh_key_size(struct net_device *netdev) +{ + return NFP_NET_CFG_RSS_KEY_SZ; +} + +static int nfp_net_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct nfp_net *nn = netdev_priv(netdev); + int i; + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS)) + return -EOPNOTSUPP; + + if (indir) + for (i = 0; i < ARRAY_SIZE(nn->rss_itbl); i++) + indir[i] = nn->rss_itbl[i]; + if (key) + memcpy(key, nn->rss_key, NFP_NET_CFG_RSS_KEY_SZ); + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + + return 0; +} + +static int nfp_net_set_rxfh(struct net_device *netdev, + const u32 *indir, const u8 *key, + const u8 hfunc) +{ + struct nfp_net *nn = netdev_priv(netdev); + int i; + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS) || + !(hfunc == ETH_RSS_HASH_NO_CHANGE || hfunc == ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + + if (!key && !indir) + return 0; + + if (key) { + memcpy(nn->rss_key, key, NFP_NET_CFG_RSS_KEY_SZ); + nfp_net_rss_write_key(nn); + } + if (indir) { + for (i = 0; i < ARRAY_SIZE(nn->rss_itbl); i++) + nn->rss_itbl[i] = indir[i]; + + nfp_net_rss_write_itbl(nn); + } + + return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RSS); +} + +/* Dump BAR registers + */ +static int nfp_net_get_regs_len(struct net_device *netdev) +{ + return NFP_NET_CFG_BAR_SZ; +} + +static void nfp_net_get_regs(struct net_device *netdev, + struct ethtool_regs *regs, void *p) +{ + struct nfp_net *nn = netdev_priv(netdev); + u32 *regs_buf = p; + int i; + + regs->version = nn_readl(nn, NFP_NET_CFG_VERSION); + + for (i = 0; i < NFP_NET_CFG_BAR_SZ / sizeof(u32); i++) + regs_buf[i] = readl(nn->ctrl_bar + (i * sizeof(u32))); +} + +static int nfp_net_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (!(nn->cap & NFP_NET_CFG_CTRL_IRQMOD)) + return -EINVAL; + + ec->rx_coalesce_usecs = nn->rx_coalesce_usecs; + ec->rx_max_coalesced_frames = nn->rx_coalesce_max_frames; + ec->tx_coalesce_usecs = nn->tx_coalesce_usecs; + ec->tx_max_coalesced_frames = nn->tx_coalesce_max_frames; + + return 0; +} + +static int nfp_net_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct nfp_net *nn = netdev_priv(netdev); + unsigned int factor; + + if (ec->rx_coalesce_usecs_irq || + ec->rx_max_coalesced_frames_irq || + ec->tx_coalesce_usecs_irq || + ec->tx_max_coalesced_frames_irq || + ec->stats_block_coalesce_usecs || + ec->use_adaptive_rx_coalesce || + ec->use_adaptive_tx_coalesce || + ec->pkt_rate_low || + ec->rx_coalesce_usecs_low || + ec->rx_max_coalesced_frames_low || + ec->tx_coalesce_usecs_low || + ec->tx_max_coalesced_frames_low || + ec->pkt_rate_high || + ec->rx_coalesce_usecs_high || + ec->rx_max_coalesced_frames_high || + ec->tx_coalesce_usecs_high || + ec->tx_max_coalesced_frames_high || + ec->rate_sample_interval) + return -ENOTSUPP; + + /* Compute factor used to convert coalesce '_usecs' parameters to + * ME timestamp ticks. There are 16 ME clock cycles for each timestamp + * count. + */ + factor = nn->me_freq_mhz / 16; + + /* Each pair of (usecs, max_frames) fields specifies that interrupts + * should be coalesced until + * (usecs > 0 && time_since_first_completion >= usecs) || + * (max_frames > 0 && completed_frames >= max_frames) + * + * It is illegal to set both usecs and max_frames to zero as this would + * cause interrupts to never be generated. To disable coalescing, set + * usecs = 0 and max_frames = 1. + * + * Some implementations ignore the value of max_frames and use the + * condition time_since_first_completion >= usecs + */ + + if (!(nn->cap & NFP_NET_CFG_CTRL_IRQMOD)) + return -EINVAL; + + /* ensure valid configuration */ + if (!ec->rx_coalesce_usecs && !ec->rx_max_coalesced_frames) + return -EINVAL; + + if (!ec->tx_coalesce_usecs && !ec->tx_max_coalesced_frames) + return -EINVAL; + + if (ec->rx_coalesce_usecs * factor >= ((1 << 16) - 1)) + return -EINVAL; + + if (ec->tx_coalesce_usecs * factor >= ((1 << 16) - 1)) + return -EINVAL; + + if (ec->rx_max_coalesced_frames >= ((1 << 16) - 1)) + return -EINVAL; + + if (ec->tx_max_coalesced_frames >= ((1 << 16) - 1)) + return -EINVAL; + + /* configuration is valid */ + nn->rx_coalesce_usecs = ec->rx_coalesce_usecs; + nn->rx_coalesce_max_frames = ec->rx_max_coalesced_frames; + nn->tx_coalesce_usecs = ec->tx_coalesce_usecs; + nn->tx_coalesce_max_frames = ec->tx_max_coalesced_frames; + + /* write configuration to device */ + nfp_net_coalesce_write_cfg(nn); + return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD); +} + +static const struct ethtool_ops nfp_net_ethtool_ops = { + .get_drvinfo = nfp_net_get_drvinfo, + .get_ringparam = nfp_net_get_ringparam, + .set_ringparam = nfp_net_set_ringparam, + .get_strings = nfp_net_get_strings, + .get_ethtool_stats = nfp_net_get_stats, + .get_sset_count = nfp_net_get_sset_count, + .get_rxnfc = nfp_net_get_rxnfc, + .set_rxnfc = nfp_net_set_rxnfc, + .get_rxfh_indir_size = nfp_net_get_rxfh_indir_size, + .get_rxfh_key_size = nfp_net_get_rxfh_key_size, + .get_rxfh = nfp_net_get_rxfh, + .set_rxfh = nfp_net_set_rxfh, + .get_regs_len = nfp_net_get_regs_len, + .get_regs = nfp_net_get_regs, + .get_coalesce = nfp_net_get_coalesce, + .set_coalesce = nfp_net_set_coalesce, +}; + +void nfp_net_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &nfp_net_ethtool_ops; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c new file mode 100644 index 00000000000000..e2b22b8a20f134 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c @@ -0,0 +1,385 @@ +/* + * Copyright (C) 2015 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_netvf_main.c + * Netronome virtual function network device driver: Main entry point + * Author: Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + */ + +#include <linux/version.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/etherdevice.h> + +#include "nfp_net_ctrl.h" +#include "nfp_net.h" + +const char nfp_net_driver_name[] = "nfp_netvf"; +const char nfp_net_driver_version[] = "0.1"; +#define PCI_DEVICE_NFP6000VF 0x6003 +static const struct pci_device_id nfp_netvf_pci_device_ids[] = { + { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_NFP6000VF, + PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID, + PCI_ANY_ID, 0, + }, + { 0, } /* Required last entry. */ +}; +MODULE_DEVICE_TABLE(pci, nfp_netvf_pci_device_ids); + +static void nfp_netvf_get_mac_addr(struct nfp_net *nn) +{ + u8 mac_addr[ETH_ALEN]; + + put_unaligned_be32(nn_readl(nn, NFP_NET_CFG_MACADDR + 0), &mac_addr[0]); + /* We can't do readw for NFP-3200 compatibility */ + put_unaligned_be16(nn_readl(nn, NFP_NET_CFG_MACADDR + 4) >> 16, + &mac_addr[4]); + + if (!is_valid_ether_addr(mac_addr)) { + eth_hw_addr_random(nn->netdev); + return; + } + + ether_addr_copy(nn->netdev->dev_addr, mac_addr); + ether_addr_copy(nn->netdev->perm_addr, mac_addr); +} + +static int nfp_netvf_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *pci_id) +{ + struct nfp_net_fw_version fw_ver; + int max_tx_rings, max_rx_rings; + u32 tx_bar_off, rx_bar_off; + u32 tx_bar_sz, rx_bar_sz; + int tx_bar_no, rx_bar_no; + u8 __iomem *ctrl_bar; + struct nfp_net *nn; + int is_nfp3200; + u32 startq; + int stride; + int err; + + err = pci_enable_device_mem(pdev); + if (err) + return err; + + err = pci_request_regions(pdev, nfp_net_driver_name); + if (err) { + dev_err(&pdev->dev, "Unable to allocate device memory.\n"); + goto err_pci_disable; + } + + switch (pdev->device) { + case PCI_DEVICE_NFP6000VF: + is_nfp3200 = 0; + break; + default: + err = -ENODEV; + goto err_pci_regions; + } + + pci_set_master(pdev); + + err = dma_set_mask_and_coherent(&pdev->dev, + DMA_BIT_MASK(NFP_NET_MAX_DMA_BITS)); + if (err) + goto err_pci_regions; + + /* Map the Control BAR. + * + * Irrespective of the advertised BAR size we only map the + * first NFP_NET_CFG_BAR_SZ of the BAR. This keeps the code + * the identical for PF and VF drivers. + */ + ctrl_bar = ioremap_nocache(pci_resource_start(pdev, NFP_NET_CRTL_BAR), + NFP_NET_CFG_BAR_SZ); + if (!ctrl_bar) { + dev_err(&pdev->dev, + "Failed to map resource %d\n", NFP_NET_CRTL_BAR); + err = -EIO; + goto err_pci_regions; + } + + nfp_net_get_fw_version(&fw_ver, ctrl_bar); + if (fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) { + dev_err(&pdev->dev, "Unknown Firmware ABI %d.%d.%d.%d\n", + fw_ver.resv, fw_ver.class, fw_ver.major, fw_ver.minor); + err = -EINVAL; + goto err_ctrl_unmap; + } + + /* Determine stride */ + if (nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 0) || + nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 1) || + nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0x12, 0x48)) { + stride = 2; + tx_bar_no = NFP_NET_Q0_BAR; + rx_bar_no = NFP_NET_Q1_BAR; + dev_warn(&pdev->dev, "OBSOLETE Firmware detected - VF isolation not available\n"); + } else { + switch (fw_ver.major) { + case 1 ... 3: + if (is_nfp3200) { + stride = 2; + tx_bar_no = NFP_NET_Q0_BAR; + rx_bar_no = NFP_NET_Q1_BAR; + } else { + stride = 4; + tx_bar_no = NFP_NET_Q0_BAR; + rx_bar_no = tx_bar_no; + } + break; + default: + dev_err(&pdev->dev, "Unsupported Firmware ABI %d.%d.%d.%d\n", + fw_ver.resv, fw_ver.class, + fw_ver.major, fw_ver.minor); + err = -EINVAL; + goto err_ctrl_unmap; + } + } + + /* Find out how many rings are supported */ + max_tx_rings = readl(ctrl_bar + NFP_NET_CFG_MAX_TXRINGS); + max_rx_rings = readl(ctrl_bar + NFP_NET_CFG_MAX_RXRINGS); + + tx_bar_sz = NFP_QCP_QUEUE_ADDR_SZ * max_tx_rings * stride; + rx_bar_sz = NFP_QCP_QUEUE_ADDR_SZ * max_rx_rings * stride; + + /* Sanity checks */ + if (tx_bar_sz > pci_resource_len(pdev, tx_bar_no)) { + dev_err(&pdev->dev, + "TX BAR too small for number of TX rings. Adjusting\n"); + tx_bar_sz = pci_resource_len(pdev, tx_bar_no); + max_tx_rings = (tx_bar_sz / NFP_QCP_QUEUE_ADDR_SZ) / 2; + } + if (rx_bar_sz > pci_resource_len(pdev, rx_bar_no)) { + dev_err(&pdev->dev, + "RX BAR too small for number of RX rings. Adjusting\n"); + rx_bar_sz = pci_resource_len(pdev, rx_bar_no); + max_rx_rings = (rx_bar_sz / NFP_QCP_QUEUE_ADDR_SZ) / 2; + } + + /* XXX Implement a workaround for THB-350 here. Ideally, we + * have a different PCI ID for A rev VFs. + */ + switch (pdev->device) { + case PCI_DEVICE_NFP6000VF: + startq = readl(ctrl_bar + NFP_NET_CFG_START_TXQ); + tx_bar_off = NFP_PCIE_QUEUE(startq); + startq = readl(ctrl_bar + NFP_NET_CFG_START_RXQ); + rx_bar_off = NFP_PCIE_QUEUE(startq); + break; + default: + err = -ENODEV; + goto err_ctrl_unmap; + } + + /* Allocate and initialise the netdev */ + nn = nfp_net_netdev_alloc(pdev, max_tx_rings, max_rx_rings); + if (IS_ERR(nn)) { + err = PTR_ERR(nn); + goto err_ctrl_unmap; + } + + nn->fw_ver = fw_ver; + nn->ctrl_bar = ctrl_bar; + nn->is_vf = 1; + nn->is_nfp3200 = is_nfp3200; + nn->stride_tx = stride; + nn->stride_rx = stride; + + if (rx_bar_no == tx_bar_no) { + u32 bar_off, bar_sz; + resource_size_t map_addr; + + /* Make a single overlapping BAR mapping */ + if (tx_bar_off < rx_bar_off) + bar_off = tx_bar_off; + else + bar_off = rx_bar_off; + + if ((tx_bar_off + tx_bar_sz) > (rx_bar_off + rx_bar_sz)) + bar_sz = (tx_bar_off + tx_bar_sz) - bar_off; + else + bar_sz = (rx_bar_off + rx_bar_sz) - bar_off; + + map_addr = pci_resource_start(pdev, tx_bar_no) + bar_off; + nn->q_bar = ioremap_nocache(map_addr, bar_sz); + if (!nn->q_bar) { + nn_err(nn, "Failed to map resource %d\n", tx_bar_no); + err = -EIO; + goto err_netdev_free; + } + + /* TX queues */ + nn->tx_bar = nn->q_bar + (tx_bar_off - bar_off); + /* RX queues */ + nn->rx_bar = nn->q_bar + (rx_bar_off - bar_off); + } else { + resource_size_t map_addr; + + /* TX queues */ + map_addr = pci_resource_start(pdev, tx_bar_no) + tx_bar_off; + nn->tx_bar = ioremap_nocache(map_addr, tx_bar_sz); + if (!nn->tx_bar) { + nn_err(nn, "Failed to map resource %d\n", tx_bar_no); + err = -EIO; + goto err_netdev_free; + } + + /* RX queues */ + map_addr = pci_resource_start(pdev, rx_bar_no) + rx_bar_off; + nn->rx_bar = ioremap_nocache(map_addr, rx_bar_sz); + if (!nn->rx_bar) { + nn_err(nn, "Failed to map resource %d\n", rx_bar_no); + err = -EIO; + goto err_unmap_tx; + } + } + + nfp_netvf_get_mac_addr(nn); + + err = nfp_net_irqs_alloc(nn); + if (!err) { + nn_warn(nn, "Unable to allocate MSI-X Vectors. Exiting\n"); + err = -EIO; + goto err_unmap_rx; + } + + /* Get ME clock frequency from ctrl BAR + * XXX for now frequency is hardcoded until we figure out how + * to get the value from nfp-hwinfo into ctrl bar + */ + nn->me_freq_mhz = 1200; + + err = nfp_net_netdev_init(nn->netdev); + if (err) + goto err_irqs_disable; + + pci_set_drvdata(pdev, nn); + + nfp_net_info(nn); + nfp_net_debugfs_adapter_add(nn); + + return 0; + +err_irqs_disable: + nfp_net_irqs_disable(nn); +err_unmap_rx: + if (!nn->q_bar) + iounmap(nn->rx_bar); +err_unmap_tx: + if (!nn->q_bar) + iounmap(nn->tx_bar); + else + iounmap(nn->q_bar); +err_netdev_free: + pci_set_drvdata(pdev, NULL); + nfp_net_netdev_free(nn); +err_ctrl_unmap: + iounmap(ctrl_bar); +err_pci_regions: + pci_release_regions(pdev); +err_pci_disable: + pci_disable_device(pdev); + return err; +} + +static void nfp_netvf_pci_remove(struct pci_dev *pdev) +{ + struct nfp_net *nn = pci_get_drvdata(pdev); + + /* Note, the order is slightly different from above as we need + * to keep the nn pointer around till we have freed everything. + */ + nfp_net_debugfs_adapter_del(nn); + + nfp_net_netdev_clean(nn->netdev); + + nfp_net_irqs_disable(nn); + + if (!nn->q_bar) { + iounmap(nn->rx_bar); + iounmap(nn->tx_bar); + } else { + iounmap(nn->q_bar); + } + iounmap(nn->ctrl_bar); + + pci_set_drvdata(pdev, NULL); + + nfp_net_netdev_free(nn); + + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +static struct pci_driver nfp_netvf_pci_driver = { + .name = nfp_net_driver_name, + .id_table = nfp_netvf_pci_device_ids, + .probe = nfp_netvf_pci_probe, + .remove = nfp_netvf_pci_remove, +}; + +static int __init nfp_netvf_init(void) +{ + int err; + + pr_info("%s: NFP VF Network driver, Copyright (C) 2014-2015 Netronome Systems\n", + nfp_net_driver_name); + + nfp_net_debugfs_create(); + err = pci_register_driver(&nfp_netvf_pci_driver); + if (err) { + nfp_net_debugfs_destroy(); + return err; + } + + return 0; +} + +static void __exit nfp_netvf_exit(void) +{ + pci_unregister_driver(&nfp_netvf_pci_driver); + nfp_net_debugfs_destroy(); +} + +module_init(nfp_netvf_init); +module_exit(nfp_netvf_exit); + +MODULE_AUTHOR("Netronome Systems <oss-drivers@netronome.com>"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("NFP VF network device driver"); diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index b2f8e854dfd1c4..264e954675d1f3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -3993,6 +3993,8 @@ struct public_drv_mb { #define DRV_MSG_CODE_PHY_CORE_WRITE 0x000e0000 #define DRV_MSG_CODE_SET_VERSION 0x000f0000 +#define DRV_MSG_CODE_SET_LED_MODE 0x00200000 + #define DRV_MSG_SEQ_NUMBER_MASK 0x0000ffff u32 drv_mb_param; @@ -4044,6 +4046,10 @@ struct public_drv_mb { #define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_SHIFT 8 #define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_MASK 0x0000FF00 +#define DRV_MB_PARAM_SET_LED_MODE_OPER 0x0 +#define DRV_MB_PARAM_SET_LED_MODE_ON 0x1 +#define DRV_MB_PARAM_SET_LED_MODE_OFF 0x2 + u32 fw_mb_header; #define FW_MSG_CODE_MASK 0xffff0000 #define FW_MSG_CODE_DRV_LOAD_ENGINE 0x10100000 diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 947c7af72b25b3..6b02e113436016 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1135,6 +1135,23 @@ static int qed_drain(struct qed_dev *cdev) return 0; } +static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *ptt; + int status = 0; + + ptt = qed_ptt_acquire(hwfn); + if (!ptt) + return -EAGAIN; + + status = qed_mcp_set_led(hwfn, ptt, mode); + + qed_ptt_release(hwfn, ptt); + + return status; +} + const struct qed_common_ops qed_common_ops_pass = { .probe = &qed_probe, .remove = &qed_remove, @@ -1155,6 +1172,7 @@ const struct qed_common_ops qed_common_ops_pass = { .update_msglvl = &qed_init_dp, .chain_alloc = &qed_chain_alloc, .chain_free = &qed_chain_free, + .set_led = &qed_set_led, }; u32 qed_get_protocol_version(enum qed_protocol protocol) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 20d048cdcb8821..ba1b1f1ef789b6 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -858,3 +858,30 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, return 0; } + +int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + enum qed_led_mode mode) +{ + u32 resp = 0, param = 0, drv_mb_param; + int rc; + + switch (mode) { + case QED_LED_MODE_ON: + drv_mb_param = DRV_MB_PARAM_SET_LED_MODE_ON; + break; + case QED_LED_MODE_OFF: + drv_mb_param = DRV_MB_PARAM_SET_LED_MODE_OFF; + break; + case QED_LED_MODE_RESTORE: + drv_mb_param = DRV_MB_PARAM_SET_LED_MODE_OPER; + break; + default: + DP_NOTICE(p_hwfn, "Invalid LED mode %d\n", mode); + return -EINVAL; + } + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_SET_LED_MODE, + drv_mb_param, &resp, ¶m); + + return rc; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index dbaae586b4a728..506197d5c3dda1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -224,6 +224,19 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_mcp_drv_version *p_ver); +/** + * @brief Set LED status + * + * @param p_hwfn + * @param p_ptt + * @param mode - LED mode + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_set_led(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_led_mode mode); + /* Using hwfn number (and not pf_num) is required since in CMT mode, * same pf_num may be used by two different hwfn * TODO - this shouldn't really be in .h file, but until all fields diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index ea00d5f3bab43a..7c6caf7f66122e 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -116,6 +116,7 @@ struct qede_dev { (edev)->dev_info.num_tc) struct qede_fastpath *fp_array; + u16 req_rss; u16 num_rss; u8 num_tc; #define QEDE_RSS_CNT(edev) ((edev)->num_rss) @@ -269,13 +270,13 @@ int qede_change_mtu(struct net_device *dev, int new_mtu); void qede_fill_by_demand_stats(struct qede_dev *edev); #define RX_RING_SIZE_POW 13 -#define RX_RING_SIZE BIT(RX_RING_SIZE_POW) +#define RX_RING_SIZE ((u16)BIT(RX_RING_SIZE_POW)) #define NUM_RX_BDS_MAX (RX_RING_SIZE - 1) #define NUM_RX_BDS_MIN 128 #define NUM_RX_BDS_DEF NUM_RX_BDS_MAX #define TX_RING_SIZE_POW 13 -#define TX_RING_SIZE BIT(TX_RING_SIZE_POW) +#define TX_RING_SIZE ((u16)BIT(TX_RING_SIZE_POW)) #define NUM_TX_BDS_MAX (TX_RING_SIZE - 1) #define NUM_TX_BDS_MIN 128 #define NUM_TX_BDS_DEF NUM_TX_BDS_MAX diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 3a362476a22c66..e442b85c9a5e9c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -322,6 +322,30 @@ static void qede_set_msglevel(struct net_device *ndev, u32 level) dp_module, dp_level); } +static int qede_nway_reset(struct net_device *dev) +{ + struct qede_dev *edev = netdev_priv(dev); + struct qed_link_output current_link; + struct qed_link_params link_params; + + if (!netif_running(dev)) + return 0; + + memset(¤t_link, 0, sizeof(current_link)); + edev->ops->common->get_link(edev->cdev, ¤t_link); + if (!current_link.link_up) + return 0; + + /* Toggle the link */ + memset(&link_params, 0, sizeof(link_params)); + link_params.link_up = false; + edev->ops->common->set_link(edev->cdev, &link_params); + link_params.link_up = true; + edev->ops->common->set_link(edev->cdev, &link_params); + + return 0; +} + static u32 qede_get_link(struct net_device *dev) { struct qede_dev *edev = netdev_priv(dev); @@ -333,6 +357,106 @@ static u32 qede_get_link(struct net_device *dev) return current_link.link_up; } +static void qede_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *ering) +{ + struct qede_dev *edev = netdev_priv(dev); + + ering->rx_max_pending = NUM_RX_BDS_MAX; + ering->rx_pending = edev->q_num_rx_buffers; + ering->tx_max_pending = NUM_TX_BDS_MAX; + ering->tx_pending = edev->q_num_tx_buffers; +} + +static int qede_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *ering) +{ + struct qede_dev *edev = netdev_priv(dev); + + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "Set ring params command parameters: rx_pending = %d, tx_pending = %d\n", + ering->rx_pending, ering->tx_pending); + + /* Validate legality of configuration */ + if (ering->rx_pending > NUM_RX_BDS_MAX || + ering->rx_pending < NUM_RX_BDS_MIN || + ering->tx_pending > NUM_TX_BDS_MAX || + ering->tx_pending < NUM_TX_BDS_MIN) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "Can only support Rx Buffer size [0%08x,...,0x%08x] and Tx Buffer size [0x%08x,...,0x%08x]\n", + NUM_RX_BDS_MIN, NUM_RX_BDS_MAX, + NUM_TX_BDS_MIN, NUM_TX_BDS_MAX); + return -EINVAL; + } + + /* Change ring size and re-load */ + edev->q_num_rx_buffers = ering->rx_pending; + edev->q_num_tx_buffers = ering->tx_pending; + + if (netif_running(edev->ndev)) + qede_reload(edev, NULL, NULL); + + return 0; +} + +static void qede_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *epause) +{ + struct qede_dev *edev = netdev_priv(dev); + struct qed_link_output current_link; + + memset(¤t_link, 0, sizeof(current_link)); + edev->ops->common->get_link(edev->cdev, ¤t_link); + + if (current_link.pause_config & QED_LINK_PAUSE_AUTONEG_ENABLE) + epause->autoneg = true; + if (current_link.pause_config & QED_LINK_PAUSE_RX_ENABLE) + epause->rx_pause = true; + if (current_link.pause_config & QED_LINK_PAUSE_TX_ENABLE) + epause->tx_pause = true; + + DP_VERBOSE(edev, QED_MSG_DEBUG, + "ethtool_pauseparam: cmd %d autoneg %d rx_pause %d tx_pause %d\n", + epause->cmd, epause->autoneg, epause->rx_pause, + epause->tx_pause); +} + +static int qede_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *epause) +{ + struct qede_dev *edev = netdev_priv(dev); + struct qed_link_params params; + struct qed_link_output current_link; + + if (!edev->dev_info.common.is_mf) { + DP_INFO(edev, + "Pause parameters can not be updated in non-default mode\n"); + return -EOPNOTSUPP; + } + + memset(¤t_link, 0, sizeof(current_link)); + edev->ops->common->get_link(edev->cdev, ¤t_link); + + memset(¶ms, 0, sizeof(params)); + params.override_flags |= QED_LINK_OVERRIDE_PAUSE_CONFIG; + if (epause->autoneg) { + if (!(current_link.supported_caps & SUPPORTED_Autoneg)) { + DP_INFO(edev, "autoneg not supported\n"); + return -EINVAL; + } + params.pause_config |= QED_LINK_PAUSE_AUTONEG_ENABLE; + } + if (epause->rx_pause) + params.pause_config |= QED_LINK_PAUSE_RX_ENABLE; + if (epause->tx_pause) + params.pause_config |= QED_LINK_PAUSE_TX_ENABLE; + + params.link_up = true; + edev->ops->common->set_link(edev->cdev, ¶ms); + + return 0; +} + static void qede_update_mtu(struct qede_dev *edev, union qede_reload_args *args) { edev->ndev->mtu = args->mtu; @@ -366,17 +490,104 @@ int qede_change_mtu(struct net_device *ndev, int new_mtu) return 0; } +static void qede_get_channels(struct net_device *dev, + struct ethtool_channels *channels) +{ + struct qede_dev *edev = netdev_priv(dev); + + channels->max_combined = QEDE_MAX_RSS_CNT(edev); + channels->combined_count = QEDE_RSS_CNT(edev); +} + +static int qede_set_channels(struct net_device *dev, + struct ethtool_channels *channels) +{ + struct qede_dev *edev = netdev_priv(dev); + + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "set-channels command parameters: rx = %d, tx = %d, other = %d, combined = %d\n", + channels->rx_count, channels->tx_count, + channels->other_count, channels->combined_count); + + /* We don't support separate rx / tx, nor `other' channels. */ + if (channels->rx_count || channels->tx_count || + channels->other_count || (channels->combined_count == 0) || + (channels->combined_count > QEDE_MAX_RSS_CNT(edev))) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "command parameters not supported\n"); + return -EINVAL; + } + + /* Check if there was a change in the active parameters */ + if (channels->combined_count == QEDE_RSS_CNT(edev)) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "No change in active parameters\n"); + return 0; + } + + /* We need the number of queues to be divisible between the hwfns */ + if (channels->combined_count % edev->dev_info.common.num_hwfns) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "Number of channels must be divisable by %04x\n", + edev->dev_info.common.num_hwfns); + return -EINVAL; + } + + /* Set number of queues and reload if necessary */ + edev->req_rss = channels->combined_count; + if (netif_running(dev)) + qede_reload(edev, NULL, NULL); + + return 0; +} + +static int qede_set_phys_id(struct net_device *dev, + enum ethtool_phys_id_state state) +{ + struct qede_dev *edev = netdev_priv(dev); + u8 led_state = 0; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + return 1; /* cycle on/off once per second */ + + case ETHTOOL_ID_ON: + led_state = QED_LED_MODE_ON; + break; + + case ETHTOOL_ID_OFF: + led_state = QED_LED_MODE_OFF; + break; + + case ETHTOOL_ID_INACTIVE: + led_state = QED_LED_MODE_RESTORE; + break; + } + + edev->ops->common->set_led(edev->cdev, led_state); + + return 0; +} + static const struct ethtool_ops qede_ethtool_ops = { .get_settings = qede_get_settings, .set_settings = qede_set_settings, .get_drvinfo = qede_get_drvinfo, .get_msglevel = qede_get_msglevel, .set_msglevel = qede_set_msglevel, + .nway_reset = qede_nway_reset, .get_link = qede_get_link, + .get_ringparam = qede_get_ringparam, + .set_ringparam = qede_set_ringparam, + .get_pauseparam = qede_get_pauseparam, + .set_pauseparam = qede_set_pauseparam, .get_strings = qede_get_strings, + .set_phys_id = qede_set_phys_id, .get_ethtool_stats = qede_get_ethtool_stats, .get_sset_count = qede_get_sset_count, + .get_channels = qede_get_channels, + .set_channels = qede_set_channels, }; void qede_set_ethtool_ops(struct net_device *dev) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index f4657a2e730aa4..6237f10b5119fa 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1502,8 +1502,11 @@ static int qede_set_num_queues(struct qede_dev *edev) u16 rss_num; /* Setup queues according to possible resources*/ - rss_num = netif_get_num_default_rss_queues() * - edev->dev_info.common.num_hwfns; + if (edev->req_rss) + rss_num = edev->req_rss; + else + rss_num = netif_get_num_default_rss_queues() * + edev->dev_info.common.num_hwfns; rss_num = min_t(u16, QEDE_MAX_RSS_CNT(edev), rss_num); diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index f9dee7436e8185..9fbe92ac225b00 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -206,6 +206,7 @@ enum CCC_BIT { CCC_OPC_RESET = 0x00000000, CCC_OPC_CONFIG = 0x00000001, CCC_OPC_OPERATION = 0x00000002, + CCC_GAC = 0x00000080, CCC_DTSR = 0x00000100, CCC_CSEL = 0x00030000, CCC_CSEL_HPB = 0x00010000, diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 16992d61d4d89d..1cf12264861cf6 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -115,12 +115,15 @@ static void ravb_read_mac_address(struct net_device *ndev, const u8 *mac) if (mac) { ether_addr_copy(ndev->dev_addr, mac); } else { - ndev->dev_addr[0] = (ravb_read(ndev, MAHR) >> 24); - ndev->dev_addr[1] = (ravb_read(ndev, MAHR) >> 16) & 0xFF; - ndev->dev_addr[2] = (ravb_read(ndev, MAHR) >> 8) & 0xFF; - ndev->dev_addr[3] = (ravb_read(ndev, MAHR) >> 0) & 0xFF; - ndev->dev_addr[4] = (ravb_read(ndev, MALR) >> 8) & 0xFF; - ndev->dev_addr[5] = (ravb_read(ndev, MALR) >> 0) & 0xFF; + u32 mahr = ravb_read(ndev, MAHR); + u32 malr = ravb_read(ndev, MALR); + + ndev->dev_addr[0] = (mahr >> 24) & 0xFF; + ndev->dev_addr[1] = (mahr >> 16) & 0xFF; + ndev->dev_addr[2] = (mahr >> 8) & 0xFF; + ndev->dev_addr[3] = (mahr >> 0) & 0xFF; + ndev->dev_addr[4] = (malr >> 8) & 0xFF; + ndev->dev_addr[5] = (malr >> 0) & 0xFF; } } @@ -1231,7 +1234,8 @@ static int ravb_open(struct net_device *ndev) ravb_emac_init(ndev); /* Initialise PTP Clock driver */ - ravb_ptp_init(ndev, priv->pdev); + if (priv->chip_id == RCAR_GEN2) + ravb_ptp_init(ndev, priv->pdev); netif_tx_start_all_queues(ndev); @@ -1244,7 +1248,8 @@ static int ravb_open(struct net_device *ndev) out_ptp_stop: /* Stop PTP Clock driver */ - ravb_ptp_stop(ndev); + if (priv->chip_id == RCAR_GEN2) + ravb_ptp_stop(ndev); out_free_irq2: if (priv->chip_id == RCAR_GEN3) free_irq(priv->emac_irq, ndev); @@ -1478,7 +1483,8 @@ static int ravb_close(struct net_device *ndev) ravb_write(ndev, 0, TIC); /* Stop PTP Clock driver */ - ravb_ptp_stop(ndev); + if (priv->chip_id == RCAR_GEN2) + ravb_ptp_stop(ndev); /* Set the config mode to stop the AVB-DMAC's processes */ if (ravb_stop_dma(ndev) < 0) @@ -1658,7 +1664,9 @@ static int ravb_mdio_release(struct ravb_private *priv) static const struct of_device_id ravb_match_table[] = { { .compatible = "renesas,etheravb-r8a7790", .data = (void *)RCAR_GEN2 }, { .compatible = "renesas,etheravb-r8a7794", .data = (void *)RCAR_GEN2 }, + { .compatible = "renesas,etheravb-rcar-gen2", .data = (void *)RCAR_GEN2 }, { .compatible = "renesas,etheravb-r8a7795", .data = (void *)RCAR_GEN3 }, + { .compatible = "renesas,etheravb-rcar-gen3", .data = (void *)RCAR_GEN3 }, { } }; MODULE_DEVICE_TABLE(of, ravb_match_table); @@ -1783,8 +1791,16 @@ static int ravb_probe(struct platform_device *pdev) ndev->ethtool_ops = &ravb_ethtool_ops; /* Set AVB config mode */ - ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) | CCC_OPC_CONFIG, - CCC); + if (chip_id == RCAR_GEN2) { + ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) | + CCC_OPC_CONFIG, CCC); + /* Set CSEL value */ + ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_CSEL) | + CCC_CSEL_HPB, CCC); + } else { + ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) | + CCC_OPC_CONFIG | CCC_GAC | CCC_CSEL_HPB, CCC); + } /* Set CSEL value */ ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_CSEL) | CCC_CSEL_HPB, @@ -1816,6 +1832,10 @@ static int ravb_probe(struct platform_device *pdev) /* Initialise HW timestamp list */ INIT_LIST_HEAD(&priv->ts_skb_list); + /* Initialise PTP Clock driver */ + if (chip_id != RCAR_GEN2) + ravb_ptp_init(ndev, pdev); + /* Debug message level */ priv->msg_enable = RAVB_DEF_MSG_ENABLE; @@ -1857,6 +1877,10 @@ out_napi_del: out_dma_free: dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat, priv->desc_bat_dma); + + /* Stop PTP Clock driver */ + if (chip_id != RCAR_GEN2) + ravb_ptp_stop(ndev); out_release: if (ndev) free_netdev(ndev); @@ -1871,6 +1895,10 @@ static int ravb_remove(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct ravb_private *priv = netdev_priv(ndev); + /* Stop PTP Clock driver */ + if (priv->chip_id != RCAR_GEN2) + ravb_ptp_stop(ndev); + dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat, priv->desc_bat_dma); /* Set reset mode */ diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 7f3c6109d45ed8..67cd24312c1153 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -989,12 +989,15 @@ static void read_mac_address(struct net_device *ndev, unsigned char *mac) if (mac[0] || mac[1] || mac[2] || mac[3] || mac[4] || mac[5]) { memcpy(ndev->dev_addr, mac, ETH_ALEN); } else { - ndev->dev_addr[0] = (sh_eth_read(ndev, MAHR) >> 24); - ndev->dev_addr[1] = (sh_eth_read(ndev, MAHR) >> 16) & 0xFF; - ndev->dev_addr[2] = (sh_eth_read(ndev, MAHR) >> 8) & 0xFF; - ndev->dev_addr[3] = (sh_eth_read(ndev, MAHR) & 0xFF); - ndev->dev_addr[4] = (sh_eth_read(ndev, MALR) >> 8) & 0xFF; - ndev->dev_addr[5] = (sh_eth_read(ndev, MALR) & 0xFF); + u32 mahr = sh_eth_read(ndev, MAHR); + u32 malr = sh_eth_read(ndev, MALR); + + ndev->dev_addr[0] = (mahr >> 24) & 0xFF; + ndev->dev_addr[1] = (mahr >> 16) & 0xFF; + ndev->dev_addr[2] = (mahr >> 8) & 0xFF; + ndev->dev_addr[3] = (mahr >> 0) & 0xFF; + ndev->dev_addr[4] = (malr >> 8) & 0xFF; + ndev->dev_addr[5] = (malr >> 0) & 0xFF; } } @@ -1010,56 +1013,34 @@ struct bb_info { void (*set_gate)(void *addr); struct mdiobb_ctrl ctrl; void *addr; - u32 mmd_msk;/* MMD */ - u32 mdo_msk; - u32 mdi_msk; - u32 mdc_msk; }; -/* PHY bit set */ -static void bb_set(void *addr, u32 msk) +static void sh_mdio_ctrl(struct mdiobb_ctrl *ctrl, u32 mask, int set) { - iowrite32(ioread32(addr) | msk, addr); -} + struct bb_info *bitbang = container_of(ctrl, struct bb_info, ctrl); + u32 pir; -/* PHY bit clear */ -static void bb_clr(void *addr, u32 msk) -{ - iowrite32((ioread32(addr) & ~msk), addr); -} + if (bitbang->set_gate) + bitbang->set_gate(bitbang->addr); -/* PHY bit read */ -static int bb_read(void *addr, u32 msk) -{ - return (ioread32(addr) & msk) != 0; + pir = ioread32(bitbang->addr); + if (set) + pir |= mask; + else + pir &= ~mask; + iowrite32(pir, bitbang->addr); } /* Data I/O pin control */ static void sh_mmd_ctrl(struct mdiobb_ctrl *ctrl, int bit) { - struct bb_info *bitbang = container_of(ctrl, struct bb_info, ctrl); - - if (bitbang->set_gate) - bitbang->set_gate(bitbang->addr); - - if (bit) - bb_set(bitbang->addr, bitbang->mmd_msk); - else - bb_clr(bitbang->addr, bitbang->mmd_msk); + sh_mdio_ctrl(ctrl, PIR_MMD, bit); } /* Set bit data*/ static void sh_set_mdio(struct mdiobb_ctrl *ctrl, int bit) { - struct bb_info *bitbang = container_of(ctrl, struct bb_info, ctrl); - - if (bitbang->set_gate) - bitbang->set_gate(bitbang->addr); - - if (bit) - bb_set(bitbang->addr, bitbang->mdo_msk); - else - bb_clr(bitbang->addr, bitbang->mdo_msk); + sh_mdio_ctrl(ctrl, PIR_MDO, bit); } /* Get bit data*/ @@ -1070,21 +1051,13 @@ static int sh_get_mdio(struct mdiobb_ctrl *ctrl) if (bitbang->set_gate) bitbang->set_gate(bitbang->addr); - return bb_read(bitbang->addr, bitbang->mdi_msk); + return (ioread32(bitbang->addr) & PIR_MDI) != 0; } /* MDC pin control */ static void sh_mdc_ctrl(struct mdiobb_ctrl *ctrl, int bit) { - struct bb_info *bitbang = container_of(ctrl, struct bb_info, ctrl); - - if (bitbang->set_gate) - bitbang->set_gate(bitbang->addr); - - if (bit) - bb_set(bitbang->addr, bitbang->mdc_msk); - else - bb_clr(bitbang->addr, bitbang->mdc_msk); + sh_mdio_ctrl(ctrl, PIR_MDC, bit); } /* mdio bus control struct */ @@ -2896,10 +2869,6 @@ static int sh_mdio_init(struct sh_eth_private *mdp, /* bitbang init */ bitbang->addr = mdp->addr + mdp->reg_offset[PIR]; bitbang->set_gate = pd->set_mdio_gate; - bitbang->mdi_msk = PIR_MDI; - bitbang->mdo_msk = PIR_MDO; - bitbang->mmd_msk = PIR_MMD; - bitbang->mdc_msk = PIR_MDC; bitbang->ctrl.ops = &bb_ops; /* MII controller setting */ diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index bc6d21b471be4f..c4a0e8a967dd7e 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -181,13 +181,6 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx) MCDI_WORD(outbuf, GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID); if (!(nic_data->datapath_caps & - (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN))) { - netif_err(efx, drv, efx->net_dev, - "current firmware does not support TSO\n"); - return -ENODEV; - } - - if (!(nic_data->datapath_caps & (1 << MC_CMD_GET_CAPABILITIES_OUT_RX_PREFIX_LEN_14_LBN))) { netif_err(efx, probe, efx->net_dev, "current firmware does not support an RX prefix\n"); @@ -1797,6 +1790,12 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload, ESF_DZ_TX_OPTION_IP_CSUM, csum_offload); tx_queue->write_count = 1; + + if (nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) { + tx_queue->tso_version = 1; + } + wmb(); efx_ef10_push_tx_desc(tx_queue, txd); @@ -2375,8 +2374,19 @@ static int efx_ef10_ev_init(struct efx_channel *channel) 1 << MC_CMD_WORKAROUND_EXT_OUT_FLR_DONE_LBN) { netif_info(efx, drv, efx->net_dev, "other functions on NIC have been reset\n"); - /* MC's boot count has incremented */ - ++nic_data->warm_boot_count; + + /* With MCFW v4.6.x and earlier, the + * boot count will have incremented, + * so re-read the warm_boot_count + * value now to ensure this function + * doesn't think it has changed next + * time it checks. + */ + rc = efx_ef10_get_warm_boot_count(efx); + if (rc >= 0) { + nic_data->warm_boot_count = rc; + rc = 0; + } } nic_data->workaround_26807 = true; } else if (rc == -EPERM) { diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 4e82bcfbe3e09f..b405349a570c28 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2784,6 +2784,12 @@ static const struct pci_device_id efx_pci_table[] = { .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0923), /* SFC9140 PF */ .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, + {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1923), /* SFC9140 VF */ + .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, + {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0a03), /* SFC9220 PF */ + .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, + {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1a03), /* SFC9220 VF */ + .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, {0} /* end of list */ }; diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index a8ddd122f68586..38c422321cdafe 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -182,6 +182,7 @@ struct efx_tx_buffer { * * @efx: The associated Efx NIC * @queue: DMA queue number + * @tso_version: Version of TSO in use for this queue. * @channel: The associated channel * @core_txq: The networking core TX queue structure * @buffer: The software buffer ring @@ -228,6 +229,7 @@ struct efx_tx_queue { /* Members which don't change on the fast path */ struct efx_nic *efx ____cacheline_aligned_in_smp; unsigned queue; + unsigned int tso_version; struct efx_channel *channel; struct netdev_queue *core_txq; struct efx_tx_buffer *buffer; @@ -1502,8 +1504,9 @@ static inline struct efx_rx_buffer *efx_rx_buffer(struct efx_rx_queue *rx_queue, * same cycle, the XMAC can miss the IPG altogether. We work around * this by adding a further 16 bytes. */ +#define EFX_FRAME_PAD 16 #define EFX_MAX_FRAME_LEN(mtu) \ - ((((mtu) + ETH_HLEN + VLAN_HLEN + 4/* FCS */ + 7) & ~7) + 16) + (ALIGN(((mtu) + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN + EFX_FRAME_PAD), 8)) static inline bool efx_xmit_with_hwtstamp(struct sk_buff *skb) { diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 67f6afaa022f4e..f7a0ec1bca97d9 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -1010,13 +1010,17 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, /* Parse the SKB header and initialise state. */ static int tso_start(struct tso_state *st, struct efx_nic *efx, + struct efx_tx_queue *tx_queue, const struct sk_buff *skb) { - bool use_opt_desc = efx_nic_rev(efx) >= EFX_REV_HUNT_A0; struct device *dma_dev = &efx->pci_dev->dev; unsigned int header_len, in_len; + bool use_opt_desc = false; dma_addr_t dma_addr; + if (tx_queue->tso_version == 1) + use_opt_desc = true; + st->ip_off = skb_network_header(skb) - skb->data; st->tcp_off = skb_transport_header(skb) - skb->data; header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u); @@ -1271,7 +1275,7 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, /* Find the packet protocol and sanity-check it */ state.protocol = efx_tso_check_protocol(skb); - rc = tso_start(&state, efx, skb); + rc = tso_start(&state, efx, tx_queue, skb); if (rc) goto mem_err; diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 623c6ed8764ac3..f4518bc2cd288b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -137,6 +137,31 @@ struct stmmac_extra_stats { unsigned long pcs_link; unsigned long pcs_duplex; unsigned long pcs_speed; + /* debug register */ + unsigned long mtl_tx_status_fifo_full; + unsigned long mtl_tx_fifo_not_empty; + unsigned long mmtl_fifo_ctrl; + unsigned long mtl_tx_fifo_read_ctrl_write; + unsigned long mtl_tx_fifo_read_ctrl_wait; + unsigned long mtl_tx_fifo_read_ctrl_read; + unsigned long mtl_tx_fifo_read_ctrl_idle; + unsigned long mac_tx_in_pause; + unsigned long mac_tx_frame_ctrl_xfer; + unsigned long mac_tx_frame_ctrl_idle; + unsigned long mac_tx_frame_ctrl_wait; + unsigned long mac_tx_frame_ctrl_pause; + unsigned long mac_gmii_tx_proto_engine; + unsigned long mtl_rx_fifo_fill_level_full; + unsigned long mtl_rx_fifo_fill_above_thresh; + unsigned long mtl_rx_fifo_fill_below_thresh; + unsigned long mtl_rx_fifo_fill_level_empty; + unsigned long mtl_rx_fifo_read_ctrl_flush; + unsigned long mtl_rx_fifo_read_ctrl_read_data; + unsigned long mtl_rx_fifo_read_ctrl_status; + unsigned long mtl_rx_fifo_read_ctrl_idle; + unsigned long mtl_rx_fifo_ctrl_active; + unsigned long mac_rx_frame_ctrl_fifo; + unsigned long mac_gmii_rx_proto_engine; }; /* CSR Frequency Access Defines*/ @@ -408,6 +433,7 @@ struct stmmac_ops { void (*set_eee_pls)(struct mac_device_info *hw, int link); void (*ctrl_ane)(struct mac_device_info *hw, bool restart); void (*get_adv)(struct mac_device_info *hw, struct rgmii_adv *adv); + void (*debug)(void __iomem *ioaddr, struct stmmac_extra_stats *x); }; /* PTP and HW Timer helpers */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 82de68b1a45277..36d3355f2fb00e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -198,19 +198,19 @@ static int ipq806x_gmac_set_speed(struct ipq806x_gmac *gmac, unsigned int speed) return 0; } -static void *ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) +static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) { struct device *dev = &gmac->pdev->dev; gmac->phy_mode = of_get_phy_mode(dev->of_node); if (gmac->phy_mode < 0) { dev_err(dev, "missing phy mode property\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } if (of_property_read_u32(dev->of_node, "qcom,id", &gmac->id) < 0) { dev_err(dev, "missing qcom id property\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } /* The GMACs are called 1 to 4 in the documentation, but to simplify the @@ -219,13 +219,13 @@ static void *ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) */ if (gmac->id < 0 || gmac->id > 3) { dev_err(dev, "invalid gmac id\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } gmac->core_clk = devm_clk_get(dev, "stmmaceth"); if (IS_ERR(gmac->core_clk)) { dev_err(dev, "missing stmmaceth clk property\n"); - return gmac->core_clk; + return PTR_ERR(gmac->core_clk); } clk_set_rate(gmac->core_clk, 266000000); @@ -234,18 +234,16 @@ static void *ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) "qcom,nss-common"); if (IS_ERR(gmac->nss_common)) { dev_err(dev, "missing nss-common node\n"); - return gmac->nss_common; + return PTR_ERR(gmac->nss_common); } /* Setup the register map for the qsgmii csr registers */ gmac->qsgmii_csr = syscon_regmap_lookup_by_phandle(dev->of_node, "qcom,qsgmii-csr"); - if (IS_ERR(gmac->qsgmii_csr)) { + if (IS_ERR(gmac->qsgmii_csr)) dev_err(dev, "missing qsgmii-csr node\n"); - return gmac->qsgmii_csr; - } - return NULL; + return PTR_ERR_OR_ZERO(gmac->qsgmii_csr); } static void ipq806x_gmac_fix_mac_speed(void *priv, unsigned int speed) @@ -262,7 +260,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct ipq806x_gmac *gmac; int val; - void *err; + int err; val = stmmac_get_platform_resources(pdev, &stmmac_res); if (val) @@ -279,9 +277,9 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) gmac->pdev = pdev; err = ipq806x_gmac_of_parse(gmac); - if (IS_ERR(err)) { + if (err) { dev_err(dev, "device tree parsing error\n"); - return PTR_ERR(err); + return err; } regmap_write(gmac->qsgmii_csr, QSGMII_PCS_CAL_LCKDT_CTL, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h index b3fe0575ff6b7a..8831a053ac1354 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h @@ -34,6 +34,7 @@ #define GMAC_FLOW_CTRL 0x00000018 /* Flow Control */ #define GMAC_VLAN_TAG 0x0000001c /* VLAN Tag */ #define GMAC_VERSION 0x00000020 /* GMAC CORE Version */ +#define GMAC_DEBUG 0x00000024 /* GMAC debug register */ #define GMAC_WAKEUP_FILTER 0x00000028 /* Wake-up Frame Filter */ #define GMAC_INT_STATUS 0x00000038 /* interrupt status register */ @@ -177,6 +178,47 @@ enum inter_frame_gap { #define GMAC_FLOW_CTRL_TFE 0x00000002 /* Tx Flow Control Enable */ #define GMAC_FLOW_CTRL_FCB_BPA 0x00000001 /* Flow Control Busy ... */ +/* DEBUG Register defines */ +/* MTL TxStatus FIFO */ +#define GMAC_DEBUG_TXSTSFSTS BIT(25) /* MTL TxStatus FIFO Full Status */ +#define GMAC_DEBUG_TXFSTS BIT(24) /* MTL Tx FIFO Not Empty Status */ +#define GMAC_DEBUG_TWCSTS BIT(22) /* MTL Tx FIFO Write Controller */ +/* MTL Tx FIFO Read Controller Status */ +#define GMAC_DEBUG_TRCSTS_MASK GENMASK(21, 20) +#define GMAC_DEBUG_TRCSTS_SHIFT 20 +#define GMAC_DEBUG_TRCSTS_IDLE 0 +#define GMAC_DEBUG_TRCSTS_READ 1 +#define GMAC_DEBUG_TRCSTS_TXW 2 +#define GMAC_DEBUG_TRCSTS_WRITE 3 +#define GMAC_DEBUG_TXPAUSED BIT(19) /* MAC Transmitter in PAUSE */ +/* MAC Transmit Frame Controller Status */ +#define GMAC_DEBUG_TFCSTS_MASK GENMASK(18, 17) +#define GMAC_DEBUG_TFCSTS_SHIFT 17 +#define GMAC_DEBUG_TFCSTS_IDLE 0 +#define GMAC_DEBUG_TFCSTS_WAIT 1 +#define GMAC_DEBUG_TFCSTS_GEN_PAUSE 2 +#define GMAC_DEBUG_TFCSTS_XFER 3 +/* MAC GMII or MII Transmit Protocol Engine Status */ +#define GMAC_DEBUG_TPESTS BIT(16) +#define GMAC_DEBUG_RXFSTS_MASK GENMASK(9, 8) /* MTL Rx FIFO Fill-level */ +#define GMAC_DEBUG_RXFSTS_SHIFT 8 +#define GMAC_DEBUG_RXFSTS_EMPTY 0 +#define GMAC_DEBUG_RXFSTS_BT 1 +#define GMAC_DEBUG_RXFSTS_AT 2 +#define GMAC_DEBUG_RXFSTS_FULL 3 +#define GMAC_DEBUG_RRCSTS_MASK GENMASK(6, 5) /* MTL Rx FIFO Read Controller */ +#define GMAC_DEBUG_RRCSTS_SHIFT 5 +#define GMAC_DEBUG_RRCSTS_IDLE 0 +#define GMAC_DEBUG_RRCSTS_RDATA 1 +#define GMAC_DEBUG_RRCSTS_RSTAT 2 +#define GMAC_DEBUG_RRCSTS_FLUSH 3 +#define GMAC_DEBUG_RWCSTS BIT(4) /* MTL Rx FIFO Write Controller Active */ +/* MAC Receive Frame Controller FIFO Status */ +#define GMAC_DEBUG_RFCFCSTS_MASK GENMASK(2, 1) +#define GMAC_DEBUG_RFCFCSTS_SHIFT 1 +/* MAC GMII or MII Receive Protocol Engine Status */ +#define GMAC_DEBUG_RPESTS BIT(0) + /*--- DMA BLOCK defines ---*/ /* DMA Bus Mode register defines */ #define DMA_BUS_MODE_SFT_RESET 0x00000001 /* Software Reset */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 371a669d69fd51..c2941172f6d13e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -397,6 +397,80 @@ static void dwmac1000_get_adv(struct mac_device_info *hw, struct rgmii_adv *adv) adv->lp_pause = (value & GMAC_ANE_PSE) >> GMAC_ANE_PSE_SHIFT; } +static void dwmac1000_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x) +{ + u32 value = readl(ioaddr + GMAC_DEBUG); + + if (value & GMAC_DEBUG_TXSTSFSTS) + x->mtl_tx_status_fifo_full++; + if (value & GMAC_DEBUG_TXFSTS) + x->mtl_tx_fifo_not_empty++; + if (value & GMAC_DEBUG_TWCSTS) + x->mmtl_fifo_ctrl++; + if (value & GMAC_DEBUG_TRCSTS_MASK) { + u32 trcsts = (value & GMAC_DEBUG_TRCSTS_MASK) + >> GMAC_DEBUG_TRCSTS_SHIFT; + if (trcsts == GMAC_DEBUG_TRCSTS_WRITE) + x->mtl_tx_fifo_read_ctrl_write++; + else if (trcsts == GMAC_DEBUG_TRCSTS_TXW) + x->mtl_tx_fifo_read_ctrl_wait++; + else if (trcsts == GMAC_DEBUG_TRCSTS_READ) + x->mtl_tx_fifo_read_ctrl_read++; + else + x->mtl_tx_fifo_read_ctrl_idle++; + } + if (value & GMAC_DEBUG_TXPAUSED) + x->mac_tx_in_pause++; + if (value & GMAC_DEBUG_TFCSTS_MASK) { + u32 tfcsts = (value & GMAC_DEBUG_TFCSTS_MASK) + >> GMAC_DEBUG_TFCSTS_SHIFT; + + if (tfcsts == GMAC_DEBUG_TFCSTS_XFER) + x->mac_tx_frame_ctrl_xfer++; + else if (tfcsts == GMAC_DEBUG_TFCSTS_GEN_PAUSE) + x->mac_tx_frame_ctrl_pause++; + else if (tfcsts == GMAC_DEBUG_TFCSTS_WAIT) + x->mac_tx_frame_ctrl_wait++; + else + x->mac_tx_frame_ctrl_idle++; + } + if (value & GMAC_DEBUG_TPESTS) + x->mac_gmii_tx_proto_engine++; + if (value & GMAC_DEBUG_RXFSTS_MASK) { + u32 rxfsts = (value & GMAC_DEBUG_RXFSTS_MASK) + >> GMAC_DEBUG_RRCSTS_SHIFT; + + if (rxfsts == GMAC_DEBUG_RXFSTS_FULL) + x->mtl_rx_fifo_fill_level_full++; + else if (rxfsts == GMAC_DEBUG_RXFSTS_AT) + x->mtl_rx_fifo_fill_above_thresh++; + else if (rxfsts == GMAC_DEBUG_RXFSTS_BT) + x->mtl_rx_fifo_fill_below_thresh++; + else + x->mtl_rx_fifo_fill_level_empty++; + } + if (value & GMAC_DEBUG_RRCSTS_MASK) { + u32 rrcsts = (value & GMAC_DEBUG_RRCSTS_MASK) >> + GMAC_DEBUG_RRCSTS_SHIFT; + + if (rrcsts == GMAC_DEBUG_RRCSTS_FLUSH) + x->mtl_rx_fifo_read_ctrl_flush++; + else if (rrcsts == GMAC_DEBUG_RRCSTS_RSTAT) + x->mtl_rx_fifo_read_ctrl_read_data++; + else if (rrcsts == GMAC_DEBUG_RRCSTS_RDATA) + x->mtl_rx_fifo_read_ctrl_status++; + else + x->mtl_rx_fifo_read_ctrl_idle++; + } + if (value & GMAC_DEBUG_RWCSTS) + x->mtl_rx_fifo_ctrl_active++; + if (value & GMAC_DEBUG_RFCFCSTS_MASK) + x->mac_rx_frame_ctrl_fifo = (value & GMAC_DEBUG_RFCFCSTS_MASK) + >> GMAC_DEBUG_RFCFCSTS_SHIFT; + if (value & GMAC_DEBUG_RPESTS) + x->mac_gmii_rx_proto_engine++; +} + static const struct stmmac_ops dwmac1000_ops = { .core_init = dwmac1000_core_init, .rx_ipc = dwmac1000_rx_ipc_enable, @@ -413,6 +487,7 @@ static const struct stmmac_ops dwmac1000_ops = { .set_eee_pls = dwmac1000_set_eee_pls, .ctrl_ane = dwmac1000_ctrl_ane, .get_adv = dwmac1000_get_adv, + .debug = dwmac1000_debug, }; struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 2e51b816a7e819..4c6486cc80fbd1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -136,6 +136,31 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = { STMMAC_STAT(irq_pcs_ane_n), STMMAC_STAT(irq_pcs_link_n), STMMAC_STAT(irq_rgmii_n), + /* DEBUG */ + STMMAC_STAT(mtl_tx_status_fifo_full), + STMMAC_STAT(mtl_tx_fifo_not_empty), + STMMAC_STAT(mmtl_fifo_ctrl), + STMMAC_STAT(mtl_tx_fifo_read_ctrl_write), + STMMAC_STAT(mtl_tx_fifo_read_ctrl_wait), + STMMAC_STAT(mtl_tx_fifo_read_ctrl_read), + STMMAC_STAT(mtl_tx_fifo_read_ctrl_idle), + STMMAC_STAT(mac_tx_in_pause), + STMMAC_STAT(mac_tx_frame_ctrl_xfer), + STMMAC_STAT(mac_tx_frame_ctrl_idle), + STMMAC_STAT(mac_tx_frame_ctrl_wait), + STMMAC_STAT(mac_tx_frame_ctrl_pause), + STMMAC_STAT(mac_gmii_tx_proto_engine), + STMMAC_STAT(mtl_rx_fifo_fill_level_full), + STMMAC_STAT(mtl_rx_fifo_fill_above_thresh), + STMMAC_STAT(mtl_rx_fifo_fill_below_thresh), + STMMAC_STAT(mtl_rx_fifo_fill_level_empty), + STMMAC_STAT(mtl_rx_fifo_read_ctrl_flush), + STMMAC_STAT(mtl_rx_fifo_read_ctrl_read_data), + STMMAC_STAT(mtl_rx_fifo_read_ctrl_status), + STMMAC_STAT(mtl_rx_fifo_read_ctrl_idle), + STMMAC_STAT(mtl_rx_fifo_ctrl_active), + STMMAC_STAT(mac_rx_frame_ctrl_fifo), + STMMAC_STAT(mac_gmii_rx_proto_engine), }; #define STMMAC_STATS_LEN ARRAY_SIZE(stmmac_gstrings_stats) @@ -497,6 +522,11 @@ static void stmmac_get_ethtool_stats(struct net_device *dev, if (val) priv->xstats.phy_eee_wakeup_error_n = val; } + + if ((priv->hw->mac->debug) && + (priv->synopsys_id >= DWMAC_CORE_3_50)) + priv->hw->mac->debug(priv->ioaddr, + (void *)&priv->xstats); } for (i = 0; i < STMMAC_STATS_LEN; i++) { char *p = (char *)priv + stmmac_gstrings_stats[i].stat_offset; diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 5fa98f599b3dd4..f4130af09244fb 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -124,37 +124,22 @@ struct ndis_tcp_ip_checksum_info; /* * Represent netvsc packet which contains 1 RNDIS and 1 ethernet frame * within the RNDIS + * + * The size of this structure is less than 48 bytes and we can now + * place this structure in the skb->cb field. */ struct hv_netvsc_packet { /* Bookkeeping stuff */ - u32 status; - - bool is_data_pkt; - bool xmit_more; /* from skb */ - bool cp_partial; /* partial copy into send buffer */ + u8 cp_partial; /* partial copy into send buffer */ - u16 vlan_tci; + u8 rmsg_size; /* RNDIS header and PPI size */ + u8 rmsg_pgcnt; /* page count of RNDIS header and PPI */ + u8 page_buf_cnt; u16 q_idx; - struct vmbus_channel *channel; - - u64 send_completion_tid; - void *send_completion_ctx; - void (*send_completion)(void *context); - u32 send_buf_index; - /* This points to the memory after page_buf */ - struct rndis_message *rndis_msg; - - u32 rmsg_size; /* RNDIS header and PPI size */ - u32 rmsg_pgcnt; /* page count of RNDIS header and PPI */ - u32 total_data_buflen; - /* Points to the send/receive buffer where the ethernet frame is */ - void *data; - u32 page_buf_cnt; - struct hv_page_buffer *page_buf; }; struct netvsc_device_info { @@ -177,7 +162,6 @@ struct rndis_device { enum rndis_device_state state; bool link_state; - bool link_change; atomic_t new_req_id; spinlock_t request_lock; @@ -188,16 +172,22 @@ struct rndis_device { /* Interface */ +struct rndis_message; int netvsc_device_add(struct hv_device *device, void *additional_info); int netvsc_device_remove(struct hv_device *device); int netvsc_send(struct hv_device *device, - struct hv_netvsc_packet *packet); + struct hv_netvsc_packet *packet, + struct rndis_message *rndis_msg, + struct hv_page_buffer **page_buffer, + struct sk_buff *skb); void netvsc_linkstatus_callback(struct hv_device *device_obj, struct rndis_message *resp); -void netvsc_xmit_completion(void *context); int netvsc_recv_callback(struct hv_device *device_obj, struct hv_netvsc_packet *packet, - struct ndis_tcp_ip_checksum_info *csum_info); + void **data, + struct ndis_tcp_ip_checksum_info *csum_info, + struct vmbus_channel *channel, + u16 vlan_tci); void netvsc_channel_cb(void *context); int rndis_filter_open(struct hv_device *dev); int rndis_filter_close(struct hv_device *dev); @@ -205,12 +195,13 @@ int rndis_filter_device_add(struct hv_device *dev, void *additional_info); void rndis_filter_device_remove(struct hv_device *dev); int rndis_filter_receive(struct hv_device *dev, - struct hv_netvsc_packet *pkt); + struct hv_netvsc_packet *pkt, + void **data, + struct vmbus_channel *channel); int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter); int rndis_filter_set_device_mac(struct hv_device *hdev, char *mac); - #define NVSP_INVALID_PROTOCOL_VERSION ((u32)0xFFFFFFFF) #define NVSP_PROTOCOL_VERSION_1 2 @@ -633,7 +624,6 @@ struct nvsp_message { #define RNDIS_PKT_ALIGN_DEFAULT 8 struct multi_send_data { - spinlock_t lock; /* protect struct multi_send_data */ struct hv_netvsc_packet *pkt; /* netvsc pkt pending */ u32 count; /* counter of batched packets */ }; @@ -644,11 +634,24 @@ struct netvsc_stats { struct u64_stats_sync syncp; }; +struct netvsc_reconfig { + struct list_head list; + u32 event; +}; + /* The context of the netvsc device */ struct net_device_context { /* point back to our device context */ struct hv_device *device_ctx; + /* reconfigure work */ struct delayed_work dwork; + /* last reconfig time */ + unsigned long last_reconfig; + /* reconfig events */ + struct list_head reconfig_events; + /* list protection */ + spinlock_t lock; + struct work_struct work; u32 msg_enable; /* debug level */ @@ -1260,5 +1263,4 @@ struct rndis_message { #define TRANSPORT_INFO_IPV6_TCP ((INFO_IPV6 << 16) | INFO_TCP) #define TRANSPORT_INFO_IPV6_UDP ((INFO_IPV6 << 16) | INFO_UDP) - #endif /* _HYPERV_NET_H */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 51e4c0fd0a7480..02bab9a7c9ff6e 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -38,7 +38,6 @@ static struct netvsc_device *alloc_net_device(struct hv_device *device) { struct netvsc_device *net_device; struct net_device *ndev = hv_get_drvdata(device); - int i; net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL); if (!net_device) @@ -58,9 +57,6 @@ static struct netvsc_device *alloc_net_device(struct hv_device *device) net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; - for (i = 0; i < num_online_cpus(); i++) - spin_lock_init(&net_device->msd[i].lock); - hv_set_drvdata(device, net_device); return net_device; } @@ -610,6 +606,7 @@ static inline void netvsc_free_send_slot(struct netvsc_device *net_device, } static void netvsc_send_completion(struct netvsc_device *net_device, + struct vmbus_channel *incoming_channel, struct hv_device *device, struct vmpacket_descriptor *packet) { @@ -617,6 +614,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device, struct hv_netvsc_packet *nvsc_packet; struct net_device *ndev; u32 send_index; + struct sk_buff *skb; ndev = net_device->ndev; @@ -642,18 +640,17 @@ static void netvsc_send_completion(struct netvsc_device *net_device, int queue_sends; /* Get the send context */ - nvsc_packet = (struct hv_netvsc_packet *)(unsigned long) - packet->trans_id; + skb = (struct sk_buff *)(unsigned long)packet->trans_id; /* Notify the layer above us */ - if (nvsc_packet) { + if (skb) { + nvsc_packet = (struct hv_netvsc_packet *) skb->cb; send_index = nvsc_packet->send_buf_index; if (send_index != NETVSC_INVALID_INDEX) netvsc_free_send_slot(net_device, send_index); q_idx = nvsc_packet->q_idx; - channel = nvsc_packet->channel; - nvsc_packet->send_completion(nvsc_packet-> - send_completion_ctx); + channel = incoming_channel; + dev_kfree_skb_any(skb); } num_outstanding_sends = @@ -705,12 +702,17 @@ static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, unsigned int section_index, u32 pend_size, - struct hv_netvsc_packet *packet) + struct hv_netvsc_packet *packet, + struct rndis_message *rndis_msg, + struct hv_page_buffer **pb, + struct sk_buff *skb) { char *start = net_device->send_buf; char *dest = start + (section_index * net_device->send_section_size) + pend_size; int i; + bool is_data_pkt = (skb != NULL) ? true : false; + bool xmit_more = (skb != NULL) ? skb->xmit_more : false; u32 msg_size = 0; u32 padding = 0; u32 remain = packet->total_data_buflen % net_device->pkt_align; @@ -718,17 +720,17 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, packet->page_buf_cnt; /* Add padding */ - if (packet->is_data_pkt && packet->xmit_more && remain && + if (is_data_pkt && xmit_more && remain && !packet->cp_partial) { padding = net_device->pkt_align - remain; - packet->rndis_msg->msg_len += padding; + rndis_msg->msg_len += padding; packet->total_data_buflen += padding; } for (i = 0; i < page_count; i++) { - char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT); - u32 offset = packet->page_buf[i].offset; - u32 len = packet->page_buf[i].len; + char *src = phys_to_virt((*pb)[i].pfn << PAGE_SHIFT); + u32 offset = (*pb)[i].offset; + u32 len = (*pb)[i].len; memcpy(dest, (src + offset), len); msg_size += len; @@ -745,19 +747,22 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, static inline int netvsc_send_pkt( struct hv_netvsc_packet *packet, - struct netvsc_device *net_device) + struct netvsc_device *net_device, + struct hv_page_buffer **pb, + struct sk_buff *skb) { struct nvsp_message nvmsg; - struct vmbus_channel *out_channel = packet->channel; u16 q_idx = packet->q_idx; + struct vmbus_channel *out_channel = net_device->chn_table[q_idx]; struct net_device *ndev = net_device->ndev; u64 req_id; int ret; struct hv_page_buffer *pgbuf; u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound); + bool xmit_more = (skb != NULL) ? skb->xmit_more : false; nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT; - if (packet->is_data_pkt) { + if (skb != NULL) { /* 0 is RMC_DATA; */ nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 0; } else { @@ -773,10 +778,7 @@ static inline int netvsc_send_pkt( nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size = packet->total_data_buflen; - if (packet->send_completion) - req_id = (ulong)packet; - else - req_id = 0; + req_id = (ulong)skb; if (out_channel->rescind) return -ENODEV; @@ -789,11 +791,11 @@ static inline int netvsc_send_pkt( * unnecessarily. */ if (ring_avail < (RING_AVAIL_PERCENT_LOWATER + 1)) - packet->xmit_more = false; + xmit_more = false; if (packet->page_buf_cnt) { - pgbuf = packet->cp_partial ? packet->page_buf + - packet->rmsg_pgcnt : packet->page_buf; + pgbuf = packet->cp_partial ? (*pb) + + packet->rmsg_pgcnt : (*pb); ret = vmbus_sendpacket_pagebuffer_ctl(out_channel, pgbuf, packet->page_buf_cnt, @@ -801,14 +803,14 @@ static inline int netvsc_send_pkt( sizeof(struct nvsp_message), req_id, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED, - !packet->xmit_more); + !xmit_more); } else { ret = vmbus_sendpacket_ctl(out_channel, &nvmsg, sizeof(struct nvsp_message), req_id, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED, - !packet->xmit_more); + !xmit_more); } if (ret == 0) { @@ -840,7 +842,10 @@ static inline int netvsc_send_pkt( } int netvsc_send(struct hv_device *device, - struct hv_netvsc_packet *packet) + struct hv_netvsc_packet *packet, + struct rndis_message *rndis_msg, + struct hv_page_buffer **pb, + struct sk_buff *skb) { struct netvsc_device *net_device; int ret = 0, m_ret = 0; @@ -848,33 +853,27 @@ int netvsc_send(struct hv_device *device, u16 q_idx = packet->q_idx; u32 pktlen = packet->total_data_buflen, msd_len = 0; unsigned int section_index = NETVSC_INVALID_INDEX; - unsigned long flag; struct multi_send_data *msdp; struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL; bool try_batch; + bool xmit_more = (skb != NULL) ? skb->xmit_more : false; net_device = get_outbound_net_device(device); if (!net_device) return -ENODEV; out_channel = net_device->chn_table[q_idx]; - if (!out_channel) { - out_channel = device->channel; - q_idx = 0; - packet->q_idx = 0; - } - packet->channel = out_channel; + packet->send_buf_index = NETVSC_INVALID_INDEX; packet->cp_partial = false; msdp = &net_device->msd[q_idx]; /* batch packets in send buffer if possible */ - spin_lock_irqsave(&msdp->lock, flag); if (msdp->pkt) msd_len = msdp->pkt->total_data_buflen; - try_batch = packet->is_data_pkt && msd_len > 0 && msdp->count < + try_batch = (skb != NULL) && msd_len > 0 && msdp->count < net_device->max_pkt; if (try_batch && msd_len + pktlen + net_device->pkt_align < @@ -886,7 +885,7 @@ int netvsc_send(struct hv_device *device, section_index = msdp->pkt->send_buf_index; packet->cp_partial = true; - } else if (packet->is_data_pkt && pktlen + net_device->pkt_align < + } else if ((skb != NULL) && pktlen + net_device->pkt_align < net_device->send_section_size) { section_index = netvsc_get_next_send_section(net_device); if (section_index != NETVSC_INVALID_INDEX) { @@ -900,7 +899,7 @@ int netvsc_send(struct hv_device *device, if (section_index != NETVSC_INVALID_INDEX) { netvsc_copy_to_send_buf(net_device, section_index, msd_len, - packet); + packet, rndis_msg, pb, skb); packet->send_buf_index = section_index; @@ -913,9 +912,9 @@ int netvsc_send(struct hv_device *device, } if (msdp->pkt) - netvsc_xmit_completion(msdp->pkt); + dev_kfree_skb_any(skb); - if (packet->xmit_more && !packet->cp_partial) { + if (xmit_more && !packet->cp_partial) { msdp->pkt = packet; msdp->count++; } else { @@ -930,20 +929,18 @@ int netvsc_send(struct hv_device *device, cur_send = packet; } - spin_unlock_irqrestore(&msdp->lock, flag); - if (msd_send) { - m_ret = netvsc_send_pkt(msd_send, net_device); + m_ret = netvsc_send_pkt(msd_send, net_device, pb, skb); if (m_ret != 0) { netvsc_free_send_slot(net_device, msd_send->send_buf_index); - netvsc_xmit_completion(msd_send); + dev_kfree_skb_any(skb); } } if (cur_send) - ret = netvsc_send_pkt(cur_send, net_device); + ret = netvsc_send_pkt(cur_send, net_device, pb, skb); if (ret != 0 && section_index != NETVSC_INVALID_INDEX) netvsc_free_send_slot(net_device, section_index); @@ -1009,6 +1006,7 @@ static void netvsc_receive(struct netvsc_device *net_device, int i; int count = 0; struct net_device *ndev; + void *data; ndev = net_device->ndev; @@ -1043,22 +1041,19 @@ static void netvsc_receive(struct netvsc_device *net_device, } count = vmxferpage_packet->range_cnt; - netvsc_packet->channel = channel; /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */ for (i = 0; i < count; i++) { /* Initialize the netvsc packet */ - netvsc_packet->status = NVSP_STAT_SUCCESS; - netvsc_packet->data = (void *)((unsigned long)net_device-> + data = (void *)((unsigned long)net_device-> recv_buf + vmxferpage_packet->ranges[i].byte_offset); netvsc_packet->total_data_buflen = vmxferpage_packet->ranges[i].byte_count; /* Pass it to the upper layer */ - rndis_filter_receive(device, netvsc_packet); + status = rndis_filter_receive(device, netvsc_packet, &data, + channel); - if (netvsc_packet->status != NVSP_STAT_SUCCESS) - status = NVSP_STAT_FAIL; } netvsc_send_recv_completion(device, channel, net_device, @@ -1150,6 +1145,7 @@ void netvsc_channel_cb(void *context) switch (desc->type) { case VM_PKT_COMP: netvsc_send_completion(net_device, + channel, device, desc); break; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 409b48e1e589df..1c8db9afdcda8c 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -42,6 +42,7 @@ #define RING_SIZE_MIN 64 +#define LINKCHANGE_INT (2 * HZ) static int ring_size = 128; module_param(ring_size, int, S_IRUGO); MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); @@ -272,17 +273,10 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, skb_set_hash(skb, hash, PKT_HASH_TYPE_L3); } - return q_idx; -} - -void netvsc_xmit_completion(void *context) -{ - struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; - struct sk_buff *skb = (struct sk_buff *) - (unsigned long)packet->send_completion_tid; + if (!nvsc_dev->chn_table[q_idx]) + q_idx = 0; - if (skb) - dev_kfree_skb_any(skb); + return q_idx; } static u32 fill_pg_buf(struct page *page, u32 offset, u32 len, @@ -320,9 +314,10 @@ static u32 fill_pg_buf(struct page *page, u32 offset, u32 len, } static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, - struct hv_netvsc_packet *packet) + struct hv_netvsc_packet *packet, + struct hv_page_buffer **page_buf) { - struct hv_page_buffer *pb = packet->page_buf; + struct hv_page_buffer *pb = *page_buf; u32 slots_used = 0; char *data = skb->data; int frags = skb_shinfo(skb)->nr_frags; @@ -432,8 +427,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) u32 net_trans_info; u32 hash; u32 skb_length; - u32 pkt_sz; struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; + struct hv_page_buffer *pb = page_buf; struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats); /* We will atmost need two pages to describe the rndis @@ -460,42 +455,34 @@ check_size: goto check_size; } - pkt_sz = sizeof(struct hv_netvsc_packet) + RNDIS_AND_PPI_SIZE; - - ret = skb_cow_head(skb, pkt_sz); + /* + * Place the rndis header in the skb head room and + * the skb->cb will be used for hv_netvsc_packet + * structure. + */ + ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE); if (ret) { netdev_err(net, "unable to alloc hv_netvsc_packet\n"); ret = -ENOMEM; goto drop; } - /* Use the headroom for building up the packet */ - packet = (struct hv_netvsc_packet *)skb->head; + /* Use the skb control buffer for building up the packet */ + BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) > + FIELD_SIZEOF(struct sk_buff, cb)); + packet = (struct hv_netvsc_packet *)skb->cb; - packet->status = 0; - packet->xmit_more = skb->xmit_more; - - packet->vlan_tci = skb->vlan_tci; - packet->page_buf = page_buf; packet->q_idx = skb_get_queue_mapping(skb); - packet->is_data_pkt = true; packet->total_data_buflen = skb->len; - packet->rndis_msg = (struct rndis_message *)((unsigned long)packet + - sizeof(struct hv_netvsc_packet)); - - memset(packet->rndis_msg, 0, RNDIS_AND_PPI_SIZE); + rndis_msg = (struct rndis_message *)skb->head; - /* Set the completion routine */ - packet->send_completion = netvsc_xmit_completion; - packet->send_completion_ctx = packet; - packet->send_completion_tid = (unsigned long)skb; + memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE); - isvlan = packet->vlan_tci & VLAN_TAG_PRESENT; + isvlan = skb->vlan_tci & VLAN_TAG_PRESENT; /* Add the rndis header */ - rndis_msg = packet->rndis_msg; rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET; rndis_msg->msg_len = packet->total_data_buflen; rndis_pkt = &rndis_msg->msg.pkt; @@ -521,8 +508,8 @@ check_size: IEEE_8021Q_INFO); vlan = (struct ndis_pkt_8021q_info *)((void *)ppi + ppi->ppi_offset); - vlan->vlanid = packet->vlan_tci & VLAN_VID_MASK; - vlan->pri = (packet->vlan_tci & VLAN_PRIO_MASK) >> + vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK; + vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } @@ -617,9 +604,10 @@ do_send: rndis_msg->msg_len += rndis_msg_size; packet->total_data_buflen = rndis_msg->msg_len; packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, - skb, packet); + skb, packet, &pb); - ret = netvsc_send(net_device_ctx->device_ctx, packet); + ret = netvsc_send(net_device_ctx->device_ctx, packet, + rndis_msg, &pb, skb); drop: if (ret == 0) { @@ -647,37 +635,33 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, struct net_device *net; struct net_device_context *ndev_ctx; struct netvsc_device *net_device; - struct rndis_device *rdev; - - net_device = hv_get_drvdata(device_obj); - rdev = net_device->extension; + struct netvsc_reconfig *event; + unsigned long flags; - switch (indicate->status) { - case RNDIS_STATUS_MEDIA_CONNECT: - rdev->link_state = false; - break; - case RNDIS_STATUS_MEDIA_DISCONNECT: - rdev->link_state = true; - break; - case RNDIS_STATUS_NETWORK_CHANGE: - rdev->link_change = true; - break; - default: + /* Handle link change statuses only */ + if (indicate->status != RNDIS_STATUS_NETWORK_CHANGE && + indicate->status != RNDIS_STATUS_MEDIA_CONNECT && + indicate->status != RNDIS_STATUS_MEDIA_DISCONNECT) return; - } + net_device = hv_get_drvdata(device_obj); net = net_device->ndev; if (!net || net->reg_state != NETREG_REGISTERED) return; ndev_ctx = netdev_priv(net); - if (!rdev->link_state) { - schedule_delayed_work(&ndev_ctx->dwork, 0); - schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20)); - } else { - schedule_delayed_work(&ndev_ctx->dwork, 0); - } + + event = kzalloc(sizeof(*event), GFP_ATOMIC); + if (!event) + return; + event->event = indicate->status; + + spin_lock_irqsave(&ndev_ctx->lock, flags); + list_add_tail(&event->list, &ndev_ctx->reconfig_events); + spin_unlock_irqrestore(&ndev_ctx->lock, flags); + + schedule_delayed_work(&ndev_ctx->dwork, 0); } /* @@ -686,7 +670,10 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, */ int netvsc_recv_callback(struct hv_device *device_obj, struct hv_netvsc_packet *packet, - struct ndis_tcp_ip_checksum_info *csum_info) + void **data, + struct ndis_tcp_ip_checksum_info *csum_info, + struct vmbus_channel *channel, + u16 vlan_tci) { struct net_device *net; struct net_device_context *net_device_ctx; @@ -695,8 +682,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev; if (!net || net->reg_state != NETREG_REGISTERED) { - packet->status = NVSP_STAT_FAIL; - return 0; + return NVSP_STAT_FAIL; } net_device_ctx = netdev_priv(net); rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); @@ -705,15 +691,14 @@ int netvsc_recv_callback(struct hv_device *device_obj, skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen); if (unlikely(!skb)) { ++net->stats.rx_dropped; - packet->status = NVSP_STAT_FAIL; - return 0; + return NVSP_STAT_FAIL; } /* * Copy to skb. This copy is needed here since the memory pointed by * hv_netvsc_packet cannot be deallocated */ - memcpy(skb_put(skb, packet->total_data_buflen), packet->data, + memcpy(skb_put(skb, packet->total_data_buflen), *data, packet->total_data_buflen); skb->protocol = eth_type_trans(skb, net); @@ -728,11 +713,11 @@ int netvsc_recv_callback(struct hv_device *device_obj, skb->ip_summed = CHECKSUM_NONE; } - if (packet->vlan_tci & VLAN_TAG_PRESENT) + if (vlan_tci & VLAN_TAG_PRESENT) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), - packet->vlan_tci); + vlan_tci); - skb_record_rx_queue(skb, packet->channel-> + skb_record_rx_queue(skb, channel-> offermsg.offer.sub_channel_index); u64_stats_update_begin(&rx_stats->syncp); @@ -1009,12 +994,9 @@ static const struct net_device_ops device_ops = { }; /* - * Send GARP packet to network peers after migrations. - * After Quick Migration, the network is not immediately operational in the - * current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add - * another netif_notify_peers() into a delayed work, otherwise GARP packet - * will not be sent after quick migration, and cause network disconnection. - * Also, we update the carrier status here. + * Handle link status changes. For RNDIS_STATUS_NETWORK_CHANGE emulate link + * down/up sequence. In case of RNDIS_STATUS_MEDIA_CONNECT when carrier is + * present send GARP packet to network peers with netif_notify_peers(). */ static void netvsc_link_change(struct work_struct *w) { @@ -1022,36 +1004,89 @@ static void netvsc_link_change(struct work_struct *w) struct net_device *net; struct netvsc_device *net_device; struct rndis_device *rdev; - bool notify, refresh = false; - char *argv[] = { "/etc/init.d/network", "restart", NULL }; - char *envp[] = { "HOME=/", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; - - rtnl_lock(); + struct netvsc_reconfig *event = NULL; + bool notify = false, reschedule = false; + unsigned long flags, next_reconfig, delay; ndev_ctx = container_of(w, struct net_device_context, dwork.work); net_device = hv_get_drvdata(ndev_ctx->device_ctx); rdev = net_device->extension; net = net_device->ndev; - if (rdev->link_state) { - netif_carrier_off(net); - notify = false; - } else { - netif_carrier_on(net); - notify = true; - if (rdev->link_change) { - rdev->link_change = false; - refresh = true; + next_reconfig = ndev_ctx->last_reconfig + LINKCHANGE_INT; + if (time_is_after_jiffies(next_reconfig)) { + /* link_watch only sends one notification with current state + * per second, avoid doing reconfig more frequently. Handle + * wrap around. + */ + delay = next_reconfig - jiffies; + delay = delay < LINKCHANGE_INT ? delay : LINKCHANGE_INT; + schedule_delayed_work(&ndev_ctx->dwork, delay); + return; + } + ndev_ctx->last_reconfig = jiffies; + + spin_lock_irqsave(&ndev_ctx->lock, flags); + if (!list_empty(&ndev_ctx->reconfig_events)) { + event = list_first_entry(&ndev_ctx->reconfig_events, + struct netvsc_reconfig, list); + list_del(&event->list); + reschedule = !list_empty(&ndev_ctx->reconfig_events); + } + spin_unlock_irqrestore(&ndev_ctx->lock, flags); + + if (!event) + return; + + rtnl_lock(); + + switch (event->event) { + /* Only the following events are possible due to the check in + * netvsc_linkstatus_callback() + */ + case RNDIS_STATUS_MEDIA_CONNECT: + if (rdev->link_state) { + rdev->link_state = false; + netif_carrier_on(net); + netif_tx_wake_all_queues(net); + } else { + notify = true; + } + kfree(event); + break; + case RNDIS_STATUS_MEDIA_DISCONNECT: + if (!rdev->link_state) { + rdev->link_state = true; + netif_carrier_off(net); + netif_tx_stop_all_queues(net); + } + kfree(event); + break; + case RNDIS_STATUS_NETWORK_CHANGE: + /* Only makes sense if carrier is present */ + if (!rdev->link_state) { + rdev->link_state = true; + netif_carrier_off(net); + netif_tx_stop_all_queues(net); + event->event = RNDIS_STATUS_MEDIA_CONNECT; + spin_lock_irqsave(&ndev_ctx->lock, flags); + list_add_tail(&event->list, &ndev_ctx->reconfig_events); + spin_unlock_irqrestore(&ndev_ctx->lock, flags); + reschedule = true; } + break; } rtnl_unlock(); - if (refresh) - call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); - if (notify) netdev_notify_peers(net); + + /* link_watch only sends one notification with current state per + * second, handle next reconfig event in 2 seconds. + */ + if (reschedule) + schedule_delayed_work(&ndev_ctx->dwork, LINKCHANGE_INT); } static void netvsc_free_netdev(struct net_device *netdev) @@ -1071,16 +1106,12 @@ static int netvsc_probe(struct hv_device *dev, struct netvsc_device_info device_info; struct netvsc_device *nvdev; int ret; - u32 max_needed_headroom; net = alloc_etherdev_mq(sizeof(struct net_device_context), num_online_cpus()); if (!net) return -ENOMEM; - max_needed_headroom = sizeof(struct hv_netvsc_packet) + - RNDIS_AND_PPI_SIZE; - netif_carrier_off(net); net_device_ctx = netdev_priv(net); @@ -1106,6 +1137,9 @@ static int netvsc_probe(struct hv_device *dev, INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); INIT_WORK(&net_device_ctx->work, do_set_multicast); + spin_lock_init(&net_device_ctx->lock); + INIT_LIST_HEAD(&net_device_ctx->reconfig_events); + net->netdev_ops = &device_ops; net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM | @@ -1116,13 +1150,6 @@ static int netvsc_probe(struct hv_device *dev, net->ethtool_ops = ðtool_ops; SET_NETDEV_DEV(net, &dev->device); - /* - * Request additional head room in the skb. - * We will use this space to build the rndis - * heaser and other state we need to maintain. - */ - net->needed_headroom = max_needed_headroom; - /* Notify the netvsc driver of the new device */ memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; @@ -1145,8 +1172,6 @@ static int netvsc_probe(struct hv_device *dev, pr_err("Unable to register netdev.\n"); rndis_filter_device_remove(dev); netvsc_free_netdev(net); - } else { - schedule_delayed_work(&net_device_ctx->dwork, 0); } return ret; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 5931a799aa1786..a37bbda37ffa62 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -210,37 +210,33 @@ static int rndis_filter_send_request(struct rndis_device *dev, int ret; struct hv_netvsc_packet *packet; struct hv_page_buffer page_buf[2]; + struct hv_page_buffer *pb = page_buf; /* Setup the packet to send it */ packet = &req->pkt; - packet->is_data_pkt = false; packet->total_data_buflen = req->request_msg.msg_len; packet->page_buf_cnt = 1; - packet->page_buf = page_buf; - packet->page_buf[0].pfn = virt_to_phys(&req->request_msg) >> + pb[0].pfn = virt_to_phys(&req->request_msg) >> PAGE_SHIFT; - packet->page_buf[0].len = req->request_msg.msg_len; - packet->page_buf[0].offset = + pb[0].len = req->request_msg.msg_len; + pb[0].offset = (unsigned long)&req->request_msg & (PAGE_SIZE - 1); /* Add one page_buf when request_msg crossing page boundary */ - if (packet->page_buf[0].offset + packet->page_buf[0].len > PAGE_SIZE) { + if (pb[0].offset + pb[0].len > PAGE_SIZE) { packet->page_buf_cnt++; - packet->page_buf[0].len = PAGE_SIZE - - packet->page_buf[0].offset; - packet->page_buf[1].pfn = virt_to_phys((void *)&req->request_msg - + packet->page_buf[0].len) >> PAGE_SHIFT; - packet->page_buf[1].offset = 0; - packet->page_buf[1].len = req->request_msg.msg_len - - packet->page_buf[0].len; + pb[0].len = PAGE_SIZE - + pb[0].offset; + pb[1].pfn = virt_to_phys((void *)&req->request_msg + + pb[0].len) >> PAGE_SHIFT; + pb[1].offset = 0; + pb[1].len = req->request_msg.msg_len - + pb[0].len; } - packet->send_completion = NULL; - packet->xmit_more = false; - - ret = netvsc_send(dev->net_dev->dev, packet); + ret = netvsc_send(dev->net_dev->dev, packet, NULL, &pb, NULL); return ret; } @@ -348,14 +344,17 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type) return NULL; } -static void rndis_filter_receive_data(struct rndis_device *dev, +static int rndis_filter_receive_data(struct rndis_device *dev, struct rndis_message *msg, - struct hv_netvsc_packet *pkt) + struct hv_netvsc_packet *pkt, + void **data, + struct vmbus_channel *channel) { struct rndis_packet *rndis_pkt; u32 data_offset; struct ndis_pkt_8021q_info *vlan; struct ndis_tcp_ip_checksum_info *csum_info; + u16 vlan_tci = 0; rndis_pkt = &msg->msg.pkt; @@ -373,7 +372,7 @@ static void rndis_filter_receive_data(struct rndis_device *dev, "overflow detected (got %u, min %u)" "...dropping this message!\n", pkt->total_data_buflen, rndis_pkt->data_len); - return; + return NVSP_STAT_FAIL; } /* @@ -382,22 +381,23 @@ static void rndis_filter_receive_data(struct rndis_device *dev, * the data packet to the stack, without the rndis trailer padding */ pkt->total_data_buflen = rndis_pkt->data_len; - pkt->data = (void *)((unsigned long)pkt->data + data_offset); + *data = (void *)((unsigned long)(*data) + data_offset); vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO); if (vlan) { - pkt->vlan_tci = VLAN_TAG_PRESENT | vlan->vlanid | + vlan_tci = VLAN_TAG_PRESENT | vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT); - } else { - pkt->vlan_tci = 0; } csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO); - netvsc_recv_callback(dev->net_dev->dev, pkt, csum_info); + return netvsc_recv_callback(dev->net_dev->dev, pkt, data, + csum_info, channel, vlan_tci); } int rndis_filter_receive(struct hv_device *dev, - struct hv_netvsc_packet *pkt) + struct hv_netvsc_packet *pkt, + void **data, + struct vmbus_channel *channel) { struct netvsc_device *net_dev = hv_get_drvdata(dev); struct rndis_device *rndis_dev; @@ -406,7 +406,7 @@ int rndis_filter_receive(struct hv_device *dev, int ret = 0; if (!net_dev) { - ret = -EINVAL; + ret = NVSP_STAT_FAIL; goto exit; } @@ -416,7 +416,7 @@ int rndis_filter_receive(struct hv_device *dev, if (!net_dev->extension) { netdev_err(ndev, "got rndis message but no rndis device - " "dropping this message!\n"); - ret = -ENODEV; + ret = NVSP_STAT_FAIL; goto exit; } @@ -424,11 +424,11 @@ int rndis_filter_receive(struct hv_device *dev, if (rndis_dev->state == RNDIS_DEV_UNINITIALIZED) { netdev_err(ndev, "got rndis message but rndis device " "uninitialized...dropping this message!\n"); - ret = -ENODEV; + ret = NVSP_STAT_FAIL; goto exit; } - rndis_msg = pkt->data; + rndis_msg = *data; if (netif_msg_rx_err(net_dev->nd_ctx)) dump_rndis_message(dev, rndis_msg); @@ -436,7 +436,8 @@ int rndis_filter_receive(struct hv_device *dev, switch (rndis_msg->ndis_msg_type) { case RNDIS_MSG_PACKET: /* data msg */ - rndis_filter_receive_data(rndis_dev, rndis_msg, pkt); + ret = rndis_filter_receive_data(rndis_dev, rndis_msg, pkt, + data, channel); break; case RNDIS_MSG_INIT_C: @@ -459,9 +460,6 @@ int rndis_filter_receive(struct hv_device *dev, } exit: - if (ret != 0) - pkt->status = NVSP_STAT_FAIL; - return ret; } diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 03d4809a91268f..d4083c381cd1bb 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -361,6 +361,19 @@ static struct phy_driver bcm7xxx_driver[] = { .resume = bcm7xxx_config_init, .driver = { .owner = THIS_MODULE }, }, { + .phy_id = PHY_ID_BCM7435, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom BCM7435", + .features = PHY_GBIT_FEATURES | + SUPPORTED_Pause | SUPPORTED_Asym_Pause, + .flags = PHY_IS_INTERNAL, + .config_init = bcm7xxx_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .suspend = bcm7xxx_suspend, + .resume = bcm7xxx_config_init, + .driver = { .owner = THIS_MODULE }, +}, { .phy_id = PHY_BCM_OUI_4, .phy_id_mask = 0xffff0000, .name = "Broadcom BCM7XXX 40nm", @@ -395,6 +408,7 @@ static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = { { PHY_ID_BCM7425, 0xfffffff0, }, { PHY_ID_BCM7429, 0xfffffff0, }, { PHY_ID_BCM7439, 0xfffffff0, }, + { PHY_ID_BCM7435, 0xfffffff0, }, { PHY_ID_BCM7445, 0xfffffff0, }, { PHY_BCM_OUI_4, 0xffff0000 }, { PHY_BCM_OUI_5, 0xffffff00 }, diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 32f10662f4ac7c..4ebf601073d97d 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -107,10 +107,7 @@ static int dp83867_of_init(struct phy_device *phydev) struct device_node *of_node = dev->of_node; int ret; - if (!of_node && dev->parent->of_node) - of_node = dev->parent->of_node; - - if (!phydev->dev.of_node) + if (!of_node) return -ENODEV; ret = of_property_read_u32(of_node, "ti,rx-internal-delay", diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 277e6827d7cd9f..b8da2eabac3ef0 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -1202,4 +1202,4 @@ module_exit(pppoe_exit); MODULE_AUTHOR("Michal Ostrowski <mostrows@speakeasy.net>"); MODULE_DESCRIPTION("PPP over Ethernet driver"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_NETPROTO(PF_PPPOX); +MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OE); diff --git a/drivers/net/ppp/pppox.c b/drivers/net/ppp/pppox.c index 0200de74eebc33..b9c8be6283d375 100644 --- a/drivers/net/ppp/pppox.c +++ b/drivers/net/ppp/pppox.c @@ -113,7 +113,7 @@ static int pppox_create(struct net *net, struct socket *sock, int protocol, rc = -EPROTONOSUPPORT; if (!pppox_protos[protocol]) - request_module("pppox-proto-%d", protocol); + request_module("net-pf-%d-proto-%d", PF_PPPOX, protocol); if (!pppox_protos[protocol] || !try_module_get(pppox_protos[protocol]->owner)) goto out; @@ -147,3 +147,4 @@ module_exit(pppox_exit); MODULE_AUTHOR("Michal Ostrowski <mostrows@speakeasy.net>"); MODULE_DESCRIPTION("PPP over Ethernet driver (generic socket layer)"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETPROTO(PF_PPPOX); diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index fc69e41d09506e..e18e0980bc61c3 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -718,3 +718,4 @@ module_exit(pptp_exit_module); MODULE_DESCRIPTION("Point-to-Point Tunneling Protocol"); MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_PPTP); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 651d35ea22c5f8..059c0f60a2b28f 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -91,10 +91,24 @@ void team_modeop_port_change_dev_addr(struct team *team, } EXPORT_SYMBOL(team_modeop_port_change_dev_addr); +static void team_lower_state_changed(struct team_port *port) +{ + struct netdev_lag_lower_state_info info; + + info.link_up = port->linkup; + info.tx_enabled = team_port_enabled(port); + netdev_lower_state_changed(port->dev, &info); +} + static void team_refresh_port_linkup(struct team_port *port) { - port->linkup = port->user.linkup_enabled ? port->user.linkup : - port->state.linkup; + bool new_linkup = port->user.linkup_enabled ? port->user.linkup : + port->state.linkup; + + if (port->linkup != new_linkup) { + port->linkup = new_linkup; + team_lower_state_changed(port); + } } @@ -932,6 +946,7 @@ static void team_port_enable(struct team *team, team->ops.port_enabled(team, port); team_notify_peers(team); team_mcast_rejoin(team); + team_lower_state_changed(port); } static void __reconstruct_port_hlist(struct team *team, int rm_index) @@ -963,6 +978,7 @@ static void team_port_disable(struct team *team, team_adjust_ops(team); team_notify_peers(team); team_mcast_rejoin(team); + team_lower_state_changed(port); } #define TEAM_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \ @@ -1078,23 +1094,24 @@ static void team_port_disable_netpoll(struct team_port *port) } #endif -static int team_upper_dev_link(struct net_device *dev, - struct net_device *port_dev) +static int team_upper_dev_link(struct team *team, struct team_port *port) { + struct netdev_lag_upper_info lag_upper_info; int err; - err = netdev_master_upper_dev_link(port_dev, dev); + lag_upper_info.tx_type = team->mode->lag_tx_type; + err = netdev_master_upper_dev_link(port->dev, team->dev, NULL, + &lag_upper_info); if (err) return err; - port_dev->priv_flags |= IFF_TEAM_PORT; + port->dev->priv_flags |= IFF_TEAM_PORT; return 0; } -static void team_upper_dev_unlink(struct net_device *dev, - struct net_device *port_dev) +static void team_upper_dev_unlink(struct team *team, struct team_port *port) { - netdev_upper_dev_unlink(port_dev, dev); - port_dev->priv_flags &= ~IFF_TEAM_PORT; + netdev_upper_dev_unlink(port->dev, team->dev); + port->dev->priv_flags &= ~IFF_TEAM_PORT; } static void __team_port_change_port_added(struct team_port *port, bool linkup); @@ -1194,7 +1211,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev) goto err_handler_register; } - err = team_upper_dev_link(dev, port_dev); + err = team_upper_dev_link(team, port); if (err) { netdev_err(dev, "Device %s failed to set upper link\n", portname); @@ -1220,7 +1237,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev) return 0; err_option_port_add: - team_upper_dev_unlink(dev, port_dev); + team_upper_dev_unlink(team, port); err_set_upper_link: netdev_rx_handler_unregister(port_dev); @@ -1264,7 +1281,7 @@ static int team_port_del(struct team *team, struct net_device *port_dev) team_port_disable(team, port); list_del_rcu(&port->list); - team_upper_dev_unlink(dev, port_dev); + team_upper_dev_unlink(team, port); netdev_rx_handler_unregister(port_dev); team_port_disable_netpoll(port); vlan_vids_del_by_dev(port_dev, dev); @@ -2054,6 +2071,7 @@ static void team_setup(struct net_device *dev) dev->flags |= IFF_MULTICAST; dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); dev->priv_flags |= IFF_NO_QUEUE; + dev->priv_flags |= IFF_TEAM; /* * Indicate we support unicast address filtering. That way core won't @@ -2420,9 +2438,13 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) struct nlattr *nl_option; LIST_HEAD(opt_inst_list); + rtnl_lock(); + team = team_nl_team_get(info); - if (!team) - return -EINVAL; + if (!team) { + err = -EINVAL; + goto rtnl_unlock; + } err = -EINVAL; if (!info->attrs[TEAM_ATTR_LIST_OPTION]) { @@ -2549,7 +2571,8 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) team_put: team_nl_team_put(team); - +rtnl_unlock: + rtnl_unlock(); return err; } diff --git a/drivers/net/team/team_mode_activebackup.c b/drivers/net/team/team_mode_activebackup.c index 40fd3381b6932f..3f189823ba3b2e 100644 --- a/drivers/net/team/team_mode_activebackup.c +++ b/drivers/net/team/team_mode_activebackup.c @@ -127,6 +127,7 @@ static const struct team_mode ab_mode = { .owner = THIS_MODULE, .priv_size = sizeof(struct ab_priv), .ops = &ab_mode_ops, + .lag_tx_type = NETDEV_LAG_TX_TYPE_ACTIVEBACKUP, }; static int __init ab_init_module(void) diff --git a/drivers/net/team/team_mode_broadcast.c b/drivers/net/team/team_mode_broadcast.c index c366cd299c06a6..302ff35b0cbc72 100644 --- a/drivers/net/team/team_mode_broadcast.c +++ b/drivers/net/team/team_mode_broadcast.c @@ -56,6 +56,7 @@ static const struct team_mode bc_mode = { .kind = "broadcast", .owner = THIS_MODULE, .ops = &bc_mode_ops, + .lag_tx_type = NETDEV_LAG_TX_TYPE_BROADCAST, }; static int __init bc_init_module(void) diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index a1536d0d83a970..cdb19b385d42c8 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -661,6 +661,7 @@ static const struct team_mode lb_mode = { .priv_size = sizeof(struct lb_priv), .port_priv_size = sizeof(struct lb_port_priv), .ops = &lb_mode_ops, + .lag_tx_type = NETDEV_LAG_TX_TYPE_HASH, }; static int __init lb_init_module(void) diff --git a/drivers/net/team/team_mode_random.c b/drivers/net/team/team_mode_random.c index cd2f692b8074e6..215f845782db40 100644 --- a/drivers/net/team/team_mode_random.c +++ b/drivers/net/team/team_mode_random.c @@ -46,6 +46,7 @@ static const struct team_mode rnd_mode = { .kind = "random", .owner = THIS_MODULE, .ops = &rnd_mode_ops, + .lag_tx_type = NETDEV_LAG_TX_TYPE_RANDOM, }; static int __init rnd_init_module(void) diff --git a/drivers/net/team/team_mode_roundrobin.c b/drivers/net/team/team_mode_roundrobin.c index 53665850b59e28..0aa234118c0351 100644 --- a/drivers/net/team/team_mode_roundrobin.c +++ b/drivers/net/team/team_mode_roundrobin.c @@ -58,6 +58,7 @@ static const struct team_mode rr_mode = { .owner = THIS_MODULE, .priv_size = sizeof(struct rr_priv), .ops = &rr_mode_ops, + .lag_tx_type = NETDEV_LAG_TX_TYPE_ROUNDROBIN, }; static int __init rr_init_module(void) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 3b1ba823776842..b45e5cae2a6b05 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -283,6 +283,48 @@ static DEVICE_ATTR(rx_max, S_IRUGO | S_IWUSR, cdc_ncm_show_rx_max, cdc_ncm_store static DEVICE_ATTR(tx_max, S_IRUGO | S_IWUSR, cdc_ncm_show_tx_max, cdc_ncm_store_tx_max); static DEVICE_ATTR(tx_timer_usecs, S_IRUGO | S_IWUSR, cdc_ncm_show_tx_timer_usecs, cdc_ncm_store_tx_timer_usecs); +static ssize_t ndp_to_end_show(struct device *d, struct device_attribute *attr, char *buf) +{ + struct usbnet *dev = netdev_priv(to_net_dev(d)); + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + + return sprintf(buf, "%c\n", ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END ? 'Y' : 'N'); +} + +static ssize_t ndp_to_end_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) +{ + struct usbnet *dev = netdev_priv(to_net_dev(d)); + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + bool enable; + + if (strtobool(buf, &enable)) + return -EINVAL; + + /* no change? */ + if (enable == (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)) + return len; + + if (enable && !ctx->delayed_ndp16) { + ctx->delayed_ndp16 = kzalloc(ctx->max_ndp_size, GFP_KERNEL); + if (!ctx->delayed_ndp16) + return -ENOMEM; + } + + /* flush pending data before changing flag */ + netif_tx_lock_bh(dev->net); + usbnet_start_xmit(NULL, dev->net); + spin_lock_bh(&ctx->mtx); + if (enable) + ctx->drvflags |= CDC_NCM_FLAG_NDP_TO_END; + else + ctx->drvflags &= ~CDC_NCM_FLAG_NDP_TO_END; + spin_unlock_bh(&ctx->mtx); + netif_tx_unlock_bh(dev->net); + + return len; +} +static DEVICE_ATTR_RW(ndp_to_end); + #define NCM_PARM_ATTR(name, format, tocpu) \ static ssize_t cdc_ncm_show_##name(struct device *d, struct device_attribute *attr, char *buf) \ { \ @@ -305,6 +347,7 @@ NCM_PARM_ATTR(wNtbOutMaxDatagrams, "%u", le16_to_cpu); static struct attribute *cdc_ncm_sysfs_attrs[] = { &dev_attr_min_tx_pkt.attr, + &dev_attr_ndp_to_end.attr, &dev_attr_rx_max.attr, &dev_attr_tx_max.attr, &dev_attr_tx_timer_usecs.attr, diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 9a5be8b851867e..babc84a3946c21 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -14,7 +14,9 @@ #include <linux/netdevice.h> #include <linux/ethtool.h> #include <linux/etherdevice.h> +#include <linux/if_arp.h> #include <linux/mii.h> +#include <linux/rtnetlink.h> #include <linux/usb.h> #include <linux/usb/cdc.h> #include <linux/usb/usbnet.h> @@ -48,11 +50,100 @@ struct qmi_wwan_state { struct usb_driver *subdriver; atomic_t pmcount; - unsigned long unused; + unsigned long flags; struct usb_interface *control; struct usb_interface *data; }; +enum qmi_wwan_flags { + QMI_WWAN_FLAG_RAWIP = 1 << 0, +}; + +static void qmi_wwan_netdev_setup(struct net_device *net) +{ + struct usbnet *dev = netdev_priv(net); + struct qmi_wwan_state *info = (void *)&dev->data; + + if (info->flags & QMI_WWAN_FLAG_RAWIP) { + net->header_ops = NULL; /* No header */ + net->type = ARPHRD_NONE; + net->hard_header_len = 0; + net->addr_len = 0; + net->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; + netdev_dbg(net, "mode: raw IP\n"); + } else if (!net->header_ops) { /* don't bother if already set */ + ether_setup(net); + netdev_dbg(net, "mode: Ethernet\n"); + } + + /* recalculate buffers after changing hard_header_len */ + usbnet_change_mtu(net, net->mtu); +} + +static ssize_t raw_ip_show(struct device *d, struct device_attribute *attr, char *buf) +{ + struct usbnet *dev = netdev_priv(to_net_dev(d)); + struct qmi_wwan_state *info = (void *)&dev->data; + + return sprintf(buf, "%c\n", info->flags & QMI_WWAN_FLAG_RAWIP ? 'Y' : 'N'); +} + +static ssize_t raw_ip_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) +{ + struct usbnet *dev = netdev_priv(to_net_dev(d)); + struct qmi_wwan_state *info = (void *)&dev->data; + bool enable; + int ret; + + if (strtobool(buf, &enable)) + return -EINVAL; + + /* no change? */ + if (enable == (info->flags & QMI_WWAN_FLAG_RAWIP)) + return len; + + if (!rtnl_trylock()) + return restart_syscall(); + + /* we don't want to modify a running netdev */ + if (netif_running(dev->net)) { + netdev_err(dev->net, "Cannot change a running device\n"); + ret = -EBUSY; + goto err; + } + + /* let other drivers deny the change */ + ret = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE, dev->net); + ret = notifier_to_errno(ret); + if (ret) { + netdev_err(dev->net, "Type change was refused\n"); + goto err; + } + + if (enable) + info->flags |= QMI_WWAN_FLAG_RAWIP; + else + info->flags &= ~QMI_WWAN_FLAG_RAWIP; + qmi_wwan_netdev_setup(dev->net); + call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, dev->net); + ret = len; +err: + rtnl_unlock(); + return ret; +} + +static DEVICE_ATTR_RW(raw_ip); + +static struct attribute *qmi_wwan_sysfs_attrs[] = { + &dev_attr_raw_ip.attr, + NULL, +}; + +static struct attribute_group qmi_wwan_sysfs_attr_group = { + .name = "qmi", + .attrs = qmi_wwan_sysfs_attrs, +}; + /* default ethernet address used by the modem */ static const u8 default_modem_addr[ETH_ALEN] = {0x02, 0x50, 0xf3}; @@ -80,6 +171,8 @@ static const u8 buggy_fw_addr[ETH_ALEN] = {0x00, 0xa0, 0xc6, 0x00, 0x00, 0x00}; */ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { + struct qmi_wwan_state *info = (void *)&dev->data; + bool rawip = info->flags & QMI_WWAN_FLAG_RAWIP; __be16 proto; /* This check is no longer done by usbnet */ @@ -94,15 +187,25 @@ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb) proto = htons(ETH_P_IPV6); break; case 0x00: + if (rawip) + return 0; if (is_multicast_ether_addr(skb->data)) return 1; /* possibly bogus destination - rewrite just in case */ skb_reset_mac_header(skb); goto fix_dest; default: + if (rawip) + return 0; /* pass along other packets without modifications */ return 1; } + if (rawip) { + skb->dev = dev->net; /* normally set by eth_type_trans */ + skb->protocol = proto; + return 1; + } + if (skb_headroom(skb) < ETH_HLEN) return 0; skb_push(skb, ETH_HLEN); @@ -223,6 +326,20 @@ err: return rv; } +/* Send CDC SetControlLineState request, setting or clearing the DTR. + * "Required for Autoconnect and 9x30 to wake up" according to the + * GobiNet driver. The requirement has been verified on an MDM9230 + * based Sierra Wireless MC7455 + */ +static int qmi_wwan_change_dtr(struct usbnet *dev, bool on) +{ + u8 intf = dev->intf->cur_altsetting->desc.bInterfaceNumber; + + return usbnet_write_cmd(dev, USB_CDC_REQ_SET_CONTROL_LINE_STATE, + USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE, + on ? 0x01 : 0x00, intf, NULL, 0); +} + static int qmi_wwan_bind(struct usbnet *dev, struct usb_interface *intf) { int status = -1; @@ -280,6 +397,24 @@ static int qmi_wwan_bind(struct usbnet *dev, struct usb_interface *intf) usb_driver_release_interface(driver, info->data); } + /* disabling remote wakeup on MDM9x30 devices has the same + * effect as clearing DTR. The device will not respond to QMI + * requests until we set DTR again. This is similar to a + * QMI_CTL SYNC request, clearing a lot of firmware state + * including the client ID allocations. + * + * Our usage model allows a session to span multiple + * open/close events, so we must prevent the firmware from + * clearing out state the clients might need. + * + * MDM9x30 is the first QMI chipset with USB3 support. Abuse + * this fact to enable the quirk. + */ + if (le16_to_cpu(dev->udev->descriptor.bcdUSB) >= 0x0201) { + qmi_wwan_manage_power(dev, 1); + qmi_wwan_change_dtr(dev, true); + } + /* Never use the same address on both ends of the link, even if the * buggy firmware told us to. Or, if device is assigned the well-known * buggy firmware MAC address, replace it with a random address, @@ -294,6 +429,7 @@ static int qmi_wwan_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->dev_addr[0] &= 0xbf; /* clear "IP" bit */ } dev->net->netdev_ops = &qmi_wwan_netdev_ops; + dev->net->sysfs_groups[0] = &qmi_wwan_sysfs_attr_group; err: return status; } @@ -307,6 +443,12 @@ static void qmi_wwan_unbind(struct usbnet *dev, struct usb_interface *intf) if (info->subdriver && info->subdriver->disconnect) info->subdriver->disconnect(info->control); + /* disable MDM9x30 quirk */ + if (le16_to_cpu(dev->udev->descriptor.bcdUSB) >= 0x0201) { + qmi_wwan_change_dtr(dev, false); + qmi_wwan_manage_power(dev, 0); + } + /* allow user to unbind using either control or data */ if (intf == info->control) other = info->data; @@ -715,8 +857,6 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1199, 0x9056, 8)}, /* Sierra Wireless Modem */ {QMI_FIXED_INTF(0x1199, 0x9057, 8)}, {QMI_FIXED_INTF(0x1199, 0x9061, 8)}, /* Sierra Wireless Modem */ - {QMI_FIXED_INTF(0x1199, 0x9070, 8)}, /* Sierra Wireless MC74xx/EM74xx */ - {QMI_FIXED_INTF(0x1199, 0x9070, 10)}, /* Sierra Wireless MC74xx/EM74xx */ {QMI_FIXED_INTF(0x1199, 0x9071, 8)}, /* Sierra Wireless MC74xx/EM74xx */ {QMI_FIXED_INTF(0x1199, 0x9071, 10)}, /* Sierra Wireless MC74xx/EM74xx */ {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 0744bf2ef2d6e4..0b0ba7ef14e4e9 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -324,7 +324,10 @@ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb) return; } - skb->protocol = eth_type_trans (skb, dev->net); + /* only update if unset to allow minidriver rx_fixup override */ + if (skb->protocol == 0) + skb->protocol = eth_type_trans (skb, dev->net); + dev->net->stats.rx_packets++; dev->net->stats.rx_bytes += skb->len; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index d997aedccbe453..56abdf224d35a6 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -624,7 +624,7 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) goto out_fail; } - ret = netdev_master_upper_dev_link(port_dev, dev); + ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL); if (ret < 0) goto out_unregister; diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c index 51f6cee8aab2d8..9bd4aa8083ce78 100644 --- a/drivers/net/wan/hdlc.c +++ b/drivers/net/wan/hdlc.c @@ -266,8 +266,8 @@ struct net_device *alloc_hdlcdev(void *priv) void unregister_hdlc_device(struct net_device *dev) { rtnl_lock(); - unregister_netdevice(dev); detach_hdlc_protocol(dev); + unregister_netdevice(dev); rtnl_unlock(); } @@ -276,7 +276,11 @@ void unregister_hdlc_device(struct net_device *dev) int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto, size_t size) { - detach_hdlc_protocol(dev); + int err; + + err = detach_hdlc_protocol(dev); + if (err) + return err; if (!try_module_get(proto->module)) return -ENOSYS; @@ -289,15 +293,24 @@ int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto, } } dev_to_hdlc(dev)->proto = proto; + return 0; } -void detach_hdlc_protocol(struct net_device *dev) +int detach_hdlc_protocol(struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); + int err; if (hdlc->proto) { + err = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE, dev); + err = notifier_to_errno(err); + if (err) { + netdev_err(dev, "Refused to change device type\n"); + return err; + } + if (hdlc->proto->detach) hdlc->proto->detach(dev); module_put(hdlc->proto->module); @@ -306,6 +319,8 @@ void detach_hdlc_protocol(struct net_device *dev) kfree(hdlc->state); hdlc->state = NULL; hdlc_setup_dev(dev); + + return 0; } diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c index 3f20808b5ff826..a408abc25512ac 100644 --- a/drivers/net/wan/hdlc_cisco.c +++ b/drivers/net/wan/hdlc_cisco.c @@ -378,6 +378,7 @@ static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr) spin_lock_init(&state(hdlc)->lock); dev->header_ops = &cisco_header_ops; dev->type = ARPHRD_CISCO; + call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, dev); netif_dormant_on(dev); return 0; } diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 89541cc90e877b..b6e0cfb095d35c 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -1240,6 +1240,7 @@ static int fr_ioctl(struct net_device *dev, struct ifreq *ifr) } memcpy(&state(hdlc)->settings, &new_settings, size); dev->type = ARPHRD_FRAD; + call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, dev); return 0; case IF_PROTO_FR_ADD_PVC: diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index 0d7645581f912d..47fdb87d3567ce 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -687,6 +687,7 @@ static int ppp_ioctl(struct net_device *dev, struct ifreq *ifr) dev->hard_header_len = sizeof(struct hdlc_header); dev->header_ops = &ppp_header_ops; dev->type = ARPHRD_PPP; + call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, dev); netif_dormant_on(dev); return 0; } diff --git a/drivers/net/wan/hdlc_raw.c b/drivers/net/wan/hdlc_raw.c index 5dc153e8a29d14..4feb45001aacf4 100644 --- a/drivers/net/wan/hdlc_raw.c +++ b/drivers/net/wan/hdlc_raw.c @@ -84,6 +84,7 @@ static int raw_ioctl(struct net_device *dev, struct ifreq *ifr) return result; memcpy(hdlc->state, &new_settings, size); dev->type = ARPHRD_RAWHDLC; + call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, dev); netif_dormant_off(dev); return 0; } diff --git a/drivers/net/wan/hdlc_raw_eth.c b/drivers/net/wan/hdlc_raw_eth.c index 3ab72b3082dee5..2f11836078ab5a 100644 --- a/drivers/net/wan/hdlc_raw_eth.c +++ b/drivers/net/wan/hdlc_raw_eth.c @@ -102,6 +102,7 @@ static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr) ether_setup(dev); dev->tx_queue_len = old_qlen; eth_hw_addr_random(dev); + call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, dev); netif_dormant_off(dev); return 0; } diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c index a49aec5efd2072..e867638067a65c 100644 --- a/drivers/net/wan/hdlc_x25.c +++ b/drivers/net/wan/hdlc_x25.c @@ -213,6 +213,7 @@ static int x25_ioctl(struct net_device *dev, struct ifreq *ifr) if ((result = attach_hdlc_protocol(dev, &proto, 0))) return result; dev->type = ARPHRD_X25; + call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, dev); netif_dormant_off(dev); return 0; } diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 91e004518237f7..742bf4a230e837 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -694,6 +694,29 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, return NULL; } +static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, + const unsigned char *path, + const void *ns) +{ + static char path_buf[PATH_MAX]; /* protected by kernfs_mutex */ + size_t len = strlcpy(path_buf, path, PATH_MAX); + char *p = path_buf; + char *name; + + lockdep_assert_held(&kernfs_mutex); + + if (len >= PATH_MAX) + return NULL; + + while ((name = strsep(&p, "/")) && parent) { + if (*name == '\0') + continue; + parent = kernfs_find_ns(parent, name, ns); + } + + return parent; +} + /** * kernfs_find_and_get_ns - find and get kernfs_node with the given name * @parent: kernfs_node to search under @@ -719,6 +742,29 @@ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); /** + * kernfs_walk_and_get_ns - find and get kernfs_node with the given path + * @parent: kernfs_node to search under + * @path: path to look for + * @ns: the namespace tag to use + * + * Look for kernfs_node with path @path under @parent and get a reference + * if found. This function may sleep and returns pointer to the found + * kernfs_node on success, %NULL on failure. + */ +struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, + const char *path, const void *ns) +{ + struct kernfs_node *kn; + + mutex_lock(&kernfs_mutex); + kn = kernfs_walk_ns(parent, path, ns); + kernfs_get(kn); + mutex_unlock(&kernfs_mutex); + + return kn; +} + +/** * kernfs_create_root - create a new kernfs hierarchy * @scops: optional syscall operations for the hierarchy * @flags: KERNFS_ROOT_* flags diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 59f4a73044199a..f0ba9c2ec639d5 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -26,6 +26,7 @@ #define PHY_ID_BCM7366 0x600d8490 #define PHY_ID_BCM7425 0x600d86b0 #define PHY_ID_BCM7429 0x600d8730 +#define PHY_ID_BCM7435 0x600d8750 #define PHY_ID_BCM7439 0x600d8480 #define PHY_ID_BCM7439_2 0xae025080 #define PHY_ID_BCM7445 0x600d8510 diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 60d44b26276d84..9dc226345e4e13 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -235,6 +235,14 @@ struct cgroup { int id; /* + * The depth this cgroup is at. The root is at depth zero and each + * step down the hierarchy increments the level. This along with + * ancestor_ids[] can determine whether a given cgroup is a + * descendant of another without traversing the hierarchy. + */ + int level; + + /* * Each non-empty css_set associated with this cgroup contributes * one to populated_cnt. All children with non-zero popuplated_cnt * of their own contribute one. The count is zero iff there's no @@ -289,6 +297,9 @@ struct cgroup { /* used to schedule release agent */ struct work_struct release_agent_work; + + /* ids of the ancestors at each level including self */ + int ancestor_ids[]; }; /* @@ -308,6 +319,9 @@ struct cgroup_root { /* The root cgroup. Root is destroyed on its release. */ struct cgroup cgrp; + /* for cgrp->ancestor_ids[0] */ + int cgrp_ancestor_id_storage; + /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ atomic_t nr_cgrps; @@ -528,4 +542,116 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} #endif /* CONFIG_CGROUPS */ +#ifdef CONFIG_SOCK_CGROUP_DATA + +/* + * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains + * per-socket cgroup information except for memcg association. + * + * On legacy hierarchies, net_prio and net_cls controllers directly set + * attributes on each sock which can then be tested by the network layer. + * On the default hierarchy, each sock is associated with the cgroup it was + * created in and the networking layer can match the cgroup directly. + * + * To avoid carrying all three cgroup related fields separately in sock, + * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer. + * On boot, sock_cgroup_data records the cgroup that the sock was created + * in so that cgroup2 matches can be made; however, once either net_prio or + * net_cls starts being used, the area is overriden to carry prioidx and/or + * classid. The two modes are distinguished by whether the lowest bit is + * set. Clear bit indicates cgroup pointer while set bit prioidx and + * classid. + * + * While userland may start using net_prio or net_cls at any time, once + * either is used, cgroup2 matching no longer works. There is no reason to + * mix the two and this is in line with how legacy and v2 compatibility is + * handled. On mode switch, cgroup references which are already being + * pointed to by socks may be leaked. While this can be remedied by adding + * synchronization around sock_cgroup_data, given that the number of leaked + * cgroups is bound and highly unlikely to be high, this seems to be the + * better trade-off. + */ +struct sock_cgroup_data { + union { +#ifdef __LITTLE_ENDIAN + struct { + u8 is_data; + u8 padding; + u16 prioidx; + u32 classid; + } __packed; +#else + struct { + u32 classid; + u16 prioidx; + u8 padding; + u8 is_data; + } __packed; +#endif + u64 val; + }; +}; + +/* + * There's a theoretical window where the following accessors race with + * updaters and return part of the previous pointer as the prioidx or + * classid. Such races are short-lived and the result isn't critical. + */ +static inline u16 sock_cgroup_prioidx(struct sock_cgroup_data *skcd) +{ + /* fallback to 1 which is always the ID of the root cgroup */ + return (skcd->is_data & 1) ? skcd->prioidx : 1; +} + +static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd) +{ + /* fallback to 0 which is the unconfigured default classid */ + return (skcd->is_data & 1) ? skcd->classid : 0; +} + +/* + * If invoked concurrently, the updaters may clobber each other. The + * caller is responsible for synchronization. + */ +static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, + u16 prioidx) +{ + struct sock_cgroup_data skcd_buf = { .val = READ_ONCE(skcd->val) }; + + if (sock_cgroup_prioidx(&skcd_buf) == prioidx) + return; + + if (!(skcd_buf.is_data & 1)) { + skcd_buf.val = 0; + skcd_buf.is_data = 1; + } + + skcd_buf.prioidx = prioidx; + WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */ +} + +static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, + u32 classid) +{ + struct sock_cgroup_data skcd_buf = { .val = READ_ONCE(skcd->val) }; + + if (sock_cgroup_classid(&skcd_buf) == classid) + return; + + if (!(skcd_buf.is_data & 1)) { + skcd_buf.val = 0; + skcd_buf.is_data = 1; + } + + skcd_buf.classid = classid; + WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */ +} + +#else /* CONFIG_SOCK_CGROUP_DATA */ + +struct sock_cgroup_data { +}; + +#endif /* CONFIG_SOCK_CGROUP_DATA */ + #endif /* _LINUX_CGROUP_DEFS_H */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 22e3754f89c511..a8ba1ea0ea5a20 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -81,7 +81,8 @@ struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, struct cgroup_subsys *ss); -bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); +struct cgroup *cgroup_get_from_path(const char *path); + int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); @@ -352,6 +353,11 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) percpu_ref_put_many(&css->refcnt, n); } +static inline void cgroup_put(struct cgroup *cgrp) +{ + css_put(&cgrp->self); +} + /** * task_css_set_check - obtain a task's css_set with extra access conditions * @task: the task to obtain css_set for @@ -459,6 +465,23 @@ static inline struct cgroup *task_cgroup(struct task_struct *task, return task_css(task, subsys_id)->cgroup; } +/** + * cgroup_is_descendant - test ancestry + * @cgrp: the cgroup to be tested + * @ancestor: possible ancestor of @cgrp + * + * Test whether @cgrp is a descendant of @ancestor. It also returns %true + * if @cgrp == @ancestor. This function is safe to call as long as @cgrp + * and @ancestor are accessible. + */ +static inline bool cgroup_is_descendant(struct cgroup *cgrp, + struct cgroup *ancestor) +{ + if (cgrp->root != ancestor->root || cgrp->level < ancestor->level) + return false; + return cgrp->ancestor_ids[ancestor->level] == ancestor->id; +} + /* no synchronization, the result can only be used as a hint */ static inline bool cgroup_is_populated(struct cgroup *cgrp) { @@ -555,4 +578,45 @@ static inline int cgroup_init(void) { return 0; } #endif /* !CONFIG_CGROUPS */ +/* + * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data + * definition in cgroup-defs.h. + */ +#ifdef CONFIG_SOCK_CGROUP_DATA + +#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) +extern spinlock_t cgroup_sk_update_lock; +#endif + +void cgroup_sk_alloc_disable(void); +void cgroup_sk_alloc(struct sock_cgroup_data *skcd); +void cgroup_sk_free(struct sock_cgroup_data *skcd); + +static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) +{ +#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) + unsigned long v; + + /* + * @skcd->val is 64bit but the following is safe on 32bit too as we + * just need the lower ulong to be written and read atomically. + */ + v = READ_ONCE(skcd->val); + + if (v & 1) + return &cgrp_dfl_root.cgrp; + + return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp; +#else + return (struct cgroup *)(unsigned long)skcd->val; +#endif +} + +#else /* CONFIG_CGROUP_DATA */ + +static inline void cgroup_sk_alloc(struct sock_cgroup_data *skcd) {} +static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {} + +#endif /* CONFIG_CGROUP_DATA */ + #endif /* _LINUX_CGROUP_H */ diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index 1acb1445e05fe5..e31bcd4c7859e5 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -101,7 +101,7 @@ netdev_tx_t hdlc_start_xmit(struct sk_buff *skb, struct net_device *dev); int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto, size_t size); /* May be used by hardware driver to gain control over HDLC device */ -void detach_hdlc_protocol(struct net_device *dev); +int detach_hdlc_protocol(struct net_device *dev); static __inline__ __be16 hdlc_type_trans(struct sk_buff *skb, struct net_device *dev) diff --git a/include/linux/if_team.h b/include/linux/if_team.h index a6aa970758a220..b84e49c3a738fc 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -164,6 +164,7 @@ struct team_mode { size_t priv_size; size_t port_priv_size; const struct team_mode_ops *ops; + enum netdev_lag_tx_type lag_tx_type; }; #define TEAM_PORT_HASHBITS 4 diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 67ce5bd3b56a54..05f5879821b814 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -73,7 +73,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) /* found in socket.c */ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); -static inline bool is_vlan_dev(struct net_device *dev) +static inline bool is_vlan_dev(const struct net_device *dev) { return dev->priv_flags & IFF_802_1Q_VLAN; } diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 5d4e9c4b821ddd..af51df35d74909 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -274,6 +274,8 @@ void pr_cont_kernfs_path(struct kernfs_node *kn); struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn); struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns); +struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, + const char *path, const void *ns); void kernfs_get(struct kernfs_node *kn); void kernfs_put(struct kernfs_node *kn); @@ -350,6 +352,10 @@ static inline struct kernfs_node * kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns) { return NULL; } +static inline struct kernfs_node * +kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path, + const void *ns) +{ return NULL; } static inline void kernfs_get(struct kernfs_node *kn) { } static inline void kernfs_put(struct kernfs_node *kn) { } @@ -431,6 +437,12 @@ kernfs_find_and_get(struct kernfs_node *kn, const char *name) } static inline struct kernfs_node * +kernfs_walk_and_get(struct kernfs_node *kn, const char *path) +{ + return kernfs_walk_and_get_ns(kn, path, NULL); +} + +static inline struct kernfs_node * kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode, void *priv) { diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index 5a06d969338e66..2e8af001c5dadb 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -75,6 +75,11 @@ static inline int mlx4_is_bonded(struct mlx4_dev *dev) return !!(dev->flags & MLX4_FLAG_BONDED); } +static inline int mlx4_is_mf_bonded(struct mlx4_dev *dev) +{ + return (mlx4_is_bonded(dev) && mlx4_is_mfunc(dev)); +} + struct mlx4_port_map { u8 port1; u8 port2; diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0b473cbfa7ef10..7d3a85faefb73e 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -251,6 +251,7 @@ enum mlx5_event { MLX5_EVENT_TYPE_PAGE_REQUEST = 0xb, MLX5_EVENT_TYPE_PAGE_FAULT = 0xc, + MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd, }; enum { @@ -520,6 +521,12 @@ struct mlx5_eqe_page_fault { __be32 flags_qpn; } __packed; +struct mlx5_eqe_vport_change { + u8 rsvd0[2]; + __be16 vport_num; + __be32 rsvd1[6]; +} __packed; + union ev_data { __be32 raw[7]; struct mlx5_eqe_cmd cmd; @@ -532,6 +539,7 @@ union ev_data { struct mlx5_eqe_stall_vl stall_vl; struct mlx5_eqe_page_req req_pages; struct mlx5_eqe_page_fault page_fault; + struct mlx5_eqe_vport_change vport_change; } __packed; struct mlx5_eqe { @@ -1067,6 +1075,12 @@ enum { }; enum { + MLX5_ESW_VPORT_ADMIN_STATE_DOWN = 0x0, + MLX5_ESW_VPORT_ADMIN_STATE_UP = 0x1, + MLX5_ESW_VPORT_ADMIN_STATE_AUTO = 0x2, +}; + +enum { MLX5_L3_PROT_TYPE_IPV4 = 0, MLX5_L3_PROT_TYPE_IPV6 = 1, }; @@ -1102,6 +1116,12 @@ enum { MLX5_FLOW_CONTEXT_DEST_TYPE_TIR = 2, }; +enum mlx5_list_type { + MLX5_NVPRT_LIST_TYPE_UC = 0x0, + MLX5_NVPRT_LIST_TYPE_MC = 0x1, + MLX5_NVPRT_LIST_TYPE_VLAN = 0x2, +}; + enum { MLX5_RQC_RQ_TYPE_MEMORY_RQ_INLINE = 0x0, MLX5_RQC_RQ_TYPE_MEMORY_RQ_RPM = 0x1, @@ -1124,6 +1144,8 @@ enum mlx5_cap_type { MLX5_CAP_IPOIB_OFFLOADS, MLX5_CAP_EOIB_OFFLOADS, MLX5_CAP_FLOW_TABLE, + MLX5_CAP_ESWITCH_FLOW_TABLE, + MLX5_CAP_ESWITCH, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1161,6 +1183,28 @@ enum mlx5_cap_type { #define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \ MLX5_GET(flow_table_nic_cap, mdev->hca_caps_max[MLX5_CAP_FLOW_TABLE], cap) +#define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ + MLX5_GET(flow_table_eswitch_cap, \ + mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) + +#define MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, cap) \ + MLX5_GET(flow_table_eswitch_cap, \ + mdev->hca_caps_max[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) + +#define MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) \ + MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_nic_esw_fdb.cap) + +#define MLX5_CAP_ESW_FLOWTABLE_FDB_MAX(mdev, cap) \ + MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_nic_esw_fdb.cap) + +#define MLX5_CAP_ESW(mdev, cap) \ + MLX5_GET(e_switch_cap, \ + mdev->hca_caps_cur[MLX5_CAP_ESWITCH], cap) + +#define MLX5_CAP_ESW_MAX(mdev, cap) \ + MLX5_GET(e_switch_cap, \ + mdev->hca_caps_max[MLX5_CAP_ESWITCH], cap) + #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5c857f2a20d7b5..ac098b6b97bf95 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -426,11 +426,23 @@ struct mlx5_mr_table { struct radix_tree_root tree; }; +struct mlx5_vf_context { + int enabled; +}; + +struct mlx5_core_sriov { + struct mlx5_vf_context *vfs_ctx; + int num_vfs; + int enabled_vfs; +}; + struct mlx5_irq_info { cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; }; +struct mlx5_eswitch; + struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; @@ -447,6 +459,7 @@ struct mlx5_priv { int fw_pages; atomic_t reg_pages; struct list_head free_list; + int vfs_pages; struct mlx5_core_health health; @@ -485,6 +498,10 @@ struct mlx5_priv { struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; + + struct mlx5_eswitch *eswitch; + struct mlx5_core_sriov sriov; + unsigned long pci_dev_data; }; enum mlx5_device_state { @@ -739,6 +756,8 @@ void mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); int mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); +int mlx5_sriov_init(struct mlx5_core_dev *dev); +int mlx5_sriov_cleanup(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, s32 npages); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); @@ -884,6 +903,15 @@ struct mlx5_profile { } mr_cache[MAX_MR_CACHE_ENTRIES]; }; +enum { + MLX5_PCI_DEV_IS_VF = 1 << 0, +}; + +static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev) +{ + return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF); +} + static inline int mlx5_get_gid_table_len(u16 param) { if (param > 4) { diff --git a/include/linux/mlx5/flow_table.h b/include/linux/mlx5/flow_table.h index 5f922c6d4fc237..0f2a15cf33173d 100644 --- a/include/linux/mlx5/flow_table.h +++ b/include/linux/mlx5/flow_table.h @@ -41,6 +41,15 @@ struct mlx5_flow_table_group { u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; }; +struct mlx5_flow_destination { + enum mlx5_flow_destination_type type; + union { + u32 tir_num; + void *ft; + u32 vport_num; + }; +}; + void *mlx5_create_flow_table(struct mlx5_core_dev *dev, u8 level, u8 table_type, u16 num_groups, struct mlx5_flow_table_group *group); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1565324eb620cc..f5d94495758a60 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -447,6 +447,29 @@ struct mlx5_ifc_flow_table_nic_cap_bits { u8 reserved_3[0x7200]; }; +struct mlx5_ifc_flow_table_eswitch_cap_bits { + u8 reserved_0[0x200]; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_ingress; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_egress; + + u8 reserved_1[0x7800]; +}; + +struct mlx5_ifc_e_switch_cap_bits { + u8 vport_svlan_strip[0x1]; + u8 vport_cvlan_strip[0x1]; + u8 vport_svlan_insert[0x1]; + u8 vport_cvlan_insert_if_not_exist[0x1]; + u8 vport_cvlan_insert_overwrite[0x1]; + u8 reserved_0[0x1b]; + + u8 reserved_1[0x7e0]; +}; + struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 csum_cap[0x1]; u8 vlan_cap[0x1]; @@ -665,7 +688,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_17[0x1]; u8 ets[0x1]; u8 nic_flow_table[0x1]; - u8 reserved_18[0x4]; + u8 eswitch_flow_table[0x1]; + u8 early_vf_enable; + u8 reserved_18[0x2]; u8 local_ca_ack_delay[0x5]; u8 reserved_19[0x6]; u8 port_type[0x2]; @@ -787,27 +812,36 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_60[0x1b]; u8 log_max_wq_sz[0x5]; - u8 reserved_61[0xa0]; - + u8 nic_vport_change_event[0x1]; + u8 reserved_61[0xa]; + u8 log_max_vlan_list[0x5]; u8 reserved_62[0x3]; + u8 log_max_current_mc_list[0x5]; + u8 reserved_63[0x3]; + u8 log_max_current_uc_list[0x5]; + + u8 reserved_64[0x80]; + + u8 reserved_65[0x3]; u8 log_max_l2_table[0x5]; - u8 reserved_63[0x8]; + u8 reserved_66[0x8]; u8 log_uar_page_sz[0x10]; - u8 reserved_64[0x100]; + u8 reserved_67[0xe0]; - u8 reserved_65[0x1f]; + u8 reserved_68[0x1f]; u8 cqe_zip[0x1]; u8 cqe_zip_timeout[0x10]; u8 cqe_zip_max_num[0x10]; - u8 reserved_66[0x220]; + u8 reserved_69[0x220]; }; -enum { - MLX5_DEST_FORMAT_STRUCT_DESTINATION_TYPE_FLOW_TABLE_ = 0x1, - MLX5_DEST_FORMAT_STRUCT_DESTINATION_TYPE_TIR = 0x2, +enum mlx5_flow_destination_type { + MLX5_FLOW_DESTINATION_TYPE_VPORT = 0x0, + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1, + MLX5_FLOW_DESTINATION_TYPE_TIR = 0x2, }; struct mlx5_ifc_dest_format_struct_bits { @@ -900,6 +934,13 @@ struct mlx5_ifc_mac_address_layout_bits { u8 mac_addr_31_0[0x20]; }; +struct mlx5_ifc_vlan_layout_bits { + u8 reserved_0[0x14]; + u8 vlan[0x0c]; + + u8 reserved_1[0x20]; +}; + struct mlx5_ifc_cong_control_r_roce_ecn_np_bits { u8 reserved_0[0xa0]; @@ -1829,6 +1870,8 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_roce_cap_bits roce_cap; struct mlx5_ifc_per_protocol_networking_offload_caps_bits per_protocol_networking_offload_caps; struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap; + struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap; + struct mlx5_ifc_e_switch_cap_bits e_switch_cap; u8 reserved_0[0x8000]; }; @@ -2133,24 +2176,35 @@ struct mlx5_ifc_rmpc_bits { struct mlx5_ifc_wq_bits wq; }; -enum { - MLX5_NIC_VPORT_CONTEXT_ALLOWED_LIST_TYPE_CURRENT_UC_MAC_ADDRESS = 0x0, -}; - struct mlx5_ifc_nic_vport_context_bits { u8 reserved_0[0x1f]; u8 roce_en[0x1]; - u8 reserved_1[0x760]; + u8 arm_change_event[0x1]; + u8 reserved_1[0x1a]; + u8 event_on_mtu[0x1]; + u8 event_on_promisc_change[0x1]; + u8 event_on_vlan_change[0x1]; + u8 event_on_mc_address_change[0x1]; + u8 event_on_uc_address_change[0x1]; - u8 reserved_2[0x5]; + u8 reserved_2[0xf0]; + + u8 mtu[0x10]; + + u8 reserved_3[0x640]; + + u8 promisc_uc[0x1]; + u8 promisc_mc[0x1]; + u8 promisc_all[0x1]; + u8 reserved_4[0x2]; u8 allowed_list_type[0x3]; - u8 reserved_3[0xc]; + u8 reserved_5[0xc]; u8 allowed_list_size[0xc]; struct mlx5_ifc_mac_address_layout_bits permanent_address; - u8 reserved_4[0x20]; + u8 reserved_6[0x20]; u8 current_uc_mac_address[0][0x40]; }; @@ -2263,6 +2317,26 @@ struct mlx5_ifc_hca_vport_context_bits { u8 reserved_6[0xca0]; }; +struct mlx5_ifc_esw_vport_context_bits { + u8 reserved_0[0x3]; + u8 vport_svlan_strip[0x1]; + u8 vport_cvlan_strip[0x1]; + u8 vport_svlan_insert[0x1]; + u8 vport_cvlan_insert[0x2]; + u8 reserved_1[0x18]; + + u8 reserved_2[0x20]; + + u8 svlan_cfi[0x1]; + u8 svlan_pcp[0x3]; + u8 svlan_id[0xc]; + u8 cvlan_cfi[0x1]; + u8 cvlan_pcp[0x3]; + u8 cvlan_id[0xc]; + + u8 reserved_3[0x7a0]; +}; + enum { MLX5_EQC_STATUS_OK = 0x0, MLX5_EQC_STATUS_EQ_WRITE_FAILURE = 0xa, @@ -2940,6 +3014,7 @@ struct mlx5_ifc_query_vport_state_out_bits { enum { MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT = 0x0, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT = 0x1, }; struct mlx5_ifc_query_vport_state_in_bits { @@ -3700,6 +3775,64 @@ struct mlx5_ifc_query_flow_group_in_bits { u8 reserved_5[0x120]; }; +struct mlx5_ifc_query_esw_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_esw_vport_context_bits esw_vport_context; +}; + +struct mlx5_ifc_query_esw_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_modify_esw_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_esw_vport_context_fields_select_bits { + u8 reserved[0x1c]; + u8 vport_cvlan_insert[0x1]; + u8 vport_svlan_insert[0x1]; + u8 vport_cvlan_strip[0x1]; + u8 vport_svlan_strip[0x1]; +}; + +struct mlx5_ifc_modify_esw_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + struct mlx5_ifc_esw_vport_context_fields_select_bits field_select; + + struct mlx5_ifc_esw_vport_context_bits esw_vport_context; +}; + struct mlx5_ifc_query_eq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; @@ -4228,7 +4361,10 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits { }; struct mlx5_ifc_modify_nic_vport_field_select_bits { - u8 reserved_0[0x1c]; + u8 reserved_0[0x19]; + u8 mtu[0x1]; + u8 change_event[0x1]; + u8 promisc[0x1]; u8 permanent_address[0x1]; u8 addresses_list[0x1]; u8 roce_en[0x1]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 967e0fd06e8972..638f2ca7a527c0 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -34,9 +34,17 @@ #define __MLX5_VPORT_H__ #include <linux/mlx5/driver.h> +#include <linux/mlx5/device.h> -u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); -void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr); +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport); +u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport); +int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u8 state); +int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, u8 *addr); +int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, + u16 vport, u8 *addr); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, u16 vf_num, u16 gid_index, union ib_gid *gid); @@ -51,5 +59,30 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, u64 *sys_image_guid); int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, u64 *node_guid); +int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, + u32 vport, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int *list_size); +int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int list_size); +int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, + u32 vport, + int *promisc_uc, + int *promisc_mc, + int *promisc_all); +int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, + int promisc_uc, + int promisc_mc, + int promisc_all); +int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, + u32 vport, + u16 vlans[], + int *size); +int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, + u16 vlans[], + int list_size); #endif /* __MLX5_VPORT_H__ */ diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 79aaa9fc1a15d6..bf9b322cb0b03a 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -9,38 +9,28 @@ #ifdef CONFIG_IP_MROUTE static inline int ip_mroute_opt(int opt) { - return (opt >= MRT_BASE) && (opt <= MRT_MAX); + return opt >= MRT_BASE && opt <= MRT_MAX; } -#else -static inline int ip_mroute_opt(int opt) -{ - return 0; -} -#endif -#ifdef CONFIG_IP_MROUTE -extern int ip_mroute_setsockopt(struct sock *, int, char __user *, unsigned int); -extern int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *); -extern int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg); -extern int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); -extern int ip_mr_init(void); +int ip_mroute_setsockopt(struct sock *, int, char __user *, unsigned int); +int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *); +int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg); +int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); +int ip_mr_init(void); #else -static inline -int ip_mroute_setsockopt(struct sock *sock, - int optname, char __user *optval, unsigned int optlen) +static inline int ip_mroute_setsockopt(struct sock *sock, int optname, + char __user *optval, unsigned int optlen) { return -ENOPROTOOPT; } -static inline -int ip_mroute_getsockopt(struct sock *sock, - int optname, char __user *optval, int __user *optlen) +static inline int ip_mroute_getsockopt(struct sock *sock, int optname, + char __user *optval, int __user *optlen) { return -ENOPROTOOPT; } -static inline -int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) +static inline int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) { return -ENOIOCTLCMD; } @@ -49,6 +39,11 @@ static inline int ip_mr_init(void) { return 0; } + +static inline int ip_mroute_opt(int opt) +{ + return 0; +} #endif struct vif_device { @@ -64,6 +59,32 @@ struct vif_device { #define VIFF_STATIC 0x8000 +#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) +#define MFC_LINES 64 + +struct mr_table { + struct list_head list; + possible_net_t net; + u32 id; + struct sock __rcu *mroute_sk; + struct timer_list ipmr_expire_timer; + struct list_head mfc_unres_queue; + struct list_head mfc_cache_array[MFC_LINES]; + struct vif_device vif_table[MAXVIFS]; + int maxvif; + atomic_t cache_resolve_queue_len; + bool mroute_do_assert; + bool mroute_do_pim; + int mroute_reg_vif_num; +}; + +/* mfc_flags: + * MFC_STATIC - the entry was added statically (not by a routing daemon) + */ +enum { + MFC_STATIC = BIT(0), +}; + struct mfc_cache { struct list_head list; __be32 mfc_mcastgrp; /* Group the entry belongs to */ @@ -89,19 +110,14 @@ struct mfc_cache { struct rcu_head rcu; }; -#define MFC_STATIC 1 -#define MFC_NOTIFY 2 - -#define MFC_LINES 64 - #ifdef __BIG_ENDIAN #define MFC_HASH(a,b) (((((__force u32)(__be32)a)>>24)^(((__force u32)(__be32)b)>>26))&(MFC_LINES-1)) #else #define MFC_HASH(a,b) ((((__force u32)(__be32)a)^(((__force u32)(__be32)b)>>2))&(MFC_LINES-1)) -#endif +#endif struct rtmsg; -extern int ipmr_get_route(struct net *net, struct sk_buff *skb, - __be32 saddr, __be32 daddr, - struct rtmsg *rtm, int nowait); +int ipmr_get_route(struct net *net, struct sk_buff *skb, + __be32 saddr, __be32 daddr, + struct rtmsg *rtm, int nowait); #endif diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3093a7212a68d1..1bb21ff0fa6414 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -132,7 +132,9 @@ static inline bool dev_xmit_complete(int rc) * used. */ -#if defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25) +#if defined(CONFIG_HYPERV_NET) +# define LL_MAX_HEADER 128 +#elif defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25) # if defined(CONFIG_MAC80211_MESH) # define LL_MAX_HEADER 128 # else @@ -1271,6 +1273,7 @@ struct net_device_ops { * @IFF_NO_QUEUE: device can run without qdisc attached * @IFF_OPENVSWITCH: device is a Open vSwitch master * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device + * @IFF_TEAM: device is a team device */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1297,6 +1300,7 @@ enum netdev_priv_flags { IFF_NO_QUEUE = 1<<21, IFF_OPENVSWITCH = 1<<22, IFF_L3MDEV_SLAVE = 1<<23, + IFF_TEAM = 1<<24, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1323,6 +1327,7 @@ enum netdev_priv_flags { #define IFF_NO_QUEUE IFF_NO_QUEUE #define IFF_OPENVSWITCH IFF_OPENVSWITCH #define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE +#define IFF_TEAM IFF_TEAM /** * struct net_device - The DEVICE structure. @@ -2106,6 +2111,24 @@ struct pcpu_sw_netstats { #define netdev_alloc_pcpu_stats(type) \ __netdev_alloc_pcpu_stats(type, GFP_KERNEL); +enum netdev_lag_tx_type { + NETDEV_LAG_TX_TYPE_UNKNOWN, + NETDEV_LAG_TX_TYPE_RANDOM, + NETDEV_LAG_TX_TYPE_BROADCAST, + NETDEV_LAG_TX_TYPE_ROUNDROBIN, + NETDEV_LAG_TX_TYPE_ACTIVEBACKUP, + NETDEV_LAG_TX_TYPE_HASH, +}; + +struct netdev_lag_upper_info { + enum netdev_lag_tx_type tx_type; +}; + +struct netdev_lag_lower_state_info { + u8 link_up : 1, + tx_enabled : 1; +}; + #include <linux/notifier.h> /* netdevice notifier chain. Please remember to update the rtnetlink @@ -2141,6 +2164,7 @@ struct pcpu_sw_netstats { #define NETDEV_CHANGEINFODATA 0x0018 #define NETDEV_BONDING_INFO 0x0019 #define NETDEV_PRECHANGEUPPER 0x001A +#define NETDEV_CHANGELOWERSTATE 0x001B int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); @@ -2159,6 +2183,12 @@ struct netdev_notifier_changeupper_info { struct net_device *upper_dev; /* new upper dev */ bool master; /* is upper dev master */ bool linking; /* is the nofication for link or unlink */ + void *upper_info; /* upper dev info */ +}; + +struct netdev_notifier_changelowerstate_info { + struct netdev_notifier_info info; /* must be first */ + void *lower_state_info; /* is lower dev state */ }; static inline void netdev_notifier_info_init(struct netdev_notifier_info *info, @@ -3615,15 +3645,15 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev); struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev); int netdev_master_upper_dev_link(struct net_device *dev, - struct net_device *upper_dev); -int netdev_master_upper_dev_link_private(struct net_device *dev, - struct net_device *upper_dev, - void *private); + struct net_device *upper_dev, + void *upper_priv, void *upper_info); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev); void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); +void netdev_lower_state_changed(struct net_device *lower_dev, + void *lower_state_info); /* RSS keys are 40 or 52 bytes long */ #define NETDEV_RSS_KEY_LEN 52 @@ -3631,7 +3661,7 @@ extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN]; void netdev_rss_key_fill(void *buffer, size_t len); int dev_get_nest_level(struct net_device *dev, - bool (*type_check)(struct net_device *dev)); + bool (*type_check)(const struct net_device *dev)); int skb_checksum_help(struct sk_buff *skb); struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path); @@ -3828,32 +3858,32 @@ static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, skb->mac_len = mac_len; } -static inline bool netif_is_macvlan(struct net_device *dev) +static inline bool netif_is_macvlan(const struct net_device *dev) { return dev->priv_flags & IFF_MACVLAN; } -static inline bool netif_is_macvlan_port(struct net_device *dev) +static inline bool netif_is_macvlan_port(const struct net_device *dev) { return dev->priv_flags & IFF_MACVLAN_PORT; } -static inline bool netif_is_ipvlan(struct net_device *dev) +static inline bool netif_is_ipvlan(const struct net_device *dev) { return dev->priv_flags & IFF_IPVLAN_SLAVE; } -static inline bool netif_is_ipvlan_port(struct net_device *dev) +static inline bool netif_is_ipvlan_port(const struct net_device *dev) { return dev->priv_flags & IFF_IPVLAN_MASTER; } -static inline bool netif_is_bond_master(struct net_device *dev) +static inline bool netif_is_bond_master(const struct net_device *dev) { return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING; } -static inline bool netif_is_bond_slave(struct net_device *dev) +static inline bool netif_is_bond_slave(const struct net_device *dev) { return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING; } @@ -3888,6 +3918,26 @@ static inline bool netif_is_ovs_master(const struct net_device *dev) return dev->priv_flags & IFF_OPENVSWITCH; } +static inline bool netif_is_team_master(const struct net_device *dev) +{ + return dev->priv_flags & IFF_TEAM; +} + +static inline bool netif_is_team_port(const struct net_device *dev) +{ + return dev->priv_flags & IFF_TEAM_PORT; +} + +static inline bool netif_is_lag_master(const struct net_device *dev) +{ + return netif_is_bond_master(dev) || netif_is_team_master(dev); +} + +static inline bool netif_is_lag_port(const struct net_device *dev) +{ + return netif_is_bond_slave(dev) || netif_is_team_port(dev); +} + /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d9ba49cedc5dfe..1acbefc4bbdab3 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2495,6 +2495,8 @@ #define PCI_DEVICE_ID_KORENIX_JETCARDF2 0x1700 #define PCI_DEVICE_ID_KORENIX_JETCARDF3 0x17ff +#define PCI_VENDOR_ID_NETRONOME 0x19ee + #define PCI_VENDOR_ID_QMI 0x1a32 #define PCI_VENDOR_ID_AZWAVE 0x1a3b diff --git a/include/linux/pim.h b/include/linux/pim.h index 252bf6644c518d..e1d756f813487e 100644 --- a/include/linux/pim.h +++ b/include/linux/pim.h @@ -13,6 +13,11 @@ #define PIM_NULL_REGISTER cpu_to_be32(0x40000000) +static inline bool ipmr_pimsm_enabled(void) +{ + return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); +} + /* PIMv2 register message header layout (ietf-draft-idmr-pimvsm-v2-00.ps */ struct pimreghdr { diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index dc9a1353f97105..d4a32e87818049 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -25,6 +25,12 @@ #include <linux/qed/common_hsi.h> #include <linux/qed/qed_chain.h> +enum qed_led_mode { + QED_LED_MODE_OFF, + QED_LED_MODE_ON, + QED_LED_MODE_RESTORE +}; + #define DIRECT_REG_WR(reg_addr, val) writel((u32)val, \ (void __iomem *)(reg_addr)) @@ -252,6 +258,17 @@ struct qed_common_ops { void (*chain_free)(struct qed_dev *cdev, struct qed_chain *p_chain); + +/** + * @brief set_led - Configure LED mode + * + * @param cdev + * @param mode - LED mode + * + * @return 0 on success, error otherwise. + */ + int (*set_led)(struct qed_dev *cdev, + enum qed_led_mode mode); }; /** diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c9c394bf077101..9b9b9ead7bb3e5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2785,6 +2785,12 @@ static inline void skb_frag_list_init(struct sk_buff *skb) #define skb_walk_frags(skb, iter) \ for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next) + +int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, + const struct sk_buff *skb); +struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags, + int *peeked, int *off, int *err, + struct sk_buff **last); struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, int *peeked, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, diff --git a/include/linux/wait.h b/include/linux/wait.h index 1e1bf9f963a947..6aa09a875fbdca 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -107,6 +107,27 @@ static inline int waitqueue_active(wait_queue_head_t *q) return !list_empty(&q->task_list); } +/** + * wq_has_sleeper - check if there are any waiting processes + * @wq: wait queue head + * + * Returns true if wq has waiting processes + * + * Please refer to the comment for waitqueue_active. + */ +static inline bool wq_has_sleeper(wait_queue_head_t *wq) +{ + /* + * We need to be sure we are in sync with the + * add_wait_queue modifications to the wait queue. + * + * This memory barrier should be paired with one on the + * waiting side. + */ + smp_mb(); + return waitqueue_active(wq); +} + extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait); extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); diff --git a/include/net/bonding.h b/include/net/bonding.h index c1740a2794a37b..ee6c52053aa324 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -165,7 +165,8 @@ struct slave { u8 backup:1, /* indicates backup slave. Value corresponds with BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ inactive:1, /* indicates inactive slave */ - should_notify:1; /* indicateds whether the state changed */ + should_notify:1, /* indicates whether the state changed */ + should_notify_link:1; /* indicates whether the link changed */ u8 duplex; u32 original_mtu; u32 link_failure_count; @@ -246,6 +247,7 @@ struct bonding { ((struct slave *) rtnl_dereference(dev->rx_handler_data)) void bond_queue_slave_event(struct slave *slave); +void bond_lower_state_changed(struct slave *slave); struct bond_vlan_tag { __be16 vlan_proto; @@ -327,6 +329,7 @@ static inline void bond_set_active_slave(struct slave *slave) if (slave->backup) { slave->backup = 0; bond_queue_slave_event(slave); + bond_lower_state_changed(slave); rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); } } @@ -336,6 +339,7 @@ static inline void bond_set_backup_slave(struct slave *slave) if (!slave->backup) { slave->backup = 1; bond_queue_slave_event(slave); + bond_lower_state_changed(slave); rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); } } @@ -348,6 +352,7 @@ static inline void bond_set_slave_state(struct slave *slave, slave->backup = slave_state; if (notify) { + bond_lower_state_changed(slave); rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); bond_queue_slave_event(slave); slave->should_notify = 0; @@ -379,6 +384,7 @@ static inline void bond_slave_state_notify(struct bonding *bond) bond_for_each_slave(bond, tmp, iter) { if (tmp->should_notify) { + bond_lower_state_changed(tmp); rtmsg_ifinfo(RTM_NEWLINK, tmp->dev, 0, GFP_ATOMIC); tmp->should_notify = 0; } @@ -504,10 +510,37 @@ static inline bool bond_is_slave_inactive(struct slave *slave) return slave->inactive; } -static inline void bond_set_slave_link_state(struct slave *slave, int state) +static inline void bond_set_slave_link_state(struct slave *slave, int state, + bool notify) { + if (slave->link == state) + return; + slave->link = state; - bond_queue_slave_event(slave); + if (notify) { + bond_queue_slave_event(slave); + bond_lower_state_changed(slave); + slave->should_notify_link = 0; + } else { + if (slave->should_notify_link) + slave->should_notify_link = 0; + else + slave->should_notify_link = 1; + } +} + +static inline void bond_slave_link_notify(struct bonding *bond) +{ + struct list_head *iter; + struct slave *tmp; + + bond_for_each_slave(bond, tmp, iter) { + if (tmp->should_notify_link) { + bond_queue_slave_event(tmp); + bond_lower_state_changed(tmp); + tmp->should_notify_link = 0; + } + } } static inline __be32 bond_confirm_addr(struct net_device *dev, __be32 dst, __be32 local) diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index ccd6d8bffa4d8d..c0a92e2c286d6c 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -41,13 +41,12 @@ static inline u32 task_cls_classid(struct task_struct *p) return classid; } -static inline void sock_update_classid(struct sock *sk) +static inline void sock_update_classid(struct sock_cgroup_data *skcd) { u32 classid; classid = task_cls_classid(current); - if (classid != sk->sk_classid) - sk->sk_classid = classid; + sock_cgroup_set_classid(skcd, classid); } static inline u32 task_get_classid(const struct sk_buff *skb) @@ -64,17 +63,17 @@ static inline u32 task_get_classid(const struct sk_buff *skb) * softirqs always disables bh. */ if (in_serving_softirq()) { - /* If there is an sk_classid we'll use that. */ + /* If there is an sock_cgroup_classid we'll use that. */ if (!skb->sk) return 0; - classid = skb->sk->sk_classid; + classid = sock_cgroup_classid(&skb->sk->sk_cgrp_data); } return classid; } #else /* !CONFIG_CGROUP_NET_CLASSID */ -static inline void sock_update_classid(struct sock *sk) +static inline void sock_update_classid(struct sock_cgroup_data *skcd) { } diff --git a/include/net/dsa.h b/include/net/dsa.h index 3f23dd9d6a6924..26a0e86e611e8d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -117,13 +117,6 @@ struct dsa_switch_tree { s8 cpu_port; /* - * Link state polling. - */ - int link_poll_needed; - struct work_struct link_poll_work; - struct timer_list link_poll_timer; - - /* * Data for the individual switch chips. */ struct dsa_switch *ds[DSA_MAX_SWITCHES]; @@ -232,11 +225,6 @@ struct dsa_switch_driver { int regnum, u16 val); /* - * Link state polling and IRQ handling. - */ - void (*poll_link)(struct dsa_switch *ds); - - /* * Link state adjustment (called from libphy) */ void (*adjust_link)(struct dsa_switch *ds, int port, diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index f2a9597ff53c08..604190596cde8c 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -25,8 +25,6 @@ struct netprio_map { u32 priomap[]; }; -void sock_update_netprioidx(struct sock *sk); - static inline u32 task_netprioidx(struct task_struct *p) { struct cgroup_subsys_state *css; @@ -38,13 +36,25 @@ static inline u32 task_netprioidx(struct task_struct *p) rcu_read_unlock(); return idx; } + +static inline void sock_update_netprioidx(struct sock_cgroup_data *skcd) +{ + if (in_interrupt()) + return; + + sock_cgroup_set_prioidx(skcd, task_netprioidx(current)); +} + #else /* !CONFIG_CGROUP_NET_PRIO */ + static inline u32 task_netprioidx(struct task_struct *p) { return 0; } -#define sock_update_netprioidx(sk) +static inline void sock_update_netprioidx(struct sock_cgroup_data *skcd) +{ +} #endif /* CONFIG_CGROUP_NET_PRIO */ #endif /* _NET_CLS_CGROUP_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 52d27ee924f478..0ca22b014de1a0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -58,6 +58,8 @@ #include <linux/memcontrol.h> #include <linux/static_key.h> #include <linux/sched.h> +#include <linux/wait.h> +#include <linux/cgroup-defs.h> #include <linux/filter.h> #include <linux/rculist_nulls.h> @@ -287,7 +289,6 @@ struct cg_proto; * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() * @sk_priority: %SO_PRIORITY setting - * @sk_cgrp_prioidx: socket group's priority map index * @sk_type: socket type (%SOCK_STREAM, etc) * @sk_protocol: which protocol this socket belongs in this network family * @sk_peer_pid: &struct pid for this socket's peer @@ -308,7 +309,7 @@ struct cg_proto; * @sk_send_head: front of stuff to transmit * @sk_security: used by security modules * @sk_mark: generic packet mark - * @sk_classid: this socket's cgroup classid + * @sk_cgrp_data: cgroup data for this cgroup * @sk_cgrp: this socket's cgroup-specific proto data * @sk_write_pending: a write to stream socket waits to start * @sk_state_change: callback to indicate change in the state of the sock @@ -424,9 +425,7 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; __u32 sk_priority; -#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) - __u32 sk_cgrp_prioidx; -#endif + __u32 sk_mark; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; long sk_rcvtimeo; @@ -444,10 +443,7 @@ struct sock { #ifdef CONFIG_SECURITY void *sk_security; #endif - __u32 sk_mark; -#ifdef CONFIG_CGROUP_NET_CLASSID - u32 sk_classid; -#endif + struct sock_cgroup_data sk_cgrp_data; struct cg_proto *sk_cgrp; void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk); @@ -775,9 +771,9 @@ static inline int sk_memalloc_socks(void) #endif -static inline gfp_t sk_gfp_atomic(const struct sock *sk, gfp_t gfp_mask) +static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask) { - return GFP_ATOMIC | (sk->sk_allocation & __GFP_MEMALLOC); + return gfp_mask | (sk->sk_allocation & __GFP_MEMALLOC); } static inline void sk_acceptq_removed(struct sock *sk) @@ -1880,12 +1876,12 @@ static inline bool sk_has_allocations(const struct sock *sk) } /** - * wq_has_sleeper - check if there are any waiting processes + * skwq_has_sleeper - check if there are any waiting processes * @wq: struct socket_wq * * Returns true if socket_wq has waiting processes * - * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory + * The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory * barrier call. They were added due to the race found within the tcp code. * * Consider following tcp code paths: @@ -1911,15 +1907,9 @@ static inline bool sk_has_allocations(const struct sock *sk) * data on the socket. * */ -static inline bool wq_has_sleeper(struct socket_wq *wq) +static inline bool skwq_has_sleeper(struct socket_wq *wq) { - /* We need to be sure we are in sync with the - * add_wait_queue modifications to the wait queue. - * - * This memory barrier is paired in the sock_poll_wait. - */ - smp_mb(); - return wq && waitqueue_active(&wq->wait); + return wq && wq_has_sleeper(&wq->wait); } /** diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f1603c153890d2..4f8f7927b4222a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -57,8 +57,8 @@ #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ #include <linux/kthread.h> #include <linux/delay.h> - #include <linux/atomic.h> +#include <net/sock.h> /* * pidlists linger the following amount before being destroyed. The goal @@ -434,11 +434,6 @@ static bool cgroup_tryget(struct cgroup *cgrp) return css_tryget(&cgrp->self); } -static void cgroup_put(struct cgroup *cgrp) -{ - css_put(&cgrp->self); -} - struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) { struct cgroup *cgrp = of->kn->parent->priv; @@ -459,25 +454,6 @@ struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) } EXPORT_SYMBOL_GPL(of_css); -/** - * cgroup_is_descendant - test ancestry - * @cgrp: the cgroup to be tested - * @ancestor: possible ancestor of @cgrp - * - * Test whether @cgrp is a descendant of @ancestor. It also returns %true - * if @cgrp == @ancestor. This function is safe to call as long as @cgrp - * and @ancestor are accessible. - */ -bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor) -{ - while (cgrp) { - if (cgrp == ancestor) - return true; - cgrp = cgroup_parent(cgrp); - } - return false; -} - static int notify_on_release(const struct cgroup *cgrp) { return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); @@ -1903,6 +1879,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask) if (ret < 0) goto out; root_cgrp->id = ret; + root_cgrp->ancestor_ids[0] = ret; ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0, GFP_KERNEL); @@ -4846,11 +4823,11 @@ err_free_css: static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode) { - struct cgroup *parent, *cgrp; + struct cgroup *parent, *cgrp, *tcgrp; struct cgroup_root *root; struct cgroup_subsys *ss; struct kernfs_node *kn; - int ssid, ret; + int level, ssid, ret; /* Do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable. */ @@ -4861,9 +4838,11 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, if (!parent) return -ENODEV; root = parent->root; + level = parent->level + 1; /* allocate the cgroup and its ID, 0 is reserved for the root */ - cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL); + cgrp = kzalloc(sizeof(*cgrp) + + sizeof(cgrp->ancestor_ids[0]) * (level + 1), GFP_KERNEL); if (!cgrp) { ret = -ENOMEM; goto out_unlock; @@ -4887,6 +4866,10 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, cgrp->self.parent = &parent->self; cgrp->root = root; + cgrp->level = level; + + for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) + cgrp->ancestor_ids[tcgrp->level] = tcgrp->id; if (notify_on_release(parent)) set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); @@ -5765,6 +5748,93 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss) return id > 0 ? idr_find(&ss->css_idr, id) : NULL; } +/** + * cgroup_get_from_path - lookup and get a cgroup from its default hierarchy path + * @path: path on the default hierarchy + * + * Find the cgroup at @path on the default hierarchy, increment its + * reference count and return it. Returns pointer to the found cgroup on + * success, ERR_PTR(-ENOENT) if @path doens't exist and ERR_PTR(-ENOTDIR) + * if @path points to a non-directory. + */ +struct cgroup *cgroup_get_from_path(const char *path) +{ + struct kernfs_node *kn; + struct cgroup *cgrp; + + mutex_lock(&cgroup_mutex); + + kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path); + if (kn) { + if (kernfs_type(kn) == KERNFS_DIR) { + cgrp = kn->priv; + cgroup_get(cgrp); + } else { + cgrp = ERR_PTR(-ENOTDIR); + } + kernfs_put(kn); + } else { + cgrp = ERR_PTR(-ENOENT); + } + + mutex_unlock(&cgroup_mutex); + return cgrp; +} +EXPORT_SYMBOL_GPL(cgroup_get_from_path); + +/* + * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data + * definition in cgroup-defs.h. + */ +#ifdef CONFIG_SOCK_CGROUP_DATA + +#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) + +spinlock_t cgroup_sk_update_lock; +static bool cgroup_sk_alloc_disabled __read_mostly; + +void cgroup_sk_alloc_disable(void) +{ + if (cgroup_sk_alloc_disabled) + return; + pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n"); + cgroup_sk_alloc_disabled = true; +} + +#else + +#define cgroup_sk_alloc_disabled false + +#endif + +void cgroup_sk_alloc(struct sock_cgroup_data *skcd) +{ + if (cgroup_sk_alloc_disabled) + return; + + rcu_read_lock(); + + while (true) { + struct css_set *cset; + + cset = task_css_set(current); + if (likely(cgroup_tryget(cset->dfl_cgrp))) { + skcd->val = (unsigned long)cset->dfl_cgrp; + break; + } + cpu_relax(); + } + + rcu_read_unlock(); +} + +void cgroup_sk_free(struct sock_cgroup_data *skcd) +{ + cgroup_put(sock_cgroup_ptr(skcd)); +} + +#endif /* CONFIG_SOCK_CGROUP_DATA */ + #ifdef CONFIG_CGROUP_DEBUG static struct cgroup_subsys_state * debug_css_alloc(struct cgroup_subsys_state *parent_css) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8c15b29d5adc64..0d76ecce27bb4d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1484,6 +1484,29 @@ config OF_RECONFIG_NOTIFIER_ERROR_INJECT If unsure, say N. +config NETDEV_NOTIFIER_ERROR_INJECT + tristate "Netdev notifier error injection module" + depends on NET && NOTIFIER_ERROR_INJECTION + help + This option provides the ability to inject artificial errors to + netdevice notifier chain callbacks. It is controlled through debugfs + interface /sys/kernel/debug/notifier-error-inject/netdev + + If the notifier call chain should be failed with some events + notified, write the error code to "actions/<notifier event>/error". + + Example: Inject netdevice mtu change error (-22 = -EINVAL) + + # cd /sys/kernel/debug/notifier-error-inject/netdev + # echo -22 > actions/NETDEV_CHANGEMTU/error + # ip link set eth0 mtu 1024 + RTNETLINK answers: Invalid argument + + To compile this code as a module, choose M here: the module will + be called netdev-notifier-error-inject. + + If unsure, say N. + config FAULT_INJECTION bool "Fault-injection framework" depends on DEBUG_KERNEL diff --git a/lib/Makefile b/lib/Makefile index 7f1de26613d26e..180dd4d0dd412b 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -120,6 +120,7 @@ obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o +obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \ of-reconfig-notifier-error-inject.o diff --git a/lib/netdev-notifier-error-inject.c b/lib/netdev-notifier-error-inject.c new file mode 100644 index 00000000000000..13e9c62e216ff0 --- /dev/null +++ b/lib/netdev-notifier-error-inject.c @@ -0,0 +1,55 @@ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netdevice.h> + +#include "notifier-error-inject.h" + +static int priority; +module_param(priority, int, 0); +MODULE_PARM_DESC(priority, "specify netdevice notifier priority"); + +static struct notifier_err_inject netdev_notifier_err_inject = { + .actions = { + { NOTIFIER_ERR_INJECT_ACTION(NETDEV_REGISTER) }, + { NOTIFIER_ERR_INJECT_ACTION(NETDEV_CHANGEMTU) }, + { NOTIFIER_ERR_INJECT_ACTION(NETDEV_CHANGENAME) }, + { NOTIFIER_ERR_INJECT_ACTION(NETDEV_PRE_UP) }, + { NOTIFIER_ERR_INJECT_ACTION(NETDEV_PRE_TYPE_CHANGE) }, + { NOTIFIER_ERR_INJECT_ACTION(NETDEV_POST_INIT) }, + { NOTIFIER_ERR_INJECT_ACTION(NETDEV_PRECHANGEMTU) }, + { NOTIFIER_ERR_INJECT_ACTION(NETDEV_PRECHANGEUPPER) }, + { NOTIFIER_ERR_INJECT_ACTION(NETDEV_CHANGEUPPER) }, + {} + } +}; + +static struct dentry *dir; + +static int netdev_err_inject_init(void) +{ + int err; + + dir = notifier_err_inject_init("netdev", notifier_err_inject_dir, + &netdev_notifier_err_inject, priority); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + err = register_netdevice_notifier(&netdev_notifier_err_inject.nb); + if (err) + debugfs_remove_recursive(dir); + + return err; +} + +static void netdev_err_inject_exit(void) +{ + unregister_netdevice_notifier(&netdev_notifier_err_inject.nb); + debugfs_remove_recursive(dir); +} + +module_init(netdev_err_inject_init); +module_exit(netdev_err_inject_exit); + +MODULE_DESCRIPTION("Netdevice notifier error injection module"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Nikolay Aleksandrov <razor@blackwall.org>"); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index a54ff8949f9116..0f3be3fad2b2b7 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -231,9 +231,6 @@ static int rhashtable_rehash_attach(struct rhashtable *ht, */ rcu_assign_pointer(old_tbl->future_tbl, new_tbl); - /* Ensure the new table is visible to readers. */ - smp_wmb(); - spin_unlock_bh(old_tbl->locks); return 0; diff --git a/net/Kconfig b/net/Kconfig index 127da94ae25eb7..11f8c22af34d09 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -250,9 +250,14 @@ config XPS depends on SMP default y +config SOCK_CGROUP_DATA + bool + default n + config CGROUP_NET_PRIO bool "Network priority cgroup" depends on CGROUPS + select SOCK_CGROUP_DATA ---help--- Cgroup subsystem for use in assigning processes to network priorities on a per-interface basis. @@ -260,6 +265,7 @@ config CGROUP_NET_PRIO config CGROUP_NET_CLASSID bool "Network classid cgroup" depends on CGROUPS + select SOCK_CGROUP_DATA ---help--- Cgroup subsystem for use as general purpose socket classid marker that is being used in cls_cgroup and for netfilter matching. diff --git a/net/atm/common.c b/net/atm/common.c index 49a872db7e4258..6dc12305799e45 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -96,7 +96,7 @@ static void vcc_def_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up(&wq->wait); rcu_read_unlock(); } @@ -117,7 +117,7 @@ static void vcc_write_space(struct sock *sk) if (vcc_writable(sk)) { wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index f11345e163d7f5..a58184fdf5fdc4 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -464,7 +464,8 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->soft_iface = soft_iface; bat_priv = netdev_priv(hard_iface->soft_iface); - ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface); + ret = netdev_master_upper_dev_link(hard_iface->net_dev, + soft_iface, NULL, NULL); if (ret) goto err_dev; @@ -708,7 +709,8 @@ static int batadv_hard_if_event(struct notifier_block *this, } hard_iface = batadv_hardif_get_by_netdev(net_dev); - if (!hard_iface && event == NETDEV_REGISTER) + if (!hard_iface && (event == NETDEV_REGISTER || + event == NETDEV_POST_TYPE_CHANGE)) hard_iface = batadv_hardif_add_interface(net_dev); if (!hard_iface) @@ -723,6 +725,7 @@ static int batadv_hard_if_event(struct notifier_block *this, batadv_hardif_deactivate_interface(hard_iface); break; case NETDEV_UNREGISTER: + case NETDEV_PRE_TYPE_CHANGE: list_del_rcu(&hard_iface->list); batadv_hardif_remove_interface(hard_iface); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ec02f5869a7802..8d1d4a22c50df1 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -493,7 +493,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev->priv_flags |= IFF_BRIDGE_PORT; - err = netdev_master_upper_dev_link(dev, br->dev); + err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL); if (err) goto err5; diff --git a/net/core/datagram.c b/net/core/datagram.c index d62af69ad844de..fa9dc6450b0813 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -83,8 +83,8 @@ static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int syn /* * Wait for the last received packet to be different from skb */ -static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, - const struct sk_buff *skb) +int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, + const struct sk_buff *skb) { int error; DEFINE_WAIT_FUNC(wait, receiver_wake_function); @@ -130,6 +130,7 @@ out_noerr: error = 1; goto out; } +EXPORT_SYMBOL(__skb_wait_for_more_packets); static struct sk_buff *skb_set_peeked(struct sk_buff *skb) { @@ -161,13 +162,15 @@ done: } /** - * __skb_recv_datagram - Receive a datagram skbuff + * __skb_try_recv_datagram - Receive a datagram skbuff * @sk: socket * @flags: MSG_ flags * @peeked: returns non-zero if this packet has been seen before * @off: an offset in bytes to peek skb from. Returns an offset * within an skb where data actually starts * @err: error code returned + * @last: set to last peeked message to inform the wait function + * what to look for when peeking * * Get a datagram skbuff, understands the peeking, nonblocking wakeups * and possible races. This replaces identical code in packet, raw and @@ -175,9 +178,11 @@ done: * the long standing peek and read race for datagram sockets. If you * alter this routine remember it must be re-entrant. * - * This function will lock the socket if a skb is returned, so the caller - * needs to unlock the socket in that case (usually by calling - * skb_free_datagram) + * This function will lock the socket if a skb is returned, so + * the caller needs to unlock the socket in that case (usually by + * calling skb_free_datagram). Returns NULL with *err set to + * -EAGAIN if no data was available or to some other value if an + * error was detected. * * * It does not lock socket since today. This function is * * free of race conditions. This measure should/can improve @@ -191,13 +196,13 @@ done: * quite explicitly by POSIX 1003.1g, don't change them without having * the standard around please. */ -struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, - int *peeked, int *off, int *err) +struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, + int *peeked, int *off, int *err, + struct sk_buff **last) { struct sk_buff_head *queue = &sk->sk_receive_queue; - struct sk_buff *skb, *last; + struct sk_buff *skb; unsigned long cpu_flags; - long timeo; /* * Caller is allowed not to check sk->sk_err before skb_recv_datagram() */ @@ -206,8 +211,6 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, if (error) goto no_packet; - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - do { /* Again only user level code calls this function, so nothing * interrupt level will suddenly eat the receive_queue. @@ -217,10 +220,10 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, */ int _off = *off; - last = (struct sk_buff *)queue; + *last = (struct sk_buff *)queue; spin_lock_irqsave(&queue->lock, cpu_flags); skb_queue_walk(queue, skb) { - last = skb; + *last = skb; *peeked = skb->peeked; if (flags & MSG_PEEK) { if (_off >= skb->len && (skb->len || _off || @@ -231,8 +234,11 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, skb = skb_set_peeked(skb); error = PTR_ERR(skb); - if (IS_ERR(skb)) - goto unlock_err; + if (IS_ERR(skb)) { + spin_unlock_irqrestore(&queue->lock, + cpu_flags); + goto no_packet; + } atomic_inc(&skb->users); } else @@ -242,25 +248,38 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, *off = _off; return skb; } + spin_unlock_irqrestore(&queue->lock, cpu_flags); + } while (sk_can_busy_loop(sk) && + sk_busy_loop(sk, flags & MSG_DONTWAIT)); - if (sk_can_busy_loop(sk) && - sk_busy_loop(sk, flags & MSG_DONTWAIT)) - continue; + error = -EAGAIN; - /* User doesn't want to wait */ - error = -EAGAIN; - if (!timeo) - goto no_packet; +no_packet: + *err = error; + return NULL; +} +EXPORT_SYMBOL(__skb_try_recv_datagram); - } while (!wait_for_more_packets(sk, err, &timeo, last)); +struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, + int *peeked, int *off, int *err) +{ + struct sk_buff *skb, *last; + long timeo; - return NULL; + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + + do { + skb = __skb_try_recv_datagram(sk, flags, peeked, off, err, + &last); + if (skb) + return skb; + + if (*err != -EAGAIN) + break; + } while (timeo && + !__skb_wait_for_more_packets(sk, err, &timeo, last)); -unlock_err: - spin_unlock_irqrestore(&queue->lock, cpu_flags); -no_packet: - *err = error; return NULL; } EXPORT_SYMBOL(__skb_recv_datagram); diff --git a/net/core/dev.c b/net/core/dev.c index 5df6cbce727c14..8f705fcedb94b0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2929,7 +2929,8 @@ static void skb_update_prio(struct sk_buff *skb) struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); if (!skb->priority && skb->sk && map) { - unsigned int prioidx = skb->sk->sk_cgrp_prioidx; + unsigned int prioidx = + sock_cgroup_prioidx(&skb->sk->sk_cgrp_data); if (prioidx < map->priomap_len) skb->priority = map->priomap[prioidx]; @@ -5421,7 +5422,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, - void *private) + void *upper_priv, void *upper_info) { struct netdev_notifier_changeupper_info changeupper_info; struct netdev_adjacent *i, *j, *to_i, *to_j; @@ -5445,6 +5446,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, changeupper_info.upper_dev = upper_dev; changeupper_info.master = master; changeupper_info.linking = true; + changeupper_info.upper_info = upper_info; ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, &changeupper_info.info); @@ -5452,7 +5454,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (ret) return ret; - ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private, + ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, upper_priv, master); if (ret) return ret; @@ -5490,8 +5492,12 @@ static int __netdev_upper_dev_link(struct net_device *dev, goto rollback_lower_mesh; } - call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, - &changeupper_info.info); + ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, + &changeupper_info.info); + ret = notifier_to_errno(ret); + if (ret) + goto rollback_lower_mesh; + return 0; rollback_lower_mesh: @@ -5545,7 +5551,7 @@ rollback_mesh: int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev) { - return __netdev_upper_dev_link(dev, upper_dev, false, NULL); + return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL); } EXPORT_SYMBOL(netdev_upper_dev_link); @@ -5553,6 +5559,8 @@ EXPORT_SYMBOL(netdev_upper_dev_link); * netdev_master_upper_dev_link - Add a master link to the upper device * @dev: device * @upper_dev: new upper device + * @upper_priv: upper device private + * @upper_info: upper info to be passed down via notifier * * Adds a link to device which is upper to this one. In this case, only * one master upper device can be linked, although other non-master devices @@ -5561,20 +5569,14 @@ EXPORT_SYMBOL(netdev_upper_dev_link); * counts are adjusted and the function returns zero. */ int netdev_master_upper_dev_link(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + void *upper_priv, void *upper_info) { - return __netdev_upper_dev_link(dev, upper_dev, true, NULL); + return __netdev_upper_dev_link(dev, upper_dev, true, + upper_priv, upper_info); } EXPORT_SYMBOL(netdev_master_upper_dev_link); -int netdev_master_upper_dev_link_private(struct net_device *dev, - struct net_device *upper_dev, - void *private) -{ - return __netdev_upper_dev_link(dev, upper_dev, true, private); -} -EXPORT_SYMBOL(netdev_master_upper_dev_link_private); - /** * netdev_upper_dev_unlink - Removes a link to upper device * @dev: device @@ -5733,7 +5735,7 @@ EXPORT_SYMBOL(netdev_lower_dev_get_private); int dev_get_nest_level(struct net_device *dev, - bool (*type_check)(struct net_device *dev)) + bool (*type_check)(const struct net_device *dev)) { struct net_device *lower = NULL; struct list_head *iter; @@ -5755,6 +5757,26 @@ int dev_get_nest_level(struct net_device *dev, } EXPORT_SYMBOL(dev_get_nest_level); +/** + * netdev_lower_change - Dispatch event about lower device state change + * @lower_dev: device + * @lower_state_info: state to dispatch + * + * Send NETDEV_CHANGELOWERSTATE to netdev notifiers with info. + * The caller must hold the RTNL lock. + */ +void netdev_lower_state_changed(struct net_device *lower_dev, + void *lower_state_info) +{ + struct netdev_notifier_changelowerstate_info changelowerstate_info; + + ASSERT_RTNL(); + changelowerstate_info.lower_state_info = lower_state_info; + call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev, + &changelowerstate_info.info); +} +EXPORT_SYMBOL(netdev_lower_state_changed); + static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 2e4df84c34a194..04257a0e353433 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -61,9 +61,12 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n) int err; struct socket *sock = sock_from_file(file, &err); - if (sock) - sock->sk->sk_classid = (u32)(unsigned long)v; - + if (sock) { + spin_lock(&cgroup_sk_update_lock); + sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, + (unsigned long)v); + spin_unlock(&cgroup_sk_update_lock); + } return 0; } @@ -98,6 +101,8 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, { struct cgroup_cls_state *cs = css_cls_state(css); + cgroup_sk_alloc_disable(); + cs->classid = (u32)value; update_classid(css, (void *)(unsigned long)cs->classid); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index cbd0a199bf52c7..053d60c33395ee 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -27,6 +27,12 @@ #include <linux/fdtable.h> +/* + * netprio allocates per-net_device priomap array which is indexed by + * css->id. Limiting css ID to 16bits doesn't lose anything. + */ +#define NETPRIO_ID_MAX USHRT_MAX + #define PRIOMAP_MIN_SZ 128 /* @@ -144,6 +150,9 @@ static int cgrp_css_online(struct cgroup_subsys_state *css) struct net_device *dev; int ret = 0; + if (css->id > NETPRIO_ID_MAX) + return -ENOSPC; + if (!parent_css) return 0; @@ -200,6 +209,8 @@ static ssize_t write_priomap(struct kernfs_open_file *of, if (!dev) return -ENODEV; + cgroup_sk_alloc_disable(); + rtnl_lock(); ret = netprio_set_prio(of_css(of), dev, prio); @@ -213,8 +224,12 @@ static int update_netprio(const void *v, struct file *file, unsigned n) { int err; struct socket *sock = sock_from_file(file, &err); - if (sock) - sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v; + if (sock) { + spin_lock(&cgroup_sk_update_lock); + sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data, + (unsigned long)v); + spin_unlock(&cgroup_sk_update_lock); + } return 0; } diff --git a/net/core/scm.c b/net/core/scm.c index 8a1741b14302bd..14596fb3717270 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -289,8 +289,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) /* Bump the usage count and install the file. */ sock = sock_from_file(fp[i], &err); if (sock) { - sock_update_netprioidx(sock->sk); - sock_update_classid(sock->sk); + sock_update_netprioidx(&sock->sk->sk_cgrp_data); + sock_update_classid(&sock->sk->sk_cgrp_data); } fd_install(new_fd, get_file(fp[i])); } diff --git a/net/core/sock.c b/net/core/sock.c index e31dfcee1729aa..1278d7b7bd9a8d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1363,6 +1363,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, if (!try_module_get(prot->owner)) goto out_free_sec; sk_tx_queue_clear(sk); + cgroup_sk_alloc(&sk->sk_cgrp_data); } return sk; @@ -1385,6 +1386,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) owner = prot->owner; slab = prot->slab; + cgroup_sk_free(&sk->sk_cgrp_data); security_sk_free(sk); if (slab != NULL) kmem_cache_free(slab, sk); @@ -1393,17 +1395,6 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) module_put(owner); } -#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) -void sock_update_netprioidx(struct sock *sk) -{ - if (in_interrupt()) - return; - - sk->sk_cgrp_prioidx = task_netprioidx(current); -} -EXPORT_SYMBOL_GPL(sock_update_netprioidx); -#endif - /** * sk_alloc - All socket objects are allocated here * @net: the applicable net namespace @@ -1432,8 +1423,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_net_set(sk, net); atomic_set(&sk->sk_wmem_alloc, 1); - sock_update_classid(sk); - sock_update_netprioidx(sk); + sock_update_classid(&sk->sk_cgrp_data); + sock_update_netprioidx(&sk->sk_cgrp_data); } return sk; @@ -2282,7 +2273,7 @@ static void sock_def_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_all(&wq->wait); rcu_read_unlock(); } @@ -2293,7 +2284,7 @@ static void sock_def_error_report(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_poll(&wq->wait, POLLERR); sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); rcu_read_unlock(); @@ -2305,7 +2296,7 @@ static void sock_def_readable(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); @@ -2323,7 +2314,7 @@ static void sock_def_write_space(struct sock *sk) */ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); diff --git a/net/core/stream.c b/net/core/stream.c index b96f7a79e54458..159516a11b7e84 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -35,7 +35,7 @@ void sk_stream_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) diff --git a/net/dccp/output.c b/net/dccp/output.c index 4ce912e691d03e..b66c84db0766f5 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -201,7 +201,7 @@ void dccp_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible(&wq->wait); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index b7448c8490ac44..208d1b257194a0 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -456,8 +456,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) if (!ds->ports[port]) continue; - unregister_netdev(ds->ports[port]); - free_netdev(ds->ports[port]); + dsa_slave_destroy(ds->ports[port]); } mdiobus_unregister(ds->slave_mii_bus); @@ -508,33 +507,6 @@ static int dsa_switch_resume(struct dsa_switch *ds) } #endif - -/* link polling *************************************************************/ -static void dsa_link_poll_work(struct work_struct *ugly) -{ - struct dsa_switch_tree *dst; - int i; - - dst = container_of(ugly, struct dsa_switch_tree, link_poll_work); - - for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; - - if (ds != NULL && ds->drv->poll_link != NULL) - ds->drv->poll_link(ds); - } - - mod_timer(&dst->link_poll_timer, round_jiffies(jiffies + HZ)); -} - -static void dsa_link_poll_timer(unsigned long _dst) -{ - struct dsa_switch_tree *dst = (void *)_dst; - - schedule_work(&dst->link_poll_work); -} - - /* platform driver init and cleanup *****************************************/ static int dev_is_class(struct device *dev, void *class) { @@ -877,8 +849,6 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, } dst->ds[i] = ds; - if (ds->drv->poll_link != NULL) - dst->link_poll_needed = 1; ++configured; } @@ -897,15 +867,6 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, wmb(); dev->dsa_ptr = (void *)dst; - if (dst->link_poll_needed) { - INIT_WORK(&dst->link_poll_work, dsa_link_poll_work); - init_timer(&dst->link_poll_timer); - dst->link_poll_timer.data = (unsigned long)dst; - dst->link_poll_timer.function = dsa_link_poll_timer; - dst->link_poll_timer.expires = round_jiffies(jiffies + HZ); - add_timer(&dst->link_poll_timer); - } - return 0; } @@ -957,8 +918,10 @@ static int dsa_probe(struct platform_device *pdev) platform_set_drvdata(pdev, dst); ret = dsa_setup_dst(dst, dev, &pdev->dev, pd); - if (ret) + if (ret) { + dev_put(dev); goto out; + } return 0; @@ -972,17 +935,14 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) { int i; - if (dst->link_poll_needed) - del_timer_sync(&dst->link_poll_timer); - - flush_work(&dst->link_poll_work); - for (i = 0; i < dst->pd->nr_chips; i++) { struct dsa_switch *ds = dst->ds[i]; if (ds) dsa_switch_destroy(ds); } + + dev_put(dst->master_netdev); } static int dsa_remove(struct platform_device *pdev) @@ -1028,6 +988,14 @@ static int dsa_suspend(struct device *d) struct dsa_switch_tree *dst = platform_get_drvdata(pdev); int i, ret = 0; + dst->master_netdev->dsa_ptr = NULL; + + /* If we used a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point get sent + * without the tag and go through the regular receive path. + */ + wmb(); + for (i = 0; i < dst->pd->nr_chips; i++) { struct dsa_switch *ds = dst->ds[i]; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 311796c809afc2..1d1a54687e4abc 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -61,6 +61,7 @@ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); int dsa_slave_create(struct dsa_switch *ds, struct device *parent, int port, char *name); +void dsa_slave_destroy(struct net_device *slave_dev); int dsa_slave_suspend(struct net_device *slave_dev); int dsa_slave_resume(struct net_device *slave_dev); int dsa_slave_netdevice_event(struct notifier_block *unused, diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 7bc787b095c897..1e9e9424a33daf 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1212,6 +1212,17 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, return 0; } +void dsa_slave_destroy(struct net_device *slave_dev) +{ + struct dsa_slave_priv *p = netdev_priv(slave_dev); + + netif_carrier_off(slave_dev); + if (p->phy) + phy_disconnect(p->phy); + unregister_netdev(slave_dev); + free_netdev(slave_dev); +} + static bool dsa_slave_dev_check(struct net_device *dev) { return dev->netdev_ops == &dsa_slave_netdev_ops; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 614521437e3015..04a48c0159cca5 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -24,7 +24,6 @@ #include <linux/tcp.h> #include <linux/udp.h> #include <linux/if_arp.h> -#include <linux/mroute.h> #include <linux/if_vlan.h> #include <linux/init.h> #include <linux/in6.h> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4233cbe47052d8..e0b94cd843d7f8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -76,7 +76,6 @@ #include <linux/igmp.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_bridge.h> -#include <linux/mroute.h> #include <linux/netlink.h> #include <linux/tcp.h> diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index cbb51f3fac06c6..0f6e9ee031c4b7 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -30,7 +30,6 @@ #include <linux/tcp.h> #include <linux/udp.h> #include <linux/if_arp.h> -#include <linux/mroute.h> #include <linux/init.h> #include <linux/in6.h> #include <linux/inetdevice.h> diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 6cb9009c3d9678..1db8418aa62e67 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -24,7 +24,6 @@ #include <linux/netdevice.h> #include <linux/in.h> #include <linux/if_arp.h> -#include <linux/mroute.h> #include <linux/init.h> #include <linux/in6.h> #include <linux/inetdevice.h> diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 4d8f0b6987777a..02d9c21e2953b8 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -30,7 +30,6 @@ #include <linux/tcp.h> #include <linux/udp.h> #include <linux/if_arp.h> -#include <linux/mroute.h> #include <linux/init.h> #include <linux/netfilter_ipv4.h> #include <linux/if_ether.h> diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index f34c31defafe08..1f067294cbc530 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -103,7 +103,6 @@ #include <linux/tcp.h> #include <linux/udp.h> #include <linux/if_arp.h> -#include <linux/mroute.h> #include <linux/init.h> #include <linux/netfilter_ipv4.h> #include <linux/if_ether.h> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index cbf075a294469a..395e2814a46d99 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -66,22 +66,7 @@ #include <net/netlink.h> #include <net/fib_rules.h> #include <linux/netconf.h> - -struct mr_table { - struct list_head list; - possible_net_t net; - u32 id; - struct sock __rcu *mroute_sk; - struct timer_list ipmr_expire_timer; - struct list_head mfc_unres_queue; - struct list_head mfc_cache_array[MFC_LINES]; - struct vif_device vif_table[MAXVIFS]; - int maxvif; - atomic_t cache_resolve_queue_len; - bool mroute_do_assert; - bool mroute_do_pim; - int mroute_reg_vif_num; -}; +#include <net/nexthop.h> struct ipmr_rule { struct fib_rule common; @@ -91,11 +76,6 @@ struct ipmr_result { struct mr_table *mrt; }; -static inline bool pimsm_enabled(void) -{ - return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); -} - /* Big lock, protecting vif table, mrt cache and mroute socket state. * Note that the changes are semaphored via rtnl_lock. */ @@ -104,8 +84,6 @@ static DEFINE_RWLOCK(mrt_lock); /* Multicast router control variables */ -#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) - /* Special spinlock for queue of unresolved entries */ static DEFINE_SPINLOCK(mfc_unres_lock); @@ -761,7 +739,7 @@ static int vif_add(struct net *net, struct mr_table *mrt, switch (vifc->vifc_flags) { case VIFF_REGISTER: - if (!pimsm_enabled()) + if (!ipmr_pimsm_enabled()) return -EINVAL; /* Special Purpose VIF in PIM * All the packets will be sent to the daemon @@ -1300,12 +1278,14 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, switch (optname) { case MRT_INIT: - if (optlen != sizeof(int)) + if (optlen != sizeof(int)) { ret = -EINVAL; - if (rtnl_dereference(mrt->mroute_sk)) + break; + } + if (rtnl_dereference(mrt->mroute_sk)) { ret = -EADDRINUSE; - if (ret) break; + } ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) { @@ -1388,7 +1368,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, mrt->mroute_do_assert = val; break; case MRT_PIM: - if (!pimsm_enabled()) { + if (!ipmr_pimsm_enabled()) { ret = -ENOPROTOOPT; break; } @@ -1462,7 +1442,7 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int val = 0x0305; break; case MRT_PIM: - if (!pimsm_enabled()) + if (!ipmr_pimsm_enabled()) return -ENOPROTOOPT; val = mrt->mroute_do_pim; break; @@ -2192,8 +2172,6 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, } read_lock(&mrt_lock); - if (!nowait && (rtm->rtm_flags & RTM_F_NOTIFY)) - cache->mfc_flags |= MFC_NOTIFY; err = __ipmr_fill_mroute(mrt, skb, cache, rtm); read_unlock(&mrt_lock); rcu_read_unlock(); @@ -2355,6 +2333,130 @@ done: return skb->len; } +static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { + [RTA_SRC] = { .type = NLA_U32 }, + [RTA_DST] = { .type = NLA_U32 }, + [RTA_IIF] = { .type = NLA_U32 }, + [RTA_TABLE] = { .type = NLA_U32 }, + [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, +}; + +static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol) +{ + switch (rtm_protocol) { + case RTPROT_STATIC: + case RTPROT_MROUTED: + return true; + } + return false; +} + +static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc) +{ + struct rtnexthop *rtnh = nla_data(nla); + int remaining = nla_len(nla), vifi = 0; + + while (rtnh_ok(rtnh, remaining)) { + mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops; + if (++vifi == MAXVIFS) + break; + rtnh = rtnh_next(rtnh, &remaining); + } + + return remaining > 0 ? -EINVAL : vifi; +} + +/* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */ +static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, + struct mfcctl *mfcc, int *mrtsock, + struct mr_table **mrtret) +{ + struct net_device *dev = NULL; + u32 tblid = RT_TABLE_DEFAULT; + struct mr_table *mrt; + struct nlattr *attr; + struct rtmsg *rtm; + int ret, rem; + + ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy); + if (ret < 0) + goto out; + rtm = nlmsg_data(nlh); + + ret = -EINVAL; + if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 || + rtm->rtm_type != RTN_MULTICAST || + rtm->rtm_scope != RT_SCOPE_UNIVERSE || + !ipmr_rtm_validate_proto(rtm->rtm_protocol)) + goto out; + + memset(mfcc, 0, sizeof(*mfcc)); + mfcc->mfcc_parent = -1; + ret = 0; + nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) { + switch (nla_type(attr)) { + case RTA_SRC: + mfcc->mfcc_origin.s_addr = nla_get_be32(attr); + break; + case RTA_DST: + mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr); + break; + case RTA_IIF: + dev = __dev_get_by_index(net, nla_get_u32(attr)); + if (!dev) { + ret = -ENODEV; + goto out; + } + break; + case RTA_MULTIPATH: + if (ipmr_nla_get_ttls(attr, mfcc) < 0) { + ret = -EINVAL; + goto out; + } + break; + case RTA_PREFSRC: + ret = 1; + break; + case RTA_TABLE: + tblid = nla_get_u32(attr); + break; + } + } + mrt = ipmr_get_table(net, tblid); + if (!mrt) { + ret = -ENOENT; + goto out; + } + *mrtret = mrt; + *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0; + if (dev) + mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); + +out: + return ret; +} + +/* takes care of both newroute and delroute */ +static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct net *net = sock_net(skb->sk); + int ret, mrtsock, parent; + struct mr_table *tbl; + struct mfcctl mfcc; + + mrtsock = 0; + tbl = NULL; + ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl); + if (ret < 0) + return ret; + + parent = ret ? mfcc.mfcc_parent : -1; + if (nlh->nlmsg_type == RTM_NEWROUTE) + return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); + else + return ipmr_mfc_delete(tbl, &mfcc, parent); +} + #ifdef CONFIG_PROC_FS /* The /proc interfaces to multicast routing : * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif @@ -2708,6 +2810,10 @@ int __init ip_mr_init(void) #endif rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute, NULL); + rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, + ipmr_rtm_route, NULL, NULL); + rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, + ipmr_rtm_route, NULL, NULL); return 0; #ifdef CONFIG_IP_PIMSM_V2 diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index cb7ca569052c5b..a800cee8803570 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2296,7 +2296,7 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, return; if (tcp_write_xmit(sk, cur_mss, nonagle, 0, - sk_gfp_atomic(sk, GFP_ATOMIC))) + sk_gfp_mask(sk, GFP_ATOMIC))) tcp_check_probe_timer(sk); } @@ -3352,8 +3352,9 @@ void tcp_send_ack(struct sock *sk) * tcp_transmit_skb() will set the ownership to this * sock. */ - buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); - if (!buff) { + buff = alloc_skb(MAX_TCP_HEADER, + sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN)); + if (unlikely(!buff)) { inet_csk_schedule_ack(sk); inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, @@ -3375,7 +3376,7 @@ void tcp_send_ack(struct sock *sk) /* Send it off, this clears delayed acks for us. */ skb_mstamp_get(&buff->skb_mstamp); - tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); + tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0); } EXPORT_SYMBOL_GPL(tcp_send_ack); @@ -3396,7 +3397,8 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) struct sk_buff *skb; /* We don't queue it, tcp_transmit_skb() sets ownership. */ - skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); + skb = alloc_skb(MAX_TCP_HEADER, + sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN)); if (!skb) return -1; @@ -3409,7 +3411,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); skb_mstamp_get(&skb->skb_mstamp); NET_INC_STATS(sock_net(sk), mib); - return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); + return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0); } void tcp_send_window_probe(struct sock *sk) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 61f26851655ccd..5e9111da449d35 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3287,7 +3287,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, case NETDEV_PRE_TYPE_CHANGE: case NETDEV_POST_TYPE_CHANGE: - addrconf_type_change(dev, event); + if (idev) + addrconf_type_change(dev, event); break; } @@ -5200,6 +5201,20 @@ int addrconf_sysctl_forward(struct ctl_table *ctl, int write, } static +int addrconf_sysctl_hop_limit(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table lctl; + int min_hl = 1, max_hl = 255; + + lctl = *ctl; + lctl.extra1 = &min_hl; + lctl.extra2 = &max_hl; + + return proc_dointvec_minmax(&lctl, write, buffer, lenp, ppos); +} + +static int addrconf_sysctl_mtu(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -5454,7 +5469,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.hop_limit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = addrconf_sysctl_hop_limit, }, { .procname = "mtu", diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3c7b9310b33fdb..938d03ce5e4b71 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -24,7 +24,6 @@ #include <linux/tcp.h> #include <linux/udp.h> #include <linux/if_arp.h> -#include <linux/mroute.h> #include <linux/init.h> #include <linux/in6.h> #include <linux/inetdevice.h> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e7aab561b7b463..c16e3fbf685475 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1136,7 +1136,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * */ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, AF_INET6, key->key, key->keylen, - sk_gfp_atomic(sk, GFP_ATOMIC)); + sk_gfp_mask(sk, GFP_ATOMIC)); } #endif @@ -1152,7 +1152,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * /* Clone pktoptions received with SYN, if we own the req */ if (ireq->pktopts) { newnp->pktoptions = skb_clone(ireq->pktopts, - sk_gfp_atomic(sk, GFP_ATOMIC)); + sk_gfp_mask(sk, GFP_ATOMIC)); consume_skb(ireq->pktopts); ireq->pktopts = NULL; if (newnp->pktoptions) @@ -1218,7 +1218,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC)); + opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ struct dst_entry *dst = sk->sk_rx_dst; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 435608c4306d4a..3ea4c98d94dcee 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -303,7 +303,7 @@ static void iucv_sock_wake_msglim(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_all(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); rcu_read_unlock(); diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 1ad18c55064caf..d93f113cb522ac 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1862,5 +1862,5 @@ MODULE_AUTHOR("James Chapman <jchapman@katalix.com>"); MODULE_DESCRIPTION("PPP over L2TP over UDP"); MODULE_LICENSE("GPL"); MODULE_VERSION(PPPOL2TP_DRV_VERSION); -MODULE_ALIAS("pppox-proto-" __stringify(PX_PROTO_OL2TP)); +MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OL2TP); MODULE_ALIAS_L2TP_PWTYPE(11); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index c70d750148b667..4b3b9b310c3a00 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -96,22 +96,15 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) } EXPORT_SYMBOL_GPL(mpls_pkt_too_big); -static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, - struct sk_buff *skb, bool bos) +static u32 mpls_multipath_hash(struct mpls_route *rt, + struct sk_buff *skb, bool bos) { struct mpls_entry_decoded dec; struct mpls_shim_hdr *hdr; bool eli_seen = false; int label_index; - int nh_index = 0; u32 hash = 0; - /* No need to look further into packet if there's only - * one path - */ - if (rt->rt_nhn == 1) - goto out; - for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos; label_index++) { if (!pskb_may_pull(skb, sizeof(*hdr) * label_index)) @@ -165,7 +158,38 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, } } - nh_index = hash % rt->rt_nhn; + return hash; +} + +static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, + struct sk_buff *skb, bool bos) +{ + int alive = ACCESS_ONCE(rt->rt_nhn_alive); + u32 hash = 0; + int nh_index = 0; + int n = 0; + + /* No need to look further into packet if there's only + * one path + */ + if (rt->rt_nhn == 1) + goto out; + + if (alive <= 0) + return NULL; + + hash = mpls_multipath_hash(rt, skb, bos); + nh_index = hash % alive; + if (alive == rt->rt_nhn) + goto out; + for_nexthops(rt) { + if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + continue; + if (n == nh_index) + return nh; + n++; + } endfor_nexthops(rt); + out: return &rt->rt_nh[nh_index]; } @@ -365,6 +389,7 @@ static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen) GFP_KERNEL); if (rt) { rt->rt_nhn = num_nh; + rt->rt_nhn_alive = num_nh; rt->rt_max_alen = max_alen_aligned; } @@ -536,6 +561,16 @@ static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, RCU_INIT_POINTER(nh->nh_dev, dev); + if (!(dev->flags & IFF_UP)) { + nh->nh_flags |= RTNH_F_DEAD; + } else { + unsigned int flags; + + flags = dev_get_flags(dev); + if (!(flags & (IFF_RUNNING | IFF_LOWER_UP))) + nh->nh_flags |= RTNH_F_LINKDOWN; + } + return 0; errout: @@ -570,6 +605,9 @@ static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, if (err) goto errout; + if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + rt->rt_nhn_alive--; + return 0; errout: @@ -577,8 +615,8 @@ errout: } static int mpls_nh_build(struct net *net, struct mpls_route *rt, - struct mpls_nh *nh, int oif, - struct nlattr *via, struct nlattr *newdst) + struct mpls_nh *nh, int oif, struct nlattr *via, + struct nlattr *newdst) { int err = -ENOMEM; @@ -681,11 +719,13 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, goto errout; err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, - rtnh->rtnh_ifindex, nla_via, - nla_newdst); + rtnh->rtnh_ifindex, nla_via, nla_newdst); if (err) goto errout; + if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + rt->rt_nhn_alive--; + rtnh = rtnh_next(rtnh, &remaining); nhs++; } endfor_nexthops(rt); @@ -875,34 +915,74 @@ free: return ERR_PTR(err); } -static void mpls_ifdown(struct net_device *dev) +static void mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); - struct mpls_dev *mdev; unsigned index; platform_label = rtnl_dereference(net->mpls.platform_label); for (index = 0; index < net->mpls.platform_labels; index++) { struct mpls_route *rt = rtnl_dereference(platform_label[index]); + if (!rt) continue; - for_nexthops(rt) { + + change_nexthops(rt) { if (rtnl_dereference(nh->nh_dev) != dev) continue; - nh->nh_dev = NULL; + switch (event) { + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + nh->nh_flags |= RTNH_F_DEAD; + /* fall through */ + case NETDEV_CHANGE: + nh->nh_flags |= RTNH_F_LINKDOWN; + ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1; + break; + } + if (event == NETDEV_UNREGISTER) + RCU_INIT_POINTER(nh->nh_dev, NULL); } endfor_nexthops(rt); } - mdev = mpls_dev_get(dev); - if (!mdev) - return; - mpls_dev_sysctl_unregister(mdev); + return; +} + +static void mpls_ifup(struct net_device *dev, unsigned int nh_flags) +{ + struct mpls_route __rcu **platform_label; + struct net *net = dev_net(dev); + unsigned index; + int alive; + + platform_label = rtnl_dereference(net->mpls.platform_label); + for (index = 0; index < net->mpls.platform_labels; index++) { + struct mpls_route *rt = rtnl_dereference(platform_label[index]); + + if (!rt) + continue; + + alive = 0; + change_nexthops(rt) { + struct net_device *nh_dev = + rtnl_dereference(nh->nh_dev); + + if (!(nh->nh_flags & nh_flags)) { + alive++; + continue; + } + if (nh_dev != dev) + continue; + alive++; + nh->nh_flags &= ~nh_flags; + } endfor_nexthops(rt); - RCU_INIT_POINTER(dev->mpls_ptr, NULL); + ACCESS_ONCE(rt->rt_nhn_alive) = alive; + } - kfree_rcu(mdev, rcu); + return; } static int mpls_dev_notify(struct notifier_block *this, unsigned long event, @@ -910,9 +990,9 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct mpls_dev *mdev; + unsigned int flags; - switch(event) { - case NETDEV_REGISTER: + if (event == NETDEV_REGISTER) { /* For now just support ethernet devices */ if ((dev->type == ARPHRD_ETHER) || (dev->type == ARPHRD_LOOPBACK)) { @@ -920,10 +1000,39 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, if (IS_ERR(mdev)) return notifier_from_errno(PTR_ERR(mdev)); } - break; + return NOTIFY_OK; + } + mdev = mpls_dev_get(dev); + if (!mdev) + return NOTIFY_OK; + + switch (event) { + case NETDEV_DOWN: + mpls_ifdown(dev, event); + break; + case NETDEV_UP: + flags = dev_get_flags(dev); + if (flags & (IFF_RUNNING | IFF_LOWER_UP)) + mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); + else + mpls_ifup(dev, RTNH_F_DEAD); + break; + case NETDEV_CHANGE: + flags = dev_get_flags(dev); + if (flags & (IFF_RUNNING | IFF_LOWER_UP)) + mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); + else + mpls_ifdown(dev, event); + break; case NETDEV_UNREGISTER: - mpls_ifdown(dev); + mpls_ifdown(dev, event); + mdev = mpls_dev_get(dev); + if (mdev) { + mpls_dev_sysctl_unregister(mdev); + RCU_INIT_POINTER(dev->mpls_ptr, NULL); + kfree_rcu(mdev, rcu); + } break; case NETDEV_CHANGENAME: mdev = mpls_dev_get(dev); @@ -1237,9 +1346,15 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, dev = rtnl_dereference(nh->nh_dev); if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) goto nla_put_failure; + if (nh->nh_flags & RTNH_F_LINKDOWN) + rtm->rtm_flags |= RTNH_F_LINKDOWN; + if (nh->nh_flags & RTNH_F_DEAD) + rtm->rtm_flags |= RTNH_F_DEAD; } else { struct rtnexthop *rtnh; struct nlattr *mp; + int dead = 0; + int linkdown = 0; mp = nla_nest_start(skb, RTA_MULTIPATH); if (!mp) @@ -1253,6 +1368,15 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, dev = rtnl_dereference(nh->nh_dev); if (dev) rtnh->rtnh_ifindex = dev->ifindex; + if (nh->nh_flags & RTNH_F_LINKDOWN) { + rtnh->rtnh_flags |= RTNH_F_LINKDOWN; + linkdown++; + } + if (nh->nh_flags & RTNH_F_DEAD) { + rtnh->rtnh_flags |= RTNH_F_DEAD; + dead++; + } + if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST, nh->nh_labels, nh->nh_label)) @@ -1266,6 +1390,11 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; } endfor_nexthops(rt); + if (linkdown == rt->rt_nhn) + rtm->rtm_flags |= RTNH_F_LINKDOWN; + if (dead == rt->rt_nhn) + rtm->rtm_flags |= RTNH_F_DEAD; + nla_nest_end(skb, mp); } diff --git a/net/mpls/internal.h b/net/mpls/internal.h index bde52ce88c949e..732a5c17e98629 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -41,6 +41,7 @@ enum mpls_payload_type { struct mpls_nh { /* next hop label forwarding entry */ struct net_device __rcu *nh_dev; + unsigned int nh_flags; u32 nh_label[MAX_NEW_LABELS]; u8 nh_labels; u8 nh_via_alen; @@ -74,6 +75,7 @@ struct mpls_route { /* next hop label forwarding entry */ u8 rt_payload_type; u8 rt_max_alen; unsigned int rt_nhn; + unsigned int rt_nhn_alive; struct mpls_nh rt_nh[0]; }; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 9dfaf4d55ee0b1..1915cab7f32d34 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -174,7 +174,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, sk = skb_to_full_sk(skb); if (!sk || !sk_fullsock(sk)) goto err; - *dest = sk->sk_classid; + *dest = sock_cgroup_classid(&sk->sk_cgrp_data); break; #endif default: diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index a1d126f2946305..54eaeb45ce9963 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -42,7 +42,8 @@ cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) if (skb->sk == NULL || !sk_fullsock(skb->sk)) return false; - return (info->id == skb->sk->sk_classid) ^ info->invert; + return (info->id == sock_cgroup_classid(&skb->sk->sk_cgrp_data)) ^ + info->invert; } static struct xt_match cgroup_mt_reg __read_mostly = { diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 6b0190b987ec62..6a6adf3143638f 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -105,7 +105,7 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) rtnl_lock(); err = netdev_master_upper_dev_link(vport->dev, - get_dpdev(vport->dp)); + get_dpdev(vport->dp), NULL, NULL); if (err) goto error_unlock; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 1f8a144a5dc2c7..7e2d1057d8bc1c 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -67,7 +67,7 @@ static void rxrpc_write_space(struct sock *sk) if (rxrpc_writable(sk)) { struct socket_wq *wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 03c8256063ec63..106bb09636f1cf 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6982,7 +6982,7 @@ void sctp_data_ready(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); diff --git a/net/tipc/node.c b/net/tipc/node.c index 3f7a4ed719900b..fa97d9649a2851 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1189,20 +1189,19 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, spin_unlock_bh(&le->lock); } tipc_node_read_unlock(n); - if (likely(!skb_queue_empty(&xmitq))) { + if (likely(!rc)) tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); - return 0; - } - if (unlikely(rc == -ENOBUFS)) + else if (rc == -ENOBUFS) tipc_node_link_down(n, bearer_id, false); tipc_node_put(n); return rc; } - if (unlikely(!in_own_node(net, dnode))) - return rc; - tipc_sk_rcv(net, list); - return 0; + if (likely(in_own_node(net, dnode))) { + tipc_sk_rcv(net, list); + return 0; + } + return rc; } /* tipc_node_xmit_skb(): send single buffer to destination diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b53246fb041283..69c29050f14abe 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1491,7 +1491,7 @@ static void tipc_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); rcu_read_unlock(); @@ -1508,7 +1508,7 @@ static void tipc_data_ready(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); rcu_read_unlock(); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 45aebd966978bd..1c3c1f3a3ec4f7 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -451,7 +451,7 @@ static void unix_write_space(struct sock *sk) rcu_read_lock(); if (unix_writable(sk)) { wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); @@ -2078,8 +2078,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, struct scm_cookie scm; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); - int noblock = flags & MSG_DONTWAIT; - struct sk_buff *skb; + struct sk_buff *skb, *last; + long timeo; int err; int peeked, skip; @@ -2087,30 +2087,38 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, if (flags&MSG_OOB) goto out; - err = mutex_lock_interruptible(&u->readlock); - if (unlikely(err)) { - /* recvmsg() in non blocking mode is supposed to return -EAGAIN - * sk_rcvtimeo is not honored by mutex_lock_interruptible() - */ - err = noblock ? -EAGAIN : -ERESTARTSYS; - goto out; - } + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - skip = sk_peek_offset(sk, flags); + do { + mutex_lock(&u->readlock); - skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err); - if (!skb) { + skip = sk_peek_offset(sk, flags); + skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err, + &last); + if (skb) + break; + + mutex_unlock(&u->readlock); + + if (err != -EAGAIN) + break; + } while (timeo && + !__skb_wait_for_more_packets(sk, &err, &timeo, last)); + + if (!skb) { /* implies readlock unlocked */ unix_state_lock(sk); /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && (sk->sk_shutdown & RCV_SHUTDOWN)) err = 0; unix_state_unlock(sk); - goto out_unlock; + goto out; } - wake_up_interruptible_sync_poll(&u->peer_wait, - POLLOUT | POLLWRNORM | POLLWRBAND); + if (wq_has_sleeper(&u->peer_wait)) + wake_up_interruptible_sync_poll(&u->peer_wait, + POLLOUT | POLLWRNORM | + POLLWRBAND); if (msg->msg_name) unix_copy_addr(msg, skb->sk); @@ -2162,7 +2170,6 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, out_free: skb_free_datagram(sk, skb); -out_unlock: mutex_unlock(&u->readlock); out: return err; |