mlxsw: pci: Use NAPI for event processing

Spectrum ASICs only support a single interrupt, that means that all the events are handled by one IRQ (interrupt request) handler. Once an interrupt is received, we schedule tasklet to handle events from EQ and then schedule tasklets to handle completions from CQs. Tasklet runs in softIRQ (software IRQ) context, and will be run on the same CPU which scheduled it. That means that today we use only one CPU to handle all the packets (both network packets and EMADs) from hardware. This can be improved using NAPI. The idea is to use NAPI instance per CQ, which is mapped 1:1 to DQ (RDQ or SDQ). NAPI poll method can be run in kernel thread, so then the driver will be able to handle WQEs in several CPUs. Convert the existing code to use NAPI APIs. Add NAPI instance as part of 'struct mlxsw_pci_queue' and initialize it as part of CQs initialization. Set the appropriate poll method and dummy net device, according to queue number, similar to tasklet setup. For CQs which are used for completions of RDQ, use Rx poll method and 'napi_dev_rx', which is set as 'threaded'. It means that Rx poll method will run in kernel context, so several RDQs will be handled in parallel. For CQs which are used for completions of SDQ, use Tx poll method and 'napi_dev_tx', this method will run in softIRQ context, as it is recommended in NAPI documentation, as Tx packets' processing is short task. Convert mlxsw_pci_cq_{rx,tx}_tasklet() to poll methods. Handle 'budget' argument - ignore it in Tx poll method, as it is recommended to not limit Tx processing. For Rx processing, handle up to 'budget' completions. Return 'work_done' which is the amount of completions that were handled. Handle the following cases: 1. After processing 'budget' completions, the driver still has work to do: Return work-done = budget. In that case, the NAPI instance will be polled again (without the need to be rescheduled). Do not re-arm the queue, as NAPI will handle the reschedule, so we do not have to involve hardware to send an additional interrupt for the completions that should be processed. 2. Event processing has been completed: Call napi_complete_done() to mark NAPI processing as completed, which means that the poll method will not be rescheduled. Re-arm the queue, as all completions were handled. In case that poll method handled exactly 'budget' completions, return work-done = budget -1, to distinguish from the case that driver still has completions to handle. Otherwise, return the amount of completions that were handled. Signed-off-by: Amit Cohen <amcohen@nvidia.com> Reviewed-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: Petr Machata <petrm@nvidia.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Amit Cohen <amcohen@nvidia.com> 2024-04-26 14:42:26 +0200
committer: David S. Miller <davem@davemloft.net> 2024-04-29 10:47:05 +0100
commit: 3b0b3019dbea1a110ff01896e00fe30804bf78bc (patch)
tree: c0d86970d8ee07ef2745d7b0417423971eb7b55f
parent: c0d9267873bc0960f65ea43cca72d63dbe34202f (diff)
download: bluetooth-next-3b0b3019dbea1a110ff01896e00fe30804bf78bc.tar.gz
1 files changed, 77 insertions, 19 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 7724f9a614791a..bf66d996e32e72 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -87,7 +87,7 @@ struct mlxsw_pci_queue {
 		struct {
 			enum mlxsw_pci_cqe_v v;
 			struct mlxsw_pci_queue *dq;
-			struct tasklet_struct tasklet;
+			struct napi_struct napi;
 		} cq;
 		struct {
 			struct tasklet_struct tasklet;
@@ -684,16 +684,29 @@ static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q)
 	return elem;
 }
 
-#define MLXSW_PCI_CQ_MAX_HANDLE 64
+static bool mlxsw_pci_cq_cqe_to_handle(struct mlxsw_pci_queue *q)
+{
+	struct mlxsw_pci_queue_elem_info *elem_info;
+	bool owner_bit;
 
-static void mlxsw_pci_cq_rx_tasklet(struct tasklet_struct *t)
+	elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
+	owner_bit = mlxsw_pci_cqe_owner_get(q->u.cq.v, elem_info->elem);
+	return !mlxsw_pci_elem_hw_owned(q, owner_bit);
+}
+
+static int mlxsw_pci_napi_poll_cq_rx(struct napi_struct *napi, int budget)
 {
-	struct mlxsw_pci_queue *q = from_tasklet(q, t, u.cq.tasklet);
+	struct mlxsw_pci_queue *q = container_of(napi, struct mlxsw_pci_queue,
+						 u.cq.napi);
 	struct mlxsw_pci_queue *rdq = q->u.cq.dq;
 	struct mlxsw_pci *mlxsw_pci = q->pci;
-	int items = 0;
+	int work_done = 0;
 	char *cqe;
 
+	/* If the budget is 0, Rx processing should be skipped. */
+	if (unlikely(!budget))
+		return 0;
+
 	while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) {
 		u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe);
 		u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe);
@@ -712,22 +725,44 @@ static void mlxsw_pci_cq_rx_tasklet(struct tasklet_struct *t)
 		mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq,
 					 wqe_counter, q->u.cq.v, cqe);
 
-		if (++items == MLXSW_PCI_CQ_MAX_HANDLE)
+		if (++work_done == budget)
 			break;
 	}
 
 	mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
 	mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, rdq);
-	mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
+
+	if (work_done < budget)
+		goto processing_completed;
+
+	/* The driver still has outstanding work to do, budget was exhausted.
+	 * Return exactly budget. In that case, the NAPI instance will be polled
+	 * again.
+	 */
+	if (mlxsw_pci_cq_cqe_to_handle(q))
+		goto out;
+
+	/* The driver processed all the completions and handled exactly
+	 * 'budget'. Return 'budget - 1' to distinguish from the case that
+	 * driver still has completions to handle.
+	 */
+	if (work_done == budget)
+		work_done--;
+
+processing_completed:
+	if (napi_complete_done(napi, work_done))
+		mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
+out:
+	return work_done;
 }
 
-static void mlxsw_pci_cq_tx_tasklet(struct tasklet_struct *t)
+static int mlxsw_pci_napi_poll_cq_tx(struct napi_struct *napi, int budget)
 {
-	struct mlxsw_pci_queue *q = from_tasklet(q, t, u.cq.tasklet);
+	struct mlxsw_pci_queue *q = container_of(napi, struct mlxsw_pci_queue,
+						 u.cq.napi);
 	struct mlxsw_pci_queue *sdq = q->u.cq.dq;
 	struct mlxsw_pci *mlxsw_pci = q->pci;
-	int credits = q->count >> 1;
-	int items = 0;
+	int work_done = 0;
 	char *cqe;
 
 	while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) {
@@ -752,11 +787,21 @@ static void mlxsw_pci_cq_tx_tasklet(struct tasklet_struct *t)
 		mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq,
 					 wqe_counter, q->u.cq.v, ncqe);
 
-		if (++items == credits)
-			break;
+		work_done++;
 	}
 
+	/* If the budget is 0 napi_complete_done() should never be called. */
+	if (unlikely(!budget))
+		goto processing_completed;
+
+	work_done = min(work_done, budget - 1);
+	if (unlikely(!napi_complete_done(napi, work_done)))
+		goto out;
+
+processing_completed:
 	mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
+out:
+	return work_done;
 }
 
 static enum mlxsw_pci_cq_type
@@ -772,17 +817,29 @@ mlxsw_pci_cq_type(const struct mlxsw_pci *mlxsw_pci,
 	return MLXSW_PCI_CQ_RDQ;
 }
 
-static void mlxsw_pci_cq_tasklet_setup(struct mlxsw_pci_queue *q,
-				       enum mlxsw_pci_cq_type cq_type)
+static void mlxsw_pci_cq_napi_setup(struct mlxsw_pci_queue *q,
+				    enum mlxsw_pci_cq_type cq_type)
 {
+	struct mlxsw_pci *mlxsw_pci = q->pci;
+
 	switch (cq_type) {
 	case MLXSW_PCI_CQ_SDQ:
-		tasklet_setup(&q->u.cq.tasklet, mlxsw_pci_cq_tx_tasklet);
+		netif_napi_add(mlxsw_pci->napi_dev_tx, &q->u.cq.napi,
+			       mlxsw_pci_napi_poll_cq_tx);
 		break;
 	case MLXSW_PCI_CQ_RDQ:
-		tasklet_setup(&q->u.cq.tasklet, mlxsw_pci_cq_rx_tasklet);
+		netif_napi_add(mlxsw_pci->napi_dev_rx, &q->u.cq.napi,
+			       mlxsw_pci_napi_poll_cq_rx);
 		break;
 	}
+
+	napi_enable(&q->u.cq.napi);
+}
+
+static void mlxsw_pci_cq_napi_teardown(struct mlxsw_pci_queue *q)
+{
+	napi_disable(&q->u.cq.napi);
+	netif_napi_del(&q->u.cq.napi);
 }
 
 static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
@@ -817,7 +874,7 @@ static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
 	err = mlxsw_cmd_sw2hw_cq(mlxsw_pci->core, mbox, q->num);
 	if (err)
 		return err;
-	mlxsw_pci_cq_tasklet_setup(q, mlxsw_pci_cq_type(mlxsw_pci, q));
+	mlxsw_pci_cq_napi_setup(q, mlxsw_pci_cq_type(mlxsw_pci, q));
 	mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
 	mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
 	return 0;
@@ -826,6 +883,7 @@ static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
 static void mlxsw_pci_cq_fini(struct mlxsw_pci *mlxsw_pci,
 			      struct mlxsw_pci_queue *q)
 {
+	mlxsw_pci_cq_napi_teardown(q);
 	mlxsw_cmd_hw2sw_cq(mlxsw_pci->core, q->num);
 }
 
@@ -886,7 +944,7 @@ static void mlxsw_pci_eq_tasklet(struct tasklet_struct *t)
 	cq_count = mlxsw_pci->num_cqs;
 	for_each_set_bit(cqn, active_cqns, cq_count) {
 		q = mlxsw_pci_cq_get(mlxsw_pci, cqn);
-		tasklet_schedule(&q->u.cq.tasklet);
+		napi_schedule(&q->u.cq.napi);
 	}
 }
author	Amit Cohen <amcohen@nvidia.com>	2024-04-26 14:42:26 +0200
committer	David S. Miller <davem@davemloft.net>	2024-04-29 10:47:05 +0100
commit	3b0b3019dbea1a110ff01896e00fe30804bf78bc (patch)
tree	c0d86970d8ee07ef2745d7b0417423971eb7b55f
parent	c0d9267873bc0960f65ea43cca72d63dbe34202f (diff)
download	bluetooth-next-3b0b3019dbea1a110ff01896e00fe30804bf78bc.tar.gz