aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWilly Tarreau <w@1wt.eu>2013-06-27 14:52:43 +0200
committerWilly Tarreau <w@1wt.eu>2016-09-21 12:13:43 +0200
commitda8914fa4d6ef52283bb66b86f645e44e8635a5d (patch)
tree0f284a07ed8f10aece0852361f164b1877770c7c
parente25cba9205a182f829ee4ddae5c528863568ea9d (diff)
downloadtesting-txmark/3.11-ndiv.tar.gz
net: mvneta: add support for multiplying Tx packets using SO_MARKtxmark/3.11-ndiv
Sometimes it's quite convenient to be able to emit wire-rate packets, but passing via skbs is very expensive and generally makes this impossible. Here we use a different approach. The xmit function checks if the skb belongs to a SOCK_PACKET, and if so, then it uses sk->sk_mark as a number of replicas to send (0 by default). It then iterates over the same skb for the announced number of replicas and decrements sk_mark as it adds them. In order to know when to release the DMA mapping, an extra skb_get() is performed. That way we can check the skb_shared() upon Tx completion and release the skb and DMA mapping when we reach the last packet for the skb. The caller just has to set the desired number of replicas by issuing a setsockopt(SOL_SOCKET, SO_MARK) on the socket. It knows how many of them were left to be sent on return, so it can count the effective number of packets sent (1+initial-final). Using this mechanism, it is possible to produce 1.485 Mpps on a very small fanless machine.
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c105
1 files changed, 67 insertions, 38 deletions
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 775c01c0dfaaf7..eef7fb190d317e 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1294,6 +1294,13 @@ static void mvneta_txq_bufs_free(const struct mvneta_port *pp,
continue;
}
+ /* we always keep +1 ref count on the skb, so we can free then
+ * check.
+ */
+ dev_kfree_skb_any(skb);
+ if (skb_shared(skb))
+ continue;
+
dma_unmap_single(pp->dev->dev.parent, tx_desc->buf_phys_addr,
tx_desc->data_size, DMA_TO_DEVICE);
dev_kfree_skb_any(skb);
@@ -1574,8 +1581,9 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev)
struct mvneta_tx_queue *txq = &pp->txqs[txq_id];
struct mvneta_tx_desc *tx_desc;
struct netdev_queue *nq;
- int frags = 0;
+ int frags = 0, sent = 0, len;
u32 tx_cmd;
+ u32 phys_addr;
if (!netif_running(dev))
goto out;
@@ -1587,58 +1595,79 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev)
frags = skb_shinfo(skb)->nr_frags + 1;
nq = netdev_get_tx_queue(dev, txq_id);
- /* Get a descriptor for the first part of the packet */
- tx_desc = mvneta_txq_next_desc_get(txq);
+#if 0
+ printk(KERN_DEBUG "%s tx: q=%d fr=%d sk=%p fa=%d rto=%d sto=%d pri=%u mk=%u low=%d\n",
+ dev_name(&dev->dev), txq_id, frags,
+ skb->sk,
+ skb->sk ? (int)skb->sk->sk_family : -1,
+ skb->sk ? (int)skb->sk->sk_rcvtimeo : -1,
+ skb->sk ? (int)skb->sk->sk_sndtimeo : -1,
+ skb->sk ? (int)skb->sk->sk_priority : -1,
+ skb->sk ? (int)skb->sk->sk_mark : -1,
+ skb->sk ? (int)skb->sk->sk_rcvlowat : -1);
+#endif
tx_cmd = mvneta_skb_tx_csum(pp, skb);
+ tx_cmd |= (frags == 1) ? MVNETA_TXD_FLZ_DESC : MVNETA_TXD_F_DESC;
- tx_desc->data_size = skb_headlen(skb);
+ len = skb_headlen(skb);
+ phys_addr = dma_map_single(dev->dev.parent, skb->data, len, DMA_TO_DEVICE);
- tx_desc->buf_phys_addr = dma_map_single(dev->dev.parent, skb->data,
- tx_desc->data_size,
- DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(dev->dev.parent,
- tx_desc->buf_phys_addr))) {
- mvneta_txq_desc_put(txq);
+ if (unlikely(dma_mapping_error(dev->dev.parent, phys_addr))) {
frags = 0;
goto out;
}
- if (frags == 1) {
- /* First and Last descriptor */
- tx_cmd |= MVNETA_TXD_FLZ_DESC;
- tx_desc->command = tx_cmd;
- txq->tx_skb[txq->txq_put_index] = skb;
- mvneta_txq_inc_put(txq);
- } else {
- /* First but not Last */
- tx_cmd |= MVNETA_TXD_F_DESC;
- txq->tx_skb[txq->txq_put_index] = NULL;
- mvneta_txq_inc_put(txq);
- tx_desc->command = tx_cmd;
- /* Continue with other skb fragments */
- if (mvneta_tx_frag_process(pp, skb, txq)) {
- dma_unmap_single(dev->dev.parent,
- tx_desc->buf_phys_addr,
- tx_desc->data_size,
- DMA_TO_DEVICE);
- mvneta_txq_desc_put(txq);
- frags = 0;
- goto out;
+ while (1) {
+ /* Get a descriptor for the first part of the packet */
+ tx_desc = mvneta_txq_next_desc_get(txq);
+ tx_desc->data_size = len;
+ tx_desc->buf_phys_addr = phys_addr;
+
+ if (frags == 1) {
+ /* First and Last descriptor */
+ tx_desc->command = tx_cmd;
+ txq->tx_skb[txq->txq_put_index] = skb;
+ mvneta_txq_inc_put(txq);
+ } else {
+ /* First but not Last */
+ txq->tx_skb[txq->txq_put_index] = NULL;
+ mvneta_txq_inc_put(txq);
+ tx_desc->command = tx_cmd;
+ /* Continue with other skb fragments */
+ if (mvneta_tx_frag_process(pp, skb, txq)) {
+ dma_unmap_single(dev->dev.parent,
+ phys_addr, len,
+ DMA_TO_DEVICE);
+ mvneta_txq_desc_put(txq);
+ frags = 0;
+ goto out;
+ }
}
- }
- txq->count += frags;
- mvneta_txq_pend_desc_add(pp, txq, frags);
+ skb_get(skb); /* keep one refcount per packet to be sent */
+
+ txq->count += frags;
+ sent += frags;
+ mvneta_txq_pend_desc_add(pp, txq, frags);
+
+ if (!skb->sk || skb->sk->sk_family != AF_PACKET || !skb->sk->sk_mark)
+ break;
+
+ if (txq->size - txq->count < MAX_SKB_FRAGS + 1)
+ break;
+
+ skb->sk->sk_mark--;
+ }
if (txq->size - txq->count < MAX_SKB_FRAGS + 1)
netif_tx_stop_queue(nq);
out:
- if (frags > 0) {
+ if (sent > 0) {
u64_stats_update_begin(&pp->tx_stats.syncp);
- pp->tx_stats.packets++;
- pp->tx_stats.bytes += skb->len;
+ pp->tx_stats.packets += sent;
+ pp->tx_stats.bytes += sent * skb->len;
u64_stats_update_end(&pp->tx_stats.syncp);
} else {
@@ -1652,7 +1681,7 @@ out:
/* If after calling mvneta_txq_done, count equals
* frags, we need to set the timer
*/
- if (txq->count == frags && frags > 0)
+ if (sent > 0 && txq->count == sent)
mvneta_add_tx_done_timer(pp);
return NETDEV_TX_OK;