diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 8c344f70849fb..bdbafe09714d4 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -1294,6 +1294,17 @@ struct macb_queue { struct work_struct tx_error_task; bool txubr_pending; bool tx_pending; + + /* TX stall watchdog -- see macb_tx_stall_watchdog() in macb_main.c. + * tx_stall_tail_moved is set by macb_tx_complete() under tx_ptr_lock + * whenever tx_tail advances, and cleared by the watchdog tick on the + * same lock. A bool avoids the index-aliasing false-positive that a + * snapshot-of-tx_tail comparison would have when the ring index space + * happens to wrap to the same value between two ticks. + */ + struct delayed_work tx_stall_watchdog_work; + bool tx_stall_tail_moved; + struct napi_struct napi_tx; dma_addr_t rx_ring_dma; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 606e1151ef71d..423aac02a0d47 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1505,6 +1505,8 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) packets, bytes); queue->tx_tail = tail; + if (packets) + queue->tx_stall_tail_moved = true; if (__netif_subqueue_stopped(bp->dev, queue_index) && CIRC_CNT(queue->tx_head, queue->tx_tail, bp->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp)) @@ -1949,6 +1951,12 @@ static void macb_tx_restart(struct macb_queue *queue) spin_lock(&bp->lock); macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); + /* Flush the PCIe posted-write queue so the TSTART doorbell + * reliably reaches the MAC. Without this, the write can sit + * in the fabric and the MAC never advances, causing a silent + * TX stall. + */ + (void)macb_readl(bp, NCR); spin_unlock(&bp->lock); out_tx_ptr_unlock: @@ -1993,17 +2001,24 @@ static int macb_tx_poll(struct napi_struct *napi, int budget) if (work_done < budget && napi_complete_done(napi, work_done)) { queue_writel(queue, IER, MACB_BIT(TCOMP)); - /* Packet completions only seem to propagate to raise - * interrupts when interrupts are enabled at the time, so if - * packets were sent while interrupts were disabled, - * they will not cause another interrupt to be generated when - * interrupts are re-enabled. - * Check for this case here to avoid losing a wakeup. This can - * potentially race with the interrupt handler doing the same - * actions if an interrupt is raised just after enabling them, - * but this should be harmless. + /* TCOMP events that fire while the interrupt is masked do + * not re-fire when IER is re-enabled. Catch this two ways + * to avoid losing a wakeup: + * + * (1) Read ISR -- catches completions the hardware flagged + * but that we did not see as an interrupt. The MMIO + * read doubles as a PCIe read barrier, flushing any + * in-flight descriptor TX_USED DMA writes into memory. + * (2) macb_tx_complete_pending() inspects the ring after + * that flush, catching a descriptor whose TX_USED is + * now visible as a result of the barrier. + * + * This can race with the interrupt handler taking the same + * path if an interrupt fires just after the IER write; + * rescheduling NAPI in that case is harmless. */ - if (macb_tx_complete_pending(queue)) { + if ((queue_readl(queue, ISR) & MACB_BIT(TCOMP)) || + macb_tx_complete_pending(queue)) { queue_writel(queue, IDR, MACB_BIT(TCOMP)); if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(queue, ISR, MACB_BIT(TCOMP)); @@ -2015,6 +2030,63 @@ static int macb_tx_poll(struct napi_struct *napi, int budget) return work_done; } +#define MACB_TX_STALL_INTERVAL_MS 1000 + +/* TX stall watchdog. + * + * Defence-in-depth against lost TCOMP interrupts. macb already has a + * recovery chain (tx_pending -> txubr_pending -> macb_tx_restart()) + * that fires on TCOMP; if TCOMP itself is lost the TX ring stalls + * silently until something else kicks TSTART. This watchdog runs + * once per second per queue and calls macb_tx_restart() if the ring + * is non-empty and tx_tail has not advanced since the previous tick. + * + * Movement is tracked via the tx_stall_tail_moved boolean rather + * than by snapshotting tx_tail. Per-queue ring indices are bounded + * (and reused), so a snapshot comparison can false-positive when the + * index happens to land on the same value between two ticks under + * sustained load. The boolean is set by macb_tx_complete() whenever + * tx_tail advances and cleared by this watchdog after each tick; + * both writes are under tx_ptr_lock, so no atomic is required. + * + * macb_tx_restart() already checks the hardware's TBQP against the + * driver's head index before re-asserting TSTART, so on a healthy + * ring this is a no-op at the hardware level. The watchdog only + * adds the missing trigger. + */ +static void macb_tx_stall_watchdog(struct work_struct *work) +{ + struct macb_queue *queue = container_of(to_delayed_work(work), + struct macb_queue, + tx_stall_watchdog_work); + struct macb *bp = queue->bp; + unsigned int cur_tail, cur_head; + bool stalled = false; + unsigned long flags; + + if (!netif_running(bp->dev)) + return; + + spin_lock_irqsave(&queue->tx_ptr_lock, flags); + cur_tail = queue->tx_tail; + cur_head = queue->tx_head; + if (cur_head != cur_tail && !queue->tx_stall_tail_moved) + stalled = true; + queue->tx_stall_tail_moved = false; + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); + + if (stalled) { + netdev_warn_once(bp->dev, + "TX stall detected on queue %u (tail=%u head=%u); re-kicking TSTART\n", + (unsigned int)(queue - bp->queues), + cur_tail, cur_head); + macb_tx_restart(queue); + } + + schedule_delayed_work(&queue->tx_stall_watchdog_work, + msecs_to_jiffies(MACB_TX_STALL_INTERVAL_MS)); +} + static void macb_hresp_error_task(struct work_struct *work) { struct macb *bp = from_work(bp, work, hresp_err_bh_work); @@ -2630,6 +2702,10 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) queue->tx_pending = 1; macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); + /* Flush the PCIe posted-write queue; see the comment in + * macb_tx_restart() for the reasoning. + */ + (void)macb_readl(bp, NCR); spin_unlock(&bp->lock); if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1) @@ -3277,6 +3353,9 @@ static int macb_open(struct net_device *dev) for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { napi_enable(&queue->napi_rx); napi_enable(&queue->napi_tx); + queue->tx_stall_tail_moved = true; + schedule_delayed_work(&queue->tx_stall_watchdog_work, + msecs_to_jiffies(MACB_TX_STALL_INTERVAL_MS)); } macb_init_hw(bp); @@ -3323,6 +3402,7 @@ static int macb_close(struct net_device *dev) for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { napi_disable(&queue->napi_rx); napi_disable(&queue->napi_tx); + cancel_delayed_work_sync(&queue->tx_stall_watchdog_work); netdev_tx_reset_queue(netdev_get_tx_queue(dev, q)); } @@ -4921,6 +5001,8 @@ static int macb_init(struct platform_device *pdev) } INIT_WORK(&queue->tx_error_task, macb_tx_error_task); + INIT_DELAYED_WORK(&queue->tx_stall_watchdog_work, + macb_tx_stall_watchdog); q++; }