diff --git a/sys/dev/cxgb/cxgb_adapter.h b/sys/dev/cxgb/cxgb_adapter.h index 6910ba95a476..65c3fe703a79 100644 --- a/sys/dev/cxgb/cxgb_adapter.h +++ b/sys/dev/cxgb/cxgb_adapter.h @@ -298,8 +298,7 @@ struct sge_txq { unsigned long txq_frees; struct mtx lock; struct sg_ent txq_sgl[TX_MAX_SEGS / 2 + 1]; - bus_dma_segment_t txq_segs[TX_MAX_SEGS]; -#define TXQ_NAME_LEN 32 + #define TXQ_NAME_LEN 32 char lockbuf[TXQ_NAME_LEN]; }; diff --git a/sys/dev/cxgb/cxgb_multiq.c b/sys/dev/cxgb/cxgb_multiq.c index 537b1db8681d..b1c402c7e370 100644 --- a/sys/dev/cxgb/cxgb_multiq.c +++ b/sys/dev/cxgb/cxgb_multiq.c @@ -422,6 +422,7 @@ cxgb_pcpu_start_(struct sge_qset *qs, struct mbuf *immpkt, int tx_flush) txq = &qs->txq[TXQ_ETH]; mtx_assert(&txq->lock, MA_OWNED); + KASSERT(qs->idx == 0, ("invalid qs %d", qs->idx)); retry: if (!pi->link_config.link_ok) @@ -667,13 +668,14 @@ cxgb_pcpu_startup_threads(struct adapter *sc) #else nqsets = 1; #endif - for (j = 0; j < pi->nqsets; ++j) { + for (j = 0; j < nqsets; ++j) { struct sge_qset *qs; qs = &sc->sge.qs[pi->first_qset + j]; qs->port = pi; qs->qs_cpuid = ((pi->first_qset + j) % mp_ncpus); - device_printf(sc->dev, "starting thread for %d\n", qs->qs_cpuid); + device_printf(sc->dev, "starting thread for %d\n", + qs->qs_cpuid); kproc_create(cxgb_pcpu_start_proc, qs, &p, RFNOWAIT, 0, "cxgbsp"); @@ -686,11 +688,18 @@ void cxgb_pcpu_shutdown_threads(struct adapter *sc) { int i, j; + int nqsets; + +#ifdef IFNET_MULTIQUEUE + nqsets = pi->nqsets; +#else + nqsets = 1; +#endif for (i = 0; i < sc->params.nports; i++) { struct port_info *pi = &sc->port[i]; int first = pi->first_qset; - for (j = 0; j < pi->nqsets; j++) { + for (j = 0; j < nqsets; j++) { struct sge_qset *qs = &sc->sge.qs[first + j]; qs->qs_flags |= QS_EXITING; diff --git a/sys/dev/cxgb/cxgb_sge.c b/sys/dev/cxgb/cxgb_sge.c index c854d9d15837..06ab98d13b9b 100644 --- a/sys/dev/cxgb/cxgb_sge.c +++ b/sys/dev/cxgb/cxgb_sge.c @@ -927,7 +927,17 @@ txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) txq->unacked &= 7; txqs->pidx = txq->pidx; txq->pidx += ndesc; - +#ifdef INVARIANTS + if (((txqs->pidx > txq->cidx) && + (txq->pidx < txqs->pidx) && + (txq->pidx >= txq->cidx)) || + ((txqs->pidx < txq->cidx) && + (txq->pidx >= txq-> cidx)) || + ((txqs->pidx < txq->cidx) && + (txq->cidx < txqs->pidx))) + panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", + txqs->pidx, txq->pidx, txq->cidx); +#endif if (txq->pidx >= txq->size) { txq->pidx -= txq->size; txq->gen ^= 1; @@ -1205,23 +1215,23 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count) struct work_request_hdr *wrp; struct tx_sw_desc *txsd; struct sg_ent *sgp, *sgl; - bus_dma_segment_t *segs; uint32_t wr_hi, wr_lo, sgl_flits; + bus_dma_segment_t segs[TX_MAX_SEGS]; struct tx_desc *txd; struct mbuf_vec *mv; struct mbuf_iovec *mi; DPRINTF("t3_encap cpu=%d ", curcpu); + KASSERT(qs->idx == 0, ("invalid qs %d", qs->idx)); mi = NULL; pi = qs->port; sc = pi->adapter; txq = &qs->txq[TXQ_ETH]; - txsd = &txq->sdesc[txq->pidx]; txd = &txq->desc[txq->pidx]; + txsd = &txq->sdesc[txq->pidx]; sgl = txq->txq_sgl; - segs = txq->txq_segs; m0 = *m; DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset); @@ -1240,6 +1250,8 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count) #endif KASSERT(txsd->mi.mi_base == NULL, ("overwrting valid entry mi_base==%p", txsd->mi.mi_base)); + if (cxgb_debug) + printf("uipc_mvec PIO_LEN=%ld\n", PIO_LEN); if (count > 1) { panic("count > 1 not support in CVS\n"); @@ -1253,7 +1265,7 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count) } KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count)); - if (m0->m_pkthdr.len > PIO_LEN) { + if (!(m0->m_pkthdr.len <= PIO_LEN)) { mi_collapse_mbuf(&txsd->mi, m0); mi = &txsd->mi; } @@ -1393,8 +1405,11 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count) write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); check_ring_tx_db(pi->adapter, txq); - if ((m0->m_type == MT_DATA) && ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT)) { + if ((m0->m_type == MT_DATA) && + ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) && + (m0->m_ext.ext_type != EXT_PACKET)) { m0->m_flags &= ~M_EXT ; + mbufs_outstanding--; m_free(m0); } @@ -1797,6 +1812,7 @@ t3_free_tx_desc(struct sge_txq *q, int reclaimable) cidx = q->cidx; txsd = &q->sdesc[cidx]; DPRINTF("reclaiming %d WR\n", reclaimable); + mtx_assert(&q->lock, MA_OWNED); while (reclaimable--) { DPRINTF("cidx=%d d=%p\n", cidx, txsd); if (txsd->mi.mi_base != NULL) { @@ -2208,6 +2224,7 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, } init_qset_cntxt(q, id); + q->idx = id; if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, @@ -3203,7 +3220,11 @@ t3_add_attach_sysctls(adapter_t *sc) SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ext_freed", CTLFLAG_RD, &cxgb_ext_freed, - 0, "#times a cluster was freed through ext_free"); + 0, "#times a cluster was freed through ext_free"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "mbufs_outstanding", + CTLFLAG_RD, &mbufs_outstanding, + 0, "#mbufs in flight in the driver"); } diff --git a/sys/dev/cxgb/sys/mvec.h b/sys/dev/cxgb/sys/mvec.h index 57a6478cd530..8395eaede3ef 100644 --- a/sys/dev/cxgb/sys/mvec.h +++ b/sys/dev/cxgb/sys/mvec.h @@ -45,6 +45,7 @@ void cxgb_cache_refill(void); extern int cxgb_cached_allocations; extern int cxgb_cached; extern int cxgb_ext_freed; +extern int mbufs_outstanding; #define mtomv(m) ((struct mbuf_vec *)((m)->m_pktdat)) #define M_IOVEC 0x100000 /* mbuf immediate data area is used for cluster ptrs */ @@ -162,7 +163,7 @@ m_explode(struct mbuf *m) static __inline void busdma_map_mbuf_fast(struct mbuf *m, bus_dma_segment_t *seg) { - seg->ds_addr = pmap_kextract((vm_offset_t)m->m_data); + seg->ds_addr = pmap_kextract(mtod(m, vm_offset_t)); seg->ds_len = m->m_len; } @@ -229,11 +230,17 @@ m_free_iovec(struct mbuf *m, int type) static __inline void m_freem_iovec(struct mbuf_iovec *mi) { - struct mbuf *m; + struct mbuf *m = (struct mbuf *)mi->mi_base; switch (mi->mi_type) { case EXT_MBUF: - m_free_fast((struct mbuf *)mi->mi_base); +#ifdef PIO_LEN + KASSERT(m->m_pkthdr.len > PIO_LEN, ("freeing PIO buf")); +#endif + KASSERT((mi->mi_flags & M_NOFREE) == 0, ("no free set on mbuf")); + KASSERT(m->m_next == NULL, ("freeing chain")); + mbufs_outstanding--; + m_free_fast(m); break; case EXT_IOVEC: case EXT_CLIOVEC: diff --git a/sys/dev/cxgb/sys/uipc_mvec.c b/sys/dev/cxgb/sys/uipc_mvec.c index 9dc902b69663..4d4354f3b803 100644 --- a/sys/dev/cxgb/sys/uipc_mvec.c +++ b/sys/dev/cxgb/sys/uipc_mvec.c @@ -75,6 +75,7 @@ extern uint32_t mb_free_vec_free; uma_zone_t zone_miovec; static int mi_inited = 0; +int mbufs_outstanding = 0; void mi_init(void) @@ -198,12 +199,18 @@ busdma_map_sg_collapse(struct mbuf **m, bus_dma_segment_t *segs, int *nsegs) struct mbuf *marray[TX_MAX_SEGS]; int i, type, seg_count, defragged = 0, err = 0; struct mbuf_vec *mv; + int skipped, freed, outstanding; + + KASSERT(n->m_pkthdr.len, ("packet has zero header len")); if (n->m_flags & M_PKTHDR && !SLIST_EMPTY(&n->m_pkthdr.tags)) m_tag_delete_chain(n, NULL); + if (cxgb_debug) + printf("cxgb_sge PIO_LEN=%ld\n", PIO_LEN); + if (n->m_pkthdr.len <= PIO_LEN) return (0); retry: @@ -211,16 +218,21 @@ retry: if (n->m_next == NULL) { busdma_map_mbuf_fast(n, segs); *nsegs = 1; + if ((n->m_flags & M_NOFREE) == 0) + mbufs_outstanding++; return (0); } + skipped = freed = outstanding = 0; while (n && seg_count < TX_MAX_SEGS) { marray[seg_count] = n; - + /* * firmware doesn't like empty segments */ - if (__predict_true(n->m_len != 0)) + if (__predict_true(n->m_len != 0)) seg_count++; + else + skipped++; n = n->m_next; } @@ -265,24 +277,35 @@ retry: } n = *m; while (n) { - if (n->m_ext.ext_type == EXT_PACKET) + if (n->m_len == 0) + /* do nothing - free if mbuf or cluster */; + else if (((n->m_flags & M_EXT) == 0) || + ((n->m_flags & M_EXT) && (n->m_ext.ext_type == EXT_PACKET)) || + (n->m_flags & M_NOFREE)) goto skip; - else if (n->m_len == 0) - /* do nothing */; else if ((n->m_flags & (M_EXT|M_NOFREE)) == M_EXT) n->m_flags &= ~M_EXT; - else - goto skip; mhead = n->m_next; m_free(n); n = mhead; + freed++; continue; skip: + /* + * is an immediate mbuf or is from the packet zone + */ + outstanding++; n = n->m_next; } *nsegs = seg_count; *m = m0; DPRINTF("pktlen=%d m0=%p *m=%p m=%p\n", m0->m_pkthdr.len, m0, *m, m); + mbufs_outstanding += outstanding; + KASSERT(outstanding + freed == skipped + seg_count, + ("outstanding=%d freed=%d skipped=%d seg_count=%d", + outstanding, freed, skipped, seg_count)); + + return (0); err_out: m_freem(*m); @@ -325,6 +348,7 @@ busdma_map_sg_vec(struct mbuf **m, struct mbuf **mret, bus_dma_segment_t *segs, (*mp)->m_next = (*mp)->m_nextpkt = NULL; if (((*mp)->m_flags & (M_EXT|M_NOFREE)) == M_EXT) { (*mp)->m_flags &= ~M_EXT; + mbufs_outstanding--; m_free(*mp); } } @@ -336,29 +360,17 @@ busdma_map_sg_vec(struct mbuf **m, struct mbuf **mret, bus_dma_segment_t *segs, void mb_free_ext_fast(struct mbuf_iovec *mi, int type, int idx) { - u_int cnt; int dofree; caddr_t cl; /* Account for lazy ref count assign. */ dofree = (mi->mi_refcnt == NULL); - - /* - * This is tricky. We need to make sure to decrement the - * refcount in a safe way but to also clean up if we're the - * last reference. This method seems to do it without race. - */ - while (dofree == 0) { - cnt = *(mi->mi_refcnt); - if (cnt == 1) { - dofree = 1; - break; - } - if (atomic_cmpset_int(mi->mi_refcnt, cnt, cnt - 1)) { - if (cnt == 1) - dofree = 1; - break; - } + if (dofree == 0) { + KASSERT(mi->mi_type != EXT_MBUF, + ("refcnt must be null for mbuf")); + if (*(mi->mi_refcnt) == 1 || + atomic_fetchadd_int(mi->mi_refcnt, -1) == 1) + dofree = 1; } if (dofree == 0) return; @@ -366,6 +378,8 @@ mb_free_ext_fast(struct mbuf_iovec *mi, int type, int idx) cl = mi->mi_base; switch (type) { case EXT_MBUF: + KASSERT((mi->mi_flags & M_NOFREE) == 0, ("no free set on mbuf")); + mbufs_outstanding--; m_free_fast((struct mbuf *)cl); break; case EXT_CLUSTER: @@ -395,6 +409,7 @@ mb_free_ext_fast(struct mbuf_iovec *mi, int type, int idx) mi->mi_ext.ext_args); break; case EXT_PACKET: + mbufs_outstanding--; if (*(mi->mi_refcnt) == 0) *(mi->mi_refcnt) = 1; uma_zfree(zone_pack, mi->mi_mbuf);