diff --git a/sys/dev/cxgb/common/cxgb_vsc8211.c b/sys/dev/cxgb/common/cxgb_vsc8211.c index c3b3532599ed..382ecc73ccac 100644 --- a/sys/dev/cxgb/common/cxgb_vsc8211.c +++ b/sys/dev/cxgb/common/cxgb_vsc8211.c @@ -248,4 +248,5 @@ void t3_vsc8211_phy_prep(struct cphy *phy, adapter_t *adapter, int phy_addr, const struct mdio_ops *mdio_ops) { cphy_init(phy, adapter, phy_addr, &vsc8211_ops, mdio_ops); + t3_os_sleep(20); /* PHY needs ~10ms to start responding to MDIO */ } diff --git a/sys/dev/cxgb/cxgb_adapter.h b/sys/dev/cxgb/cxgb_adapter.h index 6605ea9693e4..9964babbed33 100644 --- a/sys/dev/cxgb/cxgb_adapter.h +++ b/sys/dev/cxgb/cxgb_adapter.h @@ -117,10 +117,13 @@ struct port_info { #else struct mtx lock; #endif - int port_id; - uint8_t hw_addr[ETHER_ADDR_LEN]; + uint8_t port_id; + uint8_t tx_chan; + uint8_t txpkt_intf; uint8_t nqsets; uint8_t first_qset; + + uint8_t hw_addr[ETHER_ADDR_LEN]; struct taskqueue *tq; struct task start_task; struct task timer_reclaim_task; @@ -515,7 +518,7 @@ void t3_sge_deinit_sw(adapter_t *); void t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad, uint32_t rss_hash, uint32_t rss_csum, int lro); -void t3_rx_eth(struct port_info *p, struct sge_rspq *rq, struct mbuf *m, int ethpad); +void t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad); void t3_lro_flush(adapter_t *adap, struct sge_qset *qs, struct lro_state *state); void t3_add_sysctls(adapter_t *sc); diff --git a/sys/dev/cxgb/cxgb_lro.c b/sys/dev/cxgb/cxgb_lro.c index 9f64a0de255d..a502859572b2 100644 --- a/sys/dev/cxgb/cxgb_lro.c +++ b/sys/dev/cxgb/cxgb_lro.c @@ -199,7 +199,7 @@ lro_flush_session(struct sge_qset *qs, struct t3_lro_session *s, struct mbuf *m) MBUF_HEADER_CHECK(sm); sm->m_flags |= M_LRO; - t3_rx_eth(qs->port, &qs->rspq, sm, 2); + t3_rx_eth(qs->port->adapter, &qs->rspq, sm, 2); if (m) { s->head = m; @@ -341,7 +341,6 @@ t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m, struct ip *ih; struct tcphdr *th; struct t3_lro_session *s = NULL; - struct port_info *pi = qs->port; if (lro == 0) goto no_lro; @@ -349,9 +348,6 @@ t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m, if (!can_lro_packet(cpl, rss_csum)) goto no_lro; - if (&adap->port[cpl->iff] != pi) - panic("bad port index %d\n", cpl->iff); - ih = (struct ip *)(eh + 1); th = (struct tcphdr *)(ih + 1); @@ -366,9 +362,11 @@ t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m, if (lro_update_session(s, m)) { lro_flush_session(qs, s, m); } +#ifdef notyet if (__predict_false(s->head->m_pkthdr.len + pi->ifp->if_mtu > 65535)) { lro_flush_session(qs, s, NULL); - } + } +#endif } qs->port_stats[SGE_PSTATS_LRO_QUEUED]++; @@ -380,7 +378,8 @@ no_lro: if (m->m_len == 0 || m->m_pkthdr.len == 0 || (m->m_flags & M_PKTHDR) == 0) DPRINTF("rx_eth_lro mbuf len=%d pktlen=%d flags=0x%x\n", m->m_len, m->m_pkthdr.len, m->m_flags); - t3_rx_eth(pi, rq, m, ethpad); + + t3_rx_eth(adap, rq, m, ethpad); } void diff --git a/sys/dev/cxgb/cxgb_main.c b/sys/dev/cxgb/cxgb_main.c index ef10aabc3e44..583214d7dff7 100644 --- a/sys/dev/cxgb/cxgb_main.c +++ b/sys/dev/cxgb/cxgb_main.c @@ -113,6 +113,8 @@ static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned i static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf); static int cxgb_get_regs_len(void); static int offload_open(struct port_info *pi); +static void touch_bars(device_t dev); + #ifdef notyet static int offload_close(struct toedev *tdev); #endif @@ -412,7 +414,8 @@ cxgb_controller_attach(device_t dev) "PCIe x%d Link, expect reduced performance\n", sc->link_width); } - + + touch_bars(dev); pci_enable_busmaster(dev); /* * Allocate the registers and make them available to the driver. @@ -551,17 +554,23 @@ cxgb_controller_attach(device_t dev) * will be done in these children. */ for (i = 0; i < (sc)->params.nports; i++) { + struct port_info *pi; + if ((child = device_add_child(dev, "cxgb", -1)) == NULL) { device_printf(dev, "failed to add child port\n"); error = EINVAL; goto out; } - sc->port[i].adapter = sc; - sc->port[i].nqsets = port_qsets; - sc->port[i].first_qset = i*port_qsets; - sc->port[i].port_id = i; + pi = &sc->port[i]; + pi->adapter = sc; + pi->nqsets = port_qsets; + pi->first_qset = i*port_qsets; + pi->port_id = i; + pi->tx_chan = i >= ai->nports0; + pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i; + sc->rxpkt_map[pi->txpkt_intf] = i; sc->portdev[i] = child; - device_set_softc(child, &sc->port[i]); + device_set_softc(child, pi); } if ((error = bus_generic_attach(dev)) != 0) goto out; @@ -633,22 +642,25 @@ cxgb_free(struct adapter *sc) sc->msix_regs_res); } - t3_sge_deinit_sw(sc); - if (sc->tq != NULL) { taskqueue_drain(sc->tq, &sc->ext_intr_task); taskqueue_drain(sc->tq, &sc->tick_task); - taskqueue_free(sc->tq); - } - - tsleep(&sc, 0, "cxgb unload", hz); + } + t3_sge_deinit_sw(sc); + /* + * Wait for last callout + */ + tsleep(&sc, 0, "cxgb unload", 3*hz); + for (i = 0; i < (sc)->params.nports; ++i) { if (sc->portdev[i] != NULL) device_delete_child(sc->dev, sc->portdev[i]); } bus_generic_detach(sc->dev); + if (sc->tq != NULL) + taskqueue_free(sc->tq); #ifdef notyet if (is_offload(sc)) { cxgb_adapter_unofld(sc); @@ -804,16 +816,19 @@ setup_sge_qsets(adapter_t *sc) else irq_idx = 0; - for (qset_idx = 0, i = 0; i < (sc)->params.nports; ++i) { + for (qset_idx = 0, i = 0; i < (sc)->params.nports; i++) { struct port_info *pi = &sc->port[i]; - for (j = 0; j < pi->nqsets; ++j, ++qset_idx) { + for (j = 0; j < pi->nqsets; j++, qset_idx++) { + printf("allocating qset_idx=%d for port_id=%d\n", + qset_idx, pi->port_id); err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports, (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx, &sc->params.sge.qset[qset_idx], ntxq, pi); if (err) { t3_free_sge_resources(sc); - device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n", err); + device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n", + err); return (err); } } @@ -859,7 +874,7 @@ cxgb_setup_msix(adapter_t *sc, int msix_count) if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET, #ifdef INTR_FILTERS - NULL, + NULL, #endif cxgb_async_intr, sc, &sc->intr_tag)) { device_printf(sc->dev, "Cannot set up interrupt\n"); @@ -881,10 +896,12 @@ cxgb_setup_msix(adapter_t *sc, int msix_count) return (EINVAL); } sc->msix_irq_rid[k] = rid; + printf("setting up interrupt for port=%d\n", + qs->port->port_id); if (bus_setup_intr(sc->dev, sc->msix_irq_res[k], INTR_MPSAFE|INTR_TYPE_NET, #ifdef INTR_FILTERS - NULL, + NULL, #endif t3_intr_msix, qs, &sc->msix_intr_tag[k])) { device_printf(sc->dev, "Cannot set up " @@ -1077,8 +1094,11 @@ cxgb_port_detach(device_t dev) p->tq = NULL; } - PORT_LOCK_DEINIT(p); ether_ifdetach(p->ifp); + /* + * the lock may be acquired in ifdetach + */ + PORT_LOCK_DEINIT(p); if_free(p->ifp); if (p->port_cdev != NULL) @@ -1251,7 +1271,8 @@ cxgb_link_start(struct port_info *p) ifp = p->ifp; t3_init_rx_mode(&rm, p); - t3_mac_reset(mac); + if (!mac->multiport) + t3_mac_reset(mac); t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); t3_mac_set_address(mac, 0, p->hw_addr); t3_mac_set_rx_mode(mac, &rm); @@ -1278,13 +1299,16 @@ setup_rss(adapter_t *adap) uint8_t cpus[SGE_QSETS + 1]; uint16_t rspq_map[RSS_TABLE_SIZE]; - nq[0] = adap->port[0].nqsets; - nq[1] = max((u_int)adap->port[1].nqsets, 1U); - for (i = 0; i < SGE_QSETS; ++i) cpus[i] = i; cpus[SGE_QSETS] = 0xff; + nq[0] = nq[1] = 0; + for_each_port(adap, i) { + const struct port_info *pi = adap2pinfo(adap, i); + + nq[pi->tx_chan] += pi->nqsets; + } for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) { rspq_map[i] = nq[0] ? i % nq[0] : 0; rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0; @@ -1534,7 +1558,8 @@ cxgb_up(struct adapter *sc) if ((sc->flags & USING_MSIX) == 0) { if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) { - device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n", sc->irq_rid); + device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n", + sc->irq_rid); err = EINVAL; goto out; } @@ -1599,11 +1624,15 @@ cxgb_down_locked(struct adapter *sc) callout_drain(&sc->cxgb_tick_ch); callout_drain(&sc->sge_timer_ch); - if (sc->tq != NULL) + if (sc->tq != NULL) { taskqueue_drain(sc->tq, &sc->slow_intr_task); - for (i = 0; i < sc->params.nports; i++) + for (i = 0; i < sc->params.nports; i++) + taskqueue_drain(sc->tq, &sc->port[i].timer_reclaim_task); + } +#ifdef notyet + if (sc->port[i].tq != NULL) - taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task); +#endif } @@ -1718,7 +1747,8 @@ cxgb_init_locked(struct port_info *p) cxgb_link_start(p); t3_link_changed(sc, p->port_id); ifp->if_baudrate = p->link_config.speed * 1000000; - + + printf("enabling interrupts on port=%d\n", p->port_id); t3_port_intr_enable(sc, p->port_id); callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz, @@ -1891,7 +1921,7 @@ cxgb_start_tx(struct ifnet *ifp, uint32_t txmax) struct sge_txq *txq; struct port_info *p = ifp->if_softc; struct mbuf *m0, *m = NULL; - int err, in_use_init; + int err, in_use_init, qsidx = 0; if (!p->link_config.link_ok) return (ENXIO); @@ -1899,7 +1929,10 @@ cxgb_start_tx(struct ifnet *ifp, uint32_t txmax) if (IFQ_DRV_IS_EMPTY(&ifp->if_snd)) return (ENOBUFS); - qs = &p->adapter->sge.qs[p->first_qset]; + if (p->adapter->params.nports <= 2) + qsidx = p->first_qset; + + qs = &p->adapter->sge.qs[qsidx]; txq = &qs->txq[TXQ_ETH]; err = 0; @@ -2160,6 +2193,24 @@ cxgb_tick_handler(void *arg, int count) check_t3b2_mac(sc); } +static void +touch_bars(device_t dev) +{ + /* + * Don't enable yet + */ +#if !defined(__LP64__) && 0 + u32 v; + + pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v); + pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v); + pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v); + pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v); + pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v); + pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v); +#endif +} + #if 0 static void * filter_get_idx(struct seq_file *seq, loff_t pos) diff --git a/sys/dev/cxgb/cxgb_sge.c b/sys/dev/cxgb/cxgb_sge.c index 0a1d3eef62ff..eb44d289046f 100644 --- a/sys/dev/cxgb/cxgb_sge.c +++ b/sys/dev/cxgb/cxgb_sge.c @@ -1169,7 +1169,7 @@ t3_encap(struct port_info *p, struct mbuf **m) struct tx_sw_desc *stx; struct txq_state txqs; unsigned int ndesc, flits, cntrl, mlen; - int err, nsegs, tso_info = 0; + int err, nsegs, tso_info = 0, qsidx = 0; struct work_request_hdr *wrp; struct tx_sw_desc *txsd; @@ -1179,11 +1179,16 @@ t3_encap(struct port_info *p, struct mbuf **m) struct tx_desc *txd; struct cpl_tx_pkt *cpl; - - DPRINTF("t3_encap "); + m0 = *m; sc = p->adapter; - qs = &sc->sge.qs[p->first_qset]; + if (sc->params.nports <= 2) + qsidx = p->first_qset; + + DPRINTF("t3_encap qsidx=%d", qsidx); + + qs = &sc->sge.qs[qsidx]; + txq = &qs->txq[TXQ_ETH]; stx = &txq->sdesc[txq->pidx]; txd = &txq->desc[txq->pidx]; @@ -1191,12 +1196,12 @@ t3_encap(struct port_info *p, struct mbuf **m) mlen = m0->m_pkthdr.len; cpl->len = htonl(mlen | 0x80000000); - DPRINTF("mlen=%d\n", mlen); + DPRINTF("mlen=%d pktintf=%d\n", mlen, p->txpkt_intf); /* * XXX handle checksum, TSO, and VLAN here * */ - cntrl = V_TXPKT_INTF(p->port_id); + cntrl = V_TXPKT_INTF(p->txpkt_intf); /* * XXX need to add VLAN support for 6.x @@ -1247,14 +1252,14 @@ t3_encap(struct port_info *p, struct mbuf **m) if (mlen <= WR_LEN - sizeof(*cpl)) { txq_prod(txq, 1, &txqs); - txq->sdesc[txqs.pidx].m = m0; - m_set_priority(m0, txqs.pidx); + txq->sdesc[txqs.pidx].m = NULL; if (m0->m_len == m0->m_pkthdr.len) memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen); else m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); + m_freem(m0); flits = (mlen + 7) / 8 + 2; cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | @@ -1792,10 +1797,12 @@ calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) static int ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) { - unsigned int pidx, gen, ndesc; + int ret, nsegs; + unsigned int ndesc; + unsigned int pidx, gen; struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; bus_dma_segment_t segs[TX_MAX_SEGS]; - int i, cleaned, ret, nsegs; + int i, cleaned; struct tx_sw_desc *stx = &q->sdesc[q->pidx]; mtx_lock(&q->lock); @@ -2111,7 +2118,7 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, q->fl[1].type = EXT_JUMBOP; q->lro.enabled = lro_default; - + mtx_lock(&sc->sge.reg_lock); ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, q->rspq.phys_addr, q->rspq.size, @@ -2190,14 +2197,13 @@ err: } void -t3_rx_eth(struct port_info *pi, struct sge_rspq *rq, struct mbuf *m, int ethpad) +t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) { struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); + struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; struct ifnet *ifp = pi->ifp; DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); - if (&pi->adapter->port[cpl->iff] != pi) - panic("bad port index %d m->m_data=%p\n", cpl->iff, mtod(m, uint8_t *)); if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) { @@ -2506,9 +2512,9 @@ process_responses_gts(adapter_t *adap, struct sge_rspq *rq) printf("next_holdoff=%d\n", rq->next_holdoff); last_holdoff = rq->next_holdoff; } - - t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | - V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); + if (work) + t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | + V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); return work; } @@ -2523,10 +2529,9 @@ process_responses_gts(adapter_t *adap, struct sge_rspq *rq) void t3b_intr(void *data) { - uint32_t map; + uint32_t i, map; adapter_t *adap = data; struct sge_rspq *q0 = &adap->sge.qs[0].rspq; - struct sge_rspq *q1 = &adap->sge.qs[1].rspq; t3_write_reg(adap, A_PL_CLI, 0); map = t3_read_reg(adap, A_SG_DATA_INTR); @@ -2538,13 +2543,9 @@ t3b_intr(void *data) taskqueue_enqueue(adap->tq, &adap->slow_intr_task); mtx_lock(&q0->lock); - - if (__predict_true(map & 1)) - process_responses_gts(adap, q0); - - if (map & 2) - process_responses_gts(adap, q1); - + for_each_port(adap, i) + if (map & (1 << i)) + process_responses_gts(adap, &adap->sge.qs[i].rspq); mtx_unlock(&q0->lock); } @@ -2559,19 +2560,13 @@ t3_intr_msi(void *data) { adapter_t *adap = data; struct sge_rspq *q0 = &adap->sge.qs[0].rspq; - struct sge_rspq *q1 = &adap->sge.qs[1].rspq; - int new_packets = 0; - - mtx_lock(&q0->lock); - if (process_responses_gts(adap, q0)) { - new_packets = 1; - } - - if (adap->params.nports == 2 && - process_responses_gts(adap, q1)) { - new_packets = 1; - } + int i, new_packets = 0; + mtx_lock(&q0->lock); + + for_each_port(adap, i) + if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) + new_packets = 1; mtx_unlock(&q0->lock); if (new_packets == 0) taskqueue_enqueue(adap->tq, &adap->slow_intr_task);