Fixes for 4 port and small packet optimization

- remove cpl->iff panic - we can't know the port number from the rspq on the 4-port
- pick the ifnet based on the interface in the CPL header
- switch to using qset 0 for egress on the 4-port for now - may change
  when we start using RSS
- move ether_ifdetach to before the port lock gets deinitialized to avoid
  hang in the case where there are BPF peers (cxgb_ioctl is called indirectly
  when BPF peers are present)
- don't call t3_mac_reset if multiport is set, this was causing tx errors
  by misconfiguring the MAC on the 4-port
- change V_TXPKT_INTF to use txpkt_intf as the interfaces are not contiguous
- free the mbuf immediately in the case where the payload is small enough to be copied
  into the rspq
- only update the coalesce timer if for a queue if packets were taken off of it
- add in missed 20ms DELAY in initializaton vsc8211

- prompt MFC as this only applies to the 4-port which is currently completely
  broken - OK'd by kensmith

Supported by: Chelsio
Approved by: re (blanket)
MFC after: 0 days
This commit is contained in:
Kip Macy 2007-08-25 21:07:37 +00:00
parent 63f45c4bdf
commit 7ac2e6c362
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=171978
5 changed files with 126 additions and 77 deletions

View File

@ -248,4 +248,5 @@ void t3_vsc8211_phy_prep(struct cphy *phy, adapter_t *adapter, int phy_addr,
const struct mdio_ops *mdio_ops)
{
cphy_init(phy, adapter, phy_addr, &vsc8211_ops, mdio_ops);
t3_os_sleep(20); /* PHY needs ~10ms to start responding to MDIO */
}

View File

@ -117,10 +117,13 @@ struct port_info {
#else
struct mtx lock;
#endif
int port_id;
uint8_t hw_addr[ETHER_ADDR_LEN];
uint8_t port_id;
uint8_t tx_chan;
uint8_t txpkt_intf;
uint8_t nqsets;
uint8_t first_qset;
uint8_t hw_addr[ETHER_ADDR_LEN];
struct taskqueue *tq;
struct task start_task;
struct task timer_reclaim_task;
@ -515,7 +518,7 @@ void t3_sge_deinit_sw(adapter_t *);
void t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m,
int ethpad, uint32_t rss_hash, uint32_t rss_csum, int lro);
void t3_rx_eth(struct port_info *p, struct sge_rspq *rq, struct mbuf *m, int ethpad);
void t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad);
void t3_lro_flush(adapter_t *adap, struct sge_qset *qs, struct lro_state *state);
void t3_add_sysctls(adapter_t *sc);

View File

@ -199,7 +199,7 @@ lro_flush_session(struct sge_qset *qs, struct t3_lro_session *s, struct mbuf *m)
MBUF_HEADER_CHECK(sm);
sm->m_flags |= M_LRO;
t3_rx_eth(qs->port, &qs->rspq, sm, 2);
t3_rx_eth(qs->port->adapter, &qs->rspq, sm, 2);
if (m) {
s->head = m;
@ -341,7 +341,6 @@ t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m,
struct ip *ih;
struct tcphdr *th;
struct t3_lro_session *s = NULL;
struct port_info *pi = qs->port;
if (lro == 0)
goto no_lro;
@ -349,9 +348,6 @@ t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m,
if (!can_lro_packet(cpl, rss_csum))
goto no_lro;
if (&adap->port[cpl->iff] != pi)
panic("bad port index %d\n", cpl->iff);
ih = (struct ip *)(eh + 1);
th = (struct tcphdr *)(ih + 1);
@ -366,9 +362,11 @@ t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m,
if (lro_update_session(s, m)) {
lro_flush_session(qs, s, m);
}
#ifdef notyet
if (__predict_false(s->head->m_pkthdr.len + pi->ifp->if_mtu > 65535)) {
lro_flush_session(qs, s, NULL);
}
}
#endif
}
qs->port_stats[SGE_PSTATS_LRO_QUEUED]++;
@ -380,7 +378,8 @@ no_lro:
if (m->m_len == 0 || m->m_pkthdr.len == 0 || (m->m_flags & M_PKTHDR) == 0)
DPRINTF("rx_eth_lro mbuf len=%d pktlen=%d flags=0x%x\n",
m->m_len, m->m_pkthdr.len, m->m_flags);
t3_rx_eth(pi, rq, m, ethpad);
t3_rx_eth(adap, rq, m, ethpad);
}
void

View File

@ -113,6 +113,8 @@ static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned i
static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf);
static int cxgb_get_regs_len(void);
static int offload_open(struct port_info *pi);
static void touch_bars(device_t dev);
#ifdef notyet
static int offload_close(struct toedev *tdev);
#endif
@ -412,7 +414,8 @@ cxgb_controller_attach(device_t dev)
"PCIe x%d Link, expect reduced performance\n",
sc->link_width);
}
touch_bars(dev);
pci_enable_busmaster(dev);
/*
* Allocate the registers and make them available to the driver.
@ -551,17 +554,23 @@ cxgb_controller_attach(device_t dev)
* will be done in these children.
*/
for (i = 0; i < (sc)->params.nports; i++) {
struct port_info *pi;
if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
device_printf(dev, "failed to add child port\n");
error = EINVAL;
goto out;
}
sc->port[i].adapter = sc;
sc->port[i].nqsets = port_qsets;
sc->port[i].first_qset = i*port_qsets;
sc->port[i].port_id = i;
pi = &sc->port[i];
pi->adapter = sc;
pi->nqsets = port_qsets;
pi->first_qset = i*port_qsets;
pi->port_id = i;
pi->tx_chan = i >= ai->nports0;
pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
sc->rxpkt_map[pi->txpkt_intf] = i;
sc->portdev[i] = child;
device_set_softc(child, &sc->port[i]);
device_set_softc(child, pi);
}
if ((error = bus_generic_attach(dev)) != 0)
goto out;
@ -633,22 +642,25 @@ cxgb_free(struct adapter *sc)
sc->msix_regs_res);
}
t3_sge_deinit_sw(sc);
if (sc->tq != NULL) {
taskqueue_drain(sc->tq, &sc->ext_intr_task);
taskqueue_drain(sc->tq, &sc->tick_task);
taskqueue_free(sc->tq);
}
tsleep(&sc, 0, "cxgb unload", hz);
}
t3_sge_deinit_sw(sc);
/*
* Wait for last callout
*/
tsleep(&sc, 0, "cxgb unload", 3*hz);
for (i = 0; i < (sc)->params.nports; ++i) {
if (sc->portdev[i] != NULL)
device_delete_child(sc->dev, sc->portdev[i]);
}
bus_generic_detach(sc->dev);
if (sc->tq != NULL)
taskqueue_free(sc->tq);
#ifdef notyet
if (is_offload(sc)) {
cxgb_adapter_unofld(sc);
@ -804,16 +816,19 @@ setup_sge_qsets(adapter_t *sc)
else
irq_idx = 0;
for (qset_idx = 0, i = 0; i < (sc)->params.nports; ++i) {
for (qset_idx = 0, i = 0; i < (sc)->params.nports; i++) {
struct port_info *pi = &sc->port[i];
for (j = 0; j < pi->nqsets; ++j, ++qset_idx) {
for (j = 0; j < pi->nqsets; j++, qset_idx++) {
printf("allocating qset_idx=%d for port_id=%d\n",
qset_idx, pi->port_id);
err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
(sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
&sc->params.sge.qset[qset_idx], ntxq, pi);
if (err) {
t3_free_sge_resources(sc);
device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n", err);
device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
err);
return (err);
}
}
@ -859,7 +874,7 @@ cxgb_setup_msix(adapter_t *sc, int msix_count)
if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
#ifdef INTR_FILTERS
NULL,
NULL,
#endif
cxgb_async_intr, sc, &sc->intr_tag)) {
device_printf(sc->dev, "Cannot set up interrupt\n");
@ -881,10 +896,12 @@ cxgb_setup_msix(adapter_t *sc, int msix_count)
return (EINVAL);
}
sc->msix_irq_rid[k] = rid;
printf("setting up interrupt for port=%d\n",
qs->port->port_id);
if (bus_setup_intr(sc->dev, sc->msix_irq_res[k],
INTR_MPSAFE|INTR_TYPE_NET,
#ifdef INTR_FILTERS
NULL,
NULL,
#endif
t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
device_printf(sc->dev, "Cannot set up "
@ -1077,8 +1094,11 @@ cxgb_port_detach(device_t dev)
p->tq = NULL;
}
PORT_LOCK_DEINIT(p);
ether_ifdetach(p->ifp);
/*
* the lock may be acquired in ifdetach
*/
PORT_LOCK_DEINIT(p);
if_free(p->ifp);
if (p->port_cdev != NULL)
@ -1251,7 +1271,8 @@ cxgb_link_start(struct port_info *p)
ifp = p->ifp;
t3_init_rx_mode(&rm, p);
t3_mac_reset(mac);
if (!mac->multiport)
t3_mac_reset(mac);
t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
t3_mac_set_address(mac, 0, p->hw_addr);
t3_mac_set_rx_mode(mac, &rm);
@ -1278,13 +1299,16 @@ setup_rss(adapter_t *adap)
uint8_t cpus[SGE_QSETS + 1];
uint16_t rspq_map[RSS_TABLE_SIZE];
nq[0] = adap->port[0].nqsets;
nq[1] = max((u_int)adap->port[1].nqsets, 1U);
for (i = 0; i < SGE_QSETS; ++i)
cpus[i] = i;
cpus[SGE_QSETS] = 0xff;
nq[0] = nq[1] = 0;
for_each_port(adap, i) {
const struct port_info *pi = adap2pinfo(adap, i);
nq[pi->tx_chan] += pi->nqsets;
}
for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
rspq_map[i] = nq[0] ? i % nq[0] : 0;
rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
@ -1534,7 +1558,8 @@ cxgb_up(struct adapter *sc)
if ((sc->flags & USING_MSIX) == 0) {
if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
&sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n", sc->irq_rid);
device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n",
sc->irq_rid);
err = EINVAL;
goto out;
}
@ -1599,11 +1624,15 @@ cxgb_down_locked(struct adapter *sc)
callout_drain(&sc->cxgb_tick_ch);
callout_drain(&sc->sge_timer_ch);
if (sc->tq != NULL)
if (sc->tq != NULL) {
taskqueue_drain(sc->tq, &sc->slow_intr_task);
for (i = 0; i < sc->params.nports; i++)
for (i = 0; i < sc->params.nports; i++)
taskqueue_drain(sc->tq, &sc->port[i].timer_reclaim_task);
}
#ifdef notyet
if (sc->port[i].tq != NULL)
taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task);
#endif
}
@ -1718,7 +1747,8 @@ cxgb_init_locked(struct port_info *p)
cxgb_link_start(p);
t3_link_changed(sc, p->port_id);
ifp->if_baudrate = p->link_config.speed * 1000000;
printf("enabling interrupts on port=%d\n", p->port_id);
t3_port_intr_enable(sc, p->port_id);
callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
@ -1891,7 +1921,7 @@ cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
struct sge_txq *txq;
struct port_info *p = ifp->if_softc;
struct mbuf *m0, *m = NULL;
int err, in_use_init;
int err, in_use_init, qsidx = 0;
if (!p->link_config.link_ok)
return (ENXIO);
@ -1899,7 +1929,10 @@ cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
if (IFQ_DRV_IS_EMPTY(&ifp->if_snd))
return (ENOBUFS);
qs = &p->adapter->sge.qs[p->first_qset];
if (p->adapter->params.nports <= 2)
qsidx = p->first_qset;
qs = &p->adapter->sge.qs[qsidx];
txq = &qs->txq[TXQ_ETH];
err = 0;
@ -2160,6 +2193,24 @@ cxgb_tick_handler(void *arg, int count)
check_t3b2_mac(sc);
}
static void
touch_bars(device_t dev)
{
/*
* Don't enable yet
*/
#if !defined(__LP64__) && 0
u32 v;
pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
#endif
}
#if 0
static void *
filter_get_idx(struct seq_file *seq, loff_t pos)

View File

@ -1169,7 +1169,7 @@ t3_encap(struct port_info *p, struct mbuf **m)
struct tx_sw_desc *stx;
struct txq_state txqs;
unsigned int ndesc, flits, cntrl, mlen;
int err, nsegs, tso_info = 0;
int err, nsegs, tso_info = 0, qsidx = 0;
struct work_request_hdr *wrp;
struct tx_sw_desc *txsd;
@ -1179,11 +1179,16 @@ t3_encap(struct port_info *p, struct mbuf **m)
struct tx_desc *txd;
struct cpl_tx_pkt *cpl;
DPRINTF("t3_encap ");
m0 = *m;
sc = p->adapter;
qs = &sc->sge.qs[p->first_qset];
if (sc->params.nports <= 2)
qsidx = p->first_qset;
DPRINTF("t3_encap qsidx=%d", qsidx);
qs = &sc->sge.qs[qsidx];
txq = &qs->txq[TXQ_ETH];
stx = &txq->sdesc[txq->pidx];
txd = &txq->desc[txq->pidx];
@ -1191,12 +1196,12 @@ t3_encap(struct port_info *p, struct mbuf **m)
mlen = m0->m_pkthdr.len;
cpl->len = htonl(mlen | 0x80000000);
DPRINTF("mlen=%d\n", mlen);
DPRINTF("mlen=%d pktintf=%d\n", mlen, p->txpkt_intf);
/*
* XXX handle checksum, TSO, and VLAN here
*
*/
cntrl = V_TXPKT_INTF(p->port_id);
cntrl = V_TXPKT_INTF(p->txpkt_intf);
/*
* XXX need to add VLAN support for 6.x
@ -1247,14 +1252,14 @@ t3_encap(struct port_info *p, struct mbuf **m)
if (mlen <= WR_LEN - sizeof(*cpl)) {
txq_prod(txq, 1, &txqs);
txq->sdesc[txqs.pidx].m = m0;
m_set_priority(m0, txqs.pidx);
txq->sdesc[txqs.pidx].m = NULL;
if (m0->m_len == m0->m_pkthdr.len)
memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen);
else
m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
m_freem(m0);
flits = (mlen + 7) / 8 + 2;
cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
@ -1792,10 +1797,12 @@ calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
static int
ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
{
unsigned int pidx, gen, ndesc;
int ret, nsegs;
unsigned int ndesc;
unsigned int pidx, gen;
struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
bus_dma_segment_t segs[TX_MAX_SEGS];
int i, cleaned, ret, nsegs;
int i, cleaned;
struct tx_sw_desc *stx = &q->sdesc[q->pidx];
mtx_lock(&q->lock);
@ -2111,7 +2118,7 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
q->fl[1].type = EXT_JUMBOP;
q->lro.enabled = lro_default;
mtx_lock(&sc->sge.reg_lock);
ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
q->rspq.phys_addr, q->rspq.size,
@ -2190,14 +2197,13 @@ err:
}
void
t3_rx_eth(struct port_info *pi, struct sge_rspq *rq, struct mbuf *m, int ethpad)
t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
{
struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
struct ifnet *ifp = pi->ifp;
DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
if (&pi->adapter->port[cpl->iff] != pi)
panic("bad port index %d m->m_data=%p\n", cpl->iff, mtod(m, uint8_t *));
if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
cpl->csum_valid && cpl->csum == 0xffff) {
@ -2506,9 +2512,9 @@ process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
printf("next_holdoff=%d\n", rq->next_holdoff);
last_holdoff = rq->next_holdoff;
}
t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
if (work)
t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
return work;
}
@ -2523,10 +2529,9 @@ process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
void
t3b_intr(void *data)
{
uint32_t map;
uint32_t i, map;
adapter_t *adap = data;
struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
t3_write_reg(adap, A_PL_CLI, 0);
map = t3_read_reg(adap, A_SG_DATA_INTR);
@ -2538,13 +2543,9 @@ t3b_intr(void *data)
taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
mtx_lock(&q0->lock);
if (__predict_true(map & 1))
process_responses_gts(adap, q0);
if (map & 2)
process_responses_gts(adap, q1);
for_each_port(adap, i)
if (map & (1 << i))
process_responses_gts(adap, &adap->sge.qs[i].rspq);
mtx_unlock(&q0->lock);
}
@ -2559,19 +2560,13 @@ t3_intr_msi(void *data)
{
adapter_t *adap = data;
struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
int new_packets = 0;
mtx_lock(&q0->lock);
if (process_responses_gts(adap, q0)) {
new_packets = 1;
}
if (adap->params.nports == 2 &&
process_responses_gts(adap, q1)) {
new_packets = 1;
}
int i, new_packets = 0;
mtx_lock(&q0->lock);
for_each_port(adap, i)
if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
new_packets = 1;
mtx_unlock(&q0->lock);
if (new_packets == 0)
taskqueue_enqueue(adap->tq, &adap->slow_intr_task);