From 8cc724d9be9be3db8f59374d6e848ae169695cc3 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 4 Apr 2013 08:21:56 +0000 Subject: [PATCH] Fix the busdma logic to work with EDMA chipsets when using bounce buffers (ie, >4GB on amd64.) The underlying problem was that PREREAD doesn't sync the mbuf with the DMA memory (ie, bounce buffer), so the bounce buffer may have had stale information. Thus it was always considering the buffer completed and things just went off the rails. This change does the following: * Make ath_rx_pkt() always consume the mbuf somehow; it no longer passes error mbufs (eg CRC errors, crypt errors, etc) back up to the RX path to recycle. This means that a new mbuf is always allocated each time, but it's cleaner. * Push the RX buffer map/unmap to occur in the RX path, not ath_rx_pkt(). Thus, ath_rx_pkt() now assumes (a) it has to consume the mbuf somehow, and (b) that it's already been unmapped and synced. * For the legacy path, the descriptor isn't mapped, it comes out of coherent, DMA memory anyway. So leave it there. * For the EDMA path, the RX descriptor has to be cleared before its passed to the hardware, so that when we check with a POSTREAD sync, we actually get either a blank (not finished) or a filled out descriptor (finished.) Otherwise we get stale data in the DMA memory. * .. so, for EDMA RX path, we need PREREAD|PREWRITE to sync the data -> DMA memory, then POSTREAD|POSTWRITE to finish syncing the DMA memory -> data. * Whilst we're here, make sure that in EDMA buffer setup (ie, bzero'ing the descriptor part) is done before the mbuf is map/synched. NOTE: there's been a lot of commits besides this one with regards to tidying up the busdma handling in ath(4). Please check the recent commit history. Discussed with and thanks to: scottl Tested: * AR5416 (non-EDMA) on i386, with the DMA tag for the driver set to 2^^30, not 2^^32, STA * AR9580 (EDMA) on i386, as above, STA * User - tested AR9380 on amd64 with 32GB RAM. PR: kern/177530 --- sys/dev/ath/if_ath_rx.c | 63 +++++++++++++++------------ sys/dev/ath/if_ath_rx.h | 2 +- sys/dev/ath/if_ath_rx_edma.c | 82 +++++++++++++++++++++--------------- 3 files changed, 84 insertions(+), 63 deletions(-) diff --git a/sys/dev/ath/if_ath_rx.c b/sys/dev/ath/if_ath_rx.c index 354d08517990..b8ff581a2ef0 100644 --- a/sys/dev/ath/if_ath_rx.c +++ b/sys/dev/ath/if_ath_rx.c @@ -502,12 +502,21 @@ ath_handle_micerror(struct ieee80211com *ic, } } +/* + * Process a single packet. + * + * The mbuf must already be synced, unmapped and removed from bf->bf_m + * by this stage. + * + * The mbuf must be consumed by this routine - either passed up the + * net80211 stack, put on the holding queue, or freed. + */ int ath_rx_pkt(struct ath_softc *sc, struct ath_rx_status *rs, HAL_STATUS status, - uint64_t tsf, int nf, HAL_RX_QUEUE qtype, struct ath_buf *bf) + uint64_t tsf, int nf, HAL_RX_QUEUE qtype, struct ath_buf *bf, + struct mbuf *m) { struct ath_hal *ah = sc->sc_ah; - struct mbuf *m = bf->bf_m; uint64_t rstamp; int len, type; struct ifnet *ifp = sc->sc_ifp; @@ -548,10 +557,6 @@ ath_rx_pkt(struct ath_softc *sc, struct ath_rx_status *rs, HAL_STATUS status, /* Process DFS radar events */ if ((rs->rs_phyerr == HAL_PHYERR_RADAR) || (rs->rs_phyerr == HAL_PHYERR_FALSE_RADAR_EXT)) { - /* Since we're touching the frame data, sync it */ - bus_dmamap_sync(sc->sc_dmat, - bf->bf_dmamap, - BUS_DMASYNC_POSTREAD); /* Now pass it to the radar processing code */ ath_dfs_process_phy_err(sc, m, rstamp, rs); } @@ -593,9 +598,6 @@ ath_rx_pkt(struct ath_softc *sc, struct ath_rx_status *rs, HAL_STATUS status, /* XXX frag's and qos frames */ len = rs->rs_datalen; if (len >= sizeof (struct ieee80211_frame)) { - bus_dmamap_sync(sc->sc_dmat, - bf->bf_dmamap, - BUS_DMASYNC_POSTREAD); ath_handle_micerror(ic, mtod(m, struct ieee80211_frame *), sc->sc_splitmic ? @@ -619,35 +621,20 @@ rx_error: */ if (ieee80211_radiotap_active(ic) && (rs->rs_status & sc->sc_monpass)) { - bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap); /* NB: bpf needs the mbuf length setup */ len = rs->rs_datalen; m->m_pkthdr.len = m->m_len = len; - bf->bf_m = NULL; ath_rx_tap(ifp, m, rs, rstamp, nf); #ifdef ATH_ENABLE_RADIOTAP_VENDOR_EXT ath_rx_tap_vendor(ifp, m, rs, rstamp, nf); #endif /* ATH_ENABLE_RADIOTAP_VENDOR_EXT */ ieee80211_radiotap_rx_all(ic, m); - m_freem(m); } /* XXX pass MIC errors up for s/w reclaculation */ + m_freem(m); m = NULL; goto rx_next; } rx_accept: - /* - * Sync and unmap the frame. At this point we're - * committed to passing the mbuf somewhere so clear - * bf_m; this means a new mbuf must be allocated - * when the rx descriptor is setup again to receive - * another frame. - */ - bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap); - bf->bf_m = NULL; - len = rs->rs_datalen; m->m_len = len; @@ -665,6 +652,7 @@ rx_accept: m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = len; re->m_rxpending = m; + m = NULL; goto rx_next; } else if (re->m_rxpending != NULL) { /* @@ -744,7 +732,7 @@ rx_accept: /* NB: in particular this captures ack's */ ieee80211_radiotap_rx_all(ic, m); } - m_freem(m); + m_freem(m); m = NULL; goto rx_next; } @@ -788,6 +776,7 @@ rx_accept: */ type = ieee80211_input(ni, m, rs->rs_rssi, nf); ieee80211_free_node(ni); + m = NULL; /* * Arrange to update the last rx timestamp only for * frames from our ap when operating in station mode. @@ -799,7 +788,14 @@ rx_accept: is_good = 1; } else { type = ieee80211_input_all(ic, m, rs->rs_rssi, nf); + m = NULL; } + + /* + * At this point we have passed the frame up the stack; thus + * the mbuf is no longer ours. + */ + /* * Track rx rssi and do any rx antenna management. */ @@ -837,6 +833,16 @@ rx_accept: ath_led_event(sc, 0); } rx_next: + /* + * Debugging - complain if we didn't NULL the mbuf pointer + * here. + */ + if (m != NULL) { + device_printf(sc->sc_dev, + "%s: mbuf %p should've been freed!\n", + __func__, + m); + } return (is_good); } @@ -952,7 +958,10 @@ ath_rx_proc(struct ath_softc *sc, int resched) /* * Process a single frame. */ - if (ath_rx_pkt(sc, rs, status, tsf, nf, HAL_RX_QUEUE_HP, bf)) + bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap); + bf->bf_m = NULL; + if (ath_rx_pkt(sc, rs, status, tsf, nf, HAL_RX_QUEUE_HP, bf, m)) ngood++; rx_proc_next: TAILQ_INSERT_TAIL(&sc->sc_rxbuf, bf, bf_list); diff --git a/sys/dev/ath/if_ath_rx.h b/sys/dev/ath/if_ath_rx.h index 504ff80b4bff..f831148bfd25 100644 --- a/sys/dev/ath/if_ath_rx.h +++ b/sys/dev/ath/if_ath_rx.h @@ -58,7 +58,7 @@ extern int ath_startrecv(struct ath_softc *sc); extern int ath_rx_pkt(struct ath_softc *sc, struct ath_rx_status *rs, HAL_STATUS status, uint64_t tsf, int nf, HAL_RX_QUEUE qtype, - struct ath_buf *bf); + struct ath_buf *bf, struct mbuf *m); extern void ath_recv_setup_legacy(struct ath_softc *sc); diff --git a/sys/dev/ath/if_ath_rx_edma.c b/sys/dev/ath/if_ath_rx_edma.c index 7dd697439fc1..9d8bd31d4d8d 100644 --- a/sys/dev/ath/if_ath_rx_edma.c +++ b/sys/dev/ath/if_ath_rx_edma.c @@ -362,11 +362,10 @@ ath_edma_recv_proc_queue(struct ath_softc *sc, HAL_RX_QUEUE qtype, /* * Sync descriptor memory - this also syncs the buffer for us. - * * EDMA descriptors are in cached memory. */ bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, - BUS_DMASYNC_POSTREAD); + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); rs = &bf->bf_status.ds_rxstat; bf->bf_rxstatus = ath_hal_rxprocdesc(ah, ds, bf->bf_daddr, NULL, rs); @@ -384,15 +383,16 @@ ath_edma_recv_proc_queue(struct ath_softc *sc, HAL_RX_QUEUE qtype, /* * Completed descriptor. - * - * In the future we'll call ath_rx_pkt(), but it first - * has to be taught about EDMA RX queues (so it can - * access sc_rxpending correctly.) */ DPRINTF(sc, ATH_DEBUG_EDMA_RX, "%s: Q%d: completed!\n", __func__, qtype); npkts++; + /* + * We've been synced already, so unmap. + */ + bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap); + /* * Remove the FIFO entry and place it on the completion * queue. @@ -468,6 +468,7 @@ ath_edma_recv_proc_deferred_queue(struct ath_softc *sc, HAL_RX_QUEUE qtype, struct ath_rx_status *rs; int16_t nf; ath_bufhead rxlist; + struct mbuf *m; TAILQ_INIT(&rxlist); @@ -492,12 +493,11 @@ ath_edma_recv_proc_deferred_queue(struct ath_softc *sc, HAL_RX_QUEUE qtype, m_adj(bf->bf_m, sc->sc_rx_statuslen); /* Handle the frame */ - /* - * Note: this may or may not free bf->bf_m and sync/unmap - * the frame. - */ + rs = &bf->bf_status.ds_rxstat; - if (ath_rx_pkt(sc, rs, bf->bf_rxstatus, tsf, nf, qtype, bf)) + m = bf->bf_m; + bf->bf_m = NULL; + if (ath_rx_pkt(sc, rs, bf->bf_rxstatus, tsf, nf, qtype, bf, m)) ngood++; } @@ -604,11 +604,28 @@ ath_edma_rxbuf_init(struct ath_softc *sc, struct ath_buf *bf) m->m_pkthdr.len = m->m_len = m->m_ext.ext_size; + /* + * Populate ath_buf fields. + */ + bf->bf_desc = mtod(m, struct ath_desc *); + bf->bf_lastds = bf->bf_desc; /* XXX only really for TX? */ + bf->bf_m = m; + + /* + * Zero the descriptor and ensure it makes it out to the + * bounce buffer if one is required. + * + * XXX PREWRITE will copy the whole buffer; we only needed it + * to sync the first 32 DWORDS. Oh well. + */ + memset(bf->bf_desc, '\0', sc->sc_rx_statuslen); + /* * Create DMA mapping. */ error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m, bf->bf_segs, &bf->bf_nseg, BUS_DMA_NOWAIT); + if (error != 0) { device_printf(sc->sc_dev, "%s: failed; error=%d\n", __func__, @@ -618,30 +635,27 @@ ath_edma_rxbuf_init(struct ath_softc *sc, struct ath_buf *bf) } /* - * Populate ath_buf fields. + * Set daddr to the physical mapping page. */ - - bf->bf_desc = mtod(m, struct ath_desc *); bf->bf_daddr = bf->bf_segs[0].ds_addr; - bf->bf_lastds = bf->bf_desc; /* XXX only really for TX? */ - bf->bf_m = m; - /* Zero the descriptor */ - memset(bf->bf_desc, '\0', sc->sc_rx_statuslen); - -#if 0 /* - * Adjust mbuf header and length/size to compensate for the - * descriptor size. + * Prepare for the upcoming read. + * + * We need to both sync some data into the buffer (the zero'ed + * descriptor payload) and also prepare for the read that's going + * to occur. */ - m_adj(m, sc->sc_rx_statuslen); -#endif + bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Finish! */ - return (0); } +/* + * Allocate a RX buffer. + */ static struct ath_buf * ath_edma_rxbuf_alloc(struct ath_softc *sc) { @@ -653,8 +667,11 @@ ath_edma_rxbuf_alloc(struct ath_softc *sc) /* Allocate buffer */ bf = TAILQ_FIRST(&sc->sc_rxbuf); /* XXX shouldn't happen upon startup? */ - if (bf == NULL) + if (bf == NULL) { + device_printf(sc->sc_dev, "%s: nothing on rxbuf?!\n", + __func__); return (NULL); + } /* Remove it from the free list */ TAILQ_REMOVE(&sc->sc_rxbuf, bf, bf_list); @@ -743,18 +760,13 @@ ath_edma_rxfifo_alloc(struct ath_softc *sc, HAL_RX_QUEUE qtype, int nbufs) re->m_fifo[re->m_fifo_tail] = bf; - /* - * Flush the descriptor contents before it's handed to the - * hardware. - */ - bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, - BUS_DMASYNC_PREREAD); - /* Write to the RX FIFO */ - DPRINTF(sc, ATH_DEBUG_EDMA_RX, "%s: Q%d: putrxbuf=%p\n", + DPRINTF(sc, ATH_DEBUG_EDMA_RX, + "%s: Q%d: putrxbuf=%p (0x%jx)\n", __func__, qtype, - bf->bf_desc); + bf->bf_desc, + (uintmax_t) bf->bf_daddr); ath_hal_putrxbuf(sc->sc_ah, bf->bf_daddr, qtype); re->m_fifo_depth++;