From 0e195446b70cdb3130378505b24603031c1f3ce0 Mon Sep 17 00:00:00 2001 From: David Greenman Date: Wed, 20 Apr 1994 07:06:57 +0000 Subject: [PATCH] Bug fixes and performance improvements from John Dyson and myself: 1) check va before clearing the page clean flag. Not doing so was causing the vnode pager error 5 messages when paging from NFS. (pmap.c) 2) put back interrupt protection in idle_loop. Bruce didn't think it was necessary, John insists that it is (and I agree). (swtch.s) 3) various improvements to the clustering code (vm_machdep.c). It's now enabled/used by default. 4) bad disk blocks are now handled properly when doing clustered IOs. (wd.c, vm_machdep.c) 5) bogus bad block handling fixed in wd.c. 6) algorithm improvements to the pageout/pagescan daemons. It's amazing how well 4MB machines work now. --- sys/amd64/amd64/cpu_switch.S | 7 +- sys/amd64/amd64/pmap.c | 29 +++++-- sys/amd64/amd64/swtch.s | 7 +- sys/amd64/amd64/vm_machdep.c | 127 ++++++++++++++++++++++------ sys/dev/fdc/fdc.c | 3 +- sys/dev/mcd/mcd.c | 5 +- sys/i386/i386/pmap.c | 29 +++++-- sys/i386/i386/swtch.s | 7 +- sys/i386/i386/vm_machdep.c | 127 ++++++++++++++++++++++------ sys/i386/isa/fd.c | 3 +- sys/i386/isa/mcd.c | 5 +- sys/i386/isa/wd.c | 158 +++++++++++++++++++++++------------ sys/isa/fd.c | 3 +- sys/scsi/cd.c | 5 +- sys/scsi/scsi_base.c | 12 ++- sys/scsi/sd.c | 9 +- 16 files changed, 392 insertions(+), 144 deletions(-) diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index 17f246c7d228..4dbc672b923e 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -33,7 +33,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: swtch.s,v 1.4 1994/01/31 10:26:59 davidg Exp $ + * $Id: swtch.s,v 1.5 1994/04/02 07:00:30 davidg Exp $ */ #include "npx.h" /* for NNPX */ @@ -156,8 +156,10 @@ _idle: ALIGN_TEXT idle_loop: + cli cmpl $0,_whichqs - jne sw1 + jne sw1a + sti hlt /* wait for interrupt */ jmp idle_loop @@ -214,6 +216,7 @@ ENTRY(swtch) /* save is done, now choose a new process or idle */ sw1: cli +sw1a: movl _whichqs,%edi 2: /* XXX - bsf is sloow */ diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 9502df0ff356..cc2e9a435571 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -39,7 +39,7 @@ * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 - * $Id: pmap.c,v 1.22 1994/03/30 02:17:45 davidg Exp $ + * $Id: pmap.c,v 1.23 1994/04/14 07:49:38 davidg Exp $ */ /* @@ -153,6 +153,7 @@ static inline pv_entry_t get_pv_entry(); void pmap_alloc_pv_entry(); void pmap_clear_modify(); void i386_protection_init(); +extern vm_offset_t pager_sva, pager_eva; #if BSDVM_COMPAT #include "msgbuf.h" @@ -851,8 +852,10 @@ pmap_remove(pmap, sva, eva) if (pmap_is_managed(pa)) { if ((((int) oldpte & PG_M) && (sva < USRSTACK || sva > UPT_MAX_ADDRESS)) || (sva >= USRSTACK && sva < USRSTACK+(UPAGES*NBPG))) { - m = PHYS_TO_VM_PAGE(pa); - m->flags &= ~PG_CLEAN; + if (sva < pager_sva || sva >= pager_eva) { + m = PHYS_TO_VM_PAGE(pa); + m->flags &= ~PG_CLEAN; + } } pv = pa_to_pvh(pa); @@ -946,8 +949,10 @@ pmap_remove(pmap, sva, eva) if ((((int) oldpte & PG_M) && (va < USRSTACK || va > UPT_MAX_ADDRESS)) || (va >= USRSTACK && va < USRSTACK+(UPAGES*NBPG))) { - m = PHYS_TO_VM_PAGE(pa); - m->flags &= ~PG_CLEAN; + if (va < pager_sva || va >= pager_eva) { + m = PHYS_TO_VM_PAGE(pa); + m->flags &= ~PG_CLEAN; + } } pv = pa_to_pvh(pa); @@ -1018,7 +1023,9 @@ pmap_remove_all(pa) if ( (m->flags & PG_CLEAN) && ((((int) *pte) & PG_M) && (pv->pv_va < USRSTACK || pv->pv_va > UPT_MAX_ADDRESS)) || (pv->pv_va >= USRSTACK && pv->pv_va < USRSTACK+(UPAGES*NBPG))) { - m->flags &= ~PG_CLEAN; + if (pv->pv_va < pager_sva || pv->pv_va >= pager_eva) { + m->flags &= ~PG_CLEAN; + } } *pte = 0; @@ -1724,8 +1731,16 @@ pmap_testbit(pa, bit) * then mark UPAGES as always modified, and * ptes as never modified. */ + if (bit & PG_U ) { + if ((pv->pv_va >= pager_sva) && (pv->pv_va < pager_eva)) { + continue; + } + } if (bit & PG_M ) { if (pv->pv_va >= USRSTACK) { + if (pv->pv_va >= pager_sva && pv->pv_va < pager_eva) { + continue; + } if (pv->pv_va < USRSTACK+(UPAGES*NBPG)) { splx(s); return TRUE; @@ -1780,8 +1795,6 @@ pmap_changebit(pa, bit, setem) * don't write protect pager mappings */ if (!setem && (bit == PG_RW)) { - extern vm_offset_t pager_sva, pager_eva; - if (va >= pager_sva && va < pager_eva) continue; } diff --git a/sys/amd64/amd64/swtch.s b/sys/amd64/amd64/swtch.s index 17f246c7d228..4dbc672b923e 100644 --- a/sys/amd64/amd64/swtch.s +++ b/sys/amd64/amd64/swtch.s @@ -33,7 +33,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: swtch.s,v 1.4 1994/01/31 10:26:59 davidg Exp $ + * $Id: swtch.s,v 1.5 1994/04/02 07:00:30 davidg Exp $ */ #include "npx.h" /* for NNPX */ @@ -156,8 +156,10 @@ _idle: ALIGN_TEXT idle_loop: + cli cmpl $0,_whichqs - jne sw1 + jne sw1a + sti hlt /* wait for interrupt */ jmp idle_loop @@ -214,6 +216,7 @@ ENTRY(swtch) /* save is done, now choose a new process or idle */ sw1: cli +sw1a: movl _whichqs,%edi 2: /* XXX - bsf is sloow */ diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index c9b1964ce82a..89eb127f715e 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -38,7 +38,7 @@ * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ - * $Id: vm_machdep.c,v 1.18 1994/04/05 03:23:09 davidg Exp $ + * $Id: vm_machdep.c,v 1.19 1994/04/14 07:49:40 davidg Exp $ */ #include "npx.h" @@ -56,6 +56,11 @@ #define b_cylin b_resid +#define MAXCLSTATS 256 +int clstats[MAXCLSTATS]; +int rqstats[MAXCLSTATS]; + + #ifndef NOBOUNCE caddr_t bouncememory; @@ -121,10 +126,20 @@ vm_bounce_kva_free(addr, size, now) int s = splbio(); kvaf[kvasfreecnt].addr = addr; kvaf[kvasfreecnt++].size = size; - if( now) + if( now) { + /* + * this will do wakeups + */ vm_bounce_kva(0,0); - else - wakeup((caddr_t) io_map); + } else { + if (bmwait) { + /* + * if anyone is waiting on the bounce-map, then wakeup + */ + wakeup((caddr_t) io_map); + bmwait = 0; + } + } splx(s); } @@ -224,6 +239,49 @@ more: return kva; } +/* + * same as vm_bounce_kva -- but really allocate + */ +vm_offset_t +vm_bounce_kva_alloc(count) +int count; +{ + int i; + vm_offset_t kva; + vm_offset_t pa; + if( bouncepages == 0) { + kva = (vm_offset_t) malloc(count*NBPG, M_TEMP, M_WAITOK); + return kva; + } + kva = vm_bounce_kva(count, 1); + for(i=0;ib_un.b_addr); vm_bounce_kva_free( bouncekva, countvmpg*NBPG, 0); - if (bmwait) { - /* - * if anyone is waiting on the bounce-map, then wakeup - */ - wakeup((caddr_t) io_map); - bmwait = 0; - } - bp->b_un.b_addr = bp->b_savekva; bp->b_savekva = 0; bp->b_flags &= ~B_BOUNCE; @@ -476,6 +526,8 @@ cldisksort(struct buf *dp, struct buf *bp, vm_offset_t maxio) vm_offset_t orig1begin, orig2begin; vm_offset_t kvanew, kvaorig; + if( bp->b_bcount < MAXCLSTATS*PAGE_SIZE) + ++rqstats[bp->b_bcount/PAGE_SIZE]; /* * If nothing on the activity queue, then * we become the only thing. @@ -494,22 +546,22 @@ cldisksort(struct buf *dp, struct buf *bp, vm_offset_t maxio) * and add ourselves to it. */ - if (bp->b_cylin < ap->b_cylin) { + if (bp->b_pblkno < ap->b_pblkno) { while (ap->av_forw) { /* * Check for an ``inversion'' in the - * normally ascending cylinder numbers, + * normally ascending block numbers, * indicating the start of the second request list. */ - if (ap->av_forw->b_cylin < ap->b_cylin) { + if (ap->av_forw->b_pblkno < ap->b_pblkno) { /* * Search the second request list * for the first request at a larger - * cylinder number. We go before that; + * block number. We go before that; * if there is no such request, we go at end. */ do { - if (bp->b_cylin < ap->av_forw->b_cylin) + if (bp->b_pblkno < ap->av_forw->b_pblkno) goto insert; ap = ap->av_forw; } while (ap->av_forw); @@ -532,21 +584,33 @@ cldisksort(struct buf *dp, struct buf *bp, vm_offset_t maxio) * We want to go after the current request * if there is an inversion after it (i.e. it is * the end of the first request list), or if - * the next request is a larger cylinder than our request. + * the next request is a larger block than our request. */ - if (ap->av_forw->b_cylin < ap->b_cylin || - bp->b_cylin < ap->av_forw->b_cylin ) + if (ap->av_forw->b_pblkno < ap->b_pblkno || + bp->b_pblkno < ap->av_forw->b_pblkno ) goto insert; ap = ap->av_forw; } insert: + +#if 0 + /* + * read clustering with new read-ahead disk drives hurts mostly, so + * we don't bother... + */ + if( bp->b_flags & B_READ) + goto nocluster; +#endif /* * we currently only cluster I/O transfers that are at page-aligned * kvas and transfers that are multiples of page lengths. */ - if(((bp->b_bcount & PAGE_MASK) == 0) && + if ((bp->b_flags & B_BAD) == 0 && + ((bp->b_bcount & PAGE_MASK) == 0) && (((vm_offset_t) bp->b_un.b_addr & PAGE_MASK) == 0)) { + if( maxio > MAXCLSTATS*PAGE_SIZE) + maxio = MAXCLSTATS*PAGE_SIZE; /* * merge with previous? * conditions: @@ -558,9 +622,10 @@ insert: * is a multiple of a page in length. * 5) And the total I/O size would be below the maximum. */ - if( (ap->b_blkno + (ap->b_bcount / DEV_BSIZE) == bp->b_blkno) && + if( (ap->b_pblkno + (ap->b_bcount / DEV_BSIZE) == bp->b_pblkno) && (dp->b_actf != ap) && ((ap->b_flags & ~B_CLUSTER) == bp->b_flags) && + ((ap->b_flags & B_BAD) == 0) && ((ap->b_bcount & PAGE_MASK) == 0) && (((vm_offset_t) ap->b_un.b_addr & PAGE_MASK) == 0) && (ap->b_bcount + bp->b_bcount < maxio)) { @@ -597,6 +662,7 @@ insert: * build the new bp to be handed off to the device */ + --clstats[ap->b_bcount/PAGE_SIZE]; *newbp = *ap; newbp->b_flags |= B_CLUSTER; newbp->b_un.b_addr = (caddr_t) kvanew; @@ -604,6 +670,7 @@ insert: newbp->b_bufsize = newbp->b_bcount; newbp->b_clusterf = ap; newbp->b_clusterl = bp; + ++clstats[newbp->b_bcount/PAGE_SIZE]; /* * enter the new bp onto the device queue @@ -635,6 +702,7 @@ insert: * free the old kva */ vm_bounce_kva_free( orig1begin, ap->b_bufsize, 0); + --clstats[ap->b_bcount/PAGE_SIZE]; ap->b_un.b_addr = (caddr_t) kvanew; @@ -645,6 +713,7 @@ insert: ap->b_bcount += bp->b_bcount; ap->b_bufsize = ap->b_bcount; + ++clstats[ap->b_bcount/PAGE_SIZE]; } return; /* @@ -657,8 +726,9 @@ insert: * 5) And the total I/O size would be below the maximum. */ } else if( ap->av_forw && - (bp->b_blkno + (bp->b_bcount / DEV_BSIZE) == ap->av_forw->b_blkno) && + (bp->b_pblkno + (bp->b_bcount / DEV_BSIZE) == ap->av_forw->b_pblkno) && (bp->b_flags == (ap->av_forw->b_flags & ~B_CLUSTER)) && + ((ap->av_forw->b_flags & B_BAD) == 0) && ((ap->av_forw->b_bcount & PAGE_MASK) == 0) && (((vm_offset_t) ap->av_forw->b_un.b_addr & PAGE_MASK) == 0) && (ap->av_forw->b_bcount + bp->b_bcount < maxio)) { @@ -678,7 +748,6 @@ insert: goto nocluster; } - /* * if next isn't a cluster we need to create one */ @@ -694,18 +763,18 @@ insert: } cldiskvamerge( kvanew, orig1begin, orig1pages, orig2begin, orig2pages); - - pmap_update(); - ap = ap->av_forw; + --clstats[ap->b_bcount/PAGE_SIZE]; *newbp = *ap; newbp->b_flags |= B_CLUSTER; newbp->b_un.b_addr = (caddr_t) kvanew; newbp->b_blkno = bp->b_blkno; + newbp->b_pblkno = bp->b_pblkno; newbp->b_bcount += bp->b_bcount; newbp->b_bufsize = newbp->b_bcount; newbp->b_clusterf = bp; newbp->b_clusterl = ap; + ++clstats[newbp->b_bcount/PAGE_SIZE]; if( ap->av_forw) ap->av_forw->av_back = newbp; @@ -734,10 +803,13 @@ insert: ap->b_clusterf->av_back = bp; ap->b_clusterf = bp; bp->av_back = NULL; + --clstats[ap->b_bcount/PAGE_SIZE]; ap->b_blkno = bp->b_blkno; + ap->b_pblkno = bp->b_pblkno; ap->b_bcount += bp->b_bcount; ap->b_bufsize = ap->b_bcount; + ++clstats[ap->b_bcount/PAGE_SIZE]; } return; @@ -747,6 +819,7 @@ insert: * don't merge */ nocluster: + ++clstats[bp->b_bcount/PAGE_SIZE]; bp->av_forw = ap->av_forw; if( bp->av_forw) bp->av_forw->av_back = bp; diff --git a/sys/dev/fdc/fdc.c b/sys/dev/fdc/fdc.c index d2eb7aa92fcb..d05c3612b67f 100644 --- a/sys/dev/fdc/fdc.c +++ b/sys/dev/fdc/fdc.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)fd.c 7.4 (Berkeley) 5/25/91 - * $Id: fd.c,v 1.23 1994/03/02 18:34:41 ache Exp $ + * $Id: fd.c,v 1.24 1994/03/08 16:25:29 nate Exp $ * */ @@ -413,6 +413,7 @@ void fdstrategy(struct buf *bp) goto bad; } bp->b_cylin = blknum / (fd->ft->sectrac * fd->ft->heads); + bp->b_pblkno = bp->b_blkno; dp = &(fdc->head); s = splbio(); disksort(dp, bp); diff --git a/sys/dev/mcd/mcd.c b/sys/dev/mcd/mcd.c index 2e499679164f..8fbfdc4212d5 100644 --- a/sys/dev/mcd/mcd.c +++ b/sys/dev/mcd/mcd.c @@ -39,7 +39,7 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Id: mcd.c,v 1.13 1994/03/06 14:14:49 jkh Exp $ + * $Id: mcd.c,v 1.14 1994/03/21 20:59:55 ats Exp $ */ static char COPYRIGHT[] = "mcd-driver (C)1993 by H.Veit & B.Moore"; @@ -350,6 +350,9 @@ MCD_TRACE("strategy: drive not valid\n",0,0,0,0); if (bounds_check_with_label(bp,&cd->dlabel,1) <= 0) { goto done; } + } else { + bp->b_pblkno = bp->b_blkno; + bp->b_cylin = 0; } /* queue it */ diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 9502df0ff356..cc2e9a435571 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -39,7 +39,7 @@ * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 - * $Id: pmap.c,v 1.22 1994/03/30 02:17:45 davidg Exp $ + * $Id: pmap.c,v 1.23 1994/04/14 07:49:38 davidg Exp $ */ /* @@ -153,6 +153,7 @@ static inline pv_entry_t get_pv_entry(); void pmap_alloc_pv_entry(); void pmap_clear_modify(); void i386_protection_init(); +extern vm_offset_t pager_sva, pager_eva; #if BSDVM_COMPAT #include "msgbuf.h" @@ -851,8 +852,10 @@ pmap_remove(pmap, sva, eva) if (pmap_is_managed(pa)) { if ((((int) oldpte & PG_M) && (sva < USRSTACK || sva > UPT_MAX_ADDRESS)) || (sva >= USRSTACK && sva < USRSTACK+(UPAGES*NBPG))) { - m = PHYS_TO_VM_PAGE(pa); - m->flags &= ~PG_CLEAN; + if (sva < pager_sva || sva >= pager_eva) { + m = PHYS_TO_VM_PAGE(pa); + m->flags &= ~PG_CLEAN; + } } pv = pa_to_pvh(pa); @@ -946,8 +949,10 @@ pmap_remove(pmap, sva, eva) if ((((int) oldpte & PG_M) && (va < USRSTACK || va > UPT_MAX_ADDRESS)) || (va >= USRSTACK && va < USRSTACK+(UPAGES*NBPG))) { - m = PHYS_TO_VM_PAGE(pa); - m->flags &= ~PG_CLEAN; + if (va < pager_sva || va >= pager_eva) { + m = PHYS_TO_VM_PAGE(pa); + m->flags &= ~PG_CLEAN; + } } pv = pa_to_pvh(pa); @@ -1018,7 +1023,9 @@ pmap_remove_all(pa) if ( (m->flags & PG_CLEAN) && ((((int) *pte) & PG_M) && (pv->pv_va < USRSTACK || pv->pv_va > UPT_MAX_ADDRESS)) || (pv->pv_va >= USRSTACK && pv->pv_va < USRSTACK+(UPAGES*NBPG))) { - m->flags &= ~PG_CLEAN; + if (pv->pv_va < pager_sva || pv->pv_va >= pager_eva) { + m->flags &= ~PG_CLEAN; + } } *pte = 0; @@ -1724,8 +1731,16 @@ pmap_testbit(pa, bit) * then mark UPAGES as always modified, and * ptes as never modified. */ + if (bit & PG_U ) { + if ((pv->pv_va >= pager_sva) && (pv->pv_va < pager_eva)) { + continue; + } + } if (bit & PG_M ) { if (pv->pv_va >= USRSTACK) { + if (pv->pv_va >= pager_sva && pv->pv_va < pager_eva) { + continue; + } if (pv->pv_va < USRSTACK+(UPAGES*NBPG)) { splx(s); return TRUE; @@ -1780,8 +1795,6 @@ pmap_changebit(pa, bit, setem) * don't write protect pager mappings */ if (!setem && (bit == PG_RW)) { - extern vm_offset_t pager_sva, pager_eva; - if (va >= pager_sva && va < pager_eva) continue; } diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s index 17f246c7d228..4dbc672b923e 100644 --- a/sys/i386/i386/swtch.s +++ b/sys/i386/i386/swtch.s @@ -33,7 +33,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: swtch.s,v 1.4 1994/01/31 10:26:59 davidg Exp $ + * $Id: swtch.s,v 1.5 1994/04/02 07:00:30 davidg Exp $ */ #include "npx.h" /* for NNPX */ @@ -156,8 +156,10 @@ _idle: ALIGN_TEXT idle_loop: + cli cmpl $0,_whichqs - jne sw1 + jne sw1a + sti hlt /* wait for interrupt */ jmp idle_loop @@ -214,6 +216,7 @@ ENTRY(swtch) /* save is done, now choose a new process or idle */ sw1: cli +sw1a: movl _whichqs,%edi 2: /* XXX - bsf is sloow */ diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index c9b1964ce82a..89eb127f715e 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -38,7 +38,7 @@ * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ - * $Id: vm_machdep.c,v 1.18 1994/04/05 03:23:09 davidg Exp $ + * $Id: vm_machdep.c,v 1.19 1994/04/14 07:49:40 davidg Exp $ */ #include "npx.h" @@ -56,6 +56,11 @@ #define b_cylin b_resid +#define MAXCLSTATS 256 +int clstats[MAXCLSTATS]; +int rqstats[MAXCLSTATS]; + + #ifndef NOBOUNCE caddr_t bouncememory; @@ -121,10 +126,20 @@ vm_bounce_kva_free(addr, size, now) int s = splbio(); kvaf[kvasfreecnt].addr = addr; kvaf[kvasfreecnt++].size = size; - if( now) + if( now) { + /* + * this will do wakeups + */ vm_bounce_kva(0,0); - else - wakeup((caddr_t) io_map); + } else { + if (bmwait) { + /* + * if anyone is waiting on the bounce-map, then wakeup + */ + wakeup((caddr_t) io_map); + bmwait = 0; + } + } splx(s); } @@ -224,6 +239,49 @@ more: return kva; } +/* + * same as vm_bounce_kva -- but really allocate + */ +vm_offset_t +vm_bounce_kva_alloc(count) +int count; +{ + int i; + vm_offset_t kva; + vm_offset_t pa; + if( bouncepages == 0) { + kva = (vm_offset_t) malloc(count*NBPG, M_TEMP, M_WAITOK); + return kva; + } + kva = vm_bounce_kva(count, 1); + for(i=0;ib_un.b_addr); vm_bounce_kva_free( bouncekva, countvmpg*NBPG, 0); - if (bmwait) { - /* - * if anyone is waiting on the bounce-map, then wakeup - */ - wakeup((caddr_t) io_map); - bmwait = 0; - } - bp->b_un.b_addr = bp->b_savekva; bp->b_savekva = 0; bp->b_flags &= ~B_BOUNCE; @@ -476,6 +526,8 @@ cldisksort(struct buf *dp, struct buf *bp, vm_offset_t maxio) vm_offset_t orig1begin, orig2begin; vm_offset_t kvanew, kvaorig; + if( bp->b_bcount < MAXCLSTATS*PAGE_SIZE) + ++rqstats[bp->b_bcount/PAGE_SIZE]; /* * If nothing on the activity queue, then * we become the only thing. @@ -494,22 +546,22 @@ cldisksort(struct buf *dp, struct buf *bp, vm_offset_t maxio) * and add ourselves to it. */ - if (bp->b_cylin < ap->b_cylin) { + if (bp->b_pblkno < ap->b_pblkno) { while (ap->av_forw) { /* * Check for an ``inversion'' in the - * normally ascending cylinder numbers, + * normally ascending block numbers, * indicating the start of the second request list. */ - if (ap->av_forw->b_cylin < ap->b_cylin) { + if (ap->av_forw->b_pblkno < ap->b_pblkno) { /* * Search the second request list * for the first request at a larger - * cylinder number. We go before that; + * block number. We go before that; * if there is no such request, we go at end. */ do { - if (bp->b_cylin < ap->av_forw->b_cylin) + if (bp->b_pblkno < ap->av_forw->b_pblkno) goto insert; ap = ap->av_forw; } while (ap->av_forw); @@ -532,21 +584,33 @@ cldisksort(struct buf *dp, struct buf *bp, vm_offset_t maxio) * We want to go after the current request * if there is an inversion after it (i.e. it is * the end of the first request list), or if - * the next request is a larger cylinder than our request. + * the next request is a larger block than our request. */ - if (ap->av_forw->b_cylin < ap->b_cylin || - bp->b_cylin < ap->av_forw->b_cylin ) + if (ap->av_forw->b_pblkno < ap->b_pblkno || + bp->b_pblkno < ap->av_forw->b_pblkno ) goto insert; ap = ap->av_forw; } insert: + +#if 0 + /* + * read clustering with new read-ahead disk drives hurts mostly, so + * we don't bother... + */ + if( bp->b_flags & B_READ) + goto nocluster; +#endif /* * we currently only cluster I/O transfers that are at page-aligned * kvas and transfers that are multiples of page lengths. */ - if(((bp->b_bcount & PAGE_MASK) == 0) && + if ((bp->b_flags & B_BAD) == 0 && + ((bp->b_bcount & PAGE_MASK) == 0) && (((vm_offset_t) bp->b_un.b_addr & PAGE_MASK) == 0)) { + if( maxio > MAXCLSTATS*PAGE_SIZE) + maxio = MAXCLSTATS*PAGE_SIZE; /* * merge with previous? * conditions: @@ -558,9 +622,10 @@ insert: * is a multiple of a page in length. * 5) And the total I/O size would be below the maximum. */ - if( (ap->b_blkno + (ap->b_bcount / DEV_BSIZE) == bp->b_blkno) && + if( (ap->b_pblkno + (ap->b_bcount / DEV_BSIZE) == bp->b_pblkno) && (dp->b_actf != ap) && ((ap->b_flags & ~B_CLUSTER) == bp->b_flags) && + ((ap->b_flags & B_BAD) == 0) && ((ap->b_bcount & PAGE_MASK) == 0) && (((vm_offset_t) ap->b_un.b_addr & PAGE_MASK) == 0) && (ap->b_bcount + bp->b_bcount < maxio)) { @@ -597,6 +662,7 @@ insert: * build the new bp to be handed off to the device */ + --clstats[ap->b_bcount/PAGE_SIZE]; *newbp = *ap; newbp->b_flags |= B_CLUSTER; newbp->b_un.b_addr = (caddr_t) kvanew; @@ -604,6 +670,7 @@ insert: newbp->b_bufsize = newbp->b_bcount; newbp->b_clusterf = ap; newbp->b_clusterl = bp; + ++clstats[newbp->b_bcount/PAGE_SIZE]; /* * enter the new bp onto the device queue @@ -635,6 +702,7 @@ insert: * free the old kva */ vm_bounce_kva_free( orig1begin, ap->b_bufsize, 0); + --clstats[ap->b_bcount/PAGE_SIZE]; ap->b_un.b_addr = (caddr_t) kvanew; @@ -645,6 +713,7 @@ insert: ap->b_bcount += bp->b_bcount; ap->b_bufsize = ap->b_bcount; + ++clstats[ap->b_bcount/PAGE_SIZE]; } return; /* @@ -657,8 +726,9 @@ insert: * 5) And the total I/O size would be below the maximum. */ } else if( ap->av_forw && - (bp->b_blkno + (bp->b_bcount / DEV_BSIZE) == ap->av_forw->b_blkno) && + (bp->b_pblkno + (bp->b_bcount / DEV_BSIZE) == ap->av_forw->b_pblkno) && (bp->b_flags == (ap->av_forw->b_flags & ~B_CLUSTER)) && + ((ap->av_forw->b_flags & B_BAD) == 0) && ((ap->av_forw->b_bcount & PAGE_MASK) == 0) && (((vm_offset_t) ap->av_forw->b_un.b_addr & PAGE_MASK) == 0) && (ap->av_forw->b_bcount + bp->b_bcount < maxio)) { @@ -678,7 +748,6 @@ insert: goto nocluster; } - /* * if next isn't a cluster we need to create one */ @@ -694,18 +763,18 @@ insert: } cldiskvamerge( kvanew, orig1begin, orig1pages, orig2begin, orig2pages); - - pmap_update(); - ap = ap->av_forw; + --clstats[ap->b_bcount/PAGE_SIZE]; *newbp = *ap; newbp->b_flags |= B_CLUSTER; newbp->b_un.b_addr = (caddr_t) kvanew; newbp->b_blkno = bp->b_blkno; + newbp->b_pblkno = bp->b_pblkno; newbp->b_bcount += bp->b_bcount; newbp->b_bufsize = newbp->b_bcount; newbp->b_clusterf = bp; newbp->b_clusterl = ap; + ++clstats[newbp->b_bcount/PAGE_SIZE]; if( ap->av_forw) ap->av_forw->av_back = newbp; @@ -734,10 +803,13 @@ insert: ap->b_clusterf->av_back = bp; ap->b_clusterf = bp; bp->av_back = NULL; + --clstats[ap->b_bcount/PAGE_SIZE]; ap->b_blkno = bp->b_blkno; + ap->b_pblkno = bp->b_pblkno; ap->b_bcount += bp->b_bcount; ap->b_bufsize = ap->b_bcount; + ++clstats[ap->b_bcount/PAGE_SIZE]; } return; @@ -747,6 +819,7 @@ insert: * don't merge */ nocluster: + ++clstats[bp->b_bcount/PAGE_SIZE]; bp->av_forw = ap->av_forw; if( bp->av_forw) bp->av_forw->av_back = bp; diff --git a/sys/i386/isa/fd.c b/sys/i386/isa/fd.c index d2eb7aa92fcb..d05c3612b67f 100644 --- a/sys/i386/isa/fd.c +++ b/sys/i386/isa/fd.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)fd.c 7.4 (Berkeley) 5/25/91 - * $Id: fd.c,v 1.23 1994/03/02 18:34:41 ache Exp $ + * $Id: fd.c,v 1.24 1994/03/08 16:25:29 nate Exp $ * */ @@ -413,6 +413,7 @@ void fdstrategy(struct buf *bp) goto bad; } bp->b_cylin = blknum / (fd->ft->sectrac * fd->ft->heads); + bp->b_pblkno = bp->b_blkno; dp = &(fdc->head); s = splbio(); disksort(dp, bp); diff --git a/sys/i386/isa/mcd.c b/sys/i386/isa/mcd.c index 2e499679164f..8fbfdc4212d5 100644 --- a/sys/i386/isa/mcd.c +++ b/sys/i386/isa/mcd.c @@ -39,7 +39,7 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Id: mcd.c,v 1.13 1994/03/06 14:14:49 jkh Exp $ + * $Id: mcd.c,v 1.14 1994/03/21 20:59:55 ats Exp $ */ static char COPYRIGHT[] = "mcd-driver (C)1993 by H.Veit & B.Moore"; @@ -350,6 +350,9 @@ MCD_TRACE("strategy: drive not valid\n",0,0,0,0); if (bounds_check_with_label(bp,&cd->dlabel,1) <= 0) { goto done; } + } else { + bp->b_pblkno = bp->b_blkno; + bp->b_cylin = 0; } /* queue it */ diff --git a/sys/i386/isa/wd.c b/sys/i386/isa/wd.c index 99587f28acd2..b8cf448d3bfa 100644 --- a/sys/i386/isa/wd.c +++ b/sys/i386/isa/wd.c @@ -37,7 +37,7 @@ static int wdtest = 0; * SUCH DAMAGE. * * from: @(#)wd.c 7.2 (Berkeley) 5/9/91 - * $Id: wd.c,v 1.36 1994/03/06 03:10:58 jkh Exp $ + * $Id: wd.c,v 1.37 1994/04/10 11:17:13 csgr Exp $ */ /* TODO: @@ -83,11 +83,11 @@ static int wdtest = 0; #include "syslog.h" #include "vm/vm.h" -#define TIMEOUT 10000 /* XXX? WDCC_DIAGNOSE can take > 1.1 sec */ - +#define TIMEOUT 10000 #define RETRIES 5 /* number of retries before giving up */ #define RECOVERYTIME 500000 /* usec for controller to recover after err */ #define MAXTRANSFER 256 /* max size of transfer in sectors */ +#define BAD144_NO_CYL 0xffff /* XXX should be in dkbad.h; bad144.c uses -1 */ #ifdef notyet #define wdnoreloc(dev) (minor(dev) & 0x80) /* ignore partition table */ @@ -149,6 +149,7 @@ struct disk { struct dos_partition dk_dospartitions[NDOSPART]; /* DOS view of disk */ struct dkbad dk_bad; /* bad sector table */ + long dk_badsect[127]; /* 126 plus trailing -1 marker */ }; static struct disk *wddrives[NWD]; /* table of units */ @@ -159,6 +160,8 @@ static struct buf rwdbuf[NWD]; /* buffers for raw IO */ #endif static long wdxfer[NWD]; /* count of transfers */ + +static void bad144intern(struct disk *); static int wdprobe(struct isa_device *dvp); static int wdattach(struct isa_device *dvp); static void wdustart(struct disk *du); @@ -370,10 +373,32 @@ wdstrategy(register struct buf *bp) du->dk_wlabel) <= 0) goto done; + /* + * Check for *any* block on this transfer being on the bad block list + * if it is, then flag the block as a transfer that requires + * bad block handling. Also, used as a hint for low level disksort + * clustering code to keep from coalescing a bad transfer into + * a normal transfer. Single block transfers for a large number of + * blocks associated with a cluster I/O are undersirable. + */ + if( du->dk_flags & DKFL_BADSECT) { + int i; + int nsecs = howmany(bp->b_bcount, DEV_BSIZE); + int blkend = bp->b_pblkno + nsecs; + for(i=0;du->dk_badsect[i] != -1 && du->dk_badsect[i] < blkend;i++) { + if( du->dk_badsect[i] >= bp->b_pblkno) { + bp->b_flags |= B_BAD; + break; + } + } + } + /* queue transfer on drive, activate drive and controller if idle */ dp = &wdutab[lunit]; s = splbio(); - disksort(dp, bp); + + cldisksort(dp, bp, 254*DEV_BSIZE); + if (dp->b_active == 0) wdustart(du); /* start drive */ @@ -389,8 +414,10 @@ wdstrategy(register struct buf *bp) return; done: + s = splbio(); /* toss transfer, we're done early */ biodone(bp); + splx(s); } /* @@ -477,7 +504,7 @@ loop: } /* calculate transfer details */ - blknum = bp->b_blkno + du->dk_skip; + blknum = bp->b_pblkno + du->dk_skip; #ifdef WDDEBUG if (du->dk_skip == 0) printf("wd%d: wdstart: %s %d@%d; map ", lunit, @@ -486,67 +513,49 @@ loop: else printf(" %d)%x", du->dk_skip, inb(du->dk_port + wd_altsts)); #endif - if (du->dk_skip == 0) - du->dk_bc = bp->b_bcount; lp = &du->dk_dd; secpertrk = lp->d_nsectors; secpercyl = lp->d_secpercyl; - if (wddospart(bp->b_dev)) - blknum += du->dk_dd2.d_partitions[wdpart(bp->b_dev)].p_offset; - else - blknum += lp->d_partitions[wdpart(bp->b_dev)].p_offset; - cylin = blknum / secpercyl; - head = (blknum % secpercyl) / secpertrk; - sector = blknum % secpertrk; - /* - * See if the current block is in the bad block list. - * (If we have one, and not formatting.) - */ - if ((du->dk_flags & (DKFL_SINGLE | DKFL_BADSECT)) - == (DKFL_SINGLE | DKFL_BADSECT)) -#define BAD144_NO_CYL 0xffff /* XXX should be in dkbad.h; bad144.c uses -1 */ - for (bt_ptr = du->dk_bad.bt_bad; bt_ptr->bt_cyl != BAD144_NO_CYL; - bt_ptr++) { - if (bt_ptr->bt_cyl > cylin) - /* Sorted list, and we passed our cylinder. quit. */ - break; - if (bt_ptr->bt_cyl == cylin && - bt_ptr->bt_trksec == (head << 8) + sector) { - /* - * Found bad block. Calculate new block number. - * This starts at the end of the disk (skip the - * last track which is used for the bad block list), - * and works backwards to the front of the disk. - */ -#ifdef WDDEBUG - printf("--- badblock code -> Old = %ld; ", blknum); -#endif + if (du->dk_skip == 0) { + du->dk_bc = bp->b_bcount; + if (bp->b_flags & B_BAD) { + du->dk_flags |= DKFL_SINGLE; + } + } + if ((du->dk_flags & (DKFL_SINGLE|DKFL_BADSECT)) /* 19 Aug 92*/ + == (DKFL_SINGLE|DKFL_BADSECT)) { + int i; + + for(i=0; + du->dk_badsect[i] != -1 && du->dk_badsect[i] <= blknum; + i++) { + + if( du->dk_badsect[i] == blknum) { /* * XXX the offset of the bad sector table ought * to be stored in the in-core copy of the table. */ #define BAD144_PART 2 /* XXX scattered magic numbers */ #define BSD_PART 0 /* XXX should be 2 but bad144.c uses 0 */ - if (lp->d_partitions[BSD_PART].p_offset != 0) - blknum = lp->d_partitions[BAD144_PART].p_offset - + lp->d_partitions[BAD144_PART].p_size; - else - blknum = lp->d_secperunit; - blknum -= lp->d_nsectors + (bt_ptr - du->dk_bad.bt_bad) - + 1; - - cylin = blknum / secpercyl; - head = (blknum % secpercyl) / secpertrk; - sector = blknum % secpertrk; -#ifdef WDDEBUG - printf("new = %ld\n", blknum); -#endif - break; + if (lp->d_partitions[BSD_PART].p_offset != 0) + blknum = lp->d_partitions[BAD144_PART].p_offset + + lp->d_partitions[BAD144_PART].p_size; + else + blknum = lp->d_secperunit; + blknum -= lp->d_nsectors + i + 1; + + break; + } } } + + + cylin = blknum / secpercyl; + head = (blknum % secpercyl) / secpertrk; + sector = blknum % secpertrk; wdtab[ctrlr].b_active = 1; /* mark controller active */ @@ -680,7 +689,7 @@ wdintr(int unit) return; case 1: wdstart(unit); - return; + return; case 2: goto done; } @@ -727,6 +736,9 @@ oops: chk = min(DEV_BSIZE / sizeof(short), du->dk_bc / sizeof(short)); /* ready to receive data? */ + if ((du->dk_status & (WDCS_READY | WDCS_SEEKCMPLT | WDCS_DRQ)) + != (WDCS_READY | WDCS_SEEKCMPLT | WDCS_DRQ)) + wderror(bp, du, "wdintr: read intr arrived early"); if (wdwait(du, WDCS_READY | WDCS_SEEKCMPLT | WDCS_DRQ, TIMEOUT) != 0) { wderror(bp, du, "wdintr: read error detected late"); goto oops; @@ -877,8 +889,10 @@ wdopen(dev_t dev, int flags, int fmt, struct proc *p) du->dk_flags |= DKFL_BSDLABEL; du->dk_flags &= ~DKFL_WRITEPROT; - if (du->dk_dd.d_flags & D_BADSECT) + if (du->dk_dd.d_flags & D_BADSECT) { du->dk_flags |= DKFL_BADSECT; + bad144intern(du); + } /* * Force WDRAW partition to be the whole disk. @@ -1045,6 +1059,7 @@ wdcommand(struct disk *du, u_int cylinder, u_int head, u_int sector, static int wdsetctlr(struct disk *du) { + int error = 0; #ifdef WDDEBUG printf("wd(%d,%d): wdsetctlr: C %lu H %lu S %lu\n", du->dk_ctrlr, du->dk_unit, @@ -1065,8 +1080,18 @@ wdsetctlr(struct disk *du) } else { printf("(truncating to 16)\n"); - du->dk_dd.d_ntracks = 16; + du->dk_dd.d_ntracks = 16; + } } + + if (du->dk_dd.d_nsectors == 0 || du->dk_dd.d_nsectors > 255) { + printf("wd%d: cannot handle %lu sectors (max 255)\n", + du->dk_lunit, du->dk_dd.d_nsectors); + error = 1; + } + if (error) { + wdtab[du->dk_ctrlr].b_errcnt += RETRIES; + return (1); } if (wdcommand(du, du->dk_dd.d_ncylinders, du->dk_dd.d_ntracks - 1, 0, du->dk_dd.d_nsectors, WDCC_IDC) != 0 @@ -1772,4 +1797,27 @@ wdwait(struct disk *du, u_char bits_wanted, int timeout) return (-1); } +/* + * Internalize the bad sector table. + */ +void bad144intern(struct disk *du) { + int i; + if (du->dk_flags & DKFL_BADSECT) { + for (i = 0; i < 127; i++) { + du->dk_badsect[i] = -1; + } + for (i = 0; i < 126; i++) { + if (du->dk_bad.bt_bad[i].bt_cyl == 0xffff) { + break; + } else { + du->dk_badsect[i] = + du->dk_bad.bt_bad[i].bt_cyl * du->dk_dd.d_secpercyl + + (du->dk_bad.bt_bad[i].bt_trksec >> 8) * du->dk_dd.d_nsectors ++ + (du->dk_bad.bt_bad[i].bt_trksec & 0x00ff); + } + } + } +} + #endif /* NWDC > 0 */ diff --git a/sys/isa/fd.c b/sys/isa/fd.c index d2eb7aa92fcb..d05c3612b67f 100644 --- a/sys/isa/fd.c +++ b/sys/isa/fd.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)fd.c 7.4 (Berkeley) 5/25/91 - * $Id: fd.c,v 1.23 1994/03/02 18:34:41 ache Exp $ + * $Id: fd.c,v 1.24 1994/03/08 16:25:29 nate Exp $ * */ @@ -413,6 +413,7 @@ void fdstrategy(struct buf *bp) goto bad; } bp->b_cylin = blknum / (fd->ft->sectrac * fd->ft->heads); + bp->b_pblkno = bp->b_blkno; dp = &(fdc->head); s = splbio(); disksort(dp, bp); diff --git a/sys/scsi/cd.c b/sys/scsi/cd.c index 630671b6d6df..0d64665856c2 100644 --- a/sys/scsi/cd.c +++ b/sys/scsi/cd.c @@ -14,7 +14,7 @@ * * Ported to run under 386BSD by Julian Elischer (julian@tfs.com) Sept 1992 * - * $Id: cd.c,v 1.16 1994/02/05 09:08:46 swallace Exp $ + * $Id: cd.c,v 1.17 1994/03/23 09:15:51 davidg Exp $ */ #define SPLCD splbio @@ -415,6 +415,9 @@ cdstrategy(bp) if (bounds_check_with_label(bp, &cd->disklabel, 1) <= 0) goto done; /* otherwise, process transfer request */ + } else { + bp->b_pblkno = bp->b_blkno; + bp->b_resid = 0; } opri = SPLCD(); dp = &cd->buf_queue; diff --git a/sys/scsi/scsi_base.c b/sys/scsi/scsi_base.c index d6d569f88697..2fd0c7f17cf7 100644 --- a/sys/scsi/scsi_base.c +++ b/sys/scsi/scsi_base.c @@ -8,7 +8,7 @@ * file. * * Written by Julian Elischer (julian@dialix.oz.au) - * $Id: scsi_base.c,v 1.5 1994/01/29 10:30:37 rgrimes Exp $ + * $Id: scsi_base.c,v 1.6 1994/02/07 02:15:01 rgrimes Exp $ */ #define SPLSD splbio @@ -455,7 +455,11 @@ scsi_scsi_cmd(sc_link, scsi_cmd, cmdlen, data_addr, datalen, retval = EFAULT; goto bad; } - xs->data = malloc(datalen, M_TEMP, M_WAITOK); +#ifdef NOBOUNCE + xs->data = malloc(datalen, M_TEMP, M_WAITOK); +#else + xs->data = (caddr_t) vm_bounce_kva_alloc( (datalen + PAGE_SIZE - 1)/PAGE_SIZE); +#endif /* I think waiting is ok *//*XXX */ switch ((int)(flags & (SCSI_DATA_IN | SCSI_DATA_OUT))) { case 0: @@ -538,7 +542,11 @@ retry: bcopy(xs->data, data_addr, datalen); break; } +#ifdef NOBOUNCE free(xs->data, M_TEMP); +#else + vm_bounce_kva_alloc_free(xs->data, (datalen + PAGE_SIZE - 1)/PAGE_SIZE, 0); +#endif } /* * we have finished with the xfer stuct, free it and diff --git a/sys/scsi/sd.c b/sys/scsi/sd.c index 7831071000e8..783de9935ab6 100644 --- a/sys/scsi/sd.c +++ b/sys/scsi/sd.c @@ -14,7 +14,7 @@ * * Ported to run under 386BSD by Julian Elischer (julian@dialix.oz.au) Sept 1992 * - * $Id: sd.c,v 1.21 1994/03/23 09:15:59 davidg Exp $ + * $Id: sd.c,v 1.22 1994/04/05 03:23:32 davidg Exp $ */ #define SPLSD splbio @@ -415,6 +415,9 @@ sdstrategy(bp) if (bounds_check_with_label(bp, &sd->disklabel, sd->wlabel) <= 0) goto done; /* otherwise, process transfer request */ + } else { + bp->b_pblkno = bp->b_blkno; + bp->b_resid = 0; } opri = SPLSD(); dp = &sd->buf_queue; @@ -430,11 +433,7 @@ sdstrategy(bp) /* * Place it in the queue of disk activities for this disk */ -#if 0 cldisksort(dp, bp, 64*1024); -#else - disksort(dp, bp); -#endif /* * Tell the device to get going on the transfer if it's