diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 7fa891f6493f..a0fcd5828df7 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $Id: machdep.c,v 1.189 1996/05/03 21:00:53 phk Exp $ + * $Id: machdep.c,v 1.190 1996/05/10 19:28:44 wollman Exp $ */ #include "npx.h" @@ -378,6 +378,8 @@ again: (nswbuf*MAXPHYS) + pager_map_size, TRUE); exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (16*ARG_MAX), TRUE); + exech_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, + (32*ARG_MAX), TRUE); u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (maxproc*UPAGES*PAGE_SIZE), FALSE); diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 04408acb8aa3..f25d932c0a48 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -39,7 +39,7 @@ * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 - * $Id: pmap.c,v 1.88 1996/05/02 22:24:58 phk Exp $ + * $Id: pmap.c,v 1.89 1996/05/03 21:00:57 phk Exp $ */ /* @@ -95,6 +95,7 @@ #include #include #include +#include #include #include @@ -107,16 +108,15 @@ #if defined(DIAGNOSTIC) #define PMAP_DIAGNOSTIC #endif +/* #define OLDREMOVE */ static void init_pv_entries __P((int)); /* * Get PDEs and PTEs for user/kernel address space */ -#define pmap_pde(m, v) (&((m)->pm_pdir[((vm_offset_t)(v) >> PDRSHIFT)&(NPDEPG-1)])) -#define pdir_pde(m, v) (m[((vm_offset_t)(v) >> PDRSHIFT)&(NPDEPG-1)]) - -#define pmap_pte_pa(pte) (*(int *)(pte) & PG_FRAME) +#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) +#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) #define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) #define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) @@ -145,26 +145,36 @@ static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ static vm_offset_t vm_first_phys; static int nkpt; +static vm_page_t nkpg; +vm_offset_t kernel_vm_end; extern vm_offset_t clean_sva, clean_eva; extern int cpu_class; +#define PV_FREELIST_MIN ((PAGE_SIZE / sizeof (struct pv_entry)) / 2) + +/* + * Data for the pv entry allocation mechanism + */ +static int pv_freelistcnt; +static pv_entry_t pv_freelist; +static vm_offset_t pvva; +static int npvvapg; + /* * All those kernel PT submaps that BSD is so fond of */ pt_entry_t *CMAP1; static pt_entry_t *CMAP2, *ptmmap; -static pv_entry_t pv_table; +static pv_entry_t *pv_table; caddr_t CADDR1, ptvmmap; static caddr_t CADDR2; static pt_entry_t *msgbufmap; struct msgbuf *msgbufp; static void free_pv_entry __P((pv_entry_t pv)); -pt_entry_t * - get_ptbase __P((pmap_t pmap)); -static pv_entry_t - get_pv_entry __P((void)); +static __inline unsigned * get_ptbase __P((pmap_t pmap)); +static pv_entry_t get_pv_entry __P((void)); static void i386_protection_init __P((void)); static void pmap_alloc_pv_entry __P((void)); static void pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem)); @@ -173,14 +183,25 @@ static void pmap_enter_quick __P((pmap_t pmap, vm_offset_t va, static int pmap_is_managed __P((vm_offset_t pa)); static void pmap_remove_all __P((vm_offset_t pa)); static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va)); -static __inline void pmap_remove_entry __P((struct pmap *pmap, pv_entry_t pv, +static __inline int pmap_remove_entry __P((struct pmap *pmap, pv_entry_t *pv, vm_offset_t va)); -static void pmap_remove_pte __P((struct pmap *pmap, pt_entry_t *ptq, +static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq, vm_offset_t sva)); +static vm_page_t + pmap_pte_vm_page __P((pmap_t pmap, vm_offset_t pt)); static boolean_t pmap_testbit __P((vm_offset_t pa, int bit)); -static void * pmap_getpdir __P((void)); +static __inline void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va, + vm_page_t mpte, vm_offset_t pa)); +static __inline vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va)); +static void pmap_remove_pte_mapping __P((vm_offset_t pa)); +static __inline int pmap_release_free_page __P((pmap_t pmap, vm_page_t p)); +static vm_page_t _pmap_allocpte __P((pmap_t pmap, vm_offset_t va, int ptepindex)); + +#define PDSTACKMAX 16 +static vm_offset_t pdstack[PDSTACKMAX]; +static int pdstackptr; #if defined(PMAP_DIAGNOSTIC) @@ -228,34 +249,38 @@ pmap_update_2pg( vm_offset_t va1, vm_offset_t va2) { } } +static __inline __pure unsigned * +get_ptbase(pmap) + pmap_t pmap; +{ + unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME; + + /* are we current address space or kernel? */ + if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) { + return (unsigned *) PTmap; + } + /* otherwise, we are alternate address space */ + if (frame != (((unsigned) APTDpde) & PG_FRAME)) { + APTDpde = (pd_entry_t) (frame | PG_RW | PG_V); + pmap_update(); + } + return (unsigned *) APTmap; +} + /* * Routine: pmap_pte * Function: * Extract the page table entry associated * with the given map/virtual_address pair. - * [ what about induced faults -wfj] */ -__inline pt_entry_t * __pure +__inline unsigned * __pure pmap_pte(pmap, va) register pmap_t pmap; vm_offset_t va; { - if (pmap && *pmap_pde(pmap, va)) { - vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; - - /* are we current address space or kernel? */ - if ((pmap == kernel_pmap) || (frame == ((int) PTDpde & PG_FRAME))) - return ((pt_entry_t *) vtopte(va)); - /* otherwise, we are alternate address space */ - else { - if (frame != ((int) APTDpde & PG_FRAME)) { - APTDpde = pmap->pm_pdir[PTDPTDI]; - pmap_update(); - } - return ((pt_entry_t *) avtopte(va)); - } + return get_ptbase(pmap) + i386_btop(va); } return (0); } @@ -266,39 +291,108 @@ pmap_pte(pmap, va) * Extract the physical page address associated * with the given map/virtual_address pair. */ - -vm_offset_t +vm_offset_t __pure pmap_extract(pmap, va) register pmap_t pmap; vm_offset_t va; { - vm_offset_t pa; - if (pmap && *pmap_pde(pmap, va)) { - vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; - - /* are we current address space or kernel? */ - if ((pmap == kernel_pmap) - || (frame == ((int) PTDpde & PG_FRAME))) { - pa = *(int *) vtopte(va); - /* otherwise, we are alternate address space */ - } else { - if (frame != ((int) APTDpde & PG_FRAME)) { - APTDpde = pmap->pm_pdir[PTDPTDI]; - pmap_update(); - } - pa = *(int *) avtopte(va); - } - return ((pa & PG_FRAME) | (va & ~PG_FRAME)); + unsigned *pte; + pte = get_ptbase(pmap) + i386_btop(va); + return ((*pte & PG_FRAME) | (va & PAGE_MASK)); } return 0; } +/* + * Add a list of wired pages to the kva + * this routine is only used for temporary + * kernel mappings that do not need to have + * page modification or references recorded. + * Note that old mappings are simply written + * over. The page *must* be wired. + */ +void +pmap_qenter(va, m, count) + vm_offset_t va; + vm_page_t *m; + int count; +{ + int i; + int anyvalid = 0; + register unsigned *pte; + + for (i = 0; i < count; i++) { + vm_offset_t tva = va + i * PAGE_SIZE; + unsigned npte = VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V; + unsigned opte; + pte = (unsigned *)vtopte(tva); + opte = *pte; + *pte = npte; + if (opte) + pmap_update_1pg(tva); + } +} +/* + * this routine jerks page mappings from the + * kernel -- it is meant only for temporary mappings. + */ +void +pmap_qremove(va, count) + vm_offset_t va; + int count; +{ + int i; + register unsigned *pte; + + for (i = 0; i < count; i++) { + pte = (unsigned *)vtopte(va); + *pte = 0; + pmap_update_1pg(va); + va += PAGE_SIZE; + } +} + +/* + * add a wired page to the kva + * note that in order for the mapping to take effect -- you + * should do a pmap_update after doing the pmap_kenter... + */ +__inline void +pmap_kenter(va, pa) + vm_offset_t va; + register vm_offset_t pa; +{ + register unsigned *pte; + unsigned npte, opte; + + npte = pa | PG_RW | PG_V; + pte = (unsigned *)vtopte(va); + opte = *pte; + *pte = npte; + if (opte) + pmap_update_1pg(va); +} + +/* + * remove a page from the kernel pagetables + */ +__inline void +pmap_kremove(va) + vm_offset_t va; +{ + register unsigned *pte; + + pte = (unsigned *)vtopte(va); + *pte = 0; + pmap_update_1pg(va); +} + /* * determine if a page is managed (memory vs. device) */ -static __inline int +static __inline __pure int pmap_is_managed(pa) vm_offset_t pa; { @@ -314,43 +408,21 @@ pmap_is_managed(pa) return 0; } -vm_page_t -pmap_use_pt(pmap, va) - pmap_t pmap; - vm_offset_t va; -{ - vm_offset_t ptepa; - vm_page_t mpte; - - if (va >= UPT_MIN_ADDRESS) - return NULL; - - ptepa = ((vm_offset_t) *pmap_pde(pmap, va)) & PG_FRAME; -#if defined(PMAP_DIAGNOSTIC) - if (!ptepa) - panic("pmap_use_pt: pagetable page missing, va: 0x%x", va); -#endif - - mpte = PHYS_TO_VM_PAGE(ptepa); - ++mpte->hold_count; - return mpte; -} - #if !defined(PMAP_DIAGNOSTIC) __inline #endif -void +int pmap_unuse_pt(pmap, va, mpte) pmap_t pmap; vm_offset_t va; vm_page_t mpte; { if (va >= UPT_MIN_ADDRESS) - return; + return 0; if (mpte == NULL) { vm_offset_t ptepa; - ptepa = ((vm_offset_t) *pmap_pde(pmap, va)) & PG_FRAME; + ptepa = ((vm_offset_t) *pmap_pde(pmap, va)) /* & PG_FRAME */; #if defined(PMAP_DIAGNOSTIC) if (!ptepa) panic("pmap_unuse_pt: pagetable page missing, va: 0x%x", va); @@ -367,23 +439,19 @@ pmap_unuse_pt(pmap, va, mpte) vm_page_unhold(mpte); if ((mpte->hold_count == 0) && - (mpte->wire_count == 0) && - (pmap != kernel_pmap) && - (va < KPT_MIN_ADDRESS)) { + (mpte->wire_count == 0)) { /* * We don't free page-table-pages anymore because it can have a negative * impact on perf at times. Now we just deactivate, and it'll get cleaned - * up if needed... Also, if the page ends up getting used, it will fault - * back into the process address space and be reactivated. + * up if needed... Also, if the page ends up getting used, it will be + * brought back into the process address space by pmap_allocpte and be + * reactivated. */ -#if defined(PMAP_FREE_OLD_PTES) - pmap_page_protect(VM_PAGE_TO_PHYS(mpte), VM_PROT_NONE); - vm_page_free(mpte); -#else mpte->dirty = 0; vm_page_deactivate(mpte); -#endif + return 1; } + return 0; } /* @@ -442,7 +510,7 @@ pmap_bootstrap(firstaddr, loadaddr) v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); va = virtual_avail; - pte = pmap_pte(kernel_pmap, va); + pte = (pt_entry_t *) pmap_pte(kernel_pmap, va); /* * CMAP1/CMAP2 are used for zeroing and copying pages. @@ -464,6 +532,7 @@ pmap_bootstrap(firstaddr, loadaddr) *(int *) CMAP1 = *(int *) CMAP2 = *(int *) PTD = 0; pmap_update(); + } /* @@ -492,10 +561,10 @@ pmap_init(phys_start, phys_end) * Allocate memory for random pmap data structures. Includes the * pv_head_table. */ - s = (vm_size_t) (sizeof(struct pv_entry) * npg); + s = (vm_size_t) (sizeof(struct pv_entry *) * npg); s = round_page(s); addr = (vm_offset_t) kmem_alloc(kernel_map, s); - pv_table = (pv_entry_t) addr; + pv_table = (pv_entry_t *) addr; /* * init the pv free list @@ -529,59 +598,6 @@ pmap_map(virt, start, end, prot) return (virt); } -#if defined(PMAP_KEEP_PDIRS) -int nfreepdir; -caddr_t *pdirlist; -#define NFREEPDIR 3 - -static void * -pmap_getpdir() { - caddr_t *pdir; - if (pdirlist) { - --nfreepdir; - pdir = pdirlist; - pdirlist = (caddr_t *) *pdir; - *pdir = 0; -#if 0 /* Not needed anymore */ - bzero( (caddr_t) pdir, PAGE_SIZE); -#endif - } else { - pdir = (caddr_t *) kmem_alloc(kernel_map, PAGE_SIZE); - } - - return (void *) pdir; -} - -static void -pmap_freepdir(void *pdir) { - if (nfreepdir > NFREEPDIR) { - kmem_free(kernel_map, (vm_offset_t) pdir, PAGE_SIZE); - } else { - int i; - pt_entry_t *s; - s = (pt_entry_t *) pdir; - - /* - * remove wired in kernel mappings - */ - bzero(s + KPTDI, nkpt * PTESIZE); - s[APTDPTDI] = 0; - s[PTDPTDI] = 0; - -#if defined(PMAP_DIAGNOSTIC) - for(i=0;ipm_pdir = pmap_getpdir(); -#else - pmap->pm_pdir = (pd_entry_t *) kmem_alloc(kernel_map, PAGE_SIZE); -#endif + if (pdstackptr > 0) { + --pdstackptr; + pmap->pm_pdir = + (pd_entry_t *)pdstack[pdstackptr]; + } else { + pmap->pm_pdir = + (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE); + } + + /* + * allocate object for the ptes + */ + pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, + OFF_TO_IDX((KPT_MIN_ADDRESS + 1) - UPT_MIN_ADDRESS)); + + /* + * allocate the page directory page + */ +retry: + ptdpg = vm_page_alloc( pmap->pm_pteobj, OFF_TO_IDX(KPT_MIN_ADDRESS), + VM_ALLOC_ZERO); + if (ptdpg == NULL) { + VM_WAIT; + goto retry; + } + vm_page_wire(ptdpg); + ptdpg->flags &= ~(PG_MAPPED|PG_BUSY); /* not mapped normally */ + ptdpg->valid = VM_PAGE_BITS_ALL; + + pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg)); + if ((ptdpg->flags & PG_ZERO) == 0) + bzero(pmap->pm_pdir, PAGE_SIZE); /* wire in kernel global address entries */ bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE); /* install self-referential address mapping entry */ - *(int *) (pmap->pm_pdir + PTDPTDI) = - ((int) pmap_kextract((vm_offset_t) pmap->pm_pdir)) | PG_V | PG_RW; + *(unsigned *) (pmap->pm_pdir + PTDPTDI) = + VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_U; pmap->pm_count = 1; } +static __inline int +pmap_release_free_page(pmap, p) + struct pmap *pmap; + vm_page_t p; +{ + int s; + /* + * This code optimizes the case of freeing non-busy + * page-table pages. Those pages are zero now, and + * might as well be placed directly into the zero queue. + */ + s = splvm(); + if (p->flags & PG_BUSY) { + p->flags |= PG_WANTED; + tsleep(p, PVM, "pmaprl", 0); + splx(s); + return 0; + } + + if (p->flags & PG_MAPPED) { + pmap_remove_pte_mapping(VM_PAGE_TO_PHYS(p)); + p->flags &= ~PG_MAPPED; + } + +#if defined(PMAP_DIAGNOSTIC) + if (p->hold_count) + panic("pmap_release: freeing held page table page"); +#endif + /* + * Page directory pages need to have the kernel + * stuff cleared, so they can go into the zero queue also. + */ + if (p->pindex == OFF_TO_IDX(KPT_MIN_ADDRESS)) { + unsigned *pde = (unsigned *) pmap->pm_pdir; + bzero(pde + KPTDI, nkpt * PTESIZE); + pde[APTDPTDI] = 0; + pde[PTDPTDI] = 0; + pmap_kremove((vm_offset_t) pmap->pm_pdir); + } + + vm_page_free(p); + TAILQ_REMOVE(&vm_page_queue_free, p, pageq); + TAILQ_INSERT_HEAD(&vm_page_queue_zero, p, pageq); + p->queue = PQ_ZERO; + splx(s); + ++vm_page_zero_count; + return 1; +} + +/* + * Release any resources held by the given physical map. + * Called when a pmap initialized by pmap_pinit is being released. + * Should only be called if the map contains no valid mappings. + */ +void +pmap_release(pmap) + register struct pmap *pmap; +{ + vm_page_t p,n,ptdpg; + vm_object_t object = pmap->pm_pteobj; + + ptdpg = NULL; +retry: + for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) { + n = TAILQ_NEXT(p, listq); + if (p->pindex == OFF_TO_IDX(KPT_MIN_ADDRESS)) { + ptdpg = p; + continue; + } + if (!pmap_release_free_page(pmap, p)) + goto retry; + } + pmap_release_free_page(pmap, ptdpg); + + vm_object_deallocate(object); + if (pdstackptr < PDSTACKMAX) { + pdstack[pdstackptr] = (vm_offset_t) pmap->pm_pdir; + ++pdstackptr; + } else { + kmem_free(kernel_map, (vm_offset_t) pmap->pm_pdir, PAGE_SIZE); + } +} + /* * grow the number of kernel page table entries, if needed */ -static vm_page_t nkpg; -vm_offset_t kernel_vm_end; - void pmap_growkernel(vm_offset_t addr) { @@ -630,14 +754,14 @@ pmap_growkernel(vm_offset_t addr) kernel_vm_end = KERNBASE; nkpt = 0; while (pdir_pde(PTD, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + NBPDR) & ~(NBPDR-1); + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); ++nkpt; } } - addr = (addr + NBPDR) & ~(NBPDR - 1); + addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); while (kernel_vm_end < addr) { if (pdir_pde(PTD, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + NBPDR) & ~(NBPDR-1); + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); continue; } ++nkpt; @@ -659,7 +783,7 @@ pmap_growkernel(vm_offset_t addr) } } *pmap_pde(kernel_pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end); - kernel_vm_end = (kernel_vm_end + NBPDR) & ~(NBPDR-1); + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); } splx(s); } @@ -685,22 +809,6 @@ pmap_destroy(pmap) } } -/* - * Release any resources held by the given physical map. - * Called when a pmap initialized by pmap_pinit is being released. - * Should only be called if the map contains no valid mappings. - */ -void -pmap_release(pmap) - register struct pmap *pmap; -{ -#if defined(PMAP_KEEP_PDIRS) - pmap_freepdir( (void *)pmap->pm_pdir); -#else - kmem_free(kernel_map, (vm_offset_t) pmap->pm_pdir, PAGE_SIZE); -#endif -} - /* * Add a reference to the specified pmap. */ @@ -713,16 +821,6 @@ pmap_reference(pmap) } } -#define PV_FREELIST_MIN ((PAGE_SIZE / sizeof (struct pv_entry)) / 2) - -/* - * Data for the pv entry allocation mechanism - */ -static int pv_freelistcnt; -static pv_entry_t pv_freelist; -static vm_offset_t pvva; -static int npvvapg; - /* * free the pv_entry back to the free list */ @@ -730,8 +828,6 @@ static __inline void free_pv_entry(pv) pv_entry_t pv; { - if (!pv) - return; ++pv_freelistcnt; pv->pv_next = pv_freelist; pv_freelist = pv; @@ -777,10 +873,6 @@ pmap_alloc_pv_entry() if (npvvapg) { vm_page_t m; - /* - * we do this to keep recursion away - */ - pv_freelistcnt += PV_FREELIST_MIN; /* * allocate a physical page out of the vm system */ @@ -818,14 +910,11 @@ pmap_alloc_pv_entry() entry++; } } - pv_freelistcnt -= PV_FREELIST_MIN; } if (!pv_freelist) panic("get_pv_entry: cannot get a pv_entry_t"); } - - /* * init the pv_entry allocation system */ @@ -839,30 +928,13 @@ init_pv_entries(npg) * kvm space is fairly cheap, be generous!!! (the system can panic if * this is too small.) */ - npvvapg = btoc((npg * PVSPERPAGE) * sizeof(struct pv_entry)); + npvvapg = ((npg * PVSPERPAGE) * sizeof(struct pv_entry) + + PAGE_SIZE - 1) / PAGE_SIZE; pvva = kmem_alloc_pageable(kernel_map, npvvapg * PAGE_SIZE); /* * get the first batch of entries */ - free_pv_entry(get_pv_entry()); -} - -__inline pt_entry_t * -get_ptbase(pmap) - pmap_t pmap; -{ - vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; - - /* are we current address space or kernel? */ - if (pmap == kernel_pmap || frame == ((int) PTDpde & PG_FRAME)) { - return PTmap; - } - /* otherwise, we are alternate address space */ - if (frame != ((int) APTDpde & PG_FRAME)) { - APTDpde = pmap->pm_pdir[PTDPTDI]; - pmap_update(); - } - return APTmap; + pmap_alloc_pv_entry(); } /* @@ -871,77 +943,70 @@ get_ptbase(pmap) * to the header. Otherwise we must search the list for * the entry. In either case we free the now unused entry. */ -static __inline void -pmap_remove_entry(pmap, pv, va) +static __inline int +pmap_remove_entry(pmap, ppv, va) struct pmap *pmap; - pv_entry_t pv; + pv_entry_t *ppv; vm_offset_t va; { pv_entry_t npv; int s; - s = splhigh(); - if (pmap == pv->pv_pmap && va == pv->pv_va) { - pmap_unuse_pt(pmap, va, pv->pv_ptem); - npv = pv->pv_next; - if (npv) { - *pv = *npv; + + s = splvm(); + for (npv = *ppv; npv; (ppv = &npv->pv_next, npv = *ppv)) { + if (pmap == npv->pv_pmap && va == npv->pv_va) { + int rtval = pmap_unuse_pt(pmap, va, npv->pv_ptem); + *ppv = npv->pv_next; free_pv_entry(npv); - } else { - pv->pv_pmap = NULL; - } - } else { - for (npv = pv->pv_next; npv; (pv = npv, npv = pv->pv_next)) { - if (pmap == npv->pv_pmap && va == npv->pv_va) { - pmap_unuse_pt(pmap, va, npv->pv_ptem); - pv->pv_next = npv->pv_next; - free_pv_entry(npv); - break; - } + splx(s); + return rtval; } } splx(s); + return 0; } /* * pmap_remove_pte: do the things to unmap a page in a process */ -static void -pmap_remove_pte(pmap, ptq, sva) +static +#if !defined(PMAP_DIAGNOSTIC) +__inline +#endif +int +pmap_remove_pte(pmap, ptq, va) struct pmap *pmap; - pt_entry_t *ptq; - vm_offset_t sva; + unsigned *ptq; + vm_offset_t va; { - pt_entry_t oldpte; - vm_offset_t pa; - pv_entry_t pv; + unsigned oldpte; + pv_entry_t *ppv; + int i; + int s; oldpte = *ptq; - if (((int)oldpte) & PG_W) - pmap->pm_stats.wired_count--; - pmap->pm_stats.resident_count--; - - pa = ((vm_offset_t)oldpte) & PG_FRAME; - if (pmap_is_managed(pa)) { - if ((int) oldpte & PG_M) { + *ptq = 0; + if (oldpte & PG_W) + pmap->pm_stats.wired_count -= 1; + pmap->pm_stats.resident_count -= 1; + if (oldpte & PG_MANAGED) { + if (oldpte & PG_M) { #if defined(PMAP_DIAGNOSTIC) if (pmap_nw_modified(oldpte)) { - printf("pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n", sva, (int) oldpte); + printf("pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, (int) oldpte); } #endif - - if (sva < USRSTACK + (UPAGES * PAGE_SIZE) || - (sva >= KERNBASE && (sva < clean_sva || sva >= clean_eva))) { - PHYS_TO_VM_PAGE(pa)->dirty = VM_PAGE_BITS_ALL; + if (va < clean_sva || va >= clean_eva) { + PHYS_TO_VM_PAGE(oldpte)->dirty = VM_PAGE_BITS_ALL; } } - pv = pa_to_pvh(pa); - pmap_remove_entry(pmap, pv, sva); + ppv = pa_to_pvh(oldpte); + return pmap_remove_entry(pmap, ppv, va); } else { - pmap_unuse_pt(pmap, sva, NULL); + return pmap_unuse_pt(pmap, va, NULL); } - *ptq = 0; - return; + return 0; } /* @@ -952,24 +1017,26 @@ pmap_remove_page(pmap, va) struct pmap *pmap; register vm_offset_t va; { - register pt_entry_t *ptbase, *ptq; + register unsigned *ptq; + /* * if there is no pte for this address, just skip it!!! */ - if (*pmap_pde(pmap, va) == 0) + if (*pmap_pde(pmap, va) == 0) { return; + } + /* * get a local va for mappings for this pmap. */ - ptbase = get_ptbase(pmap); - ptq = ptbase + i386_btop(va); + ptq = get_ptbase(pmap) + i386_btop(va); if (*ptq) { - pmap_remove_pte(pmap, ptq, va); + (void) pmap_remove_pte(pmap, ptq, va); pmap_update_1pg(va); } return; } - + /* * Remove the given range of addresses from the specified map. * @@ -982,11 +1049,18 @@ pmap_remove(pmap, sva, eva) register vm_offset_t sva; register vm_offset_t eva; { - register pt_entry_t *ptbase; + register unsigned *ptbase; + vm_offset_t va; vm_offset_t pdnxt; vm_offset_t ptpaddr; vm_offset_t sindex, eindex; vm_page_t mpte; + int s; +#if defined(OLDREMOVE) || defined(I386_CPU) + int anyvalid; +#else + int mustremove; +#endif if (pmap == NULL) return; @@ -1001,6 +1075,16 @@ pmap_remove(pmap, sva, eva) return; } +#if !defined(OLDREMOVE) && !defined(I386_CPU) + if ((pmap == kernel_pmap) || + (pmap->pm_pdir[PTDPTDI] == PTDpde)) + mustremove = 1; + else + mustremove = 0; +#else + anyvalid = 0; +#endif + /* * Get a local virtual address for the mappings that are being * worked with. @@ -1010,6 +1094,7 @@ pmap_remove(pmap, sva, eva) sindex = i386_btop(sva); eindex = i386_btop(eva); + for (; sindex < eindex; sindex = pdnxt) { /* @@ -1046,14 +1131,70 @@ pmap_remove(pmap, sva, eva) } for ( ;sindex != pdnxt; sindex++) { - if (ptbase[sindex] == 0) + vm_offset_t va; + if (ptbase[sindex] == 0) { continue; - pmap_remove_pte(pmap, ptbase + sindex, i386_ptob(sindex)); - if (mpte->hold_count == 0 && mpte->wire_count == 0) + } + va = i386_ptob(sindex); +#if defined(OLDREMOVE) || defined(I386_CPU) + anyvalid = 1; +#else + if (mustremove) + pmap_update_1pg(va); +#endif + if (pmap_remove_pte(pmap, + ptbase + sindex, va)) break; } } - pmap_update(); + +#if defined(OLDREMOVE) || defined(I386_CPU) + if (anyvalid) { + /* are we current address space or kernel? */ + if (pmap == kernel_pmap) { + pmap_update(); + } else if (pmap->pm_pdir[PTDPTDI] == PTDpde) { + pmap_update(); + } + } +#endif +} + + +void +pmap_remove_pte_mapping(pa) + vm_offset_t pa; +{ + register pv_entry_t pv, *ppv, npv; + register unsigned *pte, *ptbase; + vm_offset_t va; + int s; + int anyvalid = 0; + + ppv = pa_to_pvh(pa); + + for (pv = *ppv; pv; pv=pv->pv_next) { + unsigned tpte; + struct pmap *pmap; + + anyvalid = 1; + pmap = pv->pv_pmap; + pte = get_ptbase(pmap) + i386_btop(pv->pv_va); + if (tpte = *pte) { + pmap->pm_stats.resident_count--; + *pte = 0; + if (tpte & PG_W) + pmap->pm_stats.wired_count--; + } + } + + if (anyvalid) { + for (pv = *ppv; pv; pv = npv) { + npv = pv->pv_next; + free_pv_entry(pv); + } + *ppv = NULL; + } } /* @@ -1068,17 +1209,15 @@ pmap_remove(pmap, sva, eva) * inefficient because they iteratively called * pmap_remove (slow...) */ -static void +static __inline void pmap_remove_all(pa) vm_offset_t pa; { - register pv_entry_t pv, opv, npv; - register pt_entry_t *pte, *ptbase; + register pv_entry_t pv, *ppv, npv; + register unsigned *pte, *ptbase; vm_offset_t va; - struct pmap *pmap; vm_page_t m; int s; - int anyvalid = 0; #if defined(PMAP_DIAGNOSTIC) /* @@ -1090,62 +1229,49 @@ pmap_remove_all(pa) } #endif - pa = pa & PG_FRAME; - opv = pa_to_pvh(pa); - if (opv->pv_pmap == NULL) - return; - m = PHYS_TO_VM_PAGE(pa); - s = splhigh(); - pv = opv; - while (pv && ((pmap = pv->pv_pmap) != NULL)) { + ppv = pa_to_pvh(pa); + + s = splvm(); + for (pv = *ppv; pv; pv=pv->pv_next) { int tpte; + struct pmap *pmap; + + pmap = pv->pv_pmap; ptbase = get_ptbase(pmap); va = pv->pv_va; pte = ptbase + i386_btop(va); if (tpte = ((int) *pte)) { + pmap->pm_stats.resident_count--; *pte = 0; if (tpte & PG_W) pmap->pm_stats.wired_count--; - pmap->pm_stats.resident_count--; - anyvalid = 1; - /* * Update the vm_page_t clean and reference bits. */ - if ((tpte & PG_M) != 0) { + if (tpte & PG_M) { #if defined(PMAP_DIAGNOSTIC) if (pmap_nw_modified((pt_entry_t) tpte)) { printf("pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, tpte); } #endif - if (va < USRSTACK + (UPAGES * PAGE_SIZE) || - (va >= KERNBASE && (va < clean_sva || va >= clean_eva))) { + if (va < clean_sva || va >= clean_eva) { m->dirty = VM_PAGE_BITS_ALL; } } } - pv = pv->pv_next; } - if (opv->pv_pmap != NULL) { - pmap_unuse_pt(opv->pv_pmap, opv->pv_va, opv->pv_ptem); - for (pv = opv->pv_next; pv; pv = npv) { - npv = pv->pv_next; - pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); - free_pv_entry(pv); - } + for (pv = *ppv; pv; pv = npv) { + npv = pv->pv_next; + pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); + free_pv_entry(pv); } - - opv->pv_pmap = NULL; - opv->pv_next = NULL; + *ppv = NULL; splx(s); - if (anyvalid) - pmap_update(); } - /* * Set the physical protection on the * specified range of this map as requested. @@ -1156,13 +1282,14 @@ pmap_protect(pmap, sva, eva, prot) vm_offset_t sva, eva; vm_prot_t prot; { - register pt_entry_t *pte; - register pt_entry_t *ptbase; + register unsigned *pte; + register vm_offset_t va; + register unsigned *ptbase; vm_offset_t pdnxt; vm_offset_t ptpaddr; vm_offset_t sindex, eindex; vm_page_t mpte; - int anychanged; + int anyvalid; if (pmap == NULL) @@ -1175,7 +1302,7 @@ pmap_protect(pmap, sva, eva, prot) if (prot & VM_PROT_WRITE) return; - anychanged = 0; + anyvalid = 0; ptbase = get_ptbase(pmap); @@ -1205,26 +1332,177 @@ pmap_protect(pmap, sva, eva, prot) } for (; sindex != pdnxt; sindex++) { - if (ptbase[sindex] == 0) - continue; - pte = ptbase + sindex; - pbits = *(int *)pte; + + unsigned pbits = ptbase[sindex]; + if (pbits & PG_RW) { if (pbits & PG_M) { - vm_page_t m; - vm_offset_t pa = pbits & PG_FRAME; - m = PHYS_TO_VM_PAGE(pa); + vm_page_t m = PHYS_TO_VM_PAGE(pbits); m->dirty = VM_PAGE_BITS_ALL; } - *(int *)pte &= ~(PG_M|PG_RW); - anychanged=1; + ptbase[sindex] = pbits & ~(PG_M|PG_RW); + anyvalid = 1; } } } - if (anychanged) + if (anyvalid) pmap_update(); } +/* + * Create a pv entry for page at pa for + * (pmap, va). + */ +static __inline void +pmap_insert_entry(pmap, va, mpte, pa) + pmap_t pmap; + vm_offset_t va; + vm_page_t mpte; + vm_offset_t pa; +{ + + int s; + pv_entry_t *ppv, pv; + + s = splvm(); + pv = get_pv_entry(); + pv->pv_va = va; + pv->pv_pmap = pmap; + pv->pv_ptem = mpte; + + ppv = pa_to_pvh(pa); + if (*ppv) + pv->pv_next = *ppv; + else + pv->pv_next = NULL; + *ppv = pv; + splx(s); +} + +/* + * this routine is called if the page table page is not + * mapped correctly. + */ +static vm_page_t +_pmap_allocpte(pmap, va, ptepindex) + pmap_t pmap; + vm_offset_t va; + int ptepindex; +{ + vm_offset_t pteva, ptepa; + vm_page_t m; + + /* + * Find or fabricate a new pagetable page + */ +retry: + m = vm_page_lookup(pmap->pm_pteobj, ptepindex); + if (m == NULL) { + m = vm_page_alloc(pmap->pm_pteobj, ptepindex, VM_ALLOC_ZERO); + if (m == NULL) { + VM_WAIT; + goto retry; + } + if ((m->flags & PG_ZERO) == 0) + pmap_zero_page(VM_PAGE_TO_PHYS(m)); + m->flags &= ~(PG_ZERO|PG_BUSY); + m->valid = VM_PAGE_BITS_ALL; + } + + /* + * mark the object writeable + */ + pmap->pm_pteobj->flags |= OBJ_WRITEABLE; + + /* + * Increment the hold count for the page table page + * (denoting a new mapping.) + */ + ++m->hold_count; + + /* + * Activate the pagetable page, if it isn't already + */ + if (m->queue != PQ_ACTIVE) + vm_page_activate(m); + + /* + * Map the pagetable page into the process address space, if + * it isn't already there. + */ + pteva = ((vm_offset_t) vtopte(va)) & PG_FRAME; + ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; + if (ptepa == 0) { + int s; + pv_entry_t pv, *ppv; + + pmap->pm_stats.resident_count++; + + s = splvm(); + pv = get_pv_entry(); + + pv->pv_va = pteva; + pv->pv_pmap = pmap; + pv->pv_next = NULL; + pv->pv_ptem = NULL; + + ptepa = VM_PAGE_TO_PHYS(m); + pmap->pm_pdir[ptepindex] = + (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_MANAGED); + ppv = pa_to_pvh(ptepa); +#if defined(PMAP_DIAGNOSTIC) + if (*ppv) + panic("pmap_allocpte: page is already mapped"); +#endif + *ppv = pv; + splx(s); + m->flags |= PG_MAPPED; + } else { +#if defined(PMAP_DIAGNOSTIC) + if (VM_PAGE_TO_PHYS(m) != (ptepa & PG_FRAME)) + panic("pmap_allocpte: mismatch"); +#endif + pmap->pm_pdir[ptepindex] = + (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_MANAGED); + pmap_update_1pg(pteva); + m->flags |= PG_MAPPED; + } + return m; +} + +static __inline vm_page_t +pmap_allocpte(pmap, va) + pmap_t pmap; + vm_offset_t va; +{ + int ptepindex; + vm_offset_t ptepa; + vm_page_t m; + + /* + * Calculate pagetable page index + */ + ptepindex = va >> PDRSHIFT; + + /* + * Get the page directory entry + */ + ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; + + /* + * If the page table page is mapped, we just increment the + * hold count, and activate it. + */ + if ((ptepa & (PG_RW|PG_U|PG_V)) == (PG_RW|PG_U|PG_V)) { + m = PHYS_TO_VM_PAGE(ptepa); + ++m->hold_count; + if (m->queue != PQ_ACTIVE) + vm_page_activate(m); + return m; + } + return _pmap_allocpte(pmap, va, ptepindex); +} + /* * Insert the given physical page (p) at * the specified virtual address (v) in the @@ -1245,40 +1523,32 @@ pmap_enter(pmap, va, pa, prot, wired) vm_prot_t prot; boolean_t wired; { - register pt_entry_t *pte; + register unsigned *pte; vm_offset_t opa; - register pv_entry_t pv, npv; vm_offset_t origpte, newpte; + vm_offset_t ptepa; + vm_page_t mpte; + int s; if (pmap == NULL) return; - pv = NULL; - - va = va & PG_FRAME; + va &= PG_FRAME; if (va > VM_MAX_KERNEL_ADDRESS) panic("pmap_enter: toobig"); + mpte = NULL; /* * In the case that a page table page is not * resident, we are creating it here. */ - if ((va < VM_MIN_KERNEL_ADDRESS) && - (curproc != NULL) && - (pmap->pm_map->pmap == pmap)) { - vm_offset_t v; - - v = (vm_offset_t) vtopte(va); - /* Fault the pte only if needed: */ - if (*((int *)vtopte(v)) == 0) - (void) vm_fault(pmap->pm_map, - trunc_page(v), VM_PROT_WRITE, FALSE); - } + if (va < UPT_MIN_ADDRESS) + mpte = pmap_allocpte(pmap, va); + pte = pmap_pte(pmap, va); /* * Page Directory table entry not valid, we need a new PT page */ - pte = pmap_pte(pmap, va); if (pte == NULL) { printf("kernel page directory invalid pdir=%p, va=0x%lx\n", pmap->pm_pdir[PTDPTDI], va); @@ -1286,10 +1556,9 @@ pmap_enter(pmap, va, pa, prot, wired) } origpte = *(vm_offset_t *)pte; + pa &= PG_FRAME; opa = origpte & PG_FRAME; - pa = pa & PG_FRAME; - /* * Mapping has not changed, must be protection or wiring change. */ @@ -1315,55 +1584,35 @@ pmap_enter(pmap, va, pa, prot, wired) * We might be turning off write access to the page, * so we go ahead and sense modify status. */ - if (origpte & PG_M) { + if (origpte & PG_MANAGED) { vm_page_t m; - m = PHYS_TO_VM_PAGE(pa); - m->dirty = VM_PAGE_BITS_ALL; + if (origpte & PG_M) { + m = PHYS_TO_VM_PAGE(pa); + m->dirty = VM_PAGE_BITS_ALL; + } + pa |= PG_MANAGED; } + + if (mpte) + --mpte->hold_count; + goto validate; } /* * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. */ - if (opa) { - pmap_remove_page(pmap, va); - opa = 0; - origpte = 0; - } + if (opa) + (void) pmap_remove_pte(pmap, pte, va); + /* * Enter on the PV list if part of our managed memory Note that we * raise IPL while manipulating pv_table since pmap_enter can be * called at interrupt time. */ if (pmap_is_managed(pa)) { - int s; - - pv = pa_to_pvh(pa); - s = splhigh(); - /* - * No entries yet, use header as the first entry - */ - if (pv->pv_pmap == NULL) { - pv->pv_va = va; - pv->pv_pmap = pmap; - pv->pv_next = NULL; - pv->pv_ptem = NULL; - } - /* - * There is at least one other VA mapping this page. Place - * this entry after the header. - */ - else { - npv = get_pv_entry(); - npv->pv_va = va; - npv->pv_pmap = pmap; - npv->pv_next = pv->pv_next; - pv->pv_next = npv; - pv = npv; - pv->pv_ptem = NULL; - } - splx(s); + pmap_insert_entry(pmap, va, mpte, pa); + pa |= PG_MANAGED; } /* @@ -1383,106 +1632,16 @@ validate: newpte |= PG_W; if (va < UPT_MIN_ADDRESS) newpte |= PG_U; - else if (va < UPT_MAX_ADDRESS) - newpte |= PG_U | PG_RW; /* * if the mapping or permission bits are different, we need * to update the pte. */ if ((origpte & ~(PG_M|PG_A)) != newpte) { - *pte = (pt_entry_t) newpte; + *pte = newpte; if (origpte) pmap_update_1pg(va); } - - if (origpte == 0) { - vm_page_t mpte; - mpte = pmap_use_pt(pmap, va); - if (pv) - pv->pv_ptem = mpte; - } -} - -/* - * Add a list of wired pages to the kva - * this routine is only used for temporary - * kernel mappings that do not need to have - * page modification or references recorded. - * Note that old mappings are simply written - * over. The page *must* be wired. - */ -void -pmap_qenter(va, m, count) - vm_offset_t va; - vm_page_t *m; - int count; -{ - int i; - register pt_entry_t *pte; - - for (i = 0; i < count; i++) { - vm_offset_t tva = va + i * PAGE_SIZE; - pt_entry_t npte = (pt_entry_t) ((int) (VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V)); - pt_entry_t opte; - pte = vtopte(tva); - opte = *pte; - *pte = npte; - if (opte) pmap_update_1pg(tva); - } -} -/* - * this routine jerks page mappings from the - * kernel -- it is meant only for temporary mappings. - */ -void -pmap_qremove(va, count) - vm_offset_t va; - int count; -{ - int i; - register pt_entry_t *pte; - - for (i = 0; i < count; i++) { - vm_offset_t tva = va + i * PAGE_SIZE; - pte = vtopte(tva); - *pte = 0; - pmap_update_1pg(tva); - } -} - -/* - * add a wired page to the kva - * note that in order for the mapping to take effect -- you - * should do a pmap_update after doing the pmap_kenter... - */ -void -pmap_kenter(va, pa) - vm_offset_t va; - register vm_offset_t pa; -{ - register pt_entry_t *pte; - pt_entry_t npte, opte; - - npte = (pt_entry_t) ((int) (pa | PG_RW | PG_V)); - pte = vtopte(va); - opte = *pte; - *pte = npte; - if (opte) pmap_update_1pg(va); -} - -/* - * remove a page from the kernel pagetables - */ -void -pmap_kremove(va) - vm_offset_t va; -{ - register pt_entry_t *pte; - - pte = vtopte(va); - *pte = (pt_entry_t) 0; - pmap_update_1pg(va); } /* @@ -1502,46 +1661,27 @@ pmap_enter_quick(pmap, va, pa) vm_offset_t va; register vm_offset_t pa; { - register pt_entry_t *pte; - register pv_entry_t pv, npv; - int s; + register unsigned *pte; + vm_page_t mpte; + + mpte = NULL; + /* + * In the case that a page table page is not + * resident, we are creating it here. + */ + if (va < UPT_MIN_ADDRESS) + mpte = pmap_allocpte(pmap, va); + + pte = (unsigned *)vtopte(va); + if (*pte) + (void) pmap_remove_pte(pmap, pte, va); /* * Enter on the PV list if part of our managed memory Note that we * raise IPL while manipulating pv_table since pmap_enter can be * called at interrupt time. */ - - pte = vtopte(va); - /* a fault on the page table might occur here */ - if (*pte) { - pmap_remove_page(pmap, va); - } - - pv = pa_to_pvh(pa); - s = splhigh(); - /* - * No entries yet, use header as the first entry - */ - if (pv->pv_pmap == NULL) { - pv->pv_pmap = pmap; - pv->pv_va = va; - pv->pv_next = NULL; - } - /* - * There is at least one other VA mapping this page. Place this entry - * after the header. - */ - else { - npv = get_pv_entry(); - npv->pv_va = va; - npv->pv_pmap = pmap; - npv->pv_next = pv->pv_next; - pv->pv_next = npv; - pv = npv; - } - splx(s); - pv->pv_ptem = pmap_use_pt(pmap, va); + pmap_insert_entry(pmap, va, mpte, pa); /* * Increment counters @@ -1551,7 +1691,7 @@ pmap_enter_quick(pmap, va, pa) /* * Now validate mapping with RO protection */ - *pte = (pt_entry_t) ((int) (pa | PG_V | PG_U)); + *pte = pa | PG_V | PG_U | PG_MANAGED; return; } @@ -1583,11 +1723,6 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) return; } - /* - * remove any already used mappings - */ - pmap_remove( pmap, trunc_page(addr), round_page(addr + size)); - /* * if we are processing a major portion of the object, then scan the * entire thing. @@ -1595,9 +1730,9 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) if (psize > (object->size >> 2)) { objpgs = psize; - for (p = object->memq.tqh_first; + for (p = TAILQ_FIRST(&object->memq); ((objpgs > 0) && (p != NULL)); - p = p->listq.tqe_next) { + p = TAILQ_NEXT(p, listq)) { tmpidx = p->pindex; if (tmpidx < pindex) { @@ -1614,7 +1749,7 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) vm_page_deactivate(p); vm_page_hold(p); p->flags |= PG_MAPPED; - pmap_enter_quick(pmap, + pmap_enter_quick(pmap, addr + (tmpidx << PAGE_SHIFT), VM_PAGE_TO_PHYS(p)); vm_page_unhold(p); @@ -1627,14 +1762,13 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) */ for (tmpidx = 0; tmpidx < psize; tmpidx += 1) { p = vm_page_lookup(object, tmpidx + pindex); - if (p && (p->busy == 0) && + if (p && ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && + (p->busy == 0) && (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { - if (p->queue == PQ_CACHE) - vm_page_deactivate(p); vm_page_hold(p); p->flags |= PG_MAPPED; - pmap_enter_quick(pmap, + pmap_enter_quick(pmap, addr + (tmpidx << PAGE_SHIFT), VM_PAGE_TO_PHYS(p)); vm_page_unhold(p); @@ -1670,6 +1804,7 @@ pmap_prefault(pmap, addra, entry, object) vm_offset_t addr; vm_pindex_t pindex; vm_page_t m; + int pageorder_index; if (entry->object.vm_object != object) return; @@ -1686,13 +1821,16 @@ pmap_prefault(pmap, addra, entry, object) for (i = 0; i < PAGEORDER_SIZE; i++) { vm_object_t lobject; - pt_entry_t *pte; + unsigned *pte; addr = addra + pmap_prefault_pageorder[i]; if (addr < starta || addr >= entry->end) continue; - pte = vtopte(addr); + if ((*pmap_pde(pmap, addr)) == NULL) + continue; + + pte = (unsigned *) vtopte(addr); if (*pte) continue; @@ -1718,16 +1856,12 @@ pmap_prefault(pmap, addra, entry, object) (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { if (m->queue == PQ_CACHE) { - if ((cnt.v_free_count + cnt.v_cache_count) < - cnt.v_free_min) - break; vm_page_deactivate(m); } vm_page_hold(m); m->flags |= PG_MAPPED; pmap_enter_quick(pmap, addr, VM_PAGE_TO_PHYS(m)); vm_page_unhold(m); - } } } @@ -1745,7 +1879,7 @@ pmap_change_wiring(pmap, va, wired) vm_offset_t va; boolean_t wired; { - register pt_entry_t *pte; + register unsigned *pte; if (pmap == NULL) return; @@ -1780,7 +1914,73 @@ pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) vm_size_t len; vm_offset_t src_addr; { -} + vm_offset_t addr; + vm_offset_t end_addr = src_addr + len; + vm_offset_t pdnxt; + unsigned src_frame, dst_frame; + pd_entry_t pde; + + if (dst_addr != src_addr) + return; + + src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME; + dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME; + + if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) + return; + + if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) { + APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V); + pmap_update(); + } + + for(addr = src_addr; addr < end_addr; addr = pdnxt) { + unsigned *src_pte, *dst_pte; + vm_page_t dstmpte, srcmpte; + vm_offset_t srcptepaddr; + + pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1)); + srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[addr >> PDRSHIFT]; + if (srcptepaddr) { + continue; + } + + srcmpte = PHYS_TO_VM_PAGE(srcptepaddr); + if (srcmpte->hold_count == 0) + continue; + + if (pdnxt > end_addr) + pdnxt = end_addr; + + src_pte = (unsigned *) vtopte(addr); + dst_pte = (unsigned *) avtopte(addr); + while (addr < pdnxt) { + unsigned ptetemp; + ptetemp = *src_pte; + if (ptetemp) { + /* + * We have to check after allocpte for the + * pte still being around... allocpte can + * block. + */ + dstmpte = pmap_allocpte(dst_pmap, addr); + if (ptetemp = *src_pte) { + *dst_pte = ptetemp; + dst_pmap->pm_stats.resident_count++; + pmap_insert_entry(dst_pmap, addr, dstmpte, + (ptetemp & PG_FRAME)); + } else { + --dstmpte->hold_count; + } + if (dstmpte->hold_count >= srcmpte->hold_count) + break; + } + addr += PAGE_SIZE; + ++src_pte; + ++dst_pte; + } + } +} /* * Routine: pmap_kernel @@ -1808,7 +2008,6 @@ pmap_zero_page(phys) *(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME); bzero(CADDR2, PAGE_SIZE); - *(int *) CMAP2 = 0; pmap_update_1pg((vm_offset_t) CADDR2); } @@ -1872,24 +2071,22 @@ pmap_page_exists(pmap, pa) pmap_t pmap; vm_offset_t pa; { - register pv_entry_t pv; + register pv_entry_t *ppv, pv; int s; if (!pmap_is_managed(pa)) return FALSE; - pv = pa_to_pvh(pa); - s = splhigh(); + s = splvm(); + ppv = pa_to_pvh(pa); /* * Not found, check current mappings returning immediately if found. */ - if (pv->pv_pmap != NULL) { - for (; pv; pv = pv->pv_next) { - if (pv->pv_pmap == pmap) { - splx(s); - return TRUE; - } + for (pv = *ppv; pv; pv = pv->pv_next) { + if (pv->pv_pmap == pmap) { + splx(s); + return TRUE; } } splx(s); @@ -1906,42 +2103,40 @@ pmap_testbit(pa, bit) register vm_offset_t pa; int bit; { - register pv_entry_t pv; - pt_entry_t *pte; + register pv_entry_t *ppv, pv; + unsigned *pte; int s; if (!pmap_is_managed(pa)) return FALSE; - pv = pa_to_pvh(pa); - s = splhigh(); + s = splvm(); + ppv = pa_to_pvh(pa); /* * Not found, check current mappings returning immediately if found. */ - if (pv->pv_pmap != NULL) { - for (; pv; pv = pv->pv_next) { - /* - * if the bit being tested is the modified bit, then - * mark UPAGES as always modified, and ptes as never - * modified. - */ - if (bit & (PG_A|PG_M)) { - if ((pv->pv_va >= clean_sva) && (pv->pv_va < clean_eva)) { - continue; - } - } - if (!pv->pv_pmap) { -#if defined(PMAP_DIAGNOSTIC) - printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); -#endif + for (pv = *ppv ;pv; pv = pv->pv_next) { + /* + * if the bit being tested is the modified bit, then + * mark UPAGES as always modified, and ptes as never + * modified. + */ + if (bit & (PG_A|PG_M)) { + if ((pv->pv_va >= clean_sva) && (pv->pv_va < clean_eva)) { continue; } - pte = pmap_pte(pv->pv_pmap, pv->pv_va); - if ((int) *pte & bit) { - splx(s); - return TRUE; - } + } + if (!pv->pv_pmap) { +#if defined(PMAP_DIAGNOSTIC) + printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); +#endif + continue; + } + pte = pmap_pte(pv->pv_pmap, pv->pv_va); + if ((int) *pte & bit) { + splx(s); + return TRUE; } } splx(s); @@ -1957,60 +2152,64 @@ pmap_changebit(pa, bit, setem) int bit; boolean_t setem; { - register pv_entry_t pv; - register pt_entry_t *pte; + register pv_entry_t pv, *ppv; + register unsigned *pte, npte; vm_offset_t va; + int changed; int s; if (!pmap_is_managed(pa)) return; - pv = pa_to_pvh(pa); - s = splhigh(); + s = splvm(); + changed = 0; + ppv = pa_to_pvh(pa); /* * Loop over all current mappings setting/clearing as appropos If * setting RO do we need to clear the VAC? */ - if (pv->pv_pmap != NULL) { - for (; pv; pv = pv->pv_next) { - va = pv->pv_va; + for ( pv = *ppv; pv; pv = pv->pv_next) { + va = pv->pv_va; - /* - * don't write protect pager mappings - */ - if (!setem && (bit == PG_RW)) { - if (va >= clean_sva && va < clean_eva) - continue; - } - if (!pv->pv_pmap) { -#if defined(PMAP_DIAGNOSTIC) - printf("Null pmap (cb) at va: 0x%lx\n", va); -#endif + /* + * don't write protect pager mappings + */ + if (!setem && (bit == PG_RW)) { + if (va >= clean_sva && va < clean_eva) continue; - } + } + if (!pv->pv_pmap) { +#if defined(PMAP_DIAGNOSTIC) + printf("Null pmap (cb) at va: 0x%lx\n", va); +#endif + continue; + } - pte = pmap_pte(pv->pv_pmap, va); - if (setem) { - *(int *)pte |= bit; - } else { - if (bit == PG_RW) { - vm_offset_t pbits = *(vm_offset_t *)pte; - if (pbits & PG_M) { - vm_page_t m; - vm_offset_t pa = pbits & PG_FRAME; - m = PHYS_TO_VM_PAGE(pa); - m->dirty = VM_PAGE_BITS_ALL; - } - *(int *)pte &= ~(PG_M|PG_RW); - } else { - *(int *)pte &= ~bit; + pte = pmap_pte(pv->pv_pmap, va); + if (setem) { + *(int *)pte |= bit; + changed = 1; + } else { + vm_offset_t pbits = *(vm_offset_t *)pte; + if (pbits & bit) + changed = 1; + if (bit == PG_RW) { + if (pbits & PG_M) { + vm_page_t m; + vm_offset_t pa = pbits & PG_FRAME; + m = PHYS_TO_VM_PAGE(pa); + m->dirty = VM_PAGE_BITS_ALL; } + *(int *)pte = pbits & ~(PG_M|PG_RW); + } else { + *(int *)pte = pbits & ~bit; } } } splx(s); - pmap_update(); + if (changed) + pmap_update(); } /* @@ -2026,8 +2225,10 @@ pmap_page_protect(phys, prot) if ((prot & VM_PROT_WRITE) == 0) { if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) pmap_changebit(phys, PG_RW, FALSE); - else + else { pmap_remove_all(phys); + pmap_update(); + } } } @@ -2127,7 +2328,7 @@ pmap_mapdev(pa, size) vm_size_t size; { vm_offset_t va, tmpva; - pt_entry_t *pte; + unsigned *pte; size = roundup(size, PAGE_SIZE); @@ -2137,8 +2338,8 @@ pmap_mapdev(pa, size) pa = pa & PG_FRAME; for (tmpva = va; size > 0;) { - pte = vtopte(tmpva); - *pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V | PG_N)); + pte = (unsigned *)vtopte(tmpva); + *pte = pa | PG_RW | PG_V | PG_N; size -= PAGE_SIZE; tmpva += PAGE_SIZE; pa += PAGE_SIZE; @@ -2164,8 +2365,8 @@ pmap_pid_dump(int pid) { pmap = &p->p_vmspace->vm_pmap; for(i=0;i<1024;i++) { pd_entry_t *pde; - pt_entry_t *pte; - unsigned base = i << PD_SHIFT; + unsigned *pte; + unsigned base = i << PDRSHIFT; pde = &pmap->pm_pdir[i]; if (pde && pmap_pde_v(pde)) { @@ -2215,7 +2416,7 @@ pads(pm) pmap_t pm; { unsigned va, i, j; - pt_entry_t *ptep; + unsigned *ptep; if (pm == kernel_pmap) return; @@ -2253,3 +2454,5 @@ pmap_pvdump(pa) printf(" "); } #endif + + diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index c97e50aca6c1..b81cfc10601b 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 - * $Id: trap.c,v 1.74 1996/03/27 17:33:39 bde Exp $ + * $Id: trap.c,v 1.75 1996/03/28 05:40:57 dyson Exp $ */ /* @@ -805,25 +805,11 @@ int trapwrite(addr) v = trunc_page(vtopte(va)); - /* - * wire the pte page - */ - if (va < USRSTACK) { - vm_map_pageable(&vm->vm_map, v, round_page(v+1), FALSE); - } - /* * fault the data page */ rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, FALSE); - /* - * unwire the pte page - */ - if (va < USRSTACK) { - vm_map_pageable(&vm->vm_map, v, round_page(v+1), TRUE); - } - --p->p_lock; if (rv != KERN_SUCCESS) diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 4c1823e40360..e764b2f7def5 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -38,7 +38,7 @@ * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ - * $Id: vm_machdep.c,v 1.61 1996/05/02 10:43:06 phk Exp $ + * $Id: vm_machdep.c,v 1.62 1996/05/02 14:19:55 phk Exp $ */ #include "npx.h" @@ -862,7 +862,7 @@ int vm_page_zero_idle() { vm_page_t m; if ((cnt.v_free_count > cnt.v_interrupt_free_min) && - (m = vm_page_queue_free.tqh_first)) { + (m = TAILQ_FIRST(&vm_page_queue_free))) { TAILQ_REMOVE(&vm_page_queue_free, m, pageq); enable_intr(); pmap_zero_page(VM_PAGE_TO_PHYS(m)); diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index a843fbfcbddc..f0d7fe695214 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -42,7 +42,7 @@ * * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 - * $Id: pmap.h,v 1.37 1996/05/02 14:20:04 phk Exp $ + * $Id: pmap.h,v 1.38 1996/05/02 22:25:18 phk Exp $ */ #ifndef _MACHINE_PMAP_H_ @@ -69,6 +69,7 @@ /* Our various interpretations of the above */ #define PG_W PG_AVAIL1 /* "Wired" pseudoflag */ +#define PG_MANAGED PG_AVAIL2 #define PG_FRAME (~PAGE_MASK) #define PG_PROT (PG_RW|PG_U) /* all protection bits . */ #define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */ @@ -87,12 +88,8 @@ #define VADDR(pdi, pti) ((vm_offset_t)(((pdi)< #include #include +#include #include #include @@ -107,16 +108,15 @@ #if defined(DIAGNOSTIC) #define PMAP_DIAGNOSTIC #endif +/* #define OLDREMOVE */ static void init_pv_entries __P((int)); /* * Get PDEs and PTEs for user/kernel address space */ -#define pmap_pde(m, v) (&((m)->pm_pdir[((vm_offset_t)(v) >> PDRSHIFT)&(NPDEPG-1)])) -#define pdir_pde(m, v) (m[((vm_offset_t)(v) >> PDRSHIFT)&(NPDEPG-1)]) - -#define pmap_pte_pa(pte) (*(int *)(pte) & PG_FRAME) +#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) +#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) #define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) #define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) @@ -145,26 +145,36 @@ static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ static vm_offset_t vm_first_phys; static int nkpt; +static vm_page_t nkpg; +vm_offset_t kernel_vm_end; extern vm_offset_t clean_sva, clean_eva; extern int cpu_class; +#define PV_FREELIST_MIN ((PAGE_SIZE / sizeof (struct pv_entry)) / 2) + +/* + * Data for the pv entry allocation mechanism + */ +static int pv_freelistcnt; +static pv_entry_t pv_freelist; +static vm_offset_t pvva; +static int npvvapg; + /* * All those kernel PT submaps that BSD is so fond of */ pt_entry_t *CMAP1; static pt_entry_t *CMAP2, *ptmmap; -static pv_entry_t pv_table; +static pv_entry_t *pv_table; caddr_t CADDR1, ptvmmap; static caddr_t CADDR2; static pt_entry_t *msgbufmap; struct msgbuf *msgbufp; static void free_pv_entry __P((pv_entry_t pv)); -pt_entry_t * - get_ptbase __P((pmap_t pmap)); -static pv_entry_t - get_pv_entry __P((void)); +static __inline unsigned * get_ptbase __P((pmap_t pmap)); +static pv_entry_t get_pv_entry __P((void)); static void i386_protection_init __P((void)); static void pmap_alloc_pv_entry __P((void)); static void pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem)); @@ -173,14 +183,25 @@ static void pmap_enter_quick __P((pmap_t pmap, vm_offset_t va, static int pmap_is_managed __P((vm_offset_t pa)); static void pmap_remove_all __P((vm_offset_t pa)); static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va)); -static __inline void pmap_remove_entry __P((struct pmap *pmap, pv_entry_t pv, +static __inline int pmap_remove_entry __P((struct pmap *pmap, pv_entry_t *pv, vm_offset_t va)); -static void pmap_remove_pte __P((struct pmap *pmap, pt_entry_t *ptq, +static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq, vm_offset_t sva)); +static vm_page_t + pmap_pte_vm_page __P((pmap_t pmap, vm_offset_t pt)); static boolean_t pmap_testbit __P((vm_offset_t pa, int bit)); -static void * pmap_getpdir __P((void)); +static __inline void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va, + vm_page_t mpte, vm_offset_t pa)); +static __inline vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va)); +static void pmap_remove_pte_mapping __P((vm_offset_t pa)); +static __inline int pmap_release_free_page __P((pmap_t pmap, vm_page_t p)); +static vm_page_t _pmap_allocpte __P((pmap_t pmap, vm_offset_t va, int ptepindex)); + +#define PDSTACKMAX 16 +static vm_offset_t pdstack[PDSTACKMAX]; +static int pdstackptr; #if defined(PMAP_DIAGNOSTIC) @@ -228,34 +249,38 @@ pmap_update_2pg( vm_offset_t va1, vm_offset_t va2) { } } +static __inline __pure unsigned * +get_ptbase(pmap) + pmap_t pmap; +{ + unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME; + + /* are we current address space or kernel? */ + if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) { + return (unsigned *) PTmap; + } + /* otherwise, we are alternate address space */ + if (frame != (((unsigned) APTDpde) & PG_FRAME)) { + APTDpde = (pd_entry_t) (frame | PG_RW | PG_V); + pmap_update(); + } + return (unsigned *) APTmap; +} + /* * Routine: pmap_pte * Function: * Extract the page table entry associated * with the given map/virtual_address pair. - * [ what about induced faults -wfj] */ -__inline pt_entry_t * __pure +__inline unsigned * __pure pmap_pte(pmap, va) register pmap_t pmap; vm_offset_t va; { - if (pmap && *pmap_pde(pmap, va)) { - vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; - - /* are we current address space or kernel? */ - if ((pmap == kernel_pmap) || (frame == ((int) PTDpde & PG_FRAME))) - return ((pt_entry_t *) vtopte(va)); - /* otherwise, we are alternate address space */ - else { - if (frame != ((int) APTDpde & PG_FRAME)) { - APTDpde = pmap->pm_pdir[PTDPTDI]; - pmap_update(); - } - return ((pt_entry_t *) avtopte(va)); - } + return get_ptbase(pmap) + i386_btop(va); } return (0); } @@ -266,39 +291,108 @@ pmap_pte(pmap, va) * Extract the physical page address associated * with the given map/virtual_address pair. */ - -vm_offset_t +vm_offset_t __pure pmap_extract(pmap, va) register pmap_t pmap; vm_offset_t va; { - vm_offset_t pa; - if (pmap && *pmap_pde(pmap, va)) { - vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; - - /* are we current address space or kernel? */ - if ((pmap == kernel_pmap) - || (frame == ((int) PTDpde & PG_FRAME))) { - pa = *(int *) vtopte(va); - /* otherwise, we are alternate address space */ - } else { - if (frame != ((int) APTDpde & PG_FRAME)) { - APTDpde = pmap->pm_pdir[PTDPTDI]; - pmap_update(); - } - pa = *(int *) avtopte(va); - } - return ((pa & PG_FRAME) | (va & ~PG_FRAME)); + unsigned *pte; + pte = get_ptbase(pmap) + i386_btop(va); + return ((*pte & PG_FRAME) | (va & PAGE_MASK)); } return 0; } +/* + * Add a list of wired pages to the kva + * this routine is only used for temporary + * kernel mappings that do not need to have + * page modification or references recorded. + * Note that old mappings are simply written + * over. The page *must* be wired. + */ +void +pmap_qenter(va, m, count) + vm_offset_t va; + vm_page_t *m; + int count; +{ + int i; + int anyvalid = 0; + register unsigned *pte; + + for (i = 0; i < count; i++) { + vm_offset_t tva = va + i * PAGE_SIZE; + unsigned npte = VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V; + unsigned opte; + pte = (unsigned *)vtopte(tva); + opte = *pte; + *pte = npte; + if (opte) + pmap_update_1pg(tva); + } +} +/* + * this routine jerks page mappings from the + * kernel -- it is meant only for temporary mappings. + */ +void +pmap_qremove(va, count) + vm_offset_t va; + int count; +{ + int i; + register unsigned *pte; + + for (i = 0; i < count; i++) { + pte = (unsigned *)vtopte(va); + *pte = 0; + pmap_update_1pg(va); + va += PAGE_SIZE; + } +} + +/* + * add a wired page to the kva + * note that in order for the mapping to take effect -- you + * should do a pmap_update after doing the pmap_kenter... + */ +__inline void +pmap_kenter(va, pa) + vm_offset_t va; + register vm_offset_t pa; +{ + register unsigned *pte; + unsigned npte, opte; + + npte = pa | PG_RW | PG_V; + pte = (unsigned *)vtopte(va); + opte = *pte; + *pte = npte; + if (opte) + pmap_update_1pg(va); +} + +/* + * remove a page from the kernel pagetables + */ +__inline void +pmap_kremove(va) + vm_offset_t va; +{ + register unsigned *pte; + + pte = (unsigned *)vtopte(va); + *pte = 0; + pmap_update_1pg(va); +} + /* * determine if a page is managed (memory vs. device) */ -static __inline int +static __inline __pure int pmap_is_managed(pa) vm_offset_t pa; { @@ -314,43 +408,21 @@ pmap_is_managed(pa) return 0; } -vm_page_t -pmap_use_pt(pmap, va) - pmap_t pmap; - vm_offset_t va; -{ - vm_offset_t ptepa; - vm_page_t mpte; - - if (va >= UPT_MIN_ADDRESS) - return NULL; - - ptepa = ((vm_offset_t) *pmap_pde(pmap, va)) & PG_FRAME; -#if defined(PMAP_DIAGNOSTIC) - if (!ptepa) - panic("pmap_use_pt: pagetable page missing, va: 0x%x", va); -#endif - - mpte = PHYS_TO_VM_PAGE(ptepa); - ++mpte->hold_count; - return mpte; -} - #if !defined(PMAP_DIAGNOSTIC) __inline #endif -void +int pmap_unuse_pt(pmap, va, mpte) pmap_t pmap; vm_offset_t va; vm_page_t mpte; { if (va >= UPT_MIN_ADDRESS) - return; + return 0; if (mpte == NULL) { vm_offset_t ptepa; - ptepa = ((vm_offset_t) *pmap_pde(pmap, va)) & PG_FRAME; + ptepa = ((vm_offset_t) *pmap_pde(pmap, va)) /* & PG_FRAME */; #if defined(PMAP_DIAGNOSTIC) if (!ptepa) panic("pmap_unuse_pt: pagetable page missing, va: 0x%x", va); @@ -367,23 +439,19 @@ pmap_unuse_pt(pmap, va, mpte) vm_page_unhold(mpte); if ((mpte->hold_count == 0) && - (mpte->wire_count == 0) && - (pmap != kernel_pmap) && - (va < KPT_MIN_ADDRESS)) { + (mpte->wire_count == 0)) { /* * We don't free page-table-pages anymore because it can have a negative * impact on perf at times. Now we just deactivate, and it'll get cleaned - * up if needed... Also, if the page ends up getting used, it will fault - * back into the process address space and be reactivated. + * up if needed... Also, if the page ends up getting used, it will be + * brought back into the process address space by pmap_allocpte and be + * reactivated. */ -#if defined(PMAP_FREE_OLD_PTES) - pmap_page_protect(VM_PAGE_TO_PHYS(mpte), VM_PROT_NONE); - vm_page_free(mpte); -#else mpte->dirty = 0; vm_page_deactivate(mpte); -#endif + return 1; } + return 0; } /* @@ -442,7 +510,7 @@ pmap_bootstrap(firstaddr, loadaddr) v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); va = virtual_avail; - pte = pmap_pte(kernel_pmap, va); + pte = (pt_entry_t *) pmap_pte(kernel_pmap, va); /* * CMAP1/CMAP2 are used for zeroing and copying pages. @@ -464,6 +532,7 @@ pmap_bootstrap(firstaddr, loadaddr) *(int *) CMAP1 = *(int *) CMAP2 = *(int *) PTD = 0; pmap_update(); + } /* @@ -492,10 +561,10 @@ pmap_init(phys_start, phys_end) * Allocate memory for random pmap data structures. Includes the * pv_head_table. */ - s = (vm_size_t) (sizeof(struct pv_entry) * npg); + s = (vm_size_t) (sizeof(struct pv_entry *) * npg); s = round_page(s); addr = (vm_offset_t) kmem_alloc(kernel_map, s); - pv_table = (pv_entry_t) addr; + pv_table = (pv_entry_t *) addr; /* * init the pv free list @@ -529,59 +598,6 @@ pmap_map(virt, start, end, prot) return (virt); } -#if defined(PMAP_KEEP_PDIRS) -int nfreepdir; -caddr_t *pdirlist; -#define NFREEPDIR 3 - -static void * -pmap_getpdir() { - caddr_t *pdir; - if (pdirlist) { - --nfreepdir; - pdir = pdirlist; - pdirlist = (caddr_t *) *pdir; - *pdir = 0; -#if 0 /* Not needed anymore */ - bzero( (caddr_t) pdir, PAGE_SIZE); -#endif - } else { - pdir = (caddr_t *) kmem_alloc(kernel_map, PAGE_SIZE); - } - - return (void *) pdir; -} - -static void -pmap_freepdir(void *pdir) { - if (nfreepdir > NFREEPDIR) { - kmem_free(kernel_map, (vm_offset_t) pdir, PAGE_SIZE); - } else { - int i; - pt_entry_t *s; - s = (pt_entry_t *) pdir; - - /* - * remove wired in kernel mappings - */ - bzero(s + KPTDI, nkpt * PTESIZE); - s[APTDPTDI] = 0; - s[PTDPTDI] = 0; - -#if defined(PMAP_DIAGNOSTIC) - for(i=0;ipm_pdir = pmap_getpdir(); -#else - pmap->pm_pdir = (pd_entry_t *) kmem_alloc(kernel_map, PAGE_SIZE); -#endif + if (pdstackptr > 0) { + --pdstackptr; + pmap->pm_pdir = + (pd_entry_t *)pdstack[pdstackptr]; + } else { + pmap->pm_pdir = + (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE); + } + + /* + * allocate object for the ptes + */ + pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, + OFF_TO_IDX((KPT_MIN_ADDRESS + 1) - UPT_MIN_ADDRESS)); + + /* + * allocate the page directory page + */ +retry: + ptdpg = vm_page_alloc( pmap->pm_pteobj, OFF_TO_IDX(KPT_MIN_ADDRESS), + VM_ALLOC_ZERO); + if (ptdpg == NULL) { + VM_WAIT; + goto retry; + } + vm_page_wire(ptdpg); + ptdpg->flags &= ~(PG_MAPPED|PG_BUSY); /* not mapped normally */ + ptdpg->valid = VM_PAGE_BITS_ALL; + + pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg)); + if ((ptdpg->flags & PG_ZERO) == 0) + bzero(pmap->pm_pdir, PAGE_SIZE); /* wire in kernel global address entries */ bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE); /* install self-referential address mapping entry */ - *(int *) (pmap->pm_pdir + PTDPTDI) = - ((int) pmap_kextract((vm_offset_t) pmap->pm_pdir)) | PG_V | PG_RW; + *(unsigned *) (pmap->pm_pdir + PTDPTDI) = + VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_U; pmap->pm_count = 1; } +static __inline int +pmap_release_free_page(pmap, p) + struct pmap *pmap; + vm_page_t p; +{ + int s; + /* + * This code optimizes the case of freeing non-busy + * page-table pages. Those pages are zero now, and + * might as well be placed directly into the zero queue. + */ + s = splvm(); + if (p->flags & PG_BUSY) { + p->flags |= PG_WANTED; + tsleep(p, PVM, "pmaprl", 0); + splx(s); + return 0; + } + + if (p->flags & PG_MAPPED) { + pmap_remove_pte_mapping(VM_PAGE_TO_PHYS(p)); + p->flags &= ~PG_MAPPED; + } + +#if defined(PMAP_DIAGNOSTIC) + if (p->hold_count) + panic("pmap_release: freeing held page table page"); +#endif + /* + * Page directory pages need to have the kernel + * stuff cleared, so they can go into the zero queue also. + */ + if (p->pindex == OFF_TO_IDX(KPT_MIN_ADDRESS)) { + unsigned *pde = (unsigned *) pmap->pm_pdir; + bzero(pde + KPTDI, nkpt * PTESIZE); + pde[APTDPTDI] = 0; + pde[PTDPTDI] = 0; + pmap_kremove((vm_offset_t) pmap->pm_pdir); + } + + vm_page_free(p); + TAILQ_REMOVE(&vm_page_queue_free, p, pageq); + TAILQ_INSERT_HEAD(&vm_page_queue_zero, p, pageq); + p->queue = PQ_ZERO; + splx(s); + ++vm_page_zero_count; + return 1; +} + +/* + * Release any resources held by the given physical map. + * Called when a pmap initialized by pmap_pinit is being released. + * Should only be called if the map contains no valid mappings. + */ +void +pmap_release(pmap) + register struct pmap *pmap; +{ + vm_page_t p,n,ptdpg; + vm_object_t object = pmap->pm_pteobj; + + ptdpg = NULL; +retry: + for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) { + n = TAILQ_NEXT(p, listq); + if (p->pindex == OFF_TO_IDX(KPT_MIN_ADDRESS)) { + ptdpg = p; + continue; + } + if (!pmap_release_free_page(pmap, p)) + goto retry; + } + pmap_release_free_page(pmap, ptdpg); + + vm_object_deallocate(object); + if (pdstackptr < PDSTACKMAX) { + pdstack[pdstackptr] = (vm_offset_t) pmap->pm_pdir; + ++pdstackptr; + } else { + kmem_free(kernel_map, (vm_offset_t) pmap->pm_pdir, PAGE_SIZE); + } +} + /* * grow the number of kernel page table entries, if needed */ -static vm_page_t nkpg; -vm_offset_t kernel_vm_end; - void pmap_growkernel(vm_offset_t addr) { @@ -630,14 +754,14 @@ pmap_growkernel(vm_offset_t addr) kernel_vm_end = KERNBASE; nkpt = 0; while (pdir_pde(PTD, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + NBPDR) & ~(NBPDR-1); + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); ++nkpt; } } - addr = (addr + NBPDR) & ~(NBPDR - 1); + addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); while (kernel_vm_end < addr) { if (pdir_pde(PTD, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + NBPDR) & ~(NBPDR-1); + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); continue; } ++nkpt; @@ -659,7 +783,7 @@ pmap_growkernel(vm_offset_t addr) } } *pmap_pde(kernel_pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end); - kernel_vm_end = (kernel_vm_end + NBPDR) & ~(NBPDR-1); + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); } splx(s); } @@ -685,22 +809,6 @@ pmap_destroy(pmap) } } -/* - * Release any resources held by the given physical map. - * Called when a pmap initialized by pmap_pinit is being released. - * Should only be called if the map contains no valid mappings. - */ -void -pmap_release(pmap) - register struct pmap *pmap; -{ -#if defined(PMAP_KEEP_PDIRS) - pmap_freepdir( (void *)pmap->pm_pdir); -#else - kmem_free(kernel_map, (vm_offset_t) pmap->pm_pdir, PAGE_SIZE); -#endif -} - /* * Add a reference to the specified pmap. */ @@ -713,16 +821,6 @@ pmap_reference(pmap) } } -#define PV_FREELIST_MIN ((PAGE_SIZE / sizeof (struct pv_entry)) / 2) - -/* - * Data for the pv entry allocation mechanism - */ -static int pv_freelistcnt; -static pv_entry_t pv_freelist; -static vm_offset_t pvva; -static int npvvapg; - /* * free the pv_entry back to the free list */ @@ -730,8 +828,6 @@ static __inline void free_pv_entry(pv) pv_entry_t pv; { - if (!pv) - return; ++pv_freelistcnt; pv->pv_next = pv_freelist; pv_freelist = pv; @@ -777,10 +873,6 @@ pmap_alloc_pv_entry() if (npvvapg) { vm_page_t m; - /* - * we do this to keep recursion away - */ - pv_freelistcnt += PV_FREELIST_MIN; /* * allocate a physical page out of the vm system */ @@ -818,14 +910,11 @@ pmap_alloc_pv_entry() entry++; } } - pv_freelistcnt -= PV_FREELIST_MIN; } if (!pv_freelist) panic("get_pv_entry: cannot get a pv_entry_t"); } - - /* * init the pv_entry allocation system */ @@ -839,30 +928,13 @@ init_pv_entries(npg) * kvm space is fairly cheap, be generous!!! (the system can panic if * this is too small.) */ - npvvapg = btoc((npg * PVSPERPAGE) * sizeof(struct pv_entry)); + npvvapg = ((npg * PVSPERPAGE) * sizeof(struct pv_entry) + + PAGE_SIZE - 1) / PAGE_SIZE; pvva = kmem_alloc_pageable(kernel_map, npvvapg * PAGE_SIZE); /* * get the first batch of entries */ - free_pv_entry(get_pv_entry()); -} - -__inline pt_entry_t * -get_ptbase(pmap) - pmap_t pmap; -{ - vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; - - /* are we current address space or kernel? */ - if (pmap == kernel_pmap || frame == ((int) PTDpde & PG_FRAME)) { - return PTmap; - } - /* otherwise, we are alternate address space */ - if (frame != ((int) APTDpde & PG_FRAME)) { - APTDpde = pmap->pm_pdir[PTDPTDI]; - pmap_update(); - } - return APTmap; + pmap_alloc_pv_entry(); } /* @@ -871,77 +943,70 @@ get_ptbase(pmap) * to the header. Otherwise we must search the list for * the entry. In either case we free the now unused entry. */ -static __inline void -pmap_remove_entry(pmap, pv, va) +static __inline int +pmap_remove_entry(pmap, ppv, va) struct pmap *pmap; - pv_entry_t pv; + pv_entry_t *ppv; vm_offset_t va; { pv_entry_t npv; int s; - s = splhigh(); - if (pmap == pv->pv_pmap && va == pv->pv_va) { - pmap_unuse_pt(pmap, va, pv->pv_ptem); - npv = pv->pv_next; - if (npv) { - *pv = *npv; + + s = splvm(); + for (npv = *ppv; npv; (ppv = &npv->pv_next, npv = *ppv)) { + if (pmap == npv->pv_pmap && va == npv->pv_va) { + int rtval = pmap_unuse_pt(pmap, va, npv->pv_ptem); + *ppv = npv->pv_next; free_pv_entry(npv); - } else { - pv->pv_pmap = NULL; - } - } else { - for (npv = pv->pv_next; npv; (pv = npv, npv = pv->pv_next)) { - if (pmap == npv->pv_pmap && va == npv->pv_va) { - pmap_unuse_pt(pmap, va, npv->pv_ptem); - pv->pv_next = npv->pv_next; - free_pv_entry(npv); - break; - } + splx(s); + return rtval; } } splx(s); + return 0; } /* * pmap_remove_pte: do the things to unmap a page in a process */ -static void -pmap_remove_pte(pmap, ptq, sva) +static +#if !defined(PMAP_DIAGNOSTIC) +__inline +#endif +int +pmap_remove_pte(pmap, ptq, va) struct pmap *pmap; - pt_entry_t *ptq; - vm_offset_t sva; + unsigned *ptq; + vm_offset_t va; { - pt_entry_t oldpte; - vm_offset_t pa; - pv_entry_t pv; + unsigned oldpte; + pv_entry_t *ppv; + int i; + int s; oldpte = *ptq; - if (((int)oldpte) & PG_W) - pmap->pm_stats.wired_count--; - pmap->pm_stats.resident_count--; - - pa = ((vm_offset_t)oldpte) & PG_FRAME; - if (pmap_is_managed(pa)) { - if ((int) oldpte & PG_M) { + *ptq = 0; + if (oldpte & PG_W) + pmap->pm_stats.wired_count -= 1; + pmap->pm_stats.resident_count -= 1; + if (oldpte & PG_MANAGED) { + if (oldpte & PG_M) { #if defined(PMAP_DIAGNOSTIC) if (pmap_nw_modified(oldpte)) { - printf("pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n", sva, (int) oldpte); + printf("pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, (int) oldpte); } #endif - - if (sva < USRSTACK + (UPAGES * PAGE_SIZE) || - (sva >= KERNBASE && (sva < clean_sva || sva >= clean_eva))) { - PHYS_TO_VM_PAGE(pa)->dirty = VM_PAGE_BITS_ALL; + if (va < clean_sva || va >= clean_eva) { + PHYS_TO_VM_PAGE(oldpte)->dirty = VM_PAGE_BITS_ALL; } } - pv = pa_to_pvh(pa); - pmap_remove_entry(pmap, pv, sva); + ppv = pa_to_pvh(oldpte); + return pmap_remove_entry(pmap, ppv, va); } else { - pmap_unuse_pt(pmap, sva, NULL); + return pmap_unuse_pt(pmap, va, NULL); } - *ptq = 0; - return; + return 0; } /* @@ -952,24 +1017,26 @@ pmap_remove_page(pmap, va) struct pmap *pmap; register vm_offset_t va; { - register pt_entry_t *ptbase, *ptq; + register unsigned *ptq; + /* * if there is no pte for this address, just skip it!!! */ - if (*pmap_pde(pmap, va) == 0) + if (*pmap_pde(pmap, va) == 0) { return; + } + /* * get a local va for mappings for this pmap. */ - ptbase = get_ptbase(pmap); - ptq = ptbase + i386_btop(va); + ptq = get_ptbase(pmap) + i386_btop(va); if (*ptq) { - pmap_remove_pte(pmap, ptq, va); + (void) pmap_remove_pte(pmap, ptq, va); pmap_update_1pg(va); } return; } - + /* * Remove the given range of addresses from the specified map. * @@ -982,11 +1049,18 @@ pmap_remove(pmap, sva, eva) register vm_offset_t sva; register vm_offset_t eva; { - register pt_entry_t *ptbase; + register unsigned *ptbase; + vm_offset_t va; vm_offset_t pdnxt; vm_offset_t ptpaddr; vm_offset_t sindex, eindex; vm_page_t mpte; + int s; +#if defined(OLDREMOVE) || defined(I386_CPU) + int anyvalid; +#else + int mustremove; +#endif if (pmap == NULL) return; @@ -1001,6 +1075,16 @@ pmap_remove(pmap, sva, eva) return; } +#if !defined(OLDREMOVE) && !defined(I386_CPU) + if ((pmap == kernel_pmap) || + (pmap->pm_pdir[PTDPTDI] == PTDpde)) + mustremove = 1; + else + mustremove = 0; +#else + anyvalid = 0; +#endif + /* * Get a local virtual address for the mappings that are being * worked with. @@ -1010,6 +1094,7 @@ pmap_remove(pmap, sva, eva) sindex = i386_btop(sva); eindex = i386_btop(eva); + for (; sindex < eindex; sindex = pdnxt) { /* @@ -1046,14 +1131,70 @@ pmap_remove(pmap, sva, eva) } for ( ;sindex != pdnxt; sindex++) { - if (ptbase[sindex] == 0) + vm_offset_t va; + if (ptbase[sindex] == 0) { continue; - pmap_remove_pte(pmap, ptbase + sindex, i386_ptob(sindex)); - if (mpte->hold_count == 0 && mpte->wire_count == 0) + } + va = i386_ptob(sindex); +#if defined(OLDREMOVE) || defined(I386_CPU) + anyvalid = 1; +#else + if (mustremove) + pmap_update_1pg(va); +#endif + if (pmap_remove_pte(pmap, + ptbase + sindex, va)) break; } } - pmap_update(); + +#if defined(OLDREMOVE) || defined(I386_CPU) + if (anyvalid) { + /* are we current address space or kernel? */ + if (pmap == kernel_pmap) { + pmap_update(); + } else if (pmap->pm_pdir[PTDPTDI] == PTDpde) { + pmap_update(); + } + } +#endif +} + + +void +pmap_remove_pte_mapping(pa) + vm_offset_t pa; +{ + register pv_entry_t pv, *ppv, npv; + register unsigned *pte, *ptbase; + vm_offset_t va; + int s; + int anyvalid = 0; + + ppv = pa_to_pvh(pa); + + for (pv = *ppv; pv; pv=pv->pv_next) { + unsigned tpte; + struct pmap *pmap; + + anyvalid = 1; + pmap = pv->pv_pmap; + pte = get_ptbase(pmap) + i386_btop(pv->pv_va); + if (tpte = *pte) { + pmap->pm_stats.resident_count--; + *pte = 0; + if (tpte & PG_W) + pmap->pm_stats.wired_count--; + } + } + + if (anyvalid) { + for (pv = *ppv; pv; pv = npv) { + npv = pv->pv_next; + free_pv_entry(pv); + } + *ppv = NULL; + } } /* @@ -1068,17 +1209,15 @@ pmap_remove(pmap, sva, eva) * inefficient because they iteratively called * pmap_remove (slow...) */ -static void +static __inline void pmap_remove_all(pa) vm_offset_t pa; { - register pv_entry_t pv, opv, npv; - register pt_entry_t *pte, *ptbase; + register pv_entry_t pv, *ppv, npv; + register unsigned *pte, *ptbase; vm_offset_t va; - struct pmap *pmap; vm_page_t m; int s; - int anyvalid = 0; #if defined(PMAP_DIAGNOSTIC) /* @@ -1090,62 +1229,49 @@ pmap_remove_all(pa) } #endif - pa = pa & PG_FRAME; - opv = pa_to_pvh(pa); - if (opv->pv_pmap == NULL) - return; - m = PHYS_TO_VM_PAGE(pa); - s = splhigh(); - pv = opv; - while (pv && ((pmap = pv->pv_pmap) != NULL)) { + ppv = pa_to_pvh(pa); + + s = splvm(); + for (pv = *ppv; pv; pv=pv->pv_next) { int tpte; + struct pmap *pmap; + + pmap = pv->pv_pmap; ptbase = get_ptbase(pmap); va = pv->pv_va; pte = ptbase + i386_btop(va); if (tpte = ((int) *pte)) { + pmap->pm_stats.resident_count--; *pte = 0; if (tpte & PG_W) pmap->pm_stats.wired_count--; - pmap->pm_stats.resident_count--; - anyvalid = 1; - /* * Update the vm_page_t clean and reference bits. */ - if ((tpte & PG_M) != 0) { + if (tpte & PG_M) { #if defined(PMAP_DIAGNOSTIC) if (pmap_nw_modified((pt_entry_t) tpte)) { printf("pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, tpte); } #endif - if (va < USRSTACK + (UPAGES * PAGE_SIZE) || - (va >= KERNBASE && (va < clean_sva || va >= clean_eva))) { + if (va < clean_sva || va >= clean_eva) { m->dirty = VM_PAGE_BITS_ALL; } } } - pv = pv->pv_next; } - if (opv->pv_pmap != NULL) { - pmap_unuse_pt(opv->pv_pmap, opv->pv_va, opv->pv_ptem); - for (pv = opv->pv_next; pv; pv = npv) { - npv = pv->pv_next; - pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); - free_pv_entry(pv); - } + for (pv = *ppv; pv; pv = npv) { + npv = pv->pv_next; + pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); + free_pv_entry(pv); } - - opv->pv_pmap = NULL; - opv->pv_next = NULL; + *ppv = NULL; splx(s); - if (anyvalid) - pmap_update(); } - /* * Set the physical protection on the * specified range of this map as requested. @@ -1156,13 +1282,14 @@ pmap_protect(pmap, sva, eva, prot) vm_offset_t sva, eva; vm_prot_t prot; { - register pt_entry_t *pte; - register pt_entry_t *ptbase; + register unsigned *pte; + register vm_offset_t va; + register unsigned *ptbase; vm_offset_t pdnxt; vm_offset_t ptpaddr; vm_offset_t sindex, eindex; vm_page_t mpte; - int anychanged; + int anyvalid; if (pmap == NULL) @@ -1175,7 +1302,7 @@ pmap_protect(pmap, sva, eva, prot) if (prot & VM_PROT_WRITE) return; - anychanged = 0; + anyvalid = 0; ptbase = get_ptbase(pmap); @@ -1205,26 +1332,177 @@ pmap_protect(pmap, sva, eva, prot) } for (; sindex != pdnxt; sindex++) { - if (ptbase[sindex] == 0) - continue; - pte = ptbase + sindex; - pbits = *(int *)pte; + + unsigned pbits = ptbase[sindex]; + if (pbits & PG_RW) { if (pbits & PG_M) { - vm_page_t m; - vm_offset_t pa = pbits & PG_FRAME; - m = PHYS_TO_VM_PAGE(pa); + vm_page_t m = PHYS_TO_VM_PAGE(pbits); m->dirty = VM_PAGE_BITS_ALL; } - *(int *)pte &= ~(PG_M|PG_RW); - anychanged=1; + ptbase[sindex] = pbits & ~(PG_M|PG_RW); + anyvalid = 1; } } } - if (anychanged) + if (anyvalid) pmap_update(); } +/* + * Create a pv entry for page at pa for + * (pmap, va). + */ +static __inline void +pmap_insert_entry(pmap, va, mpte, pa) + pmap_t pmap; + vm_offset_t va; + vm_page_t mpte; + vm_offset_t pa; +{ + + int s; + pv_entry_t *ppv, pv; + + s = splvm(); + pv = get_pv_entry(); + pv->pv_va = va; + pv->pv_pmap = pmap; + pv->pv_ptem = mpte; + + ppv = pa_to_pvh(pa); + if (*ppv) + pv->pv_next = *ppv; + else + pv->pv_next = NULL; + *ppv = pv; + splx(s); +} + +/* + * this routine is called if the page table page is not + * mapped correctly. + */ +static vm_page_t +_pmap_allocpte(pmap, va, ptepindex) + pmap_t pmap; + vm_offset_t va; + int ptepindex; +{ + vm_offset_t pteva, ptepa; + vm_page_t m; + + /* + * Find or fabricate a new pagetable page + */ +retry: + m = vm_page_lookup(pmap->pm_pteobj, ptepindex); + if (m == NULL) { + m = vm_page_alloc(pmap->pm_pteobj, ptepindex, VM_ALLOC_ZERO); + if (m == NULL) { + VM_WAIT; + goto retry; + } + if ((m->flags & PG_ZERO) == 0) + pmap_zero_page(VM_PAGE_TO_PHYS(m)); + m->flags &= ~(PG_ZERO|PG_BUSY); + m->valid = VM_PAGE_BITS_ALL; + } + + /* + * mark the object writeable + */ + pmap->pm_pteobj->flags |= OBJ_WRITEABLE; + + /* + * Increment the hold count for the page table page + * (denoting a new mapping.) + */ + ++m->hold_count; + + /* + * Activate the pagetable page, if it isn't already + */ + if (m->queue != PQ_ACTIVE) + vm_page_activate(m); + + /* + * Map the pagetable page into the process address space, if + * it isn't already there. + */ + pteva = ((vm_offset_t) vtopte(va)) & PG_FRAME; + ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; + if (ptepa == 0) { + int s; + pv_entry_t pv, *ppv; + + pmap->pm_stats.resident_count++; + + s = splvm(); + pv = get_pv_entry(); + + pv->pv_va = pteva; + pv->pv_pmap = pmap; + pv->pv_next = NULL; + pv->pv_ptem = NULL; + + ptepa = VM_PAGE_TO_PHYS(m); + pmap->pm_pdir[ptepindex] = + (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_MANAGED); + ppv = pa_to_pvh(ptepa); +#if defined(PMAP_DIAGNOSTIC) + if (*ppv) + panic("pmap_allocpte: page is already mapped"); +#endif + *ppv = pv; + splx(s); + m->flags |= PG_MAPPED; + } else { +#if defined(PMAP_DIAGNOSTIC) + if (VM_PAGE_TO_PHYS(m) != (ptepa & PG_FRAME)) + panic("pmap_allocpte: mismatch"); +#endif + pmap->pm_pdir[ptepindex] = + (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_MANAGED); + pmap_update_1pg(pteva); + m->flags |= PG_MAPPED; + } + return m; +} + +static __inline vm_page_t +pmap_allocpte(pmap, va) + pmap_t pmap; + vm_offset_t va; +{ + int ptepindex; + vm_offset_t ptepa; + vm_page_t m; + + /* + * Calculate pagetable page index + */ + ptepindex = va >> PDRSHIFT; + + /* + * Get the page directory entry + */ + ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; + + /* + * If the page table page is mapped, we just increment the + * hold count, and activate it. + */ + if ((ptepa & (PG_RW|PG_U|PG_V)) == (PG_RW|PG_U|PG_V)) { + m = PHYS_TO_VM_PAGE(ptepa); + ++m->hold_count; + if (m->queue != PQ_ACTIVE) + vm_page_activate(m); + return m; + } + return _pmap_allocpte(pmap, va, ptepindex); +} + /* * Insert the given physical page (p) at * the specified virtual address (v) in the @@ -1245,40 +1523,32 @@ pmap_enter(pmap, va, pa, prot, wired) vm_prot_t prot; boolean_t wired; { - register pt_entry_t *pte; + register unsigned *pte; vm_offset_t opa; - register pv_entry_t pv, npv; vm_offset_t origpte, newpte; + vm_offset_t ptepa; + vm_page_t mpte; + int s; if (pmap == NULL) return; - pv = NULL; - - va = va & PG_FRAME; + va &= PG_FRAME; if (va > VM_MAX_KERNEL_ADDRESS) panic("pmap_enter: toobig"); + mpte = NULL; /* * In the case that a page table page is not * resident, we are creating it here. */ - if ((va < VM_MIN_KERNEL_ADDRESS) && - (curproc != NULL) && - (pmap->pm_map->pmap == pmap)) { - vm_offset_t v; - - v = (vm_offset_t) vtopte(va); - /* Fault the pte only if needed: */ - if (*((int *)vtopte(v)) == 0) - (void) vm_fault(pmap->pm_map, - trunc_page(v), VM_PROT_WRITE, FALSE); - } + if (va < UPT_MIN_ADDRESS) + mpte = pmap_allocpte(pmap, va); + pte = pmap_pte(pmap, va); /* * Page Directory table entry not valid, we need a new PT page */ - pte = pmap_pte(pmap, va); if (pte == NULL) { printf("kernel page directory invalid pdir=%p, va=0x%lx\n", pmap->pm_pdir[PTDPTDI], va); @@ -1286,10 +1556,9 @@ pmap_enter(pmap, va, pa, prot, wired) } origpte = *(vm_offset_t *)pte; + pa &= PG_FRAME; opa = origpte & PG_FRAME; - pa = pa & PG_FRAME; - /* * Mapping has not changed, must be protection or wiring change. */ @@ -1315,55 +1584,35 @@ pmap_enter(pmap, va, pa, prot, wired) * We might be turning off write access to the page, * so we go ahead and sense modify status. */ - if (origpte & PG_M) { + if (origpte & PG_MANAGED) { vm_page_t m; - m = PHYS_TO_VM_PAGE(pa); - m->dirty = VM_PAGE_BITS_ALL; + if (origpte & PG_M) { + m = PHYS_TO_VM_PAGE(pa); + m->dirty = VM_PAGE_BITS_ALL; + } + pa |= PG_MANAGED; } + + if (mpte) + --mpte->hold_count; + goto validate; } /* * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. */ - if (opa) { - pmap_remove_page(pmap, va); - opa = 0; - origpte = 0; - } + if (opa) + (void) pmap_remove_pte(pmap, pte, va); + /* * Enter on the PV list if part of our managed memory Note that we * raise IPL while manipulating pv_table since pmap_enter can be * called at interrupt time. */ if (pmap_is_managed(pa)) { - int s; - - pv = pa_to_pvh(pa); - s = splhigh(); - /* - * No entries yet, use header as the first entry - */ - if (pv->pv_pmap == NULL) { - pv->pv_va = va; - pv->pv_pmap = pmap; - pv->pv_next = NULL; - pv->pv_ptem = NULL; - } - /* - * There is at least one other VA mapping this page. Place - * this entry after the header. - */ - else { - npv = get_pv_entry(); - npv->pv_va = va; - npv->pv_pmap = pmap; - npv->pv_next = pv->pv_next; - pv->pv_next = npv; - pv = npv; - pv->pv_ptem = NULL; - } - splx(s); + pmap_insert_entry(pmap, va, mpte, pa); + pa |= PG_MANAGED; } /* @@ -1383,106 +1632,16 @@ validate: newpte |= PG_W; if (va < UPT_MIN_ADDRESS) newpte |= PG_U; - else if (va < UPT_MAX_ADDRESS) - newpte |= PG_U | PG_RW; /* * if the mapping or permission bits are different, we need * to update the pte. */ if ((origpte & ~(PG_M|PG_A)) != newpte) { - *pte = (pt_entry_t) newpte; + *pte = newpte; if (origpte) pmap_update_1pg(va); } - - if (origpte == 0) { - vm_page_t mpte; - mpte = pmap_use_pt(pmap, va); - if (pv) - pv->pv_ptem = mpte; - } -} - -/* - * Add a list of wired pages to the kva - * this routine is only used for temporary - * kernel mappings that do not need to have - * page modification or references recorded. - * Note that old mappings are simply written - * over. The page *must* be wired. - */ -void -pmap_qenter(va, m, count) - vm_offset_t va; - vm_page_t *m; - int count; -{ - int i; - register pt_entry_t *pte; - - for (i = 0; i < count; i++) { - vm_offset_t tva = va + i * PAGE_SIZE; - pt_entry_t npte = (pt_entry_t) ((int) (VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V)); - pt_entry_t opte; - pte = vtopte(tva); - opte = *pte; - *pte = npte; - if (opte) pmap_update_1pg(tva); - } -} -/* - * this routine jerks page mappings from the - * kernel -- it is meant only for temporary mappings. - */ -void -pmap_qremove(va, count) - vm_offset_t va; - int count; -{ - int i; - register pt_entry_t *pte; - - for (i = 0; i < count; i++) { - vm_offset_t tva = va + i * PAGE_SIZE; - pte = vtopte(tva); - *pte = 0; - pmap_update_1pg(tva); - } -} - -/* - * add a wired page to the kva - * note that in order for the mapping to take effect -- you - * should do a pmap_update after doing the pmap_kenter... - */ -void -pmap_kenter(va, pa) - vm_offset_t va; - register vm_offset_t pa; -{ - register pt_entry_t *pte; - pt_entry_t npte, opte; - - npte = (pt_entry_t) ((int) (pa | PG_RW | PG_V)); - pte = vtopte(va); - opte = *pte; - *pte = npte; - if (opte) pmap_update_1pg(va); -} - -/* - * remove a page from the kernel pagetables - */ -void -pmap_kremove(va) - vm_offset_t va; -{ - register pt_entry_t *pte; - - pte = vtopte(va); - *pte = (pt_entry_t) 0; - pmap_update_1pg(va); } /* @@ -1502,46 +1661,27 @@ pmap_enter_quick(pmap, va, pa) vm_offset_t va; register vm_offset_t pa; { - register pt_entry_t *pte; - register pv_entry_t pv, npv; - int s; + register unsigned *pte; + vm_page_t mpte; + + mpte = NULL; + /* + * In the case that a page table page is not + * resident, we are creating it here. + */ + if (va < UPT_MIN_ADDRESS) + mpte = pmap_allocpte(pmap, va); + + pte = (unsigned *)vtopte(va); + if (*pte) + (void) pmap_remove_pte(pmap, pte, va); /* * Enter on the PV list if part of our managed memory Note that we * raise IPL while manipulating pv_table since pmap_enter can be * called at interrupt time. */ - - pte = vtopte(va); - /* a fault on the page table might occur here */ - if (*pte) { - pmap_remove_page(pmap, va); - } - - pv = pa_to_pvh(pa); - s = splhigh(); - /* - * No entries yet, use header as the first entry - */ - if (pv->pv_pmap == NULL) { - pv->pv_pmap = pmap; - pv->pv_va = va; - pv->pv_next = NULL; - } - /* - * There is at least one other VA mapping this page. Place this entry - * after the header. - */ - else { - npv = get_pv_entry(); - npv->pv_va = va; - npv->pv_pmap = pmap; - npv->pv_next = pv->pv_next; - pv->pv_next = npv; - pv = npv; - } - splx(s); - pv->pv_ptem = pmap_use_pt(pmap, va); + pmap_insert_entry(pmap, va, mpte, pa); /* * Increment counters @@ -1551,7 +1691,7 @@ pmap_enter_quick(pmap, va, pa) /* * Now validate mapping with RO protection */ - *pte = (pt_entry_t) ((int) (pa | PG_V | PG_U)); + *pte = pa | PG_V | PG_U | PG_MANAGED; return; } @@ -1583,11 +1723,6 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) return; } - /* - * remove any already used mappings - */ - pmap_remove( pmap, trunc_page(addr), round_page(addr + size)); - /* * if we are processing a major portion of the object, then scan the * entire thing. @@ -1595,9 +1730,9 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) if (psize > (object->size >> 2)) { objpgs = psize; - for (p = object->memq.tqh_first; + for (p = TAILQ_FIRST(&object->memq); ((objpgs > 0) && (p != NULL)); - p = p->listq.tqe_next) { + p = TAILQ_NEXT(p, listq)) { tmpidx = p->pindex; if (tmpidx < pindex) { @@ -1614,7 +1749,7 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) vm_page_deactivate(p); vm_page_hold(p); p->flags |= PG_MAPPED; - pmap_enter_quick(pmap, + pmap_enter_quick(pmap, addr + (tmpidx << PAGE_SHIFT), VM_PAGE_TO_PHYS(p)); vm_page_unhold(p); @@ -1627,14 +1762,13 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) */ for (tmpidx = 0; tmpidx < psize; tmpidx += 1) { p = vm_page_lookup(object, tmpidx + pindex); - if (p && (p->busy == 0) && + if (p && ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && + (p->busy == 0) && (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { - if (p->queue == PQ_CACHE) - vm_page_deactivate(p); vm_page_hold(p); p->flags |= PG_MAPPED; - pmap_enter_quick(pmap, + pmap_enter_quick(pmap, addr + (tmpidx << PAGE_SHIFT), VM_PAGE_TO_PHYS(p)); vm_page_unhold(p); @@ -1670,6 +1804,7 @@ pmap_prefault(pmap, addra, entry, object) vm_offset_t addr; vm_pindex_t pindex; vm_page_t m; + int pageorder_index; if (entry->object.vm_object != object) return; @@ -1686,13 +1821,16 @@ pmap_prefault(pmap, addra, entry, object) for (i = 0; i < PAGEORDER_SIZE; i++) { vm_object_t lobject; - pt_entry_t *pte; + unsigned *pte; addr = addra + pmap_prefault_pageorder[i]; if (addr < starta || addr >= entry->end) continue; - pte = vtopte(addr); + if ((*pmap_pde(pmap, addr)) == NULL) + continue; + + pte = (unsigned *) vtopte(addr); if (*pte) continue; @@ -1718,16 +1856,12 @@ pmap_prefault(pmap, addra, entry, object) (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { if (m->queue == PQ_CACHE) { - if ((cnt.v_free_count + cnt.v_cache_count) < - cnt.v_free_min) - break; vm_page_deactivate(m); } vm_page_hold(m); m->flags |= PG_MAPPED; pmap_enter_quick(pmap, addr, VM_PAGE_TO_PHYS(m)); vm_page_unhold(m); - } } } @@ -1745,7 +1879,7 @@ pmap_change_wiring(pmap, va, wired) vm_offset_t va; boolean_t wired; { - register pt_entry_t *pte; + register unsigned *pte; if (pmap == NULL) return; @@ -1780,7 +1914,73 @@ pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) vm_size_t len; vm_offset_t src_addr; { -} + vm_offset_t addr; + vm_offset_t end_addr = src_addr + len; + vm_offset_t pdnxt; + unsigned src_frame, dst_frame; + pd_entry_t pde; + + if (dst_addr != src_addr) + return; + + src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME; + dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME; + + if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) + return; + + if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) { + APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V); + pmap_update(); + } + + for(addr = src_addr; addr < end_addr; addr = pdnxt) { + unsigned *src_pte, *dst_pte; + vm_page_t dstmpte, srcmpte; + vm_offset_t srcptepaddr; + + pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1)); + srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[addr >> PDRSHIFT]; + if (srcptepaddr) { + continue; + } + + srcmpte = PHYS_TO_VM_PAGE(srcptepaddr); + if (srcmpte->hold_count == 0) + continue; + + if (pdnxt > end_addr) + pdnxt = end_addr; + + src_pte = (unsigned *) vtopte(addr); + dst_pte = (unsigned *) avtopte(addr); + while (addr < pdnxt) { + unsigned ptetemp; + ptetemp = *src_pte; + if (ptetemp) { + /* + * We have to check after allocpte for the + * pte still being around... allocpte can + * block. + */ + dstmpte = pmap_allocpte(dst_pmap, addr); + if (ptetemp = *src_pte) { + *dst_pte = ptetemp; + dst_pmap->pm_stats.resident_count++; + pmap_insert_entry(dst_pmap, addr, dstmpte, + (ptetemp & PG_FRAME)); + } else { + --dstmpte->hold_count; + } + if (dstmpte->hold_count >= srcmpte->hold_count) + break; + } + addr += PAGE_SIZE; + ++src_pte; + ++dst_pte; + } + } +} /* * Routine: pmap_kernel @@ -1808,7 +2008,6 @@ pmap_zero_page(phys) *(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME); bzero(CADDR2, PAGE_SIZE); - *(int *) CMAP2 = 0; pmap_update_1pg((vm_offset_t) CADDR2); } @@ -1872,24 +2071,22 @@ pmap_page_exists(pmap, pa) pmap_t pmap; vm_offset_t pa; { - register pv_entry_t pv; + register pv_entry_t *ppv, pv; int s; if (!pmap_is_managed(pa)) return FALSE; - pv = pa_to_pvh(pa); - s = splhigh(); + s = splvm(); + ppv = pa_to_pvh(pa); /* * Not found, check current mappings returning immediately if found. */ - if (pv->pv_pmap != NULL) { - for (; pv; pv = pv->pv_next) { - if (pv->pv_pmap == pmap) { - splx(s); - return TRUE; - } + for (pv = *ppv; pv; pv = pv->pv_next) { + if (pv->pv_pmap == pmap) { + splx(s); + return TRUE; } } splx(s); @@ -1906,42 +2103,40 @@ pmap_testbit(pa, bit) register vm_offset_t pa; int bit; { - register pv_entry_t pv; - pt_entry_t *pte; + register pv_entry_t *ppv, pv; + unsigned *pte; int s; if (!pmap_is_managed(pa)) return FALSE; - pv = pa_to_pvh(pa); - s = splhigh(); + s = splvm(); + ppv = pa_to_pvh(pa); /* * Not found, check current mappings returning immediately if found. */ - if (pv->pv_pmap != NULL) { - for (; pv; pv = pv->pv_next) { - /* - * if the bit being tested is the modified bit, then - * mark UPAGES as always modified, and ptes as never - * modified. - */ - if (bit & (PG_A|PG_M)) { - if ((pv->pv_va >= clean_sva) && (pv->pv_va < clean_eva)) { - continue; - } - } - if (!pv->pv_pmap) { -#if defined(PMAP_DIAGNOSTIC) - printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); -#endif + for (pv = *ppv ;pv; pv = pv->pv_next) { + /* + * if the bit being tested is the modified bit, then + * mark UPAGES as always modified, and ptes as never + * modified. + */ + if (bit & (PG_A|PG_M)) { + if ((pv->pv_va >= clean_sva) && (pv->pv_va < clean_eva)) { continue; } - pte = pmap_pte(pv->pv_pmap, pv->pv_va); - if ((int) *pte & bit) { - splx(s); - return TRUE; - } + } + if (!pv->pv_pmap) { +#if defined(PMAP_DIAGNOSTIC) + printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); +#endif + continue; + } + pte = pmap_pte(pv->pv_pmap, pv->pv_va); + if ((int) *pte & bit) { + splx(s); + return TRUE; } } splx(s); @@ -1957,60 +2152,64 @@ pmap_changebit(pa, bit, setem) int bit; boolean_t setem; { - register pv_entry_t pv; - register pt_entry_t *pte; + register pv_entry_t pv, *ppv; + register unsigned *pte, npte; vm_offset_t va; + int changed; int s; if (!pmap_is_managed(pa)) return; - pv = pa_to_pvh(pa); - s = splhigh(); + s = splvm(); + changed = 0; + ppv = pa_to_pvh(pa); /* * Loop over all current mappings setting/clearing as appropos If * setting RO do we need to clear the VAC? */ - if (pv->pv_pmap != NULL) { - for (; pv; pv = pv->pv_next) { - va = pv->pv_va; + for ( pv = *ppv; pv; pv = pv->pv_next) { + va = pv->pv_va; - /* - * don't write protect pager mappings - */ - if (!setem && (bit == PG_RW)) { - if (va >= clean_sva && va < clean_eva) - continue; - } - if (!pv->pv_pmap) { -#if defined(PMAP_DIAGNOSTIC) - printf("Null pmap (cb) at va: 0x%lx\n", va); -#endif + /* + * don't write protect pager mappings + */ + if (!setem && (bit == PG_RW)) { + if (va >= clean_sva && va < clean_eva) continue; - } + } + if (!pv->pv_pmap) { +#if defined(PMAP_DIAGNOSTIC) + printf("Null pmap (cb) at va: 0x%lx\n", va); +#endif + continue; + } - pte = pmap_pte(pv->pv_pmap, va); - if (setem) { - *(int *)pte |= bit; - } else { - if (bit == PG_RW) { - vm_offset_t pbits = *(vm_offset_t *)pte; - if (pbits & PG_M) { - vm_page_t m; - vm_offset_t pa = pbits & PG_FRAME; - m = PHYS_TO_VM_PAGE(pa); - m->dirty = VM_PAGE_BITS_ALL; - } - *(int *)pte &= ~(PG_M|PG_RW); - } else { - *(int *)pte &= ~bit; + pte = pmap_pte(pv->pv_pmap, va); + if (setem) { + *(int *)pte |= bit; + changed = 1; + } else { + vm_offset_t pbits = *(vm_offset_t *)pte; + if (pbits & bit) + changed = 1; + if (bit == PG_RW) { + if (pbits & PG_M) { + vm_page_t m; + vm_offset_t pa = pbits & PG_FRAME; + m = PHYS_TO_VM_PAGE(pa); + m->dirty = VM_PAGE_BITS_ALL; } + *(int *)pte = pbits & ~(PG_M|PG_RW); + } else { + *(int *)pte = pbits & ~bit; } } } splx(s); - pmap_update(); + if (changed) + pmap_update(); } /* @@ -2026,8 +2225,10 @@ pmap_page_protect(phys, prot) if ((prot & VM_PROT_WRITE) == 0) { if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) pmap_changebit(phys, PG_RW, FALSE); - else + else { pmap_remove_all(phys); + pmap_update(); + } } } @@ -2127,7 +2328,7 @@ pmap_mapdev(pa, size) vm_size_t size; { vm_offset_t va, tmpva; - pt_entry_t *pte; + unsigned *pte; size = roundup(size, PAGE_SIZE); @@ -2137,8 +2338,8 @@ pmap_mapdev(pa, size) pa = pa & PG_FRAME; for (tmpva = va; size > 0;) { - pte = vtopte(tmpva); - *pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V | PG_N)); + pte = (unsigned *)vtopte(tmpva); + *pte = pa | PG_RW | PG_V | PG_N; size -= PAGE_SIZE; tmpva += PAGE_SIZE; pa += PAGE_SIZE; @@ -2164,8 +2365,8 @@ pmap_pid_dump(int pid) { pmap = &p->p_vmspace->vm_pmap; for(i=0;i<1024;i++) { pd_entry_t *pde; - pt_entry_t *pte; - unsigned base = i << PD_SHIFT; + unsigned *pte; + unsigned base = i << PDRSHIFT; pde = &pmap->pm_pdir[i]; if (pde && pmap_pde_v(pde)) { @@ -2215,7 +2416,7 @@ pads(pm) pmap_t pm; { unsigned va, i, j; - pt_entry_t *ptep; + unsigned *ptep; if (pm == kernel_pmap) return; @@ -2253,3 +2454,5 @@ pmap_pvdump(pa) printf(" "); } #endif + + diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index c97e50aca6c1..b81cfc10601b 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 - * $Id: trap.c,v 1.74 1996/03/27 17:33:39 bde Exp $ + * $Id: trap.c,v 1.75 1996/03/28 05:40:57 dyson Exp $ */ /* @@ -805,25 +805,11 @@ int trapwrite(addr) v = trunc_page(vtopte(va)); - /* - * wire the pte page - */ - if (va < USRSTACK) { - vm_map_pageable(&vm->vm_map, v, round_page(v+1), FALSE); - } - /* * fault the data page */ rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, FALSE); - /* - * unwire the pte page - */ - if (va < USRSTACK) { - vm_map_pageable(&vm->vm_map, v, round_page(v+1), TRUE); - } - --p->p_lock; if (rv != KERN_SUCCESS) diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 4c1823e40360..e764b2f7def5 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -38,7 +38,7 @@ * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ - * $Id: vm_machdep.c,v 1.61 1996/05/02 10:43:06 phk Exp $ + * $Id: vm_machdep.c,v 1.62 1996/05/02 14:19:55 phk Exp $ */ #include "npx.h" @@ -862,7 +862,7 @@ int vm_page_zero_idle() { vm_page_t m; if ((cnt.v_free_count > cnt.v_interrupt_free_min) && - (m = vm_page_queue_free.tqh_first)) { + (m = TAILQ_FIRST(&vm_page_queue_free))) { TAILQ_REMOVE(&vm_page_queue_free, m, pageq); enable_intr(); pmap_zero_page(VM_PAGE_TO_PHYS(m)); diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h index a843fbfcbddc..f0d7fe695214 100644 --- a/sys/i386/include/pmap.h +++ b/sys/i386/include/pmap.h @@ -42,7 +42,7 @@ * * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 - * $Id: pmap.h,v 1.37 1996/05/02 14:20:04 phk Exp $ + * $Id: pmap.h,v 1.38 1996/05/02 22:25:18 phk Exp $ */ #ifndef _MACHINE_PMAP_H_ @@ -69,6 +69,7 @@ /* Our various interpretations of the above */ #define PG_W PG_AVAIL1 /* "Wired" pseudoflag */ +#define PG_MANAGED PG_AVAIL2 #define PG_FRAME (~PAGE_MASK) #define PG_PROT (PG_RW|PG_U) /* all protection bits . */ #define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */ @@ -87,12 +88,8 @@ #define VADDR(pdi, pti) ((vm_offset_t)(((pdi)< @@ -171,7 +171,7 @@ interpret: * Map the image header (first page) of the file into * kernel address space */ - error = vm_mmap(kernel_map, /* map */ + error = vm_mmap(exech_map, /* map */ (vm_offset_t *)&imgp->image_header, /* address */ PAGE_SIZE, /* size */ VM_PROT_READ, /* protection */ @@ -206,7 +206,7 @@ interpret: /* free old vnode and name buffer */ vrele(ndp->ni_vp); FREE(ndp->ni_cnd.cn_pnbuf, M_NAMEI); - if (vm_map_remove(kernel_map, (vm_offset_t)imgp->image_header, + if (vm_map_remove(exech_map, (vm_offset_t)imgp->image_header, (vm_offset_t)imgp->image_header + PAGE_SIZE)) panic("execve: header dealloc failed (1)"); @@ -319,7 +319,7 @@ interpret: * free various allocated resources */ kmem_free(exec_map, (vm_offset_t)imgp->stringbase, ARG_MAX); - if (vm_map_remove(kernel_map, (vm_offset_t)imgp->image_header, + if (vm_map_remove(exech_map, (vm_offset_t)imgp->image_header, (vm_offset_t)imgp->image_header + PAGE_SIZE)) panic("execve: header dealloc failed (2)"); vrele(ndp->ni_vp); @@ -331,7 +331,7 @@ exec_fail_dealloc: if (imgp->stringbase != NULL) kmem_free(exec_map, (vm_offset_t)imgp->stringbase, ARG_MAX); if (imgp->image_header && imgp->image_header != (char *)-1) - if (vm_map_remove(kernel_map, (vm_offset_t)imgp->image_header, + if (vm_map_remove(exech_map, (vm_offset_t)imgp->image_header, (vm_offset_t)imgp->image_header + PAGE_SIZE)) panic("execve: header dealloc failed (3)"); if (ndp->ni_vp) diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index c97e50aca6c1..b81cfc10601b 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 - * $Id: trap.c,v 1.74 1996/03/27 17:33:39 bde Exp $ + * $Id: trap.c,v 1.75 1996/03/28 05:40:57 dyson Exp $ */ /* @@ -805,25 +805,11 @@ int trapwrite(addr) v = trunc_page(vtopte(va)); - /* - * wire the pte page - */ - if (va < USRSTACK) { - vm_map_pageable(&vm->vm_map, v, round_page(v+1), FALSE); - } - /* * fault the data page */ rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, FALSE); - /* - * unwire the pte page - */ - if (va < USRSTACK) { - vm_map_pageable(&vm->vm_map, v, round_page(v+1), TRUE); - } - --p->p_lock; if (rv != KERN_SUCCESS) diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index d449b94b8469..cb76f05531c9 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -18,7 +18,7 @@ * 5. Modifications may be freely made to this file if the above conditions * are met. * - * $Id: vfs_bio.c,v 1.88 1996/03/09 06:46:51 dyson Exp $ + * $Id: vfs_bio.c,v 1.89 1996/05/03 21:01:26 phk Exp $ */ /* @@ -509,7 +509,7 @@ brelse(struct buf * bp) /* buffers with no memory */ if (bp->b_bufsize == 0) { bp->b_qindex = QUEUE_EMPTY; - TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); + TAILQ_INSERT_HEAD(&bufqueues[QUEUE_EMPTY], bp, b_freelist); LIST_REMOVE(bp, b_hash); LIST_INSERT_HEAD(&invalhash, bp, b_hash); bp->b_dev = NODEV; @@ -742,7 +742,7 @@ start: goto trytofreespace; /* can we constitute a new buffer? */ - if ((bp = bufqueues[QUEUE_EMPTY].tqh_first)) { + if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]))) { if (bp->b_qindex != QUEUE_EMPTY) panic("getnewbuf: inconsistent EMPTY queue, qindex=%d", bp->b_qindex); @@ -756,11 +756,11 @@ trytofreespace: * This is desirable because file data is cached in the * VM/Buffer cache even if a buffer is freed. */ - if ((bp = bufqueues[QUEUE_AGE].tqh_first)) { + if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]))) { if (bp->b_qindex != QUEUE_AGE) panic("getnewbuf: inconsistent AGE queue, qindex=%d", bp->b_qindex); - } else if ((bp = bufqueues[QUEUE_LRU].tqh_first)) { + } else if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_LRU]))) { if (bp->b_qindex != QUEUE_LRU) panic("getnewbuf: inconsistent LRU queue, qindex=%d", bp->b_qindex); @@ -783,7 +783,7 @@ trytofreespace: (vmiospace < maxvmiobufspace)) { --bp->b_usecount; TAILQ_REMOVE(&bufqueues[QUEUE_LRU], bp, b_freelist); - if (bufqueues[QUEUE_LRU].tqh_first != NULL) { + if (TAILQ_FIRST(&bufqueues[QUEUE_LRU]) != NULL) { TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist); goto start; } @@ -1498,9 +1498,9 @@ count_lock_queue() struct buf *bp; count = 0; - for (bp = bufqueues[QUEUE_LOCKED].tqh_first; + for (bp = TAILQ_FIRST(&bufqueues[QUEUE_LOCKED]); bp != NULL; - bp = bp->b_freelist.tqe_next) + bp = TAILQ_NEXT(bp, b_freelist)) count++; return (count); } @@ -1663,7 +1663,6 @@ vfs_clean_pages(struct buf * bp) void vfs_bio_clrbuf(struct buf *bp) { int i; - int remapbuffer = 0; if( bp->b_flags & B_VMIO) { if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE)) { int mask; @@ -1691,14 +1690,12 @@ vfs_bio_clrbuf(struct buf *bp) { bzero(bp->b_data + (i << PAGE_SHIFT) + j * DEV_BSIZE, DEV_BSIZE); } } - bp->b_pages[i]->valid = VM_PAGE_BITS_ALL; + /* bp->b_pages[i]->valid = VM_PAGE_BITS_ALL; */ } bp->b_resid = 0; } else { clrbuf(bp); } - if (remapbuffer) - pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); } /* diff --git a/sys/sys/queue.h b/sys/sys/queue.h index 8df0499dbd5e..abe8e98ebd91 100644 --- a/sys/sys/queue.h +++ b/sys/sys/queue.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)queue.h 8.5 (Berkeley) 8/20/94 - * $Id: queue.h,v 1.8 1996/03/31 03:21:45 gibbs Exp $ + * $Id: queue.h,v 1.9 1996/04/08 07:51:57 phk Exp $ */ #ifndef _SYS_QUEUE_H_ @@ -268,7 +268,9 @@ struct { \ #define TAILQ_LAST(head) ((head)->tqh_last) -#define TAILQ_NEXT(elm, field) ((elm)->field.teq_next) +#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) + +#define TAILQ_PREV(elm, field) ((elm)->field.tqe_prev) #define TAILQ_INIT(head) { \ (head)->tqh_first = NULL; \ diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index 8b1ddf2ab642..514b4716a652 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)device_pager.c 8.1 (Berkeley) 6/11/93 - * $Id: device_pager.c,v 1.21 1996/03/09 06:54:41 dyson Exp $ + * $Id: device_pager.c,v 1.22 1996/05/03 21:01:45 phk Exp $ */ #include @@ -182,7 +182,7 @@ dev_pager_dealloc(object) /* * Free up our fake pages. */ - while ((m = object->un_pager.devp.devp_pglist.tqh_first) != 0) { + while ((m = TAILQ_FIRST(&object->un_pager.devp.devp_pglist)) != 0) { TAILQ_REMOVE(&object->un_pager.devp.devp_pglist, m, pageq); dev_pager_putfake(m); } @@ -265,14 +265,14 @@ dev_pager_getfake(paddr) vm_page_t m; int i; - if (dev_pager_fakelist.tqh_first == NULL) { + if (TAILQ_FIRST(&dev_pager_fakelist) == NULL) { m = (vm_page_t) malloc(PAGE_SIZE * 2, M_VMPGDATA, M_WAITOK); for (i = (PAGE_SIZE * 2) / sizeof(*m); i > 0; i--) { TAILQ_INSERT_TAIL(&dev_pager_fakelist, m, pageq); m++; } } - m = dev_pager_fakelist.tqh_first; + m = TAILQ_FIRST(&dev_pager_fakelist); TAILQ_REMOVE(&dev_pager_fakelist, m, pageq); m->flags = PG_BUSY | PG_FICTITIOUS; diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index b6c7db613f56..4feebd56415e 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -39,7 +39,7 @@ * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ * * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 - * $Id: swap_pager.c,v 1.64 1996/05/02 14:21:14 phk Exp $ + * $Id: swap_pager.c,v 1.65 1996/05/03 21:01:47 phk Exp $ */ /* @@ -530,7 +530,7 @@ swap_pager_reclaim() /* for each pager queue */ for (k = 0; swp_qs[k]; k++) { - object = swp_qs[k]->tqh_first; + object = TAILQ_FIRST(swp_qs[k]); while (object && (reclaimcount < MAXRECLAIM)) { /* @@ -555,7 +555,7 @@ swap_pager_reclaim() } } } - object = object->pager_object_list.tqe_next; + object = TAILQ_NEXT(object, pager_object_list); } } @@ -956,8 +956,8 @@ swap_pager_getpages(object, m, count, reqpage) spc = NULL; /* we might not use an spc data structure */ - if ((count == 1) && (swap_pager_free.tqh_first != NULL)) { - spc = swap_pager_free.tqh_first; + if ((count == 1) && (TAILQ_FIRST(&swap_pager_free) != NULL)) { + spc = TAILQ_FIRST(&swap_pager_free); TAILQ_REMOVE(&swap_pager_free, spc, spc_list); kva = spc->spc_kva; bp = spc->spc_bp; @@ -1263,9 +1263,9 @@ swap_pager_putpages(object, m, count, sync, rtvals) /* * get a swap pager clean data structure, block until we get it */ - if (swap_pager_free.tqh_first == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { + if (TAILQ_FIRST(&swap_pager_free) == NULL || + TAILQ_NEXT(TAILQ_FIRST(&swap_pager_free),spc_list) == NULL || + TAILQ_NEXT(TAILQ_NEXT(TAILQ_FIRST(&swap_pager_free),spc_list),spc_list) == NULL) { s = splbio(); if (curproc == pageproc) { retryfree: @@ -1285,9 +1285,9 @@ retryfree: */ if (tsleep(&swap_pager_free, PVM, "swpfre", hz/5)) { swap_pager_sync(); - if (swap_pager_free.tqh_first == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { + if (TAILQ_FIRST(&swap_pager_free) == NULL || + TAILQ_NEXT(TAILQ_FIRST(&swap_pager_free),spc_list) == NULL || + TAILQ_NEXT(TAILQ_NEXT(TAILQ_FIRST(&swap_pager_free),spc_list),spc_list) == NULL) { splx(s); return VM_PAGER_AGAIN; } @@ -1297,17 +1297,17 @@ retryfree: * the free swap control blocks. */ swap_pager_sync(); - if (swap_pager_free.tqh_first == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { + if (TAILQ_FIRST(&swap_pager_free) == NULL || + TAILQ_NEXT(TAILQ_FIRST(&swap_pager_free),spc_list) == NULL || + TAILQ_NEXT(TAILQ_NEXT(TAILQ_FIRST(&swap_pager_free),spc_list),spc_list) == NULL) { goto retryfree; } } } else { pagedaemon_wakeup(); - while (swap_pager_free.tqh_first == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { + while (TAILQ_FIRST(&swap_pager_free) == NULL || + TAILQ_NEXT(TAILQ_FIRST(&swap_pager_free),spc_list) == NULL || + TAILQ_NEXT(TAILQ_NEXT(TAILQ_FIRST(&swap_pager_free),spc_list),spc_list) == NULL) { swap_pager_needflags |= SWAP_FREE_NEEDED; tsleep(&swap_pager_free, PVM, "swpfre", 0); pagedaemon_wakeup(); @@ -1315,7 +1315,7 @@ retryfree: } splx(s); } - spc = swap_pager_free.tqh_first; + spc = TAILQ_FIRST(&swap_pager_free); TAILQ_REMOVE(&swap_pager_free, spc, spc_list); kva = spc->spc_kva; @@ -1482,7 +1482,7 @@ swap_pager_sync() register int s; tspc = NULL; - if (swap_pager_done.tqh_first == NULL) + if (TAILQ_FIRST(&swap_pager_done) == NULL) return; for (;;) { s = splbio(); @@ -1490,7 +1490,7 @@ swap_pager_sync() * Look up and removal from done list must be done at splbio() * to avoid conflicts with swap_pager_iodone. */ - while ((spc = swap_pager_done.tqh_first) != 0) { + while ((spc = TAILQ_FIRST(&swap_pager_done)) != 0) { pmap_qremove(spc->spc_kva, spc->spc_count); swap_pager_finish(spc); TAILQ_REMOVE(&swap_pager_done, spc, spc_list); @@ -1609,7 +1609,7 @@ swap_pager_iodone(bp) wakeup(spc->spc_object); } if ((swap_pager_needflags & SWAP_FREE_NEEDED) || - swap_pager_inuse.tqh_first == 0) { + TAILQ_FIRST(&swap_pager_inuse) == 0) { swap_pager_needflags &= ~SWAP_FREE_NEEDED; wakeup(&swap_pager_free); } @@ -1623,7 +1623,7 @@ swap_pager_iodone(bp) wakeup(&vm_pageout_pages_needed); vm_pageout_pages_needed = 0; } - if ((swap_pager_inuse.tqh_first == NULL) || + if ((TAILQ_FIRST(&swap_pager_inuse) == NULL) || ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min && nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) { pagedaemon_wakeup(); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index e9f8f16065e5..904270b63a35 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -66,7 +66,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_fault.c,v 1.42 1996/03/09 06:48:26 dyson Exp $ + * $Id: vm_fault.c,v 1.43 1996/03/28 04:53:23 dyson Exp $ */ /* @@ -269,8 +269,8 @@ RetryFault:; int s; UNLOCK_THINGS; - s = splhigh(); - if ((m->flags & PG_BUSY) || m->busy) { + s = splvm(); + if (((m->flags & PG_BUSY) || m->busy)) { m->flags |= PG_WANTED | PG_REFERENCED; cnt.v_intrans++; tsleep(m, PSWP, "vmpfw", 0); @@ -311,7 +311,7 @@ RetryFault:; * Allocate a new page for this object/offset pair. */ m = vm_page_alloc(object, pindex, - vp?VM_ALLOC_NORMAL:VM_ALLOC_ZERO); + (vp || object->backing_object)?VM_ALLOC_NORMAL:VM_ALLOC_ZERO); if (m == NULL) { UNLOCK_AND_DEALLOCATE; @@ -551,9 +551,9 @@ readrest: vm_pindex_t other_pindex, other_pindex_offset; vm_page_t tm; - other_object = object->shadow_head.tqh_first; + other_object = TAILQ_FIRST(&object->shadow_head); if (other_object == first_object) - other_object = other_object->shadow_list.tqe_next; + other_object = TAILQ_NEXT(other_object, shadow_list); if (!other_object) panic("vm_fault: other object missing"); if (other_object && @@ -712,7 +712,7 @@ readrest: m->valid = VM_PAGE_BITS_ALL; pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired); - if (vp && (change_wiring == 0) && (wired == 0)) + if ((change_wiring == 0) && (wired == 0)) pmap_prefault(map->pmap, vaddr, entry, first_object); /* @@ -780,8 +780,9 @@ vm_fault_wire(map, start, end) for (va = start; va < end; va += PAGE_SIZE) { while( curproc != pageproc && - (cnt.v_free_count <= cnt.v_pageout_free_min)) + (cnt.v_free_count <= cnt.v_pageout_free_min)) { VM_WAIT; + } rv = vm_fault(map, va, VM_PROT_READ|VM_PROT_WRITE, TRUE); if (rv) { @@ -817,11 +818,10 @@ vm_fault_unwire(map, start, end) for (va = start; va < end; va += PAGE_SIZE) { pa = pmap_extract(pmap, va); - if (pa == (vm_offset_t) 0) { - panic("unwire: page not in pmap"); + if (pa != (vm_offset_t) 0) { + pmap_change_wiring(pmap, va, FALSE); + vm_page_unwire(PHYS_TO_VM_PAGE(pa)); } - pmap_change_wiring(pmap, va, FALSE); - vm_page_unwire(PHYS_TO_VM_PAGE(pa)); } /* diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 4e6db8c20f18..8e09433587d3 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -59,7 +59,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_glue.c,v 1.47 1996/04/09 04:36:58 dyson Exp $ + * $Id: vm_glue.c,v 1.48 1996/05/02 09:34:51 phk Exp $ */ #include "opt_ddb.h" @@ -196,16 +196,15 @@ vm_fork(p1, p2) register struct proc *p1, *p2; { register struct user *up; - vm_offset_t addr, ptaddr, ptpa; int error, i; - vm_map_t map; pmap_t pvp; - vm_page_t stkm; + vm_object_t upobj; while ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) { VM_WAIT; } +#if 0 /* * avoid copying any of the parent's pagetables or other per-process * objects that reside in the map by marking all of them @@ -213,6 +212,7 @@ vm_fork(p1, p2) */ (void) vm_map_inherit(&p1->p_vmspace->vm_map, UPT_MIN_ADDRESS - UPAGES * PAGE_SIZE, VM_MAX_ADDRESS, VM_INHERIT_NONE); +#endif p2->p_vmspace = vmspace_fork(p1->p_vmspace); if (p1->p_vmspace->vm_shm) @@ -223,61 +223,26 @@ vm_fork(p1, p2) * process */ - addr = (vm_offset_t) kstack; - - map = &p2->p_vmspace->vm_map; pvp = &p2->p_vmspace->vm_pmap; /* * allocate object for the upages */ - p2->p_vmspace->vm_upages_obj = vm_object_allocate( OBJT_DEFAULT, + p2->p_vmspace->vm_upages_obj = upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES); - /* - * put upages into the address space - */ - error = vm_map_find(map, p2->p_vmspace->vm_upages_obj, 0, - &addr, UPT_MIN_ADDRESS - addr, FALSE, VM_PROT_ALL, - VM_PROT_ALL, 0); - if (error != KERN_SUCCESS) - panic("vm_fork: vm_map_find (UPAGES) failed, addr=0x%x, error=%d", addr, error); - - addr += UPAGES * PAGE_SIZE; - /* allocate space for page tables */ - error = vm_map_find(map, NULL, 0, &addr, UPT_MAX_ADDRESS - addr, FALSE, - VM_PROT_ALL, VM_PROT_ALL, 0); - if (error != KERN_SUCCESS) - panic("vm_fork: vm_map_find (PTES) failed, addr=0x%x, error=%d", addr, error); - /* get a kernel virtual address for the UPAGES for this proc */ up = (struct user *) kmem_alloc_pageable(u_map, UPAGES * PAGE_SIZE); if (up == NULL) panic("vm_fork: u_map allocation failed"); - /* - * create a pagetable page for the UPAGES in the process address space - */ - ptaddr = trunc_page((u_int) vtopte(kstack)); - (void) vm_fault(map, ptaddr, VM_PROT_READ|VM_PROT_WRITE, FALSE); - ptpa = pmap_extract(pvp, ptaddr); - if (ptpa == 0) { - panic("vm_fork: no pte for UPAGES"); - } - - /* - * hold the page table page for the kernel stack, and fault them in - */ - stkm = PHYS_TO_VM_PAGE(ptpa); - vm_page_hold(stkm); - for(i=0;ip_vmspace->vm_upages_obj, + while ((m = vm_page_alloc(upobj, i, VM_ALLOC_NORMAL)) == NULL) { VM_WAIT; } @@ -286,24 +251,20 @@ vm_fork(p1, p2) * Wire the page */ vm_page_wire(m); - m->flags &= ~PG_BUSY; + PAGE_WAKEUP(m); /* * Enter the page into both the kernel and the process * address space. */ pmap_enter( pvp, (vm_offset_t) kstack + i * PAGE_SIZE, - VM_PAGE_TO_PHYS(m), VM_PROT_READ|VM_PROT_WRITE, 1); + VM_PAGE_TO_PHYS(m), VM_PROT_READ|VM_PROT_WRITE, TRUE); pmap_kenter(((vm_offset_t) up) + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m)); m->flags &= ~PG_ZERO; + m->flags |= PG_MAPPED; m->valid = VM_PAGE_BITS_ALL; } - /* - * The page table page for the kernel stack should be held in memory - * now. - */ - vm_page_unhold(stkm); p2->p_addr = up; @@ -371,33 +332,22 @@ faultin(p) int s; if ((p->p_flag & P_INMEM) == 0) { - vm_map_t map = &p->p_vmspace->vm_map; pmap_t pmap = &p->p_vmspace->vm_pmap; vm_page_t stkm, m; - vm_offset_t ptpa; int error; + vm_object_t upobj = p->p_vmspace->vm_upages_obj; ++p->p_lock; #if defined(SWAP_DEBUG) printf("swapping in %d\n", p->p_pid); #endif - ptaddr = trunc_page((u_int) vtopte(kstack)); - (void) vm_fault(map, ptaddr, VM_PROT_READ|VM_PROT_WRITE, FALSE); - ptpa = pmap_extract(&p->p_vmspace->vm_pmap, ptaddr); - if (ptpa == 0) { - panic("vm_fork: no pte for UPAGES"); - } - stkm = PHYS_TO_VM_PAGE(ptpa); - vm_page_hold(stkm); - for(i=0;ip_vmspace->vm_upages_obj, i)) == NULL) { - if ((m = vm_page_alloc(p->p_vmspace->vm_upages_obj, i, VM_ALLOC_NORMAL)) == NULL) { + if ((m = vm_page_lookup(upobj, i)) == NULL) { + if ((m = vm_page_alloc(upobj, i, VM_ALLOC_NORMAL)) == NULL) { VM_WAIT; goto retry; } @@ -407,10 +357,9 @@ retry: tsleep(m, PVM, "swinuw",0); goto retry; } + m->flags |= PG_BUSY; } vm_page_wire(m); - if (m->valid == VM_PAGE_BITS_ALL) - m->flags &= ~PG_BUSY; splx(s); pmap_enter( pmap, (vm_offset_t) kstack + i * PAGE_SIZE, @@ -419,16 +368,15 @@ retry: VM_PAGE_TO_PHYS(m)); if (m->valid != VM_PAGE_BITS_ALL) { int rv; - rv = vm_pager_get_pages(p->p_vmspace->vm_upages_obj, + rv = vm_pager_get_pages(upobj, &m, 1, 0); if (rv != VM_PAGER_OK) panic("faultin: cannot get upages for proc: %d\n", p->p_pid); m->valid = VM_PAGE_BITS_ALL; - m->flags &= ~PG_BUSY; } + PAGE_WAKEUP(m); + m->flags |= PG_MAPPED; } - vm_page_unhold(stkm); - s = splhigh(); @@ -527,8 +475,12 @@ swapout_procs() outpri = outpri2 = INT_MIN; retry: for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { + struct vmspace *vm; if (!swappable(p)) continue; + + vm = p->p_vmspace; + switch (p->p_stat) { default: continue; @@ -549,22 +501,25 @@ retry: (p->p_slptime <= 4)) continue; - vm_map_reference(&p->p_vmspace->vm_map); + ++vm->vm_refcnt; + vm_map_reference(&vm->vm_map); /* * do not swapout a process that is waiting for VM * datastructures there is a possible deadlock. */ - if (!lock_try_write(&p->p_vmspace->vm_map.lock)) { - vm_map_deallocate(&p->p_vmspace->vm_map); + if (!lock_try_write(&vm->vm_map.lock)) { + vm_map_deallocate(&vm->vm_map); + vmspace_free(vm); continue; } - vm_map_unlock(&p->p_vmspace->vm_map); + vm_map_unlock(&vm->vm_map); /* * If the process has been asleep for awhile and had * most of its pages taken away already, swap it out. */ swapout(p); - vm_map_deallocate(&p->p_vmspace->vm_map); + vm_map_deallocate(&vm->vm_map); + vmspace_free(vm); didswap++; goto retry; } @@ -612,6 +567,7 @@ swapout(p) panic("swapout: upage already missing???"); m->dirty = VM_PAGE_BITS_ALL; vm_page_unwire(m); + vm_page_deactivate(m); pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i); } pmap_remove(pmap, (vm_offset_t) kstack, diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index a820f9dcf055..fbad3f1f1b26 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_kern.c,v 1.23 1996/04/24 04:16:44 dyson Exp $ + * $Id: vm_kern.c,v 1.24 1996/05/10 19:28:54 wollman Exp $ */ /* @@ -100,6 +100,7 @@ vm_map_t io_map; vm_map_t clean_map; vm_map_t phys_map; vm_map_t exec_map; +vm_map_t exech_map; vm_map_t u_map; /* @@ -327,22 +328,8 @@ kmem_malloc(map, size, waitflag) vm_map_insert(map, kmem_object, offset, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); - /* - * If we can wait, just mark the range as wired (will fault pages as - * necessary). - */ - if (waitflag == M_WAITOK) { - vm_map_unlock(map); - (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, - FALSE); - vm_map_simplify(map, addr); - return (addr); - } - /* - * If we cannot wait then we must allocate all memory up front, - * pulling it off the active queue to prevent pageout. - */ for (i = 0; i < size; i += PAGE_SIZE) { +retry: m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i), (waitflag == M_NOWAIT) ? VM_ALLOC_INTERRUPT : VM_ALLOC_SYSTEM); @@ -352,6 +339,10 @@ kmem_malloc(map, size, waitflag) * aren't on any queues. */ if (m == NULL) { + if (waitflag == M_WAITOK) { + VM_WAIT; + goto retry; + } while (i != 0) { i -= PAGE_SIZE; m = vm_page_lookup(kmem_object, @@ -362,7 +353,7 @@ kmem_malloc(map, size, waitflag) vm_map_unlock(map); return (0); } - m->flags &= ~(PG_BUSY|PG_ZERO); + m->flags &= ~PG_ZERO; m->valid = VM_PAGE_BITS_ALL; } @@ -386,7 +377,9 @@ kmem_malloc(map, size, waitflag) for (i = 0; i < size; i += PAGE_SIZE) { m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i)); vm_page_wire(m); - pmap_kenter(addr + i, VM_PAGE_TO_PHYS(m)); + PAGE_WAKEUP(m); + pmap_enter(kernel_pmap, addr + i, VM_PAGE_TO_PHYS(m), + VM_PROT_ALL, 1); } vm_map_unlock(map); diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 619530963283..e0948e49f4d5 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_map.c,v 1.43 1996/04/29 22:04:57 dyson Exp $ + * $Id: vm_map.c,v 1.44 1996/05/03 21:01:49 phk Exp $ */ /* @@ -157,11 +157,15 @@ static int kentry_count; static vm_offset_t mapvm_start, mapvm, mapvmmax; static int mapvmpgcnt; +static struct vm_map_entry *mappool; +static int mappoolcnt; +#define KENTRY_LOW_WATER 128 + static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t)); static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t)); static vm_map_entry_t vm_map_entry_create __P((vm_map_t)); static void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t)); -static void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t)); +static __inline void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t)); static void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t)); static void vm_map_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t)); @@ -214,11 +218,10 @@ vmspace_alloc(min, max, pageable) if (mapvmpgcnt == 0 && mapvm == 0) { int s; - mapvmpgcnt = btoc(cnt.v_page_count * sizeof(struct vm_map_entry)); - s = splhigh(); - mapvm_start = mapvm = kmem_alloc_pageable(kernel_map, mapvmpgcnt * PAGE_SIZE); + mapvmpgcnt = (cnt.v_page_count * sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE; + mapvm_start = mapvm = kmem_alloc_pageable(kernel_map, + mapvmpgcnt * PAGE_SIZE); mapvmmax = mapvm_start + mapvmpgcnt * PAGE_SIZE; - splx(s); if (!mapvm) mapvmpgcnt = 0; } @@ -241,7 +244,6 @@ vmspace_free(vm) panic("vmspace_free: attempt to free already freed vmspace"); if (--vm->vm_refcnt == 0) { - int s, i; /* * Lock the map, to wait out all other references to it. @@ -252,11 +254,17 @@ vmspace_free(vm) (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, vm->vm_map.max_offset); vm_map_unlock(&vm->vm_map); + while( vm->vm_map.ref_count != 1) tsleep(&vm->vm_map.ref_count, PVM, "vmsfre", 0); --vm->vm_map.ref_count; + vm_object_pmap_remove(vm->vm_upages_obj, + 0, vm->vm_upages_obj->size); + vm_object_deallocate(vm->vm_upages_obj); pmap_release(&vm->vm_pmap); FREE(vm, M_VMMAP); + } else { + wakeup(&vm->vm_map.ref_count); } } @@ -314,45 +322,66 @@ vm_map_init(map, min, max, pageable) lock_init(&map->lock, TRUE); } +/* + * vm_map_entry_dispose: [ internal use only ] + * + * Inverse of vm_map_entry_create. + */ +static __inline void +vm_map_entry_dispose(map, entry) + vm_map_t map; + vm_map_entry_t entry; +{ + int s; + + if (kentry_count < KENTRY_LOW_WATER) { + s = splvm(); + entry->next = kentry_free; + kentry_free = entry; + ++kentry_count; + splx(s); + } else { + entry->next = mappool; + mappool = entry; + ++mappoolcnt; + } +} + /* * vm_map_entry_create: [ internal use only ] * * Allocates a VM map entry for insertion. * No entry fields are filled in. This routine is */ -static struct vm_map_entry *mappool; -static int mappoolcnt; - static vm_map_entry_t vm_map_entry_create(map) vm_map_t map; { vm_map_entry_t entry; int i; - -#define KENTRY_LOW_WATER 64 -#define MAPENTRY_LOW_WATER 128 + int s; /* * This is a *very* nasty (and sort of incomplete) hack!!!! */ if (kentry_count < KENTRY_LOW_WATER) { + s = splvm(); if (mapvmpgcnt && mapvm) { vm_page_t m; m = vm_page_alloc(kernel_object, - OFF_TO_IDX(mapvm - vm_map_min(kernel_map)), + OFF_TO_IDX(mapvm - VM_MIN_KERNEL_ADDRESS), (map == kmem_map) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL); + if (m) { int newentries; newentries = (PAGE_SIZE / sizeof(struct vm_map_entry)); vm_page_wire(m); - m->flags &= ~PG_BUSY; + PAGE_WAKEUP(m); m->valid = VM_PAGE_BITS_ALL; - pmap_enter(vm_map_pmap(kmem_map), mapvm, - VM_PAGE_TO_PHYS(m), VM_PROT_DEFAULT, 1); - m->flags |= PG_WRITEABLE|PG_MAPPED; + pmap_kenter(mapvm, VM_PAGE_TO_PHYS(m)); + m->flags |= PG_WRITEABLE; entry = (vm_map_entry_t) mapvm; mapvm += PAGE_SIZE; @@ -364,65 +393,33 @@ vm_map_entry_create(map) } } } + splx(s); } - if (map == kernel_map || map == kmem_map || map == pager_map) { + if (map == kernel_map || map == kmem_map || map == pager_map) { + s = splvm(); entry = kentry_free; if (entry) { kentry_free = entry->next; --kentry_count; - return entry; - } - entry = mappool; - if (entry) { - mappool = entry->next; - --mappoolcnt; - return entry; + } else { + panic("vm_map_entry_create: out of map entries for kernel"); } + splx(s); } else { entry = mappool; if (entry) { mappool = entry->next; --mappoolcnt; - return entry; + } else { + MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry), + M_VMMAPENT, M_WAITOK); } - MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry), - M_VMMAPENT, M_WAITOK); } - if (entry == NULL) - panic("vm_map_entry_create: out of map entries"); return (entry); } -/* - * vm_map_entry_dispose: [ internal use only ] - * - * Inverse of vm_map_entry_create. - */ -static void -vm_map_entry_dispose(map, entry) - vm_map_t map; - vm_map_entry_t entry; -{ - if ((kentry_count < KENTRY_LOW_WATER) || - ((vm_offset_t) entry >= kentry_data && (vm_offset_t) entry < (kentry_data + kentry_data_size)) || - ((vm_offset_t) entry >= mapvm_start && (vm_offset_t) entry < mapvmmax)) { - entry->next = kentry_free; - kentry_free = entry; - ++kentry_count; - return; - } else { - if (mappoolcnt < MAPENTRY_LOW_WATER) { - entry->next = mappool; - mappool = entry; - ++mappoolcnt; - return; - } - FREE(entry, M_VMMAPENT); - } -} - /* * vm_map_entry_{un,}link: * @@ -637,9 +634,9 @@ vm_map_insert(map, object, offset, start, end, prot, max, cow) if ((prev_entry != &map->header) && (prev_entry->end == start) && + ((object == NULL) || (prev_entry->object.vm_object == object)) && (prev_entry->is_a_map == FALSE) && (prev_entry->is_sub_map == FALSE) && - ((object == NULL) || (prev_entry->object.vm_object == object)) && (prev_entry->inheritance == VM_INHERIT_DEFAULT) && (prev_entry->protection == prot) && (prev_entry->max_protection == max) && @@ -664,13 +661,7 @@ vm_map_insert(map, object, offset, start, end, prot, max, cow) prev_entry->end = end; return (KERN_SUCCESS); } - } /* else if ((object == prev_entry->object.vm_object) && - (prev_entry->offset + (prev_entry->end - prev_entry->start) == offset)) { - map->size += (end - prev_entry->end); - prev_entry->end = end; - printf("map optim 1\n"); - return (KERN_SUCCESS); - } */ + } } /* * Create a new entry @@ -711,7 +702,6 @@ vm_map_insert(map, object, offset, start, end, prot, max, cow) /* * Update the free space hint */ - if ((map->first_free == prev_entry) && (prev_entry->end >= new_entry->start)) map->first_free = new_entry; @@ -803,7 +793,7 @@ vm_map_find(map, object, offset, addr, length, find_space, prot, max, cow) start = *addr; if (map == kmem_map) - s = splhigh(); + s = splvm(); vm_map_lock(map); if (find_space) { @@ -866,10 +856,13 @@ vm_map_simplify_entry(map, entry) (prev->wired_count == 0)) { if (map->first_free == prev) map->first_free = entry; + if (map->hint == prev) + map->hint = entry; vm_map_entry_unlink(map, prev); entry->start = prev->start; entry->offset = prev->offset; - vm_object_deallocate(prev->object.vm_object); + if (prev->object.vm_object) + vm_object_deallocate(prev->object.vm_object); vm_map_entry_dispose(map, prev); } } @@ -891,9 +884,12 @@ vm_map_simplify_entry(map, entry) (next->wired_count == 0)) { if (map->first_free == next) map->first_free = entry; + if (map->hint == next) + map->hint = entry; vm_map_entry_unlink(map, next); entry->end = next->end; - vm_object_deallocate(next->object.vm_object); + if (next->object.vm_object) + vm_object_deallocate(next->object.vm_object); vm_map_entry_dispose(map, next); } } @@ -1131,7 +1127,6 @@ vm_map_protect(map, start, end, new_prot, set_max) */ if (current->protection != old_prot) { - #define MASK(entry) ((entry)->copy_on_write ? ~VM_PROT_WRITE : \ VM_PROT_ALL) #define max(a,b) ((a) > (b) ? (a) : (b)) @@ -1585,7 +1580,7 @@ vm_map_clean(map, start, end, syncio, invalidate) * The map in question should be locked. * [This is the reason for this routine's existence.] */ -static void +static __inline void vm_map_entry_unwire(map, entry) vm_map_t map; register vm_map_entry_t entry; @@ -1599,7 +1594,7 @@ vm_map_entry_unwire(map, entry) * * Deallocate the given entry from the target map. */ -static void +static __inline void vm_map_entry_delete(map, entry) register vm_map_t map; register vm_map_entry_t entry; @@ -1658,7 +1653,9 @@ vm_map_delete(map, start, end) * Save the free space hint */ - if (map->first_free->start >= start) + if (entry == &map->header) { + map->first_free = &map->header; + } else if (map->first_free->start >= start) map->first_free = entry->prev; /* @@ -1667,14 +1664,16 @@ vm_map_delete(map, start, end) while ((entry != &map->header) && (entry->start < end)) { vm_map_entry_t next; - register vm_offset_t s, e; - register vm_object_t object; + vm_offset_t s, e; + vm_object_t object; + vm_ooffset_t offset; vm_map_clip_end(map, entry, end); next = entry->next; s = entry->start; e = entry->end; + offset = entry->offset; /* * Unwire before removing addresses from the pmap; otherwise, @@ -1691,15 +1690,16 @@ vm_map_delete(map, start, end) * which are sharing it. */ - if (object == kernel_object || object == kmem_object) - vm_object_page_remove(object, OFF_TO_IDX(entry->offset), - OFF_TO_IDX(entry->offset + (e - s)), FALSE); - else if (!map->is_main_map) + if (object == kernel_object || object == kmem_object) { + vm_object_page_remove(object, OFF_TO_IDX(offset), + OFF_TO_IDX(offset + (e - s)), FALSE); + } else if (!map->is_main_map) { vm_object_pmap_remove(object, - OFF_TO_IDX(entry->offset), - OFF_TO_IDX(entry->offset + (e - s))); - else + OFF_TO_IDX(offset), + OFF_TO_IDX(offset + (e - s))); + } else { pmap_remove(map->pmap, s, e); + } /* * Delete the entry (which may delete the object) only after @@ -1729,7 +1729,7 @@ vm_map_remove(map, start, end) register int result, s = 0; if (map == kmem_map) - s = splhigh(); + s = splvm(); vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); @@ -1806,16 +1806,6 @@ vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) if (src_entry->is_sub_map || dst_entry->is_sub_map) return; - if (dst_entry->object.vm_object != NULL) - printf("vm_map_copy_entry: dst_entry object not NULL!\n"); - - /* - * If our destination map was wired down, unwire it now. - */ - - if (dst_entry->wired_count != 0) - vm_map_entry_unwire(dst_map, dst_entry); - if (src_entry->wired_count == 0) { boolean_t src_needs_copy; @@ -1847,35 +1837,28 @@ vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) - src_entry->start))); } } + /* * Make a copy of the object. */ - temp_pindex = OFF_TO_IDX(dst_entry->offset); - vm_object_copy(src_entry->object.vm_object, - OFF_TO_IDX(src_entry->offset), - &dst_entry->object.vm_object, - &temp_pindex, - &src_needs_copy); - dst_entry->offset = IDX_TO_OFF(temp_pindex); - /* - * If we didn't get a copy-object now, mark the source map - * entry so that a shadow will be created to hold its changed - * pages. - */ - if (src_needs_copy) + if (src_entry->object.vm_object) { + if ((src_entry->object.vm_object->handle == NULL) && + (src_entry->object.vm_object->type == OBJT_DEFAULT || + src_entry->object.vm_object->type == OBJT_SWAP)) + vm_object_collapse(src_entry->object.vm_object); + ++src_entry->object.vm_object->ref_count; + src_entry->copy_on_write = TRUE; src_entry->needs_copy = TRUE; - /* - * The destination always needs to have a shadow created. - */ - dst_entry->needs_copy = TRUE; - - /* - * Mark the entries copy-on-write, so that write-enabling the - * entry won't make copy-on-write pages writable. - */ - src_entry->copy_on_write = TRUE; - dst_entry->copy_on_write = TRUE; + dst_entry->needs_copy = TRUE; + dst_entry->copy_on_write = TRUE; + dst_entry->object.vm_object = + src_entry->object.vm_object; + dst_entry->offset = src_entry->offset; + } else { + dst_entry->object.vm_object = NULL; + dst_entry->offset = 0; + } pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start, dst_entry->end - dst_entry->start, src_entry->start); @@ -1962,7 +1945,6 @@ vmspace_fork(vm1) /* * Clone the entry and link into the map. */ - new_entry = vm_map_entry_create(new_map); *new_entry = *old_entry; new_entry->wired_count = 0; @@ -2251,11 +2233,13 @@ vm_map_simplify(map, start) vm_map_entry_t prev_entry; vm_map_lock(map); - if ( - (vm_map_lookup_entry(map, start, &this_entry)) && + if ((vm_map_lookup_entry(map, start, &this_entry)) && ((prev_entry = this_entry->prev) != &map->header) && - (prev_entry->end == start) && + (prev_entry->object.vm_object == this_entry->object.vm_object) && + ((prev_entry->offset + (prev_entry->end - prev_entry->start)) + == this_entry->offset) && + (map->is_main_map) && (prev_entry->is_a_map == FALSE) && @@ -2270,18 +2254,15 @@ vm_map_simplify(map, start) (prev_entry->wired_count == this_entry->wired_count) && (prev_entry->copy_on_write == this_entry->copy_on_write) && - (prev_entry->needs_copy == this_entry->needs_copy) && - - (prev_entry->object.vm_object == this_entry->object.vm_object) && - ((prev_entry->offset + (prev_entry->end - prev_entry->start)) - == this_entry->offset) - ) { + (prev_entry->needs_copy == this_entry->needs_copy)) { if (map->first_free == this_entry) map->first_free = prev_entry; - SAVE_HINT(map, prev_entry); + if (map->hint == this_entry) + SAVE_HINT(map, prev_entry); vm_map_entry_unlink(map, this_entry); prev_entry->end = this_entry->end; - vm_object_deallocate(this_entry->object.vm_object); + if (this_entry->object.vm_object) + vm_object_deallocate(this_entry->object.vm_object); vm_map_entry_dispose(map, this_entry); } vm_map_unlock(map); diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index ade41bc40d2e..648130904414 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)vm_meter.c 8.4 (Berkeley) 1/4/94 - * $Id: vm_meter.c,v 1.13 1995/12/14 09:55:02 phk Exp $ + * $Id: vm_meter.c,v 1.14 1996/03/11 06:11:40 hsu Exp $ */ #include @@ -136,9 +136,9 @@ vmtotal SYSCTL_HANDLER_ARGS /* * Mark all objects as inactive. */ - for (object = vm_object_list.tqh_first; + for (object = TAILQ_FIRST(&vm_object_list); object != NULL; - object = object->object_list.tqe_next) + object = TAILQ_NEXT(object,object_list)) object->flags &= ~OBJ_ACTIVE; /* * Calculate process statistics. @@ -191,9 +191,9 @@ vmtotal SYSCTL_HANDLER_ARGS /* * Calculate object memory usage statistics. */ - for (object = vm_object_list.tqh_first; + for (object = TAILQ_FIRST(&vm_object_list); object != NULL; - object = object->object_list.tqe_next) { + object = TAILQ_NEXT(object, object_list)) { totalp->t_vm += num_pages(object->size); totalp->t_rm += object->resident_page_count; if (object->flags & OBJ_ACTIVE) { diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index ede01dc19b68..16f8ebec2b74 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -38,7 +38,7 @@ * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ * * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 - * $Id: vm_mmap.c,v 1.40 1996/03/16 15:00:05 davidg Exp $ + * $Id: vm_mmap.c,v 1.41 1996/05/03 21:01:51 phk Exp $ */ /* @@ -802,8 +802,7 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) /* * "Pre-fault" resident pages. */ - if ((map != kernel_map) && - (type == OBJT_VNODE) && (map->pmap != NULL)) { + if ((type == OBJT_VNODE) && (map->pmap != NULL)) { pmap_object_init_pt(map->pmap, *addr, object, (vm_pindex_t) OFF_TO_IDX(foff), size); } diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 6b180ae013ff..187e7773b2db 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_object.c,v 1.67 1996/03/29 06:28:48 davidg Exp $ + * $Id: vm_object.c,v 1.68 1996/04/24 04:16:45 dyson Exp $ */ /* @@ -278,7 +278,7 @@ vm_object_deallocate(object) (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { vm_object_t robject; - robject = object->shadow_head.tqh_first; + robject = TAILQ_FIRST(&object->shadow_head); if ((robject != NULL) && (robject->handle == NULL) && (robject->type == OBJT_DEFAULT || @@ -288,7 +288,7 @@ vm_object_deallocate(object) object->ref_count += 2; do { - s = splhigh(); + s = splvm(); while (robject->paging_in_progress) { robject->flags |= OBJ_PIPWNT; tsleep(robject, PVM, "objde1", 0); @@ -375,7 +375,7 @@ vm_object_terminate(object) /* * wait for the pageout daemon to be done with the object */ - s = splhigh(); + s = splvm(); while (object->paging_in_progress) { object->flags |= OBJ_PIPWNT; tsleep(object, PVM, "objtrm", 0); @@ -402,9 +402,10 @@ vm_object_terminate(object) * Now free the pages. For internal objects, this also removes them * from paging queues. */ - while ((p = object->memq.tqh_first) != NULL) { + while ((p = TAILQ_FIRST(&object->memq)) != NULL) { if (p->flags & PG_BUSY) printf("vm_object_terminate: freeing busy page\n"); + vm_page_protect(p, VM_PROT_NONE); PAGE_WAKEUP(p); vm_page_free(p); cnt.v_pfree++; @@ -478,12 +479,12 @@ vm_object_page_clean(object, start, end, syncio, lockflag) if ((tstart == 0) && (tend == object->size)) { object->flags &= ~(OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); } - for(p = object->memq.tqh_first; p; p = p->listq.tqe_next) + for(p = TAILQ_FIRST(&object->memq); p; p = TAILQ_NEXT(p, listq)) p->flags |= PG_CLEANCHK; rescan: - for(p = object->memq.tqh_first; p; p = np) { - np = p->listq.tqe_next; + for(p = TAILQ_FIRST(&object->memq); p; p = np) { + np = TAILQ_NEXT(p, listq); pi = p->pindex; if (((p->flags & PG_CLEANCHK) == 0) || @@ -499,7 +500,7 @@ rescan: continue; } - s = splhigh(); + s = splvm(); if ((p->flags & PG_BUSY) || p->busy) { p->flags |= PG_WANTED|PG_REFERENCED; tsleep(p, PVM, "vpcwai", 0); @@ -597,8 +598,8 @@ vm_object_deactivate_pages(object) { register vm_page_t p, next; - for (p = object->memq.tqh_first; p != NULL; p = next) { - next = p->listq.tqe_next; + for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) { + next = TAILQ_NEXT(p, listq); vm_page_deactivate(p); } } @@ -613,7 +614,7 @@ vm_object_cache_trim() register vm_object_t object; while (vm_object_cached > vm_object_cache_max) { - object = vm_object_cached_list.tqh_first; + object = TAILQ_FIRST(&vm_object_cached_list); vm_object_reference(object); pager_cache(object, FALSE); @@ -641,7 +642,7 @@ vm_object_pmap_copy(object, start, end) if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0) return; - for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { + for (p = TAILQ_FIRST(&object->memq); p != NULL; p = TAILQ_NEXT(p, listq)) { vm_page_protect(p, VM_PROT_READ); } @@ -665,7 +666,7 @@ vm_object_pmap_remove(object, start, end) register vm_page_t p; if (object == NULL) return; - for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { + for (p = TAILQ_FIRST(&object->memq); p != NULL; p = TAILQ_NEXT(p, listq)) { if (p->pindex >= start && p->pindex < end) vm_page_protect(p, VM_PROT_NONE); } @@ -808,17 +809,16 @@ vm_object_qcollapse(object) backing_object_paging_offset_index = OFF_TO_IDX(backing_object->paging_offset); paging_offset_index = OFF_TO_IDX(object->paging_offset); size = object->size; - p = backing_object->memq.tqh_first; + p = TAILQ_FIRST(&backing_object->memq); while (p) { vm_page_t next; - next = p->listq.tqe_next; + next = TAILQ_NEXT(p, listq); if ((p->flags & (PG_BUSY | PG_FICTITIOUS)) || (p->queue == PQ_CACHE) || !p->valid || p->hold_count || p->wire_count || p->busy) { p = next; continue; } - vm_page_protect(p, VM_PROT_NONE); new_pindex = p->pindex - backing_offset_index; if (p->pindex < backing_offset_index || new_pindex >= size) { @@ -826,6 +826,7 @@ vm_object_qcollapse(object) swap_pager_freespace(backing_object, backing_object_paging_offset_index+p->pindex, 1); + vm_page_protect(p, VM_PROT_NONE); vm_page_free(p); } else { pp = vm_page_lookup(object, new_pindex); @@ -834,6 +835,7 @@ vm_object_qcollapse(object) if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, backing_object_paging_offset_index + p->pindex, 1); + vm_page_protect(p, VM_PROT_NONE); vm_page_free(p); } else { if (backing_object->type == OBJT_SWAP) @@ -930,7 +932,7 @@ vm_object_collapse(object) * shadow them. */ - while ((p = backing_object->memq.tqh_first) != 0) { + while ((p = TAILQ_FIRST(&backing_object->memq)) != 0) { new_pindex = p->pindex - backing_offset_index; @@ -1071,7 +1073,7 @@ vm_object_collapse(object) * here. */ - for (p = backing_object->memq.tqh_first; p; p = p->listq.tqe_next) { + for (p = TAILQ_FIRST(&backing_object->memq); p; p = TAILQ_NEXT(p, listq)) { new_pindex = p->pindex - backing_offset_index; /* @@ -1160,24 +1162,29 @@ vm_object_page_remove(object, start, end, clean_only) again: size = end - start; if (size > 4 || size >= object->size / 4) { - for (p = object->memq.tqh_first; p != NULL; p = next) { - next = p->listq.tqe_next; + for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) { + next = TAILQ_NEXT(p, listq); if ((start <= p->pindex) && (p->pindex < end)) { - if (p->wire_count != 0) { vm_page_protect(p, VM_PROT_NONE); p->valid = 0; continue; } - s = splhigh(); + /* + * The busy flags are only cleared at + * interrupt -- minimize the spl transitions + */ if ((p->flags & PG_BUSY) || p->busy) { - p->flags |= PG_WANTED; - tsleep(p, PVM, "vmopar", 0); + s = splvm(); + if ((p->flags & PG_BUSY) || p->busy) { + p->flags |= PG_WANTED; + tsleep(p, PVM, "vmopar", 0); + splx(s); + goto again; + } splx(s); - goto again; } - splx(s); if (clean_only) { vm_page_test_dirty(p); @@ -1199,14 +1206,20 @@ again: size -= 1; continue; } - s = splhigh(); + /* + * The busy flags are only cleared at + * interrupt -- minimize the spl transitions + */ if ((p->flags & PG_BUSY) || p->busy) { - p->flags |= PG_WANTED; - tsleep(p, PVM, "vmopar", 0); + s = splvm(); + if ((p->flags & PG_BUSY) || p->busy) { + p->flags |= PG_WANTED; + tsleep(p, PVM, "vmopar", 0); + splx(s); + goto again; + } splx(s); - goto again; } - splx(s); if (clean_only) { vm_page_test_dirty(p); if (p->valid & p->dirty) { @@ -1391,9 +1404,9 @@ DDB_vm_object_check() * make sure that internal objs are in a map somewhere * and none have zero ref counts. */ - for (object = vm_object_list.tqh_first; + for (object = TAILQ_FIRST(&vm_object_list); object != NULL; - object = object->object_list.tqe_next) { + object = TAILQ_NEXT(object, object_list)) { if (object->handle == NULL && (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { if (object->ref_count == 0) { @@ -1436,14 +1449,14 @@ vm_object_print(iobject, full, dummy3, dummy4) (int) object->paging_offset, (int) object->backing_object, (int) object->backing_object_offset); printf("cache: next=%p, prev=%p\n", - object->cached_list.tqe_next, object->cached_list.tqe_prev); + TAILQ_NEXT(object, cached_list), TAILQ_PREV(object, cached_list)); if (!full) return; indent += 2; count = 0; - for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { + for (p = TAILQ_FIRST(&object->memq); p != NULL; p = TAILQ_NEXT(p, listq)) { if (count == 0) iprintf("memory:="); else if (count == 6) { diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 6f10cea516f6..7a95941a2b6b 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 - * $Id: vm_page.c,v 1.49 1996/03/09 06:56:39 dyson Exp $ + * $Id: vm_page.c,v 1.50 1996/03/28 04:53:27 dyson Exp $ */ /* @@ -140,7 +140,6 @@ static u_short vm_page_dev_bsize_chunks[] = { static inline __pure int vm_page_hash __P((vm_object_t object, vm_pindex_t pindex)) __pure2; -static void vm_page_unqueue __P((vm_page_t )); /* * vm_set_page_size: @@ -244,7 +243,7 @@ vm_page_startup(starta, enda, vaddr) vm_page_buckets = (struct pglist *) vaddr; bucket = vm_page_buckets; if (vm_page_bucket_count == 0) { - vm_page_bucket_count = 2; + vm_page_bucket_count = 1; while (vm_page_bucket_count < atop(total)) vm_page_bucket_count <<= 1; } @@ -383,7 +382,7 @@ vm_page_hash(object, pindex) * The object and page must be locked, and must be splhigh. */ -inline void +__inline void vm_page_insert(m, object, pindex) register vm_page_t m; register vm_object_t object; @@ -432,7 +431,7 @@ vm_page_insert(m, object, pindex) * The object and page must be locked, and at splhigh. */ -inline void +__inline void vm_page_remove(m) register vm_page_t m; { @@ -487,14 +486,13 @@ vm_page_lookup(object, pindex) bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; - s = splhigh(); - for (m = bucket->tqh_first; m != NULL; m = m->hashq.tqe_next) { + s = splvm(); + for (m = TAILQ_FIRST(bucket); m != NULL; m = TAILQ_NEXT(m,hashq)) { if ((m->object == object) && (m->pindex == pindex)) { splx(s); return (m); } } - splx(s); return (NULL); } @@ -515,7 +513,7 @@ vm_page_rename(m, new_object, new_pindex) { int s; - s = splhigh(); + s = splvm(); vm_page_remove(m); vm_page_insert(m, new_object, new_pindex); splx(s); @@ -524,7 +522,7 @@ vm_page_rename(m, new_object, new_pindex) /* * vm_page_unqueue must be called at splhigh(); */ -static inline void +__inline void vm_page_unqueue(vm_page_t m) { int queue = m->queue; @@ -575,19 +573,19 @@ vm_page_alloc(object, pindex, page_req) page_req = VM_ALLOC_SYSTEM; }; - s = splhigh(); + s = splvm(); switch (page_req) { case VM_ALLOC_NORMAL: if (cnt.v_free_count >= cnt.v_free_reserved) { - m = vm_page_queue_free.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_free); if (m == NULL) { --vm_page_zero_count; - m = vm_page_queue_zero.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_zero); } } else { - m = vm_page_queue_cache.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_cache); if (m == NULL) { splx(s); pagedaemon_wakeup(); @@ -598,14 +596,14 @@ vm_page_alloc(object, pindex, page_req) case VM_ALLOC_ZERO: if (cnt.v_free_count >= cnt.v_free_reserved) { - m = vm_page_queue_zero.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_zero); if (m) { --vm_page_zero_count; } else { - m = vm_page_queue_free.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_free); } } else { - m = vm_page_queue_cache.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_cache); if (m == NULL) { splx(s); pagedaemon_wakeup(); @@ -618,13 +616,13 @@ vm_page_alloc(object, pindex, page_req) if ((cnt.v_free_count >= cnt.v_free_reserved) || ((cnt.v_cache_count == 0) && (cnt.v_free_count >= cnt.v_interrupt_free_min))) { - m = vm_page_queue_free.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_free); if (m == NULL) { --vm_page_zero_count; - m = vm_page_queue_zero.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_zero); } } else { - m = vm_page_queue_cache.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_cache); if (m == NULL) { splx(s); pagedaemon_wakeup(); @@ -635,10 +633,10 @@ vm_page_alloc(object, pindex, page_req) case VM_ALLOC_INTERRUPT: if (cnt.v_free_count > 0) { - m = vm_page_queue_free.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_free); if (m == NULL) { --vm_page_zero_count; - m = vm_page_queue_zero.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_zero); } } else { splx(s); @@ -663,8 +661,8 @@ vm_page_alloc(object, pindex, page_req) m->flags = PG_BUSY; } m->wire_count = 0; - m->hold_count = 0; m->act_count = 0; + m->hold_count = 0; m->busy = 0; m->valid = 0; m->dirty = 0; @@ -688,114 +686,35 @@ vm_page_alloc(object, pindex, page_req) } /* - * This interface is for merging with malloc() someday. - * Even if we never implement compaction so that contiguous allocation - * works after initialization time, malloc()'s data structures are good - * for statistics and for allocations of less than a page. + * vm_page_activate: + * + * Put the specified page on the active list (if appropriate). + * + * The page queues must be locked. */ -void * -contigmalloc(size, type, flags, low, high, alignment, boundary) - unsigned long size; /* should be size_t here and for malloc() */ - int type; - int flags; - unsigned long low; - unsigned long high; - unsigned long alignment; - unsigned long boundary; +void +vm_page_activate(m) + register vm_page_t m; { - int i, s, start; - vm_offset_t addr, phys, tmp_addr; - vm_page_t pga = vm_page_array; + int s; - size = round_page(size); - if (size == 0) - panic("vm_page_alloc_contig: size must not be 0"); - if ((alignment & (alignment - 1)) != 0) - panic("vm_page_alloc_contig: alignment must be a power of 2"); - if ((boundary & (boundary - 1)) != 0) - panic("vm_page_alloc_contig: boundary must be a power of 2"); + s = splvm(); + if (m->queue == PQ_ACTIVE) + panic("vm_page_activate: already active"); - start = 0; - s = splhigh(); -again: - /* - * Find first page in array that is free, within range, aligned, and - * such that the boundary won't be crossed. - */ - for (i = start; i < cnt.v_page_count; i++) { - phys = VM_PAGE_TO_PHYS(&pga[i]); - if ((pga[i].queue == PQ_FREE) && - (phys >= low) && (phys < high) && - ((phys & (alignment - 1)) == 0) && - (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)) - break; + if (m->queue == PQ_CACHE) + cnt.v_reactivated++; + + vm_page_unqueue(m); + + if (m->wire_count == 0) { + if (m->act_count < 5) + m->act_count = 5; + TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); + m->queue = PQ_ACTIVE; + cnt.v_active_count++; } - - /* - * If the above failed or we will exceed the upper bound, fail. - */ - if ((i == cnt.v_page_count) || - ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { - splx(s); - return (NULL); - } - start = i; - - /* - * Check successive pages for contiguous and free. - */ - for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { - if ((VM_PAGE_TO_PHYS(&pga[i]) != - (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) || - (pga[i].queue != PQ_FREE)) { - start++; - goto again; - } - } - - /* - * We've found a contiguous chunk that meets are requirements. - * Allocate kernel VM, unfree and assign the physical pages to it and - * return kernel VM pointer. - */ - tmp_addr = addr = kmem_alloc_pageable(kernel_map, size); - if (addr == 0) { - splx(s); - return (NULL); - } - - for (i = start; i < (start + size / PAGE_SIZE); i++) { - vm_page_t m = &pga[i]; - - TAILQ_REMOVE(&vm_page_queue_free, m, pageq); - cnt.v_free_count--; - m->valid = VM_PAGE_BITS_ALL; - m->flags = 0; - m->dirty = 0; - m->wire_count = 0; - m->act_count = 0; - m->busy = 0; - m->queue = PQ_NONE; - vm_page_insert(m, kernel_object, - OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS)); - vm_page_wire(m); - pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m)); - tmp_addr += PAGE_SIZE; - } - splx(s); - return ((void *)addr); -} - -vm_offset_t -vm_page_alloc_contig(size, low, high, alignment) - vm_offset_t size; - vm_offset_t low; - vm_offset_t high; - vm_offset_t alignment; -{ - return ((vm_offset_t)contigmalloc(size, M_DEVBUF, M_NOWAIT, low, high, - alignment, 0ul)); } /* @@ -813,7 +732,7 @@ vm_page_free(m) int s; int flags = m->flags; - s = splhigh(); + s = splvm(); if (m->busy || (flags & PG_BUSY) || (m->queue == PQ_FREE)) { printf("vm_page_free: pindex(%ld), busy(%d), PG_BUSY(%d)\n", m->pindex, m->busy, (flags & PG_BUSY) ? 1 : 0); @@ -824,7 +743,8 @@ vm_page_free(m) } if (m->hold_count) { - panic("freeing held page, count=%d", m->hold_count); + panic("freeing held page, count=%d, pindex=%d(0x%x)", + m->hold_count, m->pindex, m->pindex); } vm_page_remove(m); @@ -840,7 +760,19 @@ vm_page_free(m) m->wire_count = 0; } m->queue = PQ_FREE; - TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq); + + /* + * If the pageout process is grabbing the page, it is likely + * that the page is NOT in the cache. It is more likely that + * the page will be partially in the cache if it is being + * explicitly freed. + */ + if (curproc == pageproc) { + TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq); + } else { + TAILQ_INSERT_HEAD(&vm_page_queue_free, m, pageq); + } + splx(s); /* * if pageout daemon needs pages, then tell it that there are @@ -859,7 +791,6 @@ vm_page_free(m) */ if ((cnt.v_free_count + cnt.v_cache_count) == cnt.v_free_min) { wakeup(&cnt.v_free_count); - wakeup(&proc0); } } else { splx(s); @@ -884,7 +815,7 @@ vm_page_wire(m) int s; if (m->wire_count == 0) { - s = splhigh(); + s = splvm(); vm_page_unqueue(m); splx(s); cnt.v_wire_count++; @@ -907,56 +838,23 @@ vm_page_unwire(m) { int s; - s = splhigh(); + s = splvm(); if (m->wire_count > 0) m->wire_count--; if (m->wire_count == 0) { cnt.v_wire_count--; - TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); - m->queue = PQ_ACTIVE; - if( m->act_count < ACT_MAX) - m->act_count += 1; - cnt.v_active_count++; - } - splx(s); -} - -/* - * vm_page_activate: - * - * Put the specified page on the active list (if appropriate). - * - * The page queues must be locked. - */ -void -vm_page_activate(m) - register vm_page_t m; -{ - int s; - - s = splhigh(); - if (m->queue == PQ_ACTIVE) - panic("vm_page_activate: already active"); - - if (m->queue == PQ_CACHE) - cnt.v_reactivated++; - - vm_page_unqueue(m); - - if (m->wire_count == 0) { TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); m->queue = PQ_ACTIVE; if (m->act_count < 5) m->act_count = 5; - else if( m->act_count < ACT_MAX) - m->act_count += 1; cnt.v_active_count++; } splx(s); } + /* * vm_page_deactivate: * @@ -982,7 +880,7 @@ vm_page_deactivate(m) if (m->queue == PQ_INACTIVE) return; - spl = splhigh(); + spl = splvm(); if (m->wire_count == 0 && m->hold_count == 0) { if (m->queue == PQ_CACHE) cnt.v_reactivated++; @@ -990,7 +888,6 @@ vm_page_deactivate(m) TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); m->queue = PQ_INACTIVE; cnt.v_inactive_count++; - m->act_count = 0; } splx(spl); } @@ -1014,7 +911,7 @@ vm_page_cache(m) return; vm_page_protect(m, VM_PROT_NONE); - s = splhigh(); + s = splvm(); vm_page_unqueue(m); TAILQ_INSERT_TAIL(&vm_page_queue_cache, m, pageq); m->queue = PQ_CACHE; @@ -1030,35 +927,6 @@ vm_page_cache(m) splx(s); } -/* - * vm_page_zero_fill: - * - * Zero-fill the specified page. - * Written as a standard pagein routine, to - * be used by the zero-fill object. - */ -boolean_t -vm_page_zero_fill(m) - vm_page_t m; -{ - pmap_zero_page(VM_PAGE_TO_PHYS(m)); - return (TRUE); -} - -/* - * vm_page_copy: - * - * Copy one page to another - */ -void -vm_page_copy(src_m, dest_m) - vm_page_t src_m; - vm_page_t dest_m; -{ - pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m)); - dest_m->valid = VM_PAGE_BITS_ALL; -} - /* * mapping function for valid bits or for dirty bits in @@ -1126,8 +994,6 @@ vm_page_is_valid(m, base, size) return 0; } - - void vm_page_test_dirty(m) vm_page_t m; @@ -1138,6 +1004,115 @@ vm_page_test_dirty(m) } } +/* + * This interface is for merging with malloc() someday. + * Even if we never implement compaction so that contiguous allocation + * works after initialization time, malloc()'s data structures are good + * for statistics and for allocations of less than a page. + */ +void * +contigmalloc(size, type, flags, low, high, alignment, boundary) + unsigned long size; /* should be size_t here and for malloc() */ + int type; + int flags; + unsigned long low; + unsigned long high; + unsigned long alignment; + unsigned long boundary; +{ + int i, s, start; + vm_offset_t addr, phys, tmp_addr; + vm_page_t pga = vm_page_array; + + size = round_page(size); + if (size == 0) + panic("vm_page_alloc_contig: size must not be 0"); + if ((alignment & (alignment - 1)) != 0) + panic("vm_page_alloc_contig: alignment must be a power of 2"); + if ((boundary & (boundary - 1)) != 0) + panic("vm_page_alloc_contig: boundary must be a power of 2"); + + start = 0; + s = splvm(); +again: + /* + * Find first page in array that is free, within range, aligned, and + * such that the boundary won't be crossed. + */ + for (i = start; i < cnt.v_page_count; i++) { + phys = VM_PAGE_TO_PHYS(&pga[i]); + if ((pga[i].queue == PQ_FREE) && + (phys >= low) && (phys < high) && + ((phys & (alignment - 1)) == 0) && + (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)) + break; + } + + /* + * If the above failed or we will exceed the upper bound, fail. + */ + if ((i == cnt.v_page_count) || + ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { + splx(s); + return (NULL); + } + start = i; + + /* + * Check successive pages for contiguous and free. + */ + for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { + if ((VM_PAGE_TO_PHYS(&pga[i]) != + (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) || + (pga[i].queue != PQ_FREE)) { + start++; + goto again; + } + } + + /* + * We've found a contiguous chunk that meets are requirements. + * Allocate kernel VM, unfree and assign the physical pages to it and + * return kernel VM pointer. + */ + tmp_addr = addr = kmem_alloc_pageable(kernel_map, size); + if (addr == 0) { + splx(s); + return (NULL); + } + + for (i = start; i < (start + size / PAGE_SIZE); i++) { + vm_page_t m = &pga[i]; + + TAILQ_REMOVE(&vm_page_queue_free, m, pageq); + cnt.v_free_count--; + m->valid = VM_PAGE_BITS_ALL; + m->flags = 0; + m->dirty = 0; + m->wire_count = 0; + m->busy = 0; + m->queue = PQ_NONE; + vm_page_insert(m, kernel_object, + OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS)); + vm_page_wire(m); + pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m)); + tmp_addr += PAGE_SIZE; + } + + splx(s); + return ((void *)addr); +} + +vm_offset_t +vm_page_alloc_contig(size, low, high, alignment) + vm_offset_t size; + vm_offset_t low; + vm_offset_t high; + vm_offset_t alignment; +{ + return ((vm_offset_t)contigmalloc(size, M_DEVBUF, M_NOWAIT, low, high, + alignment, 0ul)); +} #ifdef DDB void DDB_print_page_info(void) diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index e10c1063199e..950ebe16afd6 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -65,7 +65,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_pageout.c,v 1.69 1996/03/28 04:53:28 dyson Exp $ + * $Id: vm_pageout.c,v 1.70 1996/04/11 21:05:25 bde Exp $ */ /* @@ -138,8 +138,6 @@ extern int nswiodone; extern int vm_swap_size; extern int vfs_update_wakeup; -#define MAXSCAN 1024 /* maximum number of pages to scan in queues */ - #define MAXLAUNDER (cnt.v_page_count > 1800 ? 32 : 16) #define VM_PAGEOUT_PAGE_COUNT 16 @@ -415,9 +413,9 @@ vm_pageout_object_deactivate_pages(map, object, count, map_remove_only) * scan the objects entire memory queue */ rcount = object->resident_page_count; - p = object->memq.tqh_first; + p = TAILQ_FIRST(&object->memq); while (p && (rcount-- > 0)) { - next = p->listq.tqe_next; + next = TAILQ_NEXT(p, listq); cnt.v_pdpages++; if (p->wire_count != 0 || p->hold_count != 0 || @@ -434,26 +432,9 @@ vm_pageout_object_deactivate_pages(map, object, count, map_remove_only) if (p->queue == PQ_ACTIVE) { if (!pmap_is_referenced(VM_PAGE_TO_PHYS(p)) && (p->flags & PG_REFERENCED) == 0) { - p->act_count -= min(p->act_count, ACT_DECLINE); - /* - * if the page act_count is zero -- then we - * deactivate - */ - if (!p->act_count) { - if (!map_remove_only) - vm_page_deactivate(p); - vm_page_protect(p, VM_PROT_NONE); - /* - * else if on the next go-around we - * will deactivate the page we need to - * place the page on the end of the - * queue to age the other pages in - * memory. - */ - } else { - TAILQ_REMOVE(&vm_page_queue_active, p, pageq); - TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq); - } + vm_page_protect(p, VM_PROT_NONE); + if (!map_remove_only) + vm_page_deactivate(p); /* * see if we are done yet */ @@ -471,8 +452,6 @@ vm_pageout_object_deactivate_pages(map, object, count, map_remove_only) */ pmap_clear_reference(VM_PAGE_TO_PHYS(p)); p->flags &= ~PG_REFERENCED; - if (p->act_count < ACT_MAX) - p->act_count += ACT_ADVANCE; TAILQ_REMOVE(&vm_page_queue_active, p, pageq); TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq); @@ -544,9 +523,12 @@ vm_pageout_scan() vm_object_t object; int force_wakeup = 0; int vnodes_skipped = 0; + int usagefloor; + int i; pages_freed = 0; + /* * Start scanning the inactive queue for pages we can free. We keep * scanning until we have enough free pages or we have scanned through @@ -559,13 +541,14 @@ vm_pageout_scan() rescan1: maxscan = cnt.v_inactive_count; - m = vm_page_queue_inactive.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_inactive); while ((m != NULL) && (maxscan-- > 0) && - ((cnt.v_cache_count + cnt.v_free_count) < (cnt.v_cache_min + cnt.v_free_target))) { + ((cnt.v_cache_count + cnt.v_free_count) < + (cnt.v_cache_min + cnt.v_free_target))) { vm_page_t next; cnt.v_pdpages++; - next = m->pageq.tqe_next; + next = TAILQ_NEXT(m, pageq); #if defined(VM_DIAGNOSE) if (m->queue != PQ_INACTIVE) { @@ -575,7 +558,8 @@ rescan1: #endif /* - * dont mess with busy pages + * Dont mess with busy pages, keep in the front of the + * queue, most likely are being paged out. */ if (m->busy || (m->flags & PG_BUSY)) { m = next; @@ -600,8 +584,6 @@ rescan1: m->flags &= ~PG_REFERENCED; pmap_clear_reference(VM_PAGE_TO_PHYS(m)); vm_page_activate(m); - if (m->act_count < ACT_MAX) - m->act_count += ACT_ADVANCE; m = next; continue; } @@ -681,14 +663,11 @@ rescan1: page_shortage = 1; } } - maxscan = MAXSCAN; - pcount = cnt.v_active_count; - m = vm_page_queue_active.tqh_first; - while ((m != NULL) && (maxscan > 0) && - (pcount-- > 0) && (page_shortage > 0)) { - cnt.v_pdpages++; - next = m->pageq.tqe_next; + pcount = cnt.v_active_count; + m = TAILQ_FIRST(&vm_page_queue_active); + while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) { + next = TAILQ_NEXT(m, pageq); /* * Don't deactivate pages that are busy. @@ -701,54 +680,47 @@ rescan1: m = next; continue; } - if (m->object->ref_count && - ((m->flags & PG_REFERENCED) || - pmap_is_referenced(VM_PAGE_TO_PHYS(m))) ) { - pmap_clear_reference(VM_PAGE_TO_PHYS(m)); - m->flags &= ~PG_REFERENCED; - if (m->act_count < ACT_MAX) { - m->act_count += ACT_ADVANCE; + + /* + * The count for pagedaemon pages is done after checking the + * page for eligbility... + */ + cnt.v_pdpages++; + if ((m->flags & PG_REFERENCED) == 0) { + if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { + pmap_clear_reference(VM_PAGE_TO_PHYS(m)); + m->flags |= PG_REFERENCED; } + } else { + pmap_clear_reference(VM_PAGE_TO_PHYS(m)); + } + if ( (m->object->ref_count != 0) && + (m->flags & PG_REFERENCED) ) { + m->flags &= ~PG_REFERENCED; TAILQ_REMOVE(&vm_page_queue_active, m, pageq); TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); } else { m->flags &= ~PG_REFERENCED; - pmap_clear_reference(VM_PAGE_TO_PHYS(m)); - m->act_count -= min(m->act_count, ACT_DECLINE); - - /* - * if the page act_count is zero -- then we deactivate - */ - if (!m->act_count && (page_shortage > 0)) { - if (m->object->ref_count == 0) { - --page_shortage; - vm_page_test_dirty(m); - if (m->dirty == 0) { - m->act_count = 0; - vm_page_cache(m); - } else { - vm_page_deactivate(m); - } + if (page_shortage > 0) { + --page_shortage; + vm_page_test_dirty(m); + if (m->dirty == 0) { + vm_page_cache(m); } else { vm_page_protect(m, VM_PROT_NONE); vm_page_deactivate(m); - --page_shortage; } - } else if (m->act_count) { - TAILQ_REMOVE(&vm_page_queue_active, m, pageq); - TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); } } - maxscan--; m = next; } - + /* * We try to maintain some *really* free pages, this allows interrupt * code to be guaranteed space. */ while (cnt.v_free_count < cnt.v_free_reserved) { - m = vm_page_queue_cache.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_cache); if (!m) break; vm_page_free(m); @@ -770,23 +742,13 @@ rescan1: } } #ifndef NO_SWAPPING - /* - * now swap processes out if we are in low memory conditions - */ - if (!swap_pager_full && vm_swap_size && - vm_pageout_req_swapout == 0) { - vm_pageout_req_swapout = 1; + if (cnt.v_free_count + cnt.v_cache_count < cnt.v_free_target) { vm_req_vmdaemon(); + vm_pageout_req_swapout = 1; } #endif } -#ifndef NO_SWAPPING - if ((cnt.v_inactive_count + cnt.v_free_count + cnt.v_cache_count) < - (cnt.v_inactive_target + cnt.v_free_min)) { - vm_req_vmdaemon(); - } -#endif /* * make sure that we have swap space -- if we are low on memory and @@ -883,22 +845,23 @@ vm_pageout() * The pageout daemon is never done, so loop forever. */ while (TRUE) { - int s = splhigh(); - + int s = splvm(); if (!vm_pages_needed || ((cnt.v_free_count >= cnt.v_free_reserved) && (cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min))) { vm_pages_needed = 0; tsleep(&vm_pages_needed, PVM, "psleep", 0); + } else if (!vm_pages_needed) { + tsleep(&vm_pages_needed, PVM, "psleep", hz/3); } + if (vm_pages_needed) + cnt.v_pdwakeups++; vm_pages_needed = 0; splx(s); - cnt.v_pdwakeups++; vm_pager_sync(); vm_pageout_scan(); vm_pager_sync(); wakeup(&cnt.v_free_count); - wakeup(kmem_map); } } @@ -908,7 +871,7 @@ vm_req_vmdaemon() { static int lastrun = 0; - if ((ticks > (lastrun + hz / 10)) || (ticks < lastrun)) { + if ((ticks > (lastrun + hz)) || (ticks < lastrun)) { wakeup(&vm_daemon_needed); lastrun = ticks; } @@ -978,7 +941,7 @@ vm_daemon() * we remove cached objects that have no RSS... */ restart: - object = vm_object_cached_list.tqh_first; + object = TAILQ_FIRST(&vm_object_cached_list); while (object) { /* * if there are no resident pages -- get rid of the object @@ -988,7 +951,7 @@ restart: pager_cache(object, FALSE); goto restart; } - object = object->cached_list.tqe_next; + object = TAILQ_NEXT(object, cached_list); } } } diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c index 63ebdd9a57e9..c7c9964bd0e4 100644 --- a/sys/vm/vm_pager.c +++ b/sys/vm/vm_pager.c @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_pager.c,v 1.21 1995/12/14 09:55:11 phk Exp $ + * $Id: vm_pager.c,v 1.22 1996/05/03 21:01:53 phk Exp $ */ /* @@ -249,7 +249,7 @@ vm_pager_object_lookup(pg_list, handle) { register vm_object_t object; - for (object = pg_list->tqh_first; object != NULL; object = object->pager_object_list.tqe_next) + for (object = TAILQ_FIRST(pg_list); object != NULL; object = TAILQ_NEXT(object,pager_object_list)) if (object->handle == handle) return (object); return (NULL); @@ -288,7 +288,7 @@ getpbuf() s = splbio(); /* get a bp from the swap buffer header pool */ - while ((bp = bswlist.tqh_first) == NULL) { + while ((bp = TAILQ_FIRST(&bswlist)) == NULL) { bswneeded = 1; tsleep(&bswneeded, PVM, "wswbuf", 0); } @@ -313,7 +313,7 @@ trypbuf() struct buf *bp; s = splbio(); - if ((bp = bswlist.tqh_first) == NULL) { + if ((bp = TAILQ_FIRST(&bswlist)) == NULL) { splx(s); return NULL; }