From b18bfc3da782094cb09a9d0e9d387614aefa423e Mon Sep 17 00:00:00 2001 From: John Dyson Date: Sat, 18 May 1996 03:38:05 +0000 Subject: [PATCH] This set of commits to the VM system does the following, and contain contributions or ideas from Stephen McKay , Alan Cox , David Greenman and me: More usage of the TAILQ macros. Additional minor fix to queue.h. Performance enhancements to the pageout daemon. Addition of a wait in the case that the pageout daemon has to run immediately. Slightly modify the pageout algorithm. Significant revamp of the pmap/fork code: 1) PTE's and UPAGES's are NO LONGER in the process's map. 2) PTE's and UPAGES's reside in their own objects. 3) TOTAL elimination of recursive page table pagefaults. 4) The page directory now resides in the PTE object. 5) Implemented pmap_copy, thereby speeding up fork time. 6) Changed the pv entries so that the head is a pointer and not an entire entry. 7) Significant cleanup of pmap_protect, and pmap_remove. 8) Removed significant amounts of machine dependent fork code from vm_glue. Pushed much of that code into the machine dependent pmap module. 9) Support more completely the reuse of already zeroed pages (Page table pages and page directories) as being already zeroed. Performance and code cleanups in vm_map: 1) Improved and simplified allocation of map entries. 2) Improved vm_map_copy code. 3) Corrected some minor problems in the simplify code. Implemented splvm (combo of splbio and splimp.) The VM code now seldom uses splhigh. Improved the speed of and simplified kmem_malloc. Minor mod to vm_fault to avoid using pre-zeroed pages in the case of objects with backing objects along with the already existant condition of having a vnode. (If there is a backing object, there will likely be a COW... With a COW, it isn't necessary to start with a pre-zeroed page.) Minor reorg of source to perhaps improve locality of ref. --- sys/amd64/amd64/machdep.c | 4 +- sys/amd64/amd64/pmap.c | 1379 +++++++++++++++++++--------------- sys/amd64/amd64/trap.c | 16 +- sys/amd64/amd64/vm_machdep.c | 4 +- sys/amd64/include/pmap.h | 15 +- sys/i386/i386/machdep.c | 4 +- sys/i386/i386/pmap.c | 1379 +++++++++++++++++++--------------- sys/i386/i386/trap.c | 16 +- sys/i386/i386/vm_machdep.c | 4 +- sys/i386/include/pmap.h | 15 +- sys/i386/include/spl.h | 3 +- sys/kern/kern_exec.c | 10 +- sys/kern/subr_trap.c | 16 +- sys/kern/vfs_bio.c | 21 +- sys/sys/queue.h | 6 +- sys/vm/device_pager.c | 8 +- sys/vm/swap_pager.c | 44 +- sys/vm/vm_fault.c | 24 +- sys/vm/vm_glue.c | 104 +-- sys/vm/vm_kern.c | 29 +- sys/vm/vm_map.c | 243 +++--- sys/vm/vm_meter.c | 10 +- sys/vm/vm_mmap.c | 5 +- sys/vm/vm_object.c | 85 ++- sys/vm/vm_page.c | 373 +++++---- sys/vm/vm_pageout.c | 141 ++-- sys/vm/vm_pager.c | 8 +- 27 files changed, 2106 insertions(+), 1860 deletions(-) diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 7fa891f6493f..a0fcd5828df7 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $Id: machdep.c,v 1.189 1996/05/03 21:00:53 phk Exp $ + * $Id: machdep.c,v 1.190 1996/05/10 19:28:44 wollman Exp $ */ #include "npx.h" @@ -378,6 +378,8 @@ again: (nswbuf*MAXPHYS) + pager_map_size, TRUE); exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (16*ARG_MAX), TRUE); + exech_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, + (32*ARG_MAX), TRUE); u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (maxproc*UPAGES*PAGE_SIZE), FALSE); diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 04408acb8aa3..f25d932c0a48 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -39,7 +39,7 @@ * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 - * $Id: pmap.c,v 1.88 1996/05/02 22:24:58 phk Exp $ + * $Id: pmap.c,v 1.89 1996/05/03 21:00:57 phk Exp $ */ /* @@ -95,6 +95,7 @@ #include #include #include +#include #include #include @@ -107,16 +108,15 @@ #if defined(DIAGNOSTIC) #define PMAP_DIAGNOSTIC #endif +/* #define OLDREMOVE */ static void init_pv_entries __P((int)); /* * Get PDEs and PTEs for user/kernel address space */ -#define pmap_pde(m, v) (&((m)->pm_pdir[((vm_offset_t)(v) >> PDRSHIFT)&(NPDEPG-1)])) -#define pdir_pde(m, v) (m[((vm_offset_t)(v) >> PDRSHIFT)&(NPDEPG-1)]) - -#define pmap_pte_pa(pte) (*(int *)(pte) & PG_FRAME) +#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) +#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) #define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) #define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) @@ -145,26 +145,36 @@ static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ static vm_offset_t vm_first_phys; static int nkpt; +static vm_page_t nkpg; +vm_offset_t kernel_vm_end; extern vm_offset_t clean_sva, clean_eva; extern int cpu_class; +#define PV_FREELIST_MIN ((PAGE_SIZE / sizeof (struct pv_entry)) / 2) + +/* + * Data for the pv entry allocation mechanism + */ +static int pv_freelistcnt; +static pv_entry_t pv_freelist; +static vm_offset_t pvva; +static int npvvapg; + /* * All those kernel PT submaps that BSD is so fond of */ pt_entry_t *CMAP1; static pt_entry_t *CMAP2, *ptmmap; -static pv_entry_t pv_table; +static pv_entry_t *pv_table; caddr_t CADDR1, ptvmmap; static caddr_t CADDR2; static pt_entry_t *msgbufmap; struct msgbuf *msgbufp; static void free_pv_entry __P((pv_entry_t pv)); -pt_entry_t * - get_ptbase __P((pmap_t pmap)); -static pv_entry_t - get_pv_entry __P((void)); +static __inline unsigned * get_ptbase __P((pmap_t pmap)); +static pv_entry_t get_pv_entry __P((void)); static void i386_protection_init __P((void)); static void pmap_alloc_pv_entry __P((void)); static void pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem)); @@ -173,14 +183,25 @@ static void pmap_enter_quick __P((pmap_t pmap, vm_offset_t va, static int pmap_is_managed __P((vm_offset_t pa)); static void pmap_remove_all __P((vm_offset_t pa)); static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va)); -static __inline void pmap_remove_entry __P((struct pmap *pmap, pv_entry_t pv, +static __inline int pmap_remove_entry __P((struct pmap *pmap, pv_entry_t *pv, vm_offset_t va)); -static void pmap_remove_pte __P((struct pmap *pmap, pt_entry_t *ptq, +static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq, vm_offset_t sva)); +static vm_page_t + pmap_pte_vm_page __P((pmap_t pmap, vm_offset_t pt)); static boolean_t pmap_testbit __P((vm_offset_t pa, int bit)); -static void * pmap_getpdir __P((void)); +static __inline void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va, + vm_page_t mpte, vm_offset_t pa)); +static __inline vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va)); +static void pmap_remove_pte_mapping __P((vm_offset_t pa)); +static __inline int pmap_release_free_page __P((pmap_t pmap, vm_page_t p)); +static vm_page_t _pmap_allocpte __P((pmap_t pmap, vm_offset_t va, int ptepindex)); + +#define PDSTACKMAX 16 +static vm_offset_t pdstack[PDSTACKMAX]; +static int pdstackptr; #if defined(PMAP_DIAGNOSTIC) @@ -228,34 +249,38 @@ pmap_update_2pg( vm_offset_t va1, vm_offset_t va2) { } } +static __inline __pure unsigned * +get_ptbase(pmap) + pmap_t pmap; +{ + unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME; + + /* are we current address space or kernel? */ + if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) { + return (unsigned *) PTmap; + } + /* otherwise, we are alternate address space */ + if (frame != (((unsigned) APTDpde) & PG_FRAME)) { + APTDpde = (pd_entry_t) (frame | PG_RW | PG_V); + pmap_update(); + } + return (unsigned *) APTmap; +} + /* * Routine: pmap_pte * Function: * Extract the page table entry associated * with the given map/virtual_address pair. - * [ what about induced faults -wfj] */ -__inline pt_entry_t * __pure +__inline unsigned * __pure pmap_pte(pmap, va) register pmap_t pmap; vm_offset_t va; { - if (pmap && *pmap_pde(pmap, va)) { - vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; - - /* are we current address space or kernel? */ - if ((pmap == kernel_pmap) || (frame == ((int) PTDpde & PG_FRAME))) - return ((pt_entry_t *) vtopte(va)); - /* otherwise, we are alternate address space */ - else { - if (frame != ((int) APTDpde & PG_FRAME)) { - APTDpde = pmap->pm_pdir[PTDPTDI]; - pmap_update(); - } - return ((pt_entry_t *) avtopte(va)); - } + return get_ptbase(pmap) + i386_btop(va); } return (0); } @@ -266,39 +291,108 @@ pmap_pte(pmap, va) * Extract the physical page address associated * with the given map/virtual_address pair. */ - -vm_offset_t +vm_offset_t __pure pmap_extract(pmap, va) register pmap_t pmap; vm_offset_t va; { - vm_offset_t pa; - if (pmap && *pmap_pde(pmap, va)) { - vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; - - /* are we current address space or kernel? */ - if ((pmap == kernel_pmap) - || (frame == ((int) PTDpde & PG_FRAME))) { - pa = *(int *) vtopte(va); - /* otherwise, we are alternate address space */ - } else { - if (frame != ((int) APTDpde & PG_FRAME)) { - APTDpde = pmap->pm_pdir[PTDPTDI]; - pmap_update(); - } - pa = *(int *) avtopte(va); - } - return ((pa & PG_FRAME) | (va & ~PG_FRAME)); + unsigned *pte; + pte = get_ptbase(pmap) + i386_btop(va); + return ((*pte & PG_FRAME) | (va & PAGE_MASK)); } return 0; } +/* + * Add a list of wired pages to the kva + * this routine is only used for temporary + * kernel mappings that do not need to have + * page modification or references recorded. + * Note that old mappings are simply written + * over. The page *must* be wired. + */ +void +pmap_qenter(va, m, count) + vm_offset_t va; + vm_page_t *m; + int count; +{ + int i; + int anyvalid = 0; + register unsigned *pte; + + for (i = 0; i < count; i++) { + vm_offset_t tva = va + i * PAGE_SIZE; + unsigned npte = VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V; + unsigned opte; + pte = (unsigned *)vtopte(tva); + opte = *pte; + *pte = npte; + if (opte) + pmap_update_1pg(tva); + } +} +/* + * this routine jerks page mappings from the + * kernel -- it is meant only for temporary mappings. + */ +void +pmap_qremove(va, count) + vm_offset_t va; + int count; +{ + int i; + register unsigned *pte; + + for (i = 0; i < count; i++) { + pte = (unsigned *)vtopte(va); + *pte = 0; + pmap_update_1pg(va); + va += PAGE_SIZE; + } +} + +/* + * add a wired page to the kva + * note that in order for the mapping to take effect -- you + * should do a pmap_update after doing the pmap_kenter... + */ +__inline void +pmap_kenter(va, pa) + vm_offset_t va; + register vm_offset_t pa; +{ + register unsigned *pte; + unsigned npte, opte; + + npte = pa | PG_RW | PG_V; + pte = (unsigned *)vtopte(va); + opte = *pte; + *pte = npte; + if (opte) + pmap_update_1pg(va); +} + +/* + * remove a page from the kernel pagetables + */ +__inline void +pmap_kremove(va) + vm_offset_t va; +{ + register unsigned *pte; + + pte = (unsigned *)vtopte(va); + *pte = 0; + pmap_update_1pg(va); +} + /* * determine if a page is managed (memory vs. device) */ -static __inline int +static __inline __pure int pmap_is_managed(pa) vm_offset_t pa; { @@ -314,43 +408,21 @@ pmap_is_managed(pa) return 0; } -vm_page_t -pmap_use_pt(pmap, va) - pmap_t pmap; - vm_offset_t va; -{ - vm_offset_t ptepa; - vm_page_t mpte; - - if (va >= UPT_MIN_ADDRESS) - return NULL; - - ptepa = ((vm_offset_t) *pmap_pde(pmap, va)) & PG_FRAME; -#if defined(PMAP_DIAGNOSTIC) - if (!ptepa) - panic("pmap_use_pt: pagetable page missing, va: 0x%x", va); -#endif - - mpte = PHYS_TO_VM_PAGE(ptepa); - ++mpte->hold_count; - return mpte; -} - #if !defined(PMAP_DIAGNOSTIC) __inline #endif -void +int pmap_unuse_pt(pmap, va, mpte) pmap_t pmap; vm_offset_t va; vm_page_t mpte; { if (va >= UPT_MIN_ADDRESS) - return; + return 0; if (mpte == NULL) { vm_offset_t ptepa; - ptepa = ((vm_offset_t) *pmap_pde(pmap, va)) & PG_FRAME; + ptepa = ((vm_offset_t) *pmap_pde(pmap, va)) /* & PG_FRAME */; #if defined(PMAP_DIAGNOSTIC) if (!ptepa) panic("pmap_unuse_pt: pagetable page missing, va: 0x%x", va); @@ -367,23 +439,19 @@ pmap_unuse_pt(pmap, va, mpte) vm_page_unhold(mpte); if ((mpte->hold_count == 0) && - (mpte->wire_count == 0) && - (pmap != kernel_pmap) && - (va < KPT_MIN_ADDRESS)) { + (mpte->wire_count == 0)) { /* * We don't free page-table-pages anymore because it can have a negative * impact on perf at times. Now we just deactivate, and it'll get cleaned - * up if needed... Also, if the page ends up getting used, it will fault - * back into the process address space and be reactivated. + * up if needed... Also, if the page ends up getting used, it will be + * brought back into the process address space by pmap_allocpte and be + * reactivated. */ -#if defined(PMAP_FREE_OLD_PTES) - pmap_page_protect(VM_PAGE_TO_PHYS(mpte), VM_PROT_NONE); - vm_page_free(mpte); -#else mpte->dirty = 0; vm_page_deactivate(mpte); -#endif + return 1; } + return 0; } /* @@ -442,7 +510,7 @@ pmap_bootstrap(firstaddr, loadaddr) v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); va = virtual_avail; - pte = pmap_pte(kernel_pmap, va); + pte = (pt_entry_t *) pmap_pte(kernel_pmap, va); /* * CMAP1/CMAP2 are used for zeroing and copying pages. @@ -464,6 +532,7 @@ pmap_bootstrap(firstaddr, loadaddr) *(int *) CMAP1 = *(int *) CMAP2 = *(int *) PTD = 0; pmap_update(); + } /* @@ -492,10 +561,10 @@ pmap_init(phys_start, phys_end) * Allocate memory for random pmap data structures. Includes the * pv_head_table. */ - s = (vm_size_t) (sizeof(struct pv_entry) * npg); + s = (vm_size_t) (sizeof(struct pv_entry *) * npg); s = round_page(s); addr = (vm_offset_t) kmem_alloc(kernel_map, s); - pv_table = (pv_entry_t) addr; + pv_table = (pv_entry_t *) addr; /* * init the pv free list @@ -529,59 +598,6 @@ pmap_map(virt, start, end, prot) return (virt); } -#if defined(PMAP_KEEP_PDIRS) -int nfreepdir; -caddr_t *pdirlist; -#define NFREEPDIR 3 - -static void * -pmap_getpdir() { - caddr_t *pdir; - if (pdirlist) { - --nfreepdir; - pdir = pdirlist; - pdirlist = (caddr_t *) *pdir; - *pdir = 0; -#if 0 /* Not needed anymore */ - bzero( (caddr_t) pdir, PAGE_SIZE); -#endif - } else { - pdir = (caddr_t *) kmem_alloc(kernel_map, PAGE_SIZE); - } - - return (void *) pdir; -} - -static void -pmap_freepdir(void *pdir) { - if (nfreepdir > NFREEPDIR) { - kmem_free(kernel_map, (vm_offset_t) pdir, PAGE_SIZE); - } else { - int i; - pt_entry_t *s; - s = (pt_entry_t *) pdir; - - /* - * remove wired in kernel mappings - */ - bzero(s + KPTDI, nkpt * PTESIZE); - s[APTDPTDI] = 0; - s[PTDPTDI] = 0; - -#if defined(PMAP_DIAGNOSTIC) - for(i=0;ipm_pdir = pmap_getpdir(); -#else - pmap->pm_pdir = (pd_entry_t *) kmem_alloc(kernel_map, PAGE_SIZE); -#endif + if (pdstackptr > 0) { + --pdstackptr; + pmap->pm_pdir = + (pd_entry_t *)pdstack[pdstackptr]; + } else { + pmap->pm_pdir = + (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE); + } + + /* + * allocate object for the ptes + */ + pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, + OFF_TO_IDX((KPT_MIN_ADDRESS + 1) - UPT_MIN_ADDRESS)); + + /* + * allocate the page directory page + */ +retry: + ptdpg = vm_page_alloc( pmap->pm_pteobj, OFF_TO_IDX(KPT_MIN_ADDRESS), + VM_ALLOC_ZERO); + if (ptdpg == NULL) { + VM_WAIT; + goto retry; + } + vm_page_wire(ptdpg); + ptdpg->flags &= ~(PG_MAPPED|PG_BUSY); /* not mapped normally */ + ptdpg->valid = VM_PAGE_BITS_ALL; + + pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg)); + if ((ptdpg->flags & PG_ZERO) == 0) + bzero(pmap->pm_pdir, PAGE_SIZE); /* wire in kernel global address entries */ bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE); /* install self-referential address mapping entry */ - *(int *) (pmap->pm_pdir + PTDPTDI) = - ((int) pmap_kextract((vm_offset_t) pmap->pm_pdir)) | PG_V | PG_RW; + *(unsigned *) (pmap->pm_pdir + PTDPTDI) = + VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_U; pmap->pm_count = 1; } +static __inline int +pmap_release_free_page(pmap, p) + struct pmap *pmap; + vm_page_t p; +{ + int s; + /* + * This code optimizes the case of freeing non-busy + * page-table pages. Those pages are zero now, and + * might as well be placed directly into the zero queue. + */ + s = splvm(); + if (p->flags & PG_BUSY) { + p->flags |= PG_WANTED; + tsleep(p, PVM, "pmaprl", 0); + splx(s); + return 0; + } + + if (p->flags & PG_MAPPED) { + pmap_remove_pte_mapping(VM_PAGE_TO_PHYS(p)); + p->flags &= ~PG_MAPPED; + } + +#if defined(PMAP_DIAGNOSTIC) + if (p->hold_count) + panic("pmap_release: freeing held page table page"); +#endif + /* + * Page directory pages need to have the kernel + * stuff cleared, so they can go into the zero queue also. + */ + if (p->pindex == OFF_TO_IDX(KPT_MIN_ADDRESS)) { + unsigned *pde = (unsigned *) pmap->pm_pdir; + bzero(pde + KPTDI, nkpt * PTESIZE); + pde[APTDPTDI] = 0; + pde[PTDPTDI] = 0; + pmap_kremove((vm_offset_t) pmap->pm_pdir); + } + + vm_page_free(p); + TAILQ_REMOVE(&vm_page_queue_free, p, pageq); + TAILQ_INSERT_HEAD(&vm_page_queue_zero, p, pageq); + p->queue = PQ_ZERO; + splx(s); + ++vm_page_zero_count; + return 1; +} + +/* + * Release any resources held by the given physical map. + * Called when a pmap initialized by pmap_pinit is being released. + * Should only be called if the map contains no valid mappings. + */ +void +pmap_release(pmap) + register struct pmap *pmap; +{ + vm_page_t p,n,ptdpg; + vm_object_t object = pmap->pm_pteobj; + + ptdpg = NULL; +retry: + for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) { + n = TAILQ_NEXT(p, listq); + if (p->pindex == OFF_TO_IDX(KPT_MIN_ADDRESS)) { + ptdpg = p; + continue; + } + if (!pmap_release_free_page(pmap, p)) + goto retry; + } + pmap_release_free_page(pmap, ptdpg); + + vm_object_deallocate(object); + if (pdstackptr < PDSTACKMAX) { + pdstack[pdstackptr] = (vm_offset_t) pmap->pm_pdir; + ++pdstackptr; + } else { + kmem_free(kernel_map, (vm_offset_t) pmap->pm_pdir, PAGE_SIZE); + } +} + /* * grow the number of kernel page table entries, if needed */ -static vm_page_t nkpg; -vm_offset_t kernel_vm_end; - void pmap_growkernel(vm_offset_t addr) { @@ -630,14 +754,14 @@ pmap_growkernel(vm_offset_t addr) kernel_vm_end = KERNBASE; nkpt = 0; while (pdir_pde(PTD, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + NBPDR) & ~(NBPDR-1); + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); ++nkpt; } } - addr = (addr + NBPDR) & ~(NBPDR - 1); + addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); while (kernel_vm_end < addr) { if (pdir_pde(PTD, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + NBPDR) & ~(NBPDR-1); + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); continue; } ++nkpt; @@ -659,7 +783,7 @@ pmap_growkernel(vm_offset_t addr) } } *pmap_pde(kernel_pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end); - kernel_vm_end = (kernel_vm_end + NBPDR) & ~(NBPDR-1); + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); } splx(s); } @@ -685,22 +809,6 @@ pmap_destroy(pmap) } } -/* - * Release any resources held by the given physical map. - * Called when a pmap initialized by pmap_pinit is being released. - * Should only be called if the map contains no valid mappings. - */ -void -pmap_release(pmap) - register struct pmap *pmap; -{ -#if defined(PMAP_KEEP_PDIRS) - pmap_freepdir( (void *)pmap->pm_pdir); -#else - kmem_free(kernel_map, (vm_offset_t) pmap->pm_pdir, PAGE_SIZE); -#endif -} - /* * Add a reference to the specified pmap. */ @@ -713,16 +821,6 @@ pmap_reference(pmap) } } -#define PV_FREELIST_MIN ((PAGE_SIZE / sizeof (struct pv_entry)) / 2) - -/* - * Data for the pv entry allocation mechanism - */ -static int pv_freelistcnt; -static pv_entry_t pv_freelist; -static vm_offset_t pvva; -static int npvvapg; - /* * free the pv_entry back to the free list */ @@ -730,8 +828,6 @@ static __inline void free_pv_entry(pv) pv_entry_t pv; { - if (!pv) - return; ++pv_freelistcnt; pv->pv_next = pv_freelist; pv_freelist = pv; @@ -777,10 +873,6 @@ pmap_alloc_pv_entry() if (npvvapg) { vm_page_t m; - /* - * we do this to keep recursion away - */ - pv_freelistcnt += PV_FREELIST_MIN; /* * allocate a physical page out of the vm system */ @@ -818,14 +910,11 @@ pmap_alloc_pv_entry() entry++; } } - pv_freelistcnt -= PV_FREELIST_MIN; } if (!pv_freelist) panic("get_pv_entry: cannot get a pv_entry_t"); } - - /* * init the pv_entry allocation system */ @@ -839,30 +928,13 @@ init_pv_entries(npg) * kvm space is fairly cheap, be generous!!! (the system can panic if * this is too small.) */ - npvvapg = btoc((npg * PVSPERPAGE) * sizeof(struct pv_entry)); + npvvapg = ((npg * PVSPERPAGE) * sizeof(struct pv_entry) + + PAGE_SIZE - 1) / PAGE_SIZE; pvva = kmem_alloc_pageable(kernel_map, npvvapg * PAGE_SIZE); /* * get the first batch of entries */ - free_pv_entry(get_pv_entry()); -} - -__inline pt_entry_t * -get_ptbase(pmap) - pmap_t pmap; -{ - vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; - - /* are we current address space or kernel? */ - if (pmap == kernel_pmap || frame == ((int) PTDpde & PG_FRAME)) { - return PTmap; - } - /* otherwise, we are alternate address space */ - if (frame != ((int) APTDpde & PG_FRAME)) { - APTDpde = pmap->pm_pdir[PTDPTDI]; - pmap_update(); - } - return APTmap; + pmap_alloc_pv_entry(); } /* @@ -871,77 +943,70 @@ get_ptbase(pmap) * to the header. Otherwise we must search the list for * the entry. In either case we free the now unused entry. */ -static __inline void -pmap_remove_entry(pmap, pv, va) +static __inline int +pmap_remove_entry(pmap, ppv, va) struct pmap *pmap; - pv_entry_t pv; + pv_entry_t *ppv; vm_offset_t va; { pv_entry_t npv; int s; - s = splhigh(); - if (pmap == pv->pv_pmap && va == pv->pv_va) { - pmap_unuse_pt(pmap, va, pv->pv_ptem); - npv = pv->pv_next; - if (npv) { - *pv = *npv; + + s = splvm(); + for (npv = *ppv; npv; (ppv = &npv->pv_next, npv = *ppv)) { + if (pmap == npv->pv_pmap && va == npv->pv_va) { + int rtval = pmap_unuse_pt(pmap, va, npv->pv_ptem); + *ppv = npv->pv_next; free_pv_entry(npv); - } else { - pv->pv_pmap = NULL; - } - } else { - for (npv = pv->pv_next; npv; (pv = npv, npv = pv->pv_next)) { - if (pmap == npv->pv_pmap && va == npv->pv_va) { - pmap_unuse_pt(pmap, va, npv->pv_ptem); - pv->pv_next = npv->pv_next; - free_pv_entry(npv); - break; - } + splx(s); + return rtval; } } splx(s); + return 0; } /* * pmap_remove_pte: do the things to unmap a page in a process */ -static void -pmap_remove_pte(pmap, ptq, sva) +static +#if !defined(PMAP_DIAGNOSTIC) +__inline +#endif +int +pmap_remove_pte(pmap, ptq, va) struct pmap *pmap; - pt_entry_t *ptq; - vm_offset_t sva; + unsigned *ptq; + vm_offset_t va; { - pt_entry_t oldpte; - vm_offset_t pa; - pv_entry_t pv; + unsigned oldpte; + pv_entry_t *ppv; + int i; + int s; oldpte = *ptq; - if (((int)oldpte) & PG_W) - pmap->pm_stats.wired_count--; - pmap->pm_stats.resident_count--; - - pa = ((vm_offset_t)oldpte) & PG_FRAME; - if (pmap_is_managed(pa)) { - if ((int) oldpte & PG_M) { + *ptq = 0; + if (oldpte & PG_W) + pmap->pm_stats.wired_count -= 1; + pmap->pm_stats.resident_count -= 1; + if (oldpte & PG_MANAGED) { + if (oldpte & PG_M) { #if defined(PMAP_DIAGNOSTIC) if (pmap_nw_modified(oldpte)) { - printf("pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n", sva, (int) oldpte); + printf("pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, (int) oldpte); } #endif - - if (sva < USRSTACK + (UPAGES * PAGE_SIZE) || - (sva >= KERNBASE && (sva < clean_sva || sva >= clean_eva))) { - PHYS_TO_VM_PAGE(pa)->dirty = VM_PAGE_BITS_ALL; + if (va < clean_sva || va >= clean_eva) { + PHYS_TO_VM_PAGE(oldpte)->dirty = VM_PAGE_BITS_ALL; } } - pv = pa_to_pvh(pa); - pmap_remove_entry(pmap, pv, sva); + ppv = pa_to_pvh(oldpte); + return pmap_remove_entry(pmap, ppv, va); } else { - pmap_unuse_pt(pmap, sva, NULL); + return pmap_unuse_pt(pmap, va, NULL); } - *ptq = 0; - return; + return 0; } /* @@ -952,24 +1017,26 @@ pmap_remove_page(pmap, va) struct pmap *pmap; register vm_offset_t va; { - register pt_entry_t *ptbase, *ptq; + register unsigned *ptq; + /* * if there is no pte for this address, just skip it!!! */ - if (*pmap_pde(pmap, va) == 0) + if (*pmap_pde(pmap, va) == 0) { return; + } + /* * get a local va for mappings for this pmap. */ - ptbase = get_ptbase(pmap); - ptq = ptbase + i386_btop(va); + ptq = get_ptbase(pmap) + i386_btop(va); if (*ptq) { - pmap_remove_pte(pmap, ptq, va); + (void) pmap_remove_pte(pmap, ptq, va); pmap_update_1pg(va); } return; } - + /* * Remove the given range of addresses from the specified map. * @@ -982,11 +1049,18 @@ pmap_remove(pmap, sva, eva) register vm_offset_t sva; register vm_offset_t eva; { - register pt_entry_t *ptbase; + register unsigned *ptbase; + vm_offset_t va; vm_offset_t pdnxt; vm_offset_t ptpaddr; vm_offset_t sindex, eindex; vm_page_t mpte; + int s; +#if defined(OLDREMOVE) || defined(I386_CPU) + int anyvalid; +#else + int mustremove; +#endif if (pmap == NULL) return; @@ -1001,6 +1075,16 @@ pmap_remove(pmap, sva, eva) return; } +#if !defined(OLDREMOVE) && !defined(I386_CPU) + if ((pmap == kernel_pmap) || + (pmap->pm_pdir[PTDPTDI] == PTDpde)) + mustremove = 1; + else + mustremove = 0; +#else + anyvalid = 0; +#endif + /* * Get a local virtual address for the mappings that are being * worked with. @@ -1010,6 +1094,7 @@ pmap_remove(pmap, sva, eva) sindex = i386_btop(sva); eindex = i386_btop(eva); + for (; sindex < eindex; sindex = pdnxt) { /* @@ -1046,14 +1131,70 @@ pmap_remove(pmap, sva, eva) } for ( ;sindex != pdnxt; sindex++) { - if (ptbase[sindex] == 0) + vm_offset_t va; + if (ptbase[sindex] == 0) { continue; - pmap_remove_pte(pmap, ptbase + sindex, i386_ptob(sindex)); - if (mpte->hold_count == 0 && mpte->wire_count == 0) + } + va = i386_ptob(sindex); +#if defined(OLDREMOVE) || defined(I386_CPU) + anyvalid = 1; +#else + if (mustremove) + pmap_update_1pg(va); +#endif + if (pmap_remove_pte(pmap, + ptbase + sindex, va)) break; } } - pmap_update(); + +#if defined(OLDREMOVE) || defined(I386_CPU) + if (anyvalid) { + /* are we current address space or kernel? */ + if (pmap == kernel_pmap) { + pmap_update(); + } else if (pmap->pm_pdir[PTDPTDI] == PTDpde) { + pmap_update(); + } + } +#endif +} + + +void +pmap_remove_pte_mapping(pa) + vm_offset_t pa; +{ + register pv_entry_t pv, *ppv, npv; + register unsigned *pte, *ptbase; + vm_offset_t va; + int s; + int anyvalid = 0; + + ppv = pa_to_pvh(pa); + + for (pv = *ppv; pv; pv=pv->pv_next) { + unsigned tpte; + struct pmap *pmap; + + anyvalid = 1; + pmap = pv->pv_pmap; + pte = get_ptbase(pmap) + i386_btop(pv->pv_va); + if (tpte = *pte) { + pmap->pm_stats.resident_count--; + *pte = 0; + if (tpte & PG_W) + pmap->pm_stats.wired_count--; + } + } + + if (anyvalid) { + for (pv = *ppv; pv; pv = npv) { + npv = pv->pv_next; + free_pv_entry(pv); + } + *ppv = NULL; + } } /* @@ -1068,17 +1209,15 @@ pmap_remove(pmap, sva, eva) * inefficient because they iteratively called * pmap_remove (slow...) */ -static void +static __inline void pmap_remove_all(pa) vm_offset_t pa; { - register pv_entry_t pv, opv, npv; - register pt_entry_t *pte, *ptbase; + register pv_entry_t pv, *ppv, npv; + register unsigned *pte, *ptbase; vm_offset_t va; - struct pmap *pmap; vm_page_t m; int s; - int anyvalid = 0; #if defined(PMAP_DIAGNOSTIC) /* @@ -1090,62 +1229,49 @@ pmap_remove_all(pa) } #endif - pa = pa & PG_FRAME; - opv = pa_to_pvh(pa); - if (opv->pv_pmap == NULL) - return; - m = PHYS_TO_VM_PAGE(pa); - s = splhigh(); - pv = opv; - while (pv && ((pmap = pv->pv_pmap) != NULL)) { + ppv = pa_to_pvh(pa); + + s = splvm(); + for (pv = *ppv; pv; pv=pv->pv_next) { int tpte; + struct pmap *pmap; + + pmap = pv->pv_pmap; ptbase = get_ptbase(pmap); va = pv->pv_va; pte = ptbase + i386_btop(va); if (tpte = ((int) *pte)) { + pmap->pm_stats.resident_count--; *pte = 0; if (tpte & PG_W) pmap->pm_stats.wired_count--; - pmap->pm_stats.resident_count--; - anyvalid = 1; - /* * Update the vm_page_t clean and reference bits. */ - if ((tpte & PG_M) != 0) { + if (tpte & PG_M) { #if defined(PMAP_DIAGNOSTIC) if (pmap_nw_modified((pt_entry_t) tpte)) { printf("pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, tpte); } #endif - if (va < USRSTACK + (UPAGES * PAGE_SIZE) || - (va >= KERNBASE && (va < clean_sva || va >= clean_eva))) { + if (va < clean_sva || va >= clean_eva) { m->dirty = VM_PAGE_BITS_ALL; } } } - pv = pv->pv_next; } - if (opv->pv_pmap != NULL) { - pmap_unuse_pt(opv->pv_pmap, opv->pv_va, opv->pv_ptem); - for (pv = opv->pv_next; pv; pv = npv) { - npv = pv->pv_next; - pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); - free_pv_entry(pv); - } + for (pv = *ppv; pv; pv = npv) { + npv = pv->pv_next; + pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); + free_pv_entry(pv); } - - opv->pv_pmap = NULL; - opv->pv_next = NULL; + *ppv = NULL; splx(s); - if (anyvalid) - pmap_update(); } - /* * Set the physical protection on the * specified range of this map as requested. @@ -1156,13 +1282,14 @@ pmap_protect(pmap, sva, eva, prot) vm_offset_t sva, eva; vm_prot_t prot; { - register pt_entry_t *pte; - register pt_entry_t *ptbase; + register unsigned *pte; + register vm_offset_t va; + register unsigned *ptbase; vm_offset_t pdnxt; vm_offset_t ptpaddr; vm_offset_t sindex, eindex; vm_page_t mpte; - int anychanged; + int anyvalid; if (pmap == NULL) @@ -1175,7 +1302,7 @@ pmap_protect(pmap, sva, eva, prot) if (prot & VM_PROT_WRITE) return; - anychanged = 0; + anyvalid = 0; ptbase = get_ptbase(pmap); @@ -1205,26 +1332,177 @@ pmap_protect(pmap, sva, eva, prot) } for (; sindex != pdnxt; sindex++) { - if (ptbase[sindex] == 0) - continue; - pte = ptbase + sindex; - pbits = *(int *)pte; + + unsigned pbits = ptbase[sindex]; + if (pbits & PG_RW) { if (pbits & PG_M) { - vm_page_t m; - vm_offset_t pa = pbits & PG_FRAME; - m = PHYS_TO_VM_PAGE(pa); + vm_page_t m = PHYS_TO_VM_PAGE(pbits); m->dirty = VM_PAGE_BITS_ALL; } - *(int *)pte &= ~(PG_M|PG_RW); - anychanged=1; + ptbase[sindex] = pbits & ~(PG_M|PG_RW); + anyvalid = 1; } } } - if (anychanged) + if (anyvalid) pmap_update(); } +/* + * Create a pv entry for page at pa for + * (pmap, va). + */ +static __inline void +pmap_insert_entry(pmap, va, mpte, pa) + pmap_t pmap; + vm_offset_t va; + vm_page_t mpte; + vm_offset_t pa; +{ + + int s; + pv_entry_t *ppv, pv; + + s = splvm(); + pv = get_pv_entry(); + pv->pv_va = va; + pv->pv_pmap = pmap; + pv->pv_ptem = mpte; + + ppv = pa_to_pvh(pa); + if (*ppv) + pv->pv_next = *ppv; + else + pv->pv_next = NULL; + *ppv = pv; + splx(s); +} + +/* + * this routine is called if the page table page is not + * mapped correctly. + */ +static vm_page_t +_pmap_allocpte(pmap, va, ptepindex) + pmap_t pmap; + vm_offset_t va; + int ptepindex; +{ + vm_offset_t pteva, ptepa; + vm_page_t m; + + /* + * Find or fabricate a new pagetable page + */ +retry: + m = vm_page_lookup(pmap->pm_pteobj, ptepindex); + if (m == NULL) { + m = vm_page_alloc(pmap->pm_pteobj, ptepindex, VM_ALLOC_ZERO); + if (m == NULL) { + VM_WAIT; + goto retry; + } + if ((m->flags & PG_ZERO) == 0) + pmap_zero_page(VM_PAGE_TO_PHYS(m)); + m->flags &= ~(PG_ZERO|PG_BUSY); + m->valid = VM_PAGE_BITS_ALL; + } + + /* + * mark the object writeable + */ + pmap->pm_pteobj->flags |= OBJ_WRITEABLE; + + /* + * Increment the hold count for the page table page + * (denoting a new mapping.) + */ + ++m->hold_count; + + /* + * Activate the pagetable page, if it isn't already + */ + if (m->queue != PQ_ACTIVE) + vm_page_activate(m); + + /* + * Map the pagetable page into the process address space, if + * it isn't already there. + */ + pteva = ((vm_offset_t) vtopte(va)) & PG_FRAME; + ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; + if (ptepa == 0) { + int s; + pv_entry_t pv, *ppv; + + pmap->pm_stats.resident_count++; + + s = splvm(); + pv = get_pv_entry(); + + pv->pv_va = pteva; + pv->pv_pmap = pmap; + pv->pv_next = NULL; + pv->pv_ptem = NULL; + + ptepa = VM_PAGE_TO_PHYS(m); + pmap->pm_pdir[ptepindex] = + (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_MANAGED); + ppv = pa_to_pvh(ptepa); +#if defined(PMAP_DIAGNOSTIC) + if (*ppv) + panic("pmap_allocpte: page is already mapped"); +#endif + *ppv = pv; + splx(s); + m->flags |= PG_MAPPED; + } else { +#if defined(PMAP_DIAGNOSTIC) + if (VM_PAGE_TO_PHYS(m) != (ptepa & PG_FRAME)) + panic("pmap_allocpte: mismatch"); +#endif + pmap->pm_pdir[ptepindex] = + (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_MANAGED); + pmap_update_1pg(pteva); + m->flags |= PG_MAPPED; + } + return m; +} + +static __inline vm_page_t +pmap_allocpte(pmap, va) + pmap_t pmap; + vm_offset_t va; +{ + int ptepindex; + vm_offset_t ptepa; + vm_page_t m; + + /* + * Calculate pagetable page index + */ + ptepindex = va >> PDRSHIFT; + + /* + * Get the page directory entry + */ + ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; + + /* + * If the page table page is mapped, we just increment the + * hold count, and activate it. + */ + if ((ptepa & (PG_RW|PG_U|PG_V)) == (PG_RW|PG_U|PG_V)) { + m = PHYS_TO_VM_PAGE(ptepa); + ++m->hold_count; + if (m->queue != PQ_ACTIVE) + vm_page_activate(m); + return m; + } + return _pmap_allocpte(pmap, va, ptepindex); +} + /* * Insert the given physical page (p) at * the specified virtual address (v) in the @@ -1245,40 +1523,32 @@ pmap_enter(pmap, va, pa, prot, wired) vm_prot_t prot; boolean_t wired; { - register pt_entry_t *pte; + register unsigned *pte; vm_offset_t opa; - register pv_entry_t pv, npv; vm_offset_t origpte, newpte; + vm_offset_t ptepa; + vm_page_t mpte; + int s; if (pmap == NULL) return; - pv = NULL; - - va = va & PG_FRAME; + va &= PG_FRAME; if (va > VM_MAX_KERNEL_ADDRESS) panic("pmap_enter: toobig"); + mpte = NULL; /* * In the case that a page table page is not * resident, we are creating it here. */ - if ((va < VM_MIN_KERNEL_ADDRESS) && - (curproc != NULL) && - (pmap->pm_map->pmap == pmap)) { - vm_offset_t v; - - v = (vm_offset_t) vtopte(va); - /* Fault the pte only if needed: */ - if (*((int *)vtopte(v)) == 0) - (void) vm_fault(pmap->pm_map, - trunc_page(v), VM_PROT_WRITE, FALSE); - } + if (va < UPT_MIN_ADDRESS) + mpte = pmap_allocpte(pmap, va); + pte = pmap_pte(pmap, va); /* * Page Directory table entry not valid, we need a new PT page */ - pte = pmap_pte(pmap, va); if (pte == NULL) { printf("kernel page directory invalid pdir=%p, va=0x%lx\n", pmap->pm_pdir[PTDPTDI], va); @@ -1286,10 +1556,9 @@ pmap_enter(pmap, va, pa, prot, wired) } origpte = *(vm_offset_t *)pte; + pa &= PG_FRAME; opa = origpte & PG_FRAME; - pa = pa & PG_FRAME; - /* * Mapping has not changed, must be protection or wiring change. */ @@ -1315,55 +1584,35 @@ pmap_enter(pmap, va, pa, prot, wired) * We might be turning off write access to the page, * so we go ahead and sense modify status. */ - if (origpte & PG_M) { + if (origpte & PG_MANAGED) { vm_page_t m; - m = PHYS_TO_VM_PAGE(pa); - m->dirty = VM_PAGE_BITS_ALL; + if (origpte & PG_M) { + m = PHYS_TO_VM_PAGE(pa); + m->dirty = VM_PAGE_BITS_ALL; + } + pa |= PG_MANAGED; } + + if (mpte) + --mpte->hold_count; + goto validate; } /* * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. */ - if (opa) { - pmap_remove_page(pmap, va); - opa = 0; - origpte = 0; - } + if (opa) + (void) pmap_remove_pte(pmap, pte, va); + /* * Enter on the PV list if part of our managed memory Note that we * raise IPL while manipulating pv_table since pmap_enter can be * called at interrupt time. */ if (pmap_is_managed(pa)) { - int s; - - pv = pa_to_pvh(pa); - s = splhigh(); - /* - * No entries yet, use header as the first entry - */ - if (pv->pv_pmap == NULL) { - pv->pv_va = va; - pv->pv_pmap = pmap; - pv->pv_next = NULL; - pv->pv_ptem = NULL; - } - /* - * There is at least one other VA mapping this page. Place - * this entry after the header. - */ - else { - npv = get_pv_entry(); - npv->pv_va = va; - npv->pv_pmap = pmap; - npv->pv_next = pv->pv_next; - pv->pv_next = npv; - pv = npv; - pv->pv_ptem = NULL; - } - splx(s); + pmap_insert_entry(pmap, va, mpte, pa); + pa |= PG_MANAGED; } /* @@ -1383,106 +1632,16 @@ validate: newpte |= PG_W; if (va < UPT_MIN_ADDRESS) newpte |= PG_U; - else if (va < UPT_MAX_ADDRESS) - newpte |= PG_U | PG_RW; /* * if the mapping or permission bits are different, we need * to update the pte. */ if ((origpte & ~(PG_M|PG_A)) != newpte) { - *pte = (pt_entry_t) newpte; + *pte = newpte; if (origpte) pmap_update_1pg(va); } - - if (origpte == 0) { - vm_page_t mpte; - mpte = pmap_use_pt(pmap, va); - if (pv) - pv->pv_ptem = mpte; - } -} - -/* - * Add a list of wired pages to the kva - * this routine is only used for temporary - * kernel mappings that do not need to have - * page modification or references recorded. - * Note that old mappings are simply written - * over. The page *must* be wired. - */ -void -pmap_qenter(va, m, count) - vm_offset_t va; - vm_page_t *m; - int count; -{ - int i; - register pt_entry_t *pte; - - for (i = 0; i < count; i++) { - vm_offset_t tva = va + i * PAGE_SIZE; - pt_entry_t npte = (pt_entry_t) ((int) (VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V)); - pt_entry_t opte; - pte = vtopte(tva); - opte = *pte; - *pte = npte; - if (opte) pmap_update_1pg(tva); - } -} -/* - * this routine jerks page mappings from the - * kernel -- it is meant only for temporary mappings. - */ -void -pmap_qremove(va, count) - vm_offset_t va; - int count; -{ - int i; - register pt_entry_t *pte; - - for (i = 0; i < count; i++) { - vm_offset_t tva = va + i * PAGE_SIZE; - pte = vtopte(tva); - *pte = 0; - pmap_update_1pg(tva); - } -} - -/* - * add a wired page to the kva - * note that in order for the mapping to take effect -- you - * should do a pmap_update after doing the pmap_kenter... - */ -void -pmap_kenter(va, pa) - vm_offset_t va; - register vm_offset_t pa; -{ - register pt_entry_t *pte; - pt_entry_t npte, opte; - - npte = (pt_entry_t) ((int) (pa | PG_RW | PG_V)); - pte = vtopte(va); - opte = *pte; - *pte = npte; - if (opte) pmap_update_1pg(va); -} - -/* - * remove a page from the kernel pagetables - */ -void -pmap_kremove(va) - vm_offset_t va; -{ - register pt_entry_t *pte; - - pte = vtopte(va); - *pte = (pt_entry_t) 0; - pmap_update_1pg(va); } /* @@ -1502,46 +1661,27 @@ pmap_enter_quick(pmap, va, pa) vm_offset_t va; register vm_offset_t pa; { - register pt_entry_t *pte; - register pv_entry_t pv, npv; - int s; + register unsigned *pte; + vm_page_t mpte; + + mpte = NULL; + /* + * In the case that a page table page is not + * resident, we are creating it here. + */ + if (va < UPT_MIN_ADDRESS) + mpte = pmap_allocpte(pmap, va); + + pte = (unsigned *)vtopte(va); + if (*pte) + (void) pmap_remove_pte(pmap, pte, va); /* * Enter on the PV list if part of our managed memory Note that we * raise IPL while manipulating pv_table since pmap_enter can be * called at interrupt time. */ - - pte = vtopte(va); - /* a fault on the page table might occur here */ - if (*pte) { - pmap_remove_page(pmap, va); - } - - pv = pa_to_pvh(pa); - s = splhigh(); - /* - * No entries yet, use header as the first entry - */ - if (pv->pv_pmap == NULL) { - pv->pv_pmap = pmap; - pv->pv_va = va; - pv->pv_next = NULL; - } - /* - * There is at least one other VA mapping this page. Place this entry - * after the header. - */ - else { - npv = get_pv_entry(); - npv->pv_va = va; - npv->pv_pmap = pmap; - npv->pv_next = pv->pv_next; - pv->pv_next = npv; - pv = npv; - } - splx(s); - pv->pv_ptem = pmap_use_pt(pmap, va); + pmap_insert_entry(pmap, va, mpte, pa); /* * Increment counters @@ -1551,7 +1691,7 @@ pmap_enter_quick(pmap, va, pa) /* * Now validate mapping with RO protection */ - *pte = (pt_entry_t) ((int) (pa | PG_V | PG_U)); + *pte = pa | PG_V | PG_U | PG_MANAGED; return; } @@ -1583,11 +1723,6 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) return; } - /* - * remove any already used mappings - */ - pmap_remove( pmap, trunc_page(addr), round_page(addr + size)); - /* * if we are processing a major portion of the object, then scan the * entire thing. @@ -1595,9 +1730,9 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) if (psize > (object->size >> 2)) { objpgs = psize; - for (p = object->memq.tqh_first; + for (p = TAILQ_FIRST(&object->memq); ((objpgs > 0) && (p != NULL)); - p = p->listq.tqe_next) { + p = TAILQ_NEXT(p, listq)) { tmpidx = p->pindex; if (tmpidx < pindex) { @@ -1614,7 +1749,7 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) vm_page_deactivate(p); vm_page_hold(p); p->flags |= PG_MAPPED; - pmap_enter_quick(pmap, + pmap_enter_quick(pmap, addr + (tmpidx << PAGE_SHIFT), VM_PAGE_TO_PHYS(p)); vm_page_unhold(p); @@ -1627,14 +1762,13 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) */ for (tmpidx = 0; tmpidx < psize; tmpidx += 1) { p = vm_page_lookup(object, tmpidx + pindex); - if (p && (p->busy == 0) && + if (p && ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && + (p->busy == 0) && (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { - if (p->queue == PQ_CACHE) - vm_page_deactivate(p); vm_page_hold(p); p->flags |= PG_MAPPED; - pmap_enter_quick(pmap, + pmap_enter_quick(pmap, addr + (tmpidx << PAGE_SHIFT), VM_PAGE_TO_PHYS(p)); vm_page_unhold(p); @@ -1670,6 +1804,7 @@ pmap_prefault(pmap, addra, entry, object) vm_offset_t addr; vm_pindex_t pindex; vm_page_t m; + int pageorder_index; if (entry->object.vm_object != object) return; @@ -1686,13 +1821,16 @@ pmap_prefault(pmap, addra, entry, object) for (i = 0; i < PAGEORDER_SIZE; i++) { vm_object_t lobject; - pt_entry_t *pte; + unsigned *pte; addr = addra + pmap_prefault_pageorder[i]; if (addr < starta || addr >= entry->end) continue; - pte = vtopte(addr); + if ((*pmap_pde(pmap, addr)) == NULL) + continue; + + pte = (unsigned *) vtopte(addr); if (*pte) continue; @@ -1718,16 +1856,12 @@ pmap_prefault(pmap, addra, entry, object) (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { if (m->queue == PQ_CACHE) { - if ((cnt.v_free_count + cnt.v_cache_count) < - cnt.v_free_min) - break; vm_page_deactivate(m); } vm_page_hold(m); m->flags |= PG_MAPPED; pmap_enter_quick(pmap, addr, VM_PAGE_TO_PHYS(m)); vm_page_unhold(m); - } } } @@ -1745,7 +1879,7 @@ pmap_change_wiring(pmap, va, wired) vm_offset_t va; boolean_t wired; { - register pt_entry_t *pte; + register unsigned *pte; if (pmap == NULL) return; @@ -1780,7 +1914,73 @@ pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) vm_size_t len; vm_offset_t src_addr; { -} + vm_offset_t addr; + vm_offset_t end_addr = src_addr + len; + vm_offset_t pdnxt; + unsigned src_frame, dst_frame; + pd_entry_t pde; + + if (dst_addr != src_addr) + return; + + src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME; + dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME; + + if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) + return; + + if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) { + APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V); + pmap_update(); + } + + for(addr = src_addr; addr < end_addr; addr = pdnxt) { + unsigned *src_pte, *dst_pte; + vm_page_t dstmpte, srcmpte; + vm_offset_t srcptepaddr; + + pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1)); + srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[addr >> PDRSHIFT]; + if (srcptepaddr) { + continue; + } + + srcmpte = PHYS_TO_VM_PAGE(srcptepaddr); + if (srcmpte->hold_count == 0) + continue; + + if (pdnxt > end_addr) + pdnxt = end_addr; + + src_pte = (unsigned *) vtopte(addr); + dst_pte = (unsigned *) avtopte(addr); + while (addr < pdnxt) { + unsigned ptetemp; + ptetemp = *src_pte; + if (ptetemp) { + /* + * We have to check after allocpte for the + * pte still being around... allocpte can + * block. + */ + dstmpte = pmap_allocpte(dst_pmap, addr); + if (ptetemp = *src_pte) { + *dst_pte = ptetemp; + dst_pmap->pm_stats.resident_count++; + pmap_insert_entry(dst_pmap, addr, dstmpte, + (ptetemp & PG_FRAME)); + } else { + --dstmpte->hold_count; + } + if (dstmpte->hold_count >= srcmpte->hold_count) + break; + } + addr += PAGE_SIZE; + ++src_pte; + ++dst_pte; + } + } +} /* * Routine: pmap_kernel @@ -1808,7 +2008,6 @@ pmap_zero_page(phys) *(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME); bzero(CADDR2, PAGE_SIZE); - *(int *) CMAP2 = 0; pmap_update_1pg((vm_offset_t) CADDR2); } @@ -1872,24 +2071,22 @@ pmap_page_exists(pmap, pa) pmap_t pmap; vm_offset_t pa; { - register pv_entry_t pv; + register pv_entry_t *ppv, pv; int s; if (!pmap_is_managed(pa)) return FALSE; - pv = pa_to_pvh(pa); - s = splhigh(); + s = splvm(); + ppv = pa_to_pvh(pa); /* * Not found, check current mappings returning immediately if found. */ - if (pv->pv_pmap != NULL) { - for (; pv; pv = pv->pv_next) { - if (pv->pv_pmap == pmap) { - splx(s); - return TRUE; - } + for (pv = *ppv; pv; pv = pv->pv_next) { + if (pv->pv_pmap == pmap) { + splx(s); + return TRUE; } } splx(s); @@ -1906,42 +2103,40 @@ pmap_testbit(pa, bit) register vm_offset_t pa; int bit; { - register pv_entry_t pv; - pt_entry_t *pte; + register pv_entry_t *ppv, pv; + unsigned *pte; int s; if (!pmap_is_managed(pa)) return FALSE; - pv = pa_to_pvh(pa); - s = splhigh(); + s = splvm(); + ppv = pa_to_pvh(pa); /* * Not found, check current mappings returning immediately if found. */ - if (pv->pv_pmap != NULL) { - for (; pv; pv = pv->pv_next) { - /* - * if the bit being tested is the modified bit, then - * mark UPAGES as always modified, and ptes as never - * modified. - */ - if (bit & (PG_A|PG_M)) { - if ((pv->pv_va >= clean_sva) && (pv->pv_va < clean_eva)) { - continue; - } - } - if (!pv->pv_pmap) { -#if defined(PMAP_DIAGNOSTIC) - printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); -#endif + for (pv = *ppv ;pv; pv = pv->pv_next) { + /* + * if the bit being tested is the modified bit, then + * mark UPAGES as always modified, and ptes as never + * modified. + */ + if (bit & (PG_A|PG_M)) { + if ((pv->pv_va >= clean_sva) && (pv->pv_va < clean_eva)) { continue; } - pte = pmap_pte(pv->pv_pmap, pv->pv_va); - if ((int) *pte & bit) { - splx(s); - return TRUE; - } + } + if (!pv->pv_pmap) { +#if defined(PMAP_DIAGNOSTIC) + printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); +#endif + continue; + } + pte = pmap_pte(pv->pv_pmap, pv->pv_va); + if ((int) *pte & bit) { + splx(s); + return TRUE; } } splx(s); @@ -1957,60 +2152,64 @@ pmap_changebit(pa, bit, setem) int bit; boolean_t setem; { - register pv_entry_t pv; - register pt_entry_t *pte; + register pv_entry_t pv, *ppv; + register unsigned *pte, npte; vm_offset_t va; + int changed; int s; if (!pmap_is_managed(pa)) return; - pv = pa_to_pvh(pa); - s = splhigh(); + s = splvm(); + changed = 0; + ppv = pa_to_pvh(pa); /* * Loop over all current mappings setting/clearing as appropos If * setting RO do we need to clear the VAC? */ - if (pv->pv_pmap != NULL) { - for (; pv; pv = pv->pv_next) { - va = pv->pv_va; + for ( pv = *ppv; pv; pv = pv->pv_next) { + va = pv->pv_va; - /* - * don't write protect pager mappings - */ - if (!setem && (bit == PG_RW)) { - if (va >= clean_sva && va < clean_eva) - continue; - } - if (!pv->pv_pmap) { -#if defined(PMAP_DIAGNOSTIC) - printf("Null pmap (cb) at va: 0x%lx\n", va); -#endif + /* + * don't write protect pager mappings + */ + if (!setem && (bit == PG_RW)) { + if (va >= clean_sva && va < clean_eva) continue; - } + } + if (!pv->pv_pmap) { +#if defined(PMAP_DIAGNOSTIC) + printf("Null pmap (cb) at va: 0x%lx\n", va); +#endif + continue; + } - pte = pmap_pte(pv->pv_pmap, va); - if (setem) { - *(int *)pte |= bit; - } else { - if (bit == PG_RW) { - vm_offset_t pbits = *(vm_offset_t *)pte; - if (pbits & PG_M) { - vm_page_t m; - vm_offset_t pa = pbits & PG_FRAME; - m = PHYS_TO_VM_PAGE(pa); - m->dirty = VM_PAGE_BITS_ALL; - } - *(int *)pte &= ~(PG_M|PG_RW); - } else { - *(int *)pte &= ~bit; + pte = pmap_pte(pv->pv_pmap, va); + if (setem) { + *(int *)pte |= bit; + changed = 1; + } else { + vm_offset_t pbits = *(vm_offset_t *)pte; + if (pbits & bit) + changed = 1; + if (bit == PG_RW) { + if (pbits & PG_M) { + vm_page_t m; + vm_offset_t pa = pbits & PG_FRAME; + m = PHYS_TO_VM_PAGE(pa); + m->dirty = VM_PAGE_BITS_ALL; } + *(int *)pte = pbits & ~(PG_M|PG_RW); + } else { + *(int *)pte = pbits & ~bit; } } } splx(s); - pmap_update(); + if (changed) + pmap_update(); } /* @@ -2026,8 +2225,10 @@ pmap_page_protect(phys, prot) if ((prot & VM_PROT_WRITE) == 0) { if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) pmap_changebit(phys, PG_RW, FALSE); - else + else { pmap_remove_all(phys); + pmap_update(); + } } } @@ -2127,7 +2328,7 @@ pmap_mapdev(pa, size) vm_size_t size; { vm_offset_t va, tmpva; - pt_entry_t *pte; + unsigned *pte; size = roundup(size, PAGE_SIZE); @@ -2137,8 +2338,8 @@ pmap_mapdev(pa, size) pa = pa & PG_FRAME; for (tmpva = va; size > 0;) { - pte = vtopte(tmpva); - *pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V | PG_N)); + pte = (unsigned *)vtopte(tmpva); + *pte = pa | PG_RW | PG_V | PG_N; size -= PAGE_SIZE; tmpva += PAGE_SIZE; pa += PAGE_SIZE; @@ -2164,8 +2365,8 @@ pmap_pid_dump(int pid) { pmap = &p->p_vmspace->vm_pmap; for(i=0;i<1024;i++) { pd_entry_t *pde; - pt_entry_t *pte; - unsigned base = i << PD_SHIFT; + unsigned *pte; + unsigned base = i << PDRSHIFT; pde = &pmap->pm_pdir[i]; if (pde && pmap_pde_v(pde)) { @@ -2215,7 +2416,7 @@ pads(pm) pmap_t pm; { unsigned va, i, j; - pt_entry_t *ptep; + unsigned *ptep; if (pm == kernel_pmap) return; @@ -2253,3 +2454,5 @@ pmap_pvdump(pa) printf(" "); } #endif + + diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index c97e50aca6c1..b81cfc10601b 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 - * $Id: trap.c,v 1.74 1996/03/27 17:33:39 bde Exp $ + * $Id: trap.c,v 1.75 1996/03/28 05:40:57 dyson Exp $ */ /* @@ -805,25 +805,11 @@ int trapwrite(addr) v = trunc_page(vtopte(va)); - /* - * wire the pte page - */ - if (va < USRSTACK) { - vm_map_pageable(&vm->vm_map, v, round_page(v+1), FALSE); - } - /* * fault the data page */ rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, FALSE); - /* - * unwire the pte page - */ - if (va < USRSTACK) { - vm_map_pageable(&vm->vm_map, v, round_page(v+1), TRUE); - } - --p->p_lock; if (rv != KERN_SUCCESS) diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 4c1823e40360..e764b2f7def5 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -38,7 +38,7 @@ * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ - * $Id: vm_machdep.c,v 1.61 1996/05/02 10:43:06 phk Exp $ + * $Id: vm_machdep.c,v 1.62 1996/05/02 14:19:55 phk Exp $ */ #include "npx.h" @@ -862,7 +862,7 @@ int vm_page_zero_idle() { vm_page_t m; if ((cnt.v_free_count > cnt.v_interrupt_free_min) && - (m = vm_page_queue_free.tqh_first)) { + (m = TAILQ_FIRST(&vm_page_queue_free))) { TAILQ_REMOVE(&vm_page_queue_free, m, pageq); enable_intr(); pmap_zero_page(VM_PAGE_TO_PHYS(m)); diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index a843fbfcbddc..f0d7fe695214 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -42,7 +42,7 @@ * * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 - * $Id: pmap.h,v 1.37 1996/05/02 14:20:04 phk Exp $ + * $Id: pmap.h,v 1.38 1996/05/02 22:25:18 phk Exp $ */ #ifndef _MACHINE_PMAP_H_ @@ -69,6 +69,7 @@ /* Our various interpretations of the above */ #define PG_W PG_AVAIL1 /* "Wired" pseudoflag */ +#define PG_MANAGED PG_AVAIL2 #define PG_FRAME (~PAGE_MASK) #define PG_PROT (PG_RW|PG_U) /* all protection bits . */ #define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */ @@ -87,12 +88,8 @@ #define VADDR(pdi, pti) ((vm_offset_t)(((pdi)< #include #include +#include #include #include @@ -107,16 +108,15 @@ #if defined(DIAGNOSTIC) #define PMAP_DIAGNOSTIC #endif +/* #define OLDREMOVE */ static void init_pv_entries __P((int)); /* * Get PDEs and PTEs for user/kernel address space */ -#define pmap_pde(m, v) (&((m)->pm_pdir[((vm_offset_t)(v) >> PDRSHIFT)&(NPDEPG-1)])) -#define pdir_pde(m, v) (m[((vm_offset_t)(v) >> PDRSHIFT)&(NPDEPG-1)]) - -#define pmap_pte_pa(pte) (*(int *)(pte) & PG_FRAME) +#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) +#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) #define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) #define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) @@ -145,26 +145,36 @@ static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ static vm_offset_t vm_first_phys; static int nkpt; +static vm_page_t nkpg; +vm_offset_t kernel_vm_end; extern vm_offset_t clean_sva, clean_eva; extern int cpu_class; +#define PV_FREELIST_MIN ((PAGE_SIZE / sizeof (struct pv_entry)) / 2) + +/* + * Data for the pv entry allocation mechanism + */ +static int pv_freelistcnt; +static pv_entry_t pv_freelist; +static vm_offset_t pvva; +static int npvvapg; + /* * All those kernel PT submaps that BSD is so fond of */ pt_entry_t *CMAP1; static pt_entry_t *CMAP2, *ptmmap; -static pv_entry_t pv_table; +static pv_entry_t *pv_table; caddr_t CADDR1, ptvmmap; static caddr_t CADDR2; static pt_entry_t *msgbufmap; struct msgbuf *msgbufp; static void free_pv_entry __P((pv_entry_t pv)); -pt_entry_t * - get_ptbase __P((pmap_t pmap)); -static pv_entry_t - get_pv_entry __P((void)); +static __inline unsigned * get_ptbase __P((pmap_t pmap)); +static pv_entry_t get_pv_entry __P((void)); static void i386_protection_init __P((void)); static void pmap_alloc_pv_entry __P((void)); static void pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem)); @@ -173,14 +183,25 @@ static void pmap_enter_quick __P((pmap_t pmap, vm_offset_t va, static int pmap_is_managed __P((vm_offset_t pa)); static void pmap_remove_all __P((vm_offset_t pa)); static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va)); -static __inline void pmap_remove_entry __P((struct pmap *pmap, pv_entry_t pv, +static __inline int pmap_remove_entry __P((struct pmap *pmap, pv_entry_t *pv, vm_offset_t va)); -static void pmap_remove_pte __P((struct pmap *pmap, pt_entry_t *ptq, +static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq, vm_offset_t sva)); +static vm_page_t + pmap_pte_vm_page __P((pmap_t pmap, vm_offset_t pt)); static boolean_t pmap_testbit __P((vm_offset_t pa, int bit)); -static void * pmap_getpdir __P((void)); +static __inline void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va, + vm_page_t mpte, vm_offset_t pa)); +static __inline vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va)); +static void pmap_remove_pte_mapping __P((vm_offset_t pa)); +static __inline int pmap_release_free_page __P((pmap_t pmap, vm_page_t p)); +static vm_page_t _pmap_allocpte __P((pmap_t pmap, vm_offset_t va, int ptepindex)); + +#define PDSTACKMAX 16 +static vm_offset_t pdstack[PDSTACKMAX]; +static int pdstackptr; #if defined(PMAP_DIAGNOSTIC) @@ -228,34 +249,38 @@ pmap_update_2pg( vm_offset_t va1, vm_offset_t va2) { } } +static __inline __pure unsigned * +get_ptbase(pmap) + pmap_t pmap; +{ + unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME; + + /* are we current address space or kernel? */ + if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) { + return (unsigned *) PTmap; + } + /* otherwise, we are alternate address space */ + if (frame != (((unsigned) APTDpde) & PG_FRAME)) { + APTDpde = (pd_entry_t) (frame | PG_RW | PG_V); + pmap_update(); + } + return (unsigned *) APTmap; +} + /* * Routine: pmap_pte * Function: * Extract the page table entry associated * with the given map/virtual_address pair. - * [ what about induced faults -wfj] */ -__inline pt_entry_t * __pure +__inline unsigned * __pure pmap_pte(pmap, va) register pmap_t pmap; vm_offset_t va; { - if (pmap && *pmap_pde(pmap, va)) { - vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; - - /* are we current address space or kernel? */ - if ((pmap == kernel_pmap) || (frame == ((int) PTDpde & PG_FRAME))) - return ((pt_entry_t *) vtopte(va)); - /* otherwise, we are alternate address space */ - else { - if (frame != ((int) APTDpde & PG_FRAME)) { - APTDpde = pmap->pm_pdir[PTDPTDI]; - pmap_update(); - } - return ((pt_entry_t *) avtopte(va)); - } + return get_ptbase(pmap) + i386_btop(va); } return (0); } @@ -266,39 +291,108 @@ pmap_pte(pmap, va) * Extract the physical page address associated * with the given map/virtual_address pair. */ - -vm_offset_t +vm_offset_t __pure pmap_extract(pmap, va) register pmap_t pmap; vm_offset_t va; { - vm_offset_t pa; - if (pmap && *pmap_pde(pmap, va)) { - vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; - - /* are we current address space or kernel? */ - if ((pmap == kernel_pmap) - || (frame == ((int) PTDpde & PG_FRAME))) { - pa = *(int *) vtopte(va); - /* otherwise, we are alternate address space */ - } else { - if (frame != ((int) APTDpde & PG_FRAME)) { - APTDpde = pmap->pm_pdir[PTDPTDI]; - pmap_update(); - } - pa = *(int *) avtopte(va); - } - return ((pa & PG_FRAME) | (va & ~PG_FRAME)); + unsigned *pte; + pte = get_ptbase(pmap) + i386_btop(va); + return ((*pte & PG_FRAME) | (va & PAGE_MASK)); } return 0; } +/* + * Add a list of wired pages to the kva + * this routine is only used for temporary + * kernel mappings that do not need to have + * page modification or references recorded. + * Note that old mappings are simply written + * over. The page *must* be wired. + */ +void +pmap_qenter(va, m, count) + vm_offset_t va; + vm_page_t *m; + int count; +{ + int i; + int anyvalid = 0; + register unsigned *pte; + + for (i = 0; i < count; i++) { + vm_offset_t tva = va + i * PAGE_SIZE; + unsigned npte = VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V; + unsigned opte; + pte = (unsigned *)vtopte(tva); + opte = *pte; + *pte = npte; + if (opte) + pmap_update_1pg(tva); + } +} +/* + * this routine jerks page mappings from the + * kernel -- it is meant only for temporary mappings. + */ +void +pmap_qremove(va, count) + vm_offset_t va; + int count; +{ + int i; + register unsigned *pte; + + for (i = 0; i < count; i++) { + pte = (unsigned *)vtopte(va); + *pte = 0; + pmap_update_1pg(va); + va += PAGE_SIZE; + } +} + +/* + * add a wired page to the kva + * note that in order for the mapping to take effect -- you + * should do a pmap_update after doing the pmap_kenter... + */ +__inline void +pmap_kenter(va, pa) + vm_offset_t va; + register vm_offset_t pa; +{ + register unsigned *pte; + unsigned npte, opte; + + npte = pa | PG_RW | PG_V; + pte = (unsigned *)vtopte(va); + opte = *pte; + *pte = npte; + if (opte) + pmap_update_1pg(va); +} + +/* + * remove a page from the kernel pagetables + */ +__inline void +pmap_kremove(va) + vm_offset_t va; +{ + register unsigned *pte; + + pte = (unsigned *)vtopte(va); + *pte = 0; + pmap_update_1pg(va); +} + /* * determine if a page is managed (memory vs. device) */ -static __inline int +static __inline __pure int pmap_is_managed(pa) vm_offset_t pa; { @@ -314,43 +408,21 @@ pmap_is_managed(pa) return 0; } -vm_page_t -pmap_use_pt(pmap, va) - pmap_t pmap; - vm_offset_t va; -{ - vm_offset_t ptepa; - vm_page_t mpte; - - if (va >= UPT_MIN_ADDRESS) - return NULL; - - ptepa = ((vm_offset_t) *pmap_pde(pmap, va)) & PG_FRAME; -#if defined(PMAP_DIAGNOSTIC) - if (!ptepa) - panic("pmap_use_pt: pagetable page missing, va: 0x%x", va); -#endif - - mpte = PHYS_TO_VM_PAGE(ptepa); - ++mpte->hold_count; - return mpte; -} - #if !defined(PMAP_DIAGNOSTIC) __inline #endif -void +int pmap_unuse_pt(pmap, va, mpte) pmap_t pmap; vm_offset_t va; vm_page_t mpte; { if (va >= UPT_MIN_ADDRESS) - return; + return 0; if (mpte == NULL) { vm_offset_t ptepa; - ptepa = ((vm_offset_t) *pmap_pde(pmap, va)) & PG_FRAME; + ptepa = ((vm_offset_t) *pmap_pde(pmap, va)) /* & PG_FRAME */; #if defined(PMAP_DIAGNOSTIC) if (!ptepa) panic("pmap_unuse_pt: pagetable page missing, va: 0x%x", va); @@ -367,23 +439,19 @@ pmap_unuse_pt(pmap, va, mpte) vm_page_unhold(mpte); if ((mpte->hold_count == 0) && - (mpte->wire_count == 0) && - (pmap != kernel_pmap) && - (va < KPT_MIN_ADDRESS)) { + (mpte->wire_count == 0)) { /* * We don't free page-table-pages anymore because it can have a negative * impact on perf at times. Now we just deactivate, and it'll get cleaned - * up if needed... Also, if the page ends up getting used, it will fault - * back into the process address space and be reactivated. + * up if needed... Also, if the page ends up getting used, it will be + * brought back into the process address space by pmap_allocpte and be + * reactivated. */ -#if defined(PMAP_FREE_OLD_PTES) - pmap_page_protect(VM_PAGE_TO_PHYS(mpte), VM_PROT_NONE); - vm_page_free(mpte); -#else mpte->dirty = 0; vm_page_deactivate(mpte); -#endif + return 1; } + return 0; } /* @@ -442,7 +510,7 @@ pmap_bootstrap(firstaddr, loadaddr) v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); va = virtual_avail; - pte = pmap_pte(kernel_pmap, va); + pte = (pt_entry_t *) pmap_pte(kernel_pmap, va); /* * CMAP1/CMAP2 are used for zeroing and copying pages. @@ -464,6 +532,7 @@ pmap_bootstrap(firstaddr, loadaddr) *(int *) CMAP1 = *(int *) CMAP2 = *(int *) PTD = 0; pmap_update(); + } /* @@ -492,10 +561,10 @@ pmap_init(phys_start, phys_end) * Allocate memory for random pmap data structures. Includes the * pv_head_table. */ - s = (vm_size_t) (sizeof(struct pv_entry) * npg); + s = (vm_size_t) (sizeof(struct pv_entry *) * npg); s = round_page(s); addr = (vm_offset_t) kmem_alloc(kernel_map, s); - pv_table = (pv_entry_t) addr; + pv_table = (pv_entry_t *) addr; /* * init the pv free list @@ -529,59 +598,6 @@ pmap_map(virt, start, end, prot) return (virt); } -#if defined(PMAP_KEEP_PDIRS) -int nfreepdir; -caddr_t *pdirlist; -#define NFREEPDIR 3 - -static void * -pmap_getpdir() { - caddr_t *pdir; - if (pdirlist) { - --nfreepdir; - pdir = pdirlist; - pdirlist = (caddr_t *) *pdir; - *pdir = 0; -#if 0 /* Not needed anymore */ - bzero( (caddr_t) pdir, PAGE_SIZE); -#endif - } else { - pdir = (caddr_t *) kmem_alloc(kernel_map, PAGE_SIZE); - } - - return (void *) pdir; -} - -static void -pmap_freepdir(void *pdir) { - if (nfreepdir > NFREEPDIR) { - kmem_free(kernel_map, (vm_offset_t) pdir, PAGE_SIZE); - } else { - int i; - pt_entry_t *s; - s = (pt_entry_t *) pdir; - - /* - * remove wired in kernel mappings - */ - bzero(s + KPTDI, nkpt * PTESIZE); - s[APTDPTDI] = 0; - s[PTDPTDI] = 0; - -#if defined(PMAP_DIAGNOSTIC) - for(i=0;ipm_pdir = pmap_getpdir(); -#else - pmap->pm_pdir = (pd_entry_t *) kmem_alloc(kernel_map, PAGE_SIZE); -#endif + if (pdstackptr > 0) { + --pdstackptr; + pmap->pm_pdir = + (pd_entry_t *)pdstack[pdstackptr]; + } else { + pmap->pm_pdir = + (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE); + } + + /* + * allocate object for the ptes + */ + pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, + OFF_TO_IDX((KPT_MIN_ADDRESS + 1) - UPT_MIN_ADDRESS)); + + /* + * allocate the page directory page + */ +retry: + ptdpg = vm_page_alloc( pmap->pm_pteobj, OFF_TO_IDX(KPT_MIN_ADDRESS), + VM_ALLOC_ZERO); + if (ptdpg == NULL) { + VM_WAIT; + goto retry; + } + vm_page_wire(ptdpg); + ptdpg->flags &= ~(PG_MAPPED|PG_BUSY); /* not mapped normally */ + ptdpg->valid = VM_PAGE_BITS_ALL; + + pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg)); + if ((ptdpg->flags & PG_ZERO) == 0) + bzero(pmap->pm_pdir, PAGE_SIZE); /* wire in kernel global address entries */ bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE); /* install self-referential address mapping entry */ - *(int *) (pmap->pm_pdir + PTDPTDI) = - ((int) pmap_kextract((vm_offset_t) pmap->pm_pdir)) | PG_V | PG_RW; + *(unsigned *) (pmap->pm_pdir + PTDPTDI) = + VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_U; pmap->pm_count = 1; } +static __inline int +pmap_release_free_page(pmap, p) + struct pmap *pmap; + vm_page_t p; +{ + int s; + /* + * This code optimizes the case of freeing non-busy + * page-table pages. Those pages are zero now, and + * might as well be placed directly into the zero queue. + */ + s = splvm(); + if (p->flags & PG_BUSY) { + p->flags |= PG_WANTED; + tsleep(p, PVM, "pmaprl", 0); + splx(s); + return 0; + } + + if (p->flags & PG_MAPPED) { + pmap_remove_pte_mapping(VM_PAGE_TO_PHYS(p)); + p->flags &= ~PG_MAPPED; + } + +#if defined(PMAP_DIAGNOSTIC) + if (p->hold_count) + panic("pmap_release: freeing held page table page"); +#endif + /* + * Page directory pages need to have the kernel + * stuff cleared, so they can go into the zero queue also. + */ + if (p->pindex == OFF_TO_IDX(KPT_MIN_ADDRESS)) { + unsigned *pde = (unsigned *) pmap->pm_pdir; + bzero(pde + KPTDI, nkpt * PTESIZE); + pde[APTDPTDI] = 0; + pde[PTDPTDI] = 0; + pmap_kremove((vm_offset_t) pmap->pm_pdir); + } + + vm_page_free(p); + TAILQ_REMOVE(&vm_page_queue_free, p, pageq); + TAILQ_INSERT_HEAD(&vm_page_queue_zero, p, pageq); + p->queue = PQ_ZERO; + splx(s); + ++vm_page_zero_count; + return 1; +} + +/* + * Release any resources held by the given physical map. + * Called when a pmap initialized by pmap_pinit is being released. + * Should only be called if the map contains no valid mappings. + */ +void +pmap_release(pmap) + register struct pmap *pmap; +{ + vm_page_t p,n,ptdpg; + vm_object_t object = pmap->pm_pteobj; + + ptdpg = NULL; +retry: + for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) { + n = TAILQ_NEXT(p, listq); + if (p->pindex == OFF_TO_IDX(KPT_MIN_ADDRESS)) { + ptdpg = p; + continue; + } + if (!pmap_release_free_page(pmap, p)) + goto retry; + } + pmap_release_free_page(pmap, ptdpg); + + vm_object_deallocate(object); + if (pdstackptr < PDSTACKMAX) { + pdstack[pdstackptr] = (vm_offset_t) pmap->pm_pdir; + ++pdstackptr; + } else { + kmem_free(kernel_map, (vm_offset_t) pmap->pm_pdir, PAGE_SIZE); + } +} + /* * grow the number of kernel page table entries, if needed */ -static vm_page_t nkpg; -vm_offset_t kernel_vm_end; - void pmap_growkernel(vm_offset_t addr) { @@ -630,14 +754,14 @@ pmap_growkernel(vm_offset_t addr) kernel_vm_end = KERNBASE; nkpt = 0; while (pdir_pde(PTD, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + NBPDR) & ~(NBPDR-1); + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); ++nkpt; } } - addr = (addr + NBPDR) & ~(NBPDR - 1); + addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); while (kernel_vm_end < addr) { if (pdir_pde(PTD, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + NBPDR) & ~(NBPDR-1); + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); continue; } ++nkpt; @@ -659,7 +783,7 @@ pmap_growkernel(vm_offset_t addr) } } *pmap_pde(kernel_pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end); - kernel_vm_end = (kernel_vm_end + NBPDR) & ~(NBPDR-1); + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); } splx(s); } @@ -685,22 +809,6 @@ pmap_destroy(pmap) } } -/* - * Release any resources held by the given physical map. - * Called when a pmap initialized by pmap_pinit is being released. - * Should only be called if the map contains no valid mappings. - */ -void -pmap_release(pmap) - register struct pmap *pmap; -{ -#if defined(PMAP_KEEP_PDIRS) - pmap_freepdir( (void *)pmap->pm_pdir); -#else - kmem_free(kernel_map, (vm_offset_t) pmap->pm_pdir, PAGE_SIZE); -#endif -} - /* * Add a reference to the specified pmap. */ @@ -713,16 +821,6 @@ pmap_reference(pmap) } } -#define PV_FREELIST_MIN ((PAGE_SIZE / sizeof (struct pv_entry)) / 2) - -/* - * Data for the pv entry allocation mechanism - */ -static int pv_freelistcnt; -static pv_entry_t pv_freelist; -static vm_offset_t pvva; -static int npvvapg; - /* * free the pv_entry back to the free list */ @@ -730,8 +828,6 @@ static __inline void free_pv_entry(pv) pv_entry_t pv; { - if (!pv) - return; ++pv_freelistcnt; pv->pv_next = pv_freelist; pv_freelist = pv; @@ -777,10 +873,6 @@ pmap_alloc_pv_entry() if (npvvapg) { vm_page_t m; - /* - * we do this to keep recursion away - */ - pv_freelistcnt += PV_FREELIST_MIN; /* * allocate a physical page out of the vm system */ @@ -818,14 +910,11 @@ pmap_alloc_pv_entry() entry++; } } - pv_freelistcnt -= PV_FREELIST_MIN; } if (!pv_freelist) panic("get_pv_entry: cannot get a pv_entry_t"); } - - /* * init the pv_entry allocation system */ @@ -839,30 +928,13 @@ init_pv_entries(npg) * kvm space is fairly cheap, be generous!!! (the system can panic if * this is too small.) */ - npvvapg = btoc((npg * PVSPERPAGE) * sizeof(struct pv_entry)); + npvvapg = ((npg * PVSPERPAGE) * sizeof(struct pv_entry) + + PAGE_SIZE - 1) / PAGE_SIZE; pvva = kmem_alloc_pageable(kernel_map, npvvapg * PAGE_SIZE); /* * get the first batch of entries */ - free_pv_entry(get_pv_entry()); -} - -__inline pt_entry_t * -get_ptbase(pmap) - pmap_t pmap; -{ - vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; - - /* are we current address space or kernel? */ - if (pmap == kernel_pmap || frame == ((int) PTDpde & PG_FRAME)) { - return PTmap; - } - /* otherwise, we are alternate address space */ - if (frame != ((int) APTDpde & PG_FRAME)) { - APTDpde = pmap->pm_pdir[PTDPTDI]; - pmap_update(); - } - return APTmap; + pmap_alloc_pv_entry(); } /* @@ -871,77 +943,70 @@ get_ptbase(pmap) * to the header. Otherwise we must search the list for * the entry. In either case we free the now unused entry. */ -static __inline void -pmap_remove_entry(pmap, pv, va) +static __inline int +pmap_remove_entry(pmap, ppv, va) struct pmap *pmap; - pv_entry_t pv; + pv_entry_t *ppv; vm_offset_t va; { pv_entry_t npv; int s; - s = splhigh(); - if (pmap == pv->pv_pmap && va == pv->pv_va) { - pmap_unuse_pt(pmap, va, pv->pv_ptem); - npv = pv->pv_next; - if (npv) { - *pv = *npv; + + s = splvm(); + for (npv = *ppv; npv; (ppv = &npv->pv_next, npv = *ppv)) { + if (pmap == npv->pv_pmap && va == npv->pv_va) { + int rtval = pmap_unuse_pt(pmap, va, npv->pv_ptem); + *ppv = npv->pv_next; free_pv_entry(npv); - } else { - pv->pv_pmap = NULL; - } - } else { - for (npv = pv->pv_next; npv; (pv = npv, npv = pv->pv_next)) { - if (pmap == npv->pv_pmap && va == npv->pv_va) { - pmap_unuse_pt(pmap, va, npv->pv_ptem); - pv->pv_next = npv->pv_next; - free_pv_entry(npv); - break; - } + splx(s); + return rtval; } } splx(s); + return 0; } /* * pmap_remove_pte: do the things to unmap a page in a process */ -static void -pmap_remove_pte(pmap, ptq, sva) +static +#if !defined(PMAP_DIAGNOSTIC) +__inline +#endif +int +pmap_remove_pte(pmap, ptq, va) struct pmap *pmap; - pt_entry_t *ptq; - vm_offset_t sva; + unsigned *ptq; + vm_offset_t va; { - pt_entry_t oldpte; - vm_offset_t pa; - pv_entry_t pv; + unsigned oldpte; + pv_entry_t *ppv; + int i; + int s; oldpte = *ptq; - if (((int)oldpte) & PG_W) - pmap->pm_stats.wired_count--; - pmap->pm_stats.resident_count--; - - pa = ((vm_offset_t)oldpte) & PG_FRAME; - if (pmap_is_managed(pa)) { - if ((int) oldpte & PG_M) { + *ptq = 0; + if (oldpte & PG_W) + pmap->pm_stats.wired_count -= 1; + pmap->pm_stats.resident_count -= 1; + if (oldpte & PG_MANAGED) { + if (oldpte & PG_M) { #if defined(PMAP_DIAGNOSTIC) if (pmap_nw_modified(oldpte)) { - printf("pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n", sva, (int) oldpte); + printf("pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, (int) oldpte); } #endif - - if (sva < USRSTACK + (UPAGES * PAGE_SIZE) || - (sva >= KERNBASE && (sva < clean_sva || sva >= clean_eva))) { - PHYS_TO_VM_PAGE(pa)->dirty = VM_PAGE_BITS_ALL; + if (va < clean_sva || va >= clean_eva) { + PHYS_TO_VM_PAGE(oldpte)->dirty = VM_PAGE_BITS_ALL; } } - pv = pa_to_pvh(pa); - pmap_remove_entry(pmap, pv, sva); + ppv = pa_to_pvh(oldpte); + return pmap_remove_entry(pmap, ppv, va); } else { - pmap_unuse_pt(pmap, sva, NULL); + return pmap_unuse_pt(pmap, va, NULL); } - *ptq = 0; - return; + return 0; } /* @@ -952,24 +1017,26 @@ pmap_remove_page(pmap, va) struct pmap *pmap; register vm_offset_t va; { - register pt_entry_t *ptbase, *ptq; + register unsigned *ptq; + /* * if there is no pte for this address, just skip it!!! */ - if (*pmap_pde(pmap, va) == 0) + if (*pmap_pde(pmap, va) == 0) { return; + } + /* * get a local va for mappings for this pmap. */ - ptbase = get_ptbase(pmap); - ptq = ptbase + i386_btop(va); + ptq = get_ptbase(pmap) + i386_btop(va); if (*ptq) { - pmap_remove_pte(pmap, ptq, va); + (void) pmap_remove_pte(pmap, ptq, va); pmap_update_1pg(va); } return; } - + /* * Remove the given range of addresses from the specified map. * @@ -982,11 +1049,18 @@ pmap_remove(pmap, sva, eva) register vm_offset_t sva; register vm_offset_t eva; { - register pt_entry_t *ptbase; + register unsigned *ptbase; + vm_offset_t va; vm_offset_t pdnxt; vm_offset_t ptpaddr; vm_offset_t sindex, eindex; vm_page_t mpte; + int s; +#if defined(OLDREMOVE) || defined(I386_CPU) + int anyvalid; +#else + int mustremove; +#endif if (pmap == NULL) return; @@ -1001,6 +1075,16 @@ pmap_remove(pmap, sva, eva) return; } +#if !defined(OLDREMOVE) && !defined(I386_CPU) + if ((pmap == kernel_pmap) || + (pmap->pm_pdir[PTDPTDI] == PTDpde)) + mustremove = 1; + else + mustremove = 0; +#else + anyvalid = 0; +#endif + /* * Get a local virtual address for the mappings that are being * worked with. @@ -1010,6 +1094,7 @@ pmap_remove(pmap, sva, eva) sindex = i386_btop(sva); eindex = i386_btop(eva); + for (; sindex < eindex; sindex = pdnxt) { /* @@ -1046,14 +1131,70 @@ pmap_remove(pmap, sva, eva) } for ( ;sindex != pdnxt; sindex++) { - if (ptbase[sindex] == 0) + vm_offset_t va; + if (ptbase[sindex] == 0) { continue; - pmap_remove_pte(pmap, ptbase + sindex, i386_ptob(sindex)); - if (mpte->hold_count == 0 && mpte->wire_count == 0) + } + va = i386_ptob(sindex); +#if defined(OLDREMOVE) || defined(I386_CPU) + anyvalid = 1; +#else + if (mustremove) + pmap_update_1pg(va); +#endif + if (pmap_remove_pte(pmap, + ptbase + sindex, va)) break; } } - pmap_update(); + +#if defined(OLDREMOVE) || defined(I386_CPU) + if (anyvalid) { + /* are we current address space or kernel? */ + if (pmap == kernel_pmap) { + pmap_update(); + } else if (pmap->pm_pdir[PTDPTDI] == PTDpde) { + pmap_update(); + } + } +#endif +} + + +void +pmap_remove_pte_mapping(pa) + vm_offset_t pa; +{ + register pv_entry_t pv, *ppv, npv; + register unsigned *pte, *ptbase; + vm_offset_t va; + int s; + int anyvalid = 0; + + ppv = pa_to_pvh(pa); + + for (pv = *ppv; pv; pv=pv->pv_next) { + unsigned tpte; + struct pmap *pmap; + + anyvalid = 1; + pmap = pv->pv_pmap; + pte = get_ptbase(pmap) + i386_btop(pv->pv_va); + if (tpte = *pte) { + pmap->pm_stats.resident_count--; + *pte = 0; + if (tpte & PG_W) + pmap->pm_stats.wired_count--; + } + } + + if (anyvalid) { + for (pv = *ppv; pv; pv = npv) { + npv = pv->pv_next; + free_pv_entry(pv); + } + *ppv = NULL; + } } /* @@ -1068,17 +1209,15 @@ pmap_remove(pmap, sva, eva) * inefficient because they iteratively called * pmap_remove (slow...) */ -static void +static __inline void pmap_remove_all(pa) vm_offset_t pa; { - register pv_entry_t pv, opv, npv; - register pt_entry_t *pte, *ptbase; + register pv_entry_t pv, *ppv, npv; + register unsigned *pte, *ptbase; vm_offset_t va; - struct pmap *pmap; vm_page_t m; int s; - int anyvalid = 0; #if defined(PMAP_DIAGNOSTIC) /* @@ -1090,62 +1229,49 @@ pmap_remove_all(pa) } #endif - pa = pa & PG_FRAME; - opv = pa_to_pvh(pa); - if (opv->pv_pmap == NULL) - return; - m = PHYS_TO_VM_PAGE(pa); - s = splhigh(); - pv = opv; - while (pv && ((pmap = pv->pv_pmap) != NULL)) { + ppv = pa_to_pvh(pa); + + s = splvm(); + for (pv = *ppv; pv; pv=pv->pv_next) { int tpte; + struct pmap *pmap; + + pmap = pv->pv_pmap; ptbase = get_ptbase(pmap); va = pv->pv_va; pte = ptbase + i386_btop(va); if (tpte = ((int) *pte)) { + pmap->pm_stats.resident_count--; *pte = 0; if (tpte & PG_W) pmap->pm_stats.wired_count--; - pmap->pm_stats.resident_count--; - anyvalid = 1; - /* * Update the vm_page_t clean and reference bits. */ - if ((tpte & PG_M) != 0) { + if (tpte & PG_M) { #if defined(PMAP_DIAGNOSTIC) if (pmap_nw_modified((pt_entry_t) tpte)) { printf("pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, tpte); } #endif - if (va < USRSTACK + (UPAGES * PAGE_SIZE) || - (va >= KERNBASE && (va < clean_sva || va >= clean_eva))) { + if (va < clean_sva || va >= clean_eva) { m->dirty = VM_PAGE_BITS_ALL; } } } - pv = pv->pv_next; } - if (opv->pv_pmap != NULL) { - pmap_unuse_pt(opv->pv_pmap, opv->pv_va, opv->pv_ptem); - for (pv = opv->pv_next; pv; pv = npv) { - npv = pv->pv_next; - pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); - free_pv_entry(pv); - } + for (pv = *ppv; pv; pv = npv) { + npv = pv->pv_next; + pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); + free_pv_entry(pv); } - - opv->pv_pmap = NULL; - opv->pv_next = NULL; + *ppv = NULL; splx(s); - if (anyvalid) - pmap_update(); } - /* * Set the physical protection on the * specified range of this map as requested. @@ -1156,13 +1282,14 @@ pmap_protect(pmap, sva, eva, prot) vm_offset_t sva, eva; vm_prot_t prot; { - register pt_entry_t *pte; - register pt_entry_t *ptbase; + register unsigned *pte; + register vm_offset_t va; + register unsigned *ptbase; vm_offset_t pdnxt; vm_offset_t ptpaddr; vm_offset_t sindex, eindex; vm_page_t mpte; - int anychanged; + int anyvalid; if (pmap == NULL) @@ -1175,7 +1302,7 @@ pmap_protect(pmap, sva, eva, prot) if (prot & VM_PROT_WRITE) return; - anychanged = 0; + anyvalid = 0; ptbase = get_ptbase(pmap); @@ -1205,26 +1332,177 @@ pmap_protect(pmap, sva, eva, prot) } for (; sindex != pdnxt; sindex++) { - if (ptbase[sindex] == 0) - continue; - pte = ptbase + sindex; - pbits = *(int *)pte; + + unsigned pbits = ptbase[sindex]; + if (pbits & PG_RW) { if (pbits & PG_M) { - vm_page_t m; - vm_offset_t pa = pbits & PG_FRAME; - m = PHYS_TO_VM_PAGE(pa); + vm_page_t m = PHYS_TO_VM_PAGE(pbits); m->dirty = VM_PAGE_BITS_ALL; } - *(int *)pte &= ~(PG_M|PG_RW); - anychanged=1; + ptbase[sindex] = pbits & ~(PG_M|PG_RW); + anyvalid = 1; } } } - if (anychanged) + if (anyvalid) pmap_update(); } +/* + * Create a pv entry for page at pa for + * (pmap, va). + */ +static __inline void +pmap_insert_entry(pmap, va, mpte, pa) + pmap_t pmap; + vm_offset_t va; + vm_page_t mpte; + vm_offset_t pa; +{ + + int s; + pv_entry_t *ppv, pv; + + s = splvm(); + pv = get_pv_entry(); + pv->pv_va = va; + pv->pv_pmap = pmap; + pv->pv_ptem = mpte; + + ppv = pa_to_pvh(pa); + if (*ppv) + pv->pv_next = *ppv; + else + pv->pv_next = NULL; + *ppv = pv; + splx(s); +} + +/* + * this routine is called if the page table page is not + * mapped correctly. + */ +static vm_page_t +_pmap_allocpte(pmap, va, ptepindex) + pmap_t pmap; + vm_offset_t va; + int ptepindex; +{ + vm_offset_t pteva, ptepa; + vm_page_t m; + + /* + * Find or fabricate a new pagetable page + */ +retry: + m = vm_page_lookup(pmap->pm_pteobj, ptepindex); + if (m == NULL) { + m = vm_page_alloc(pmap->pm_pteobj, ptepindex, VM_ALLOC_ZERO); + if (m == NULL) { + VM_WAIT; + goto retry; + } + if ((m->flags & PG_ZERO) == 0) + pmap_zero_page(VM_PAGE_TO_PHYS(m)); + m->flags &= ~(PG_ZERO|PG_BUSY); + m->valid = VM_PAGE_BITS_ALL; + } + + /* + * mark the object writeable + */ + pmap->pm_pteobj->flags |= OBJ_WRITEABLE; + + /* + * Increment the hold count for the page table page + * (denoting a new mapping.) + */ + ++m->hold_count; + + /* + * Activate the pagetable page, if it isn't already + */ + if (m->queue != PQ_ACTIVE) + vm_page_activate(m); + + /* + * Map the pagetable page into the process address space, if + * it isn't already there. + */ + pteva = ((vm_offset_t) vtopte(va)) & PG_FRAME; + ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; + if (ptepa == 0) { + int s; + pv_entry_t pv, *ppv; + + pmap->pm_stats.resident_count++; + + s = splvm(); + pv = get_pv_entry(); + + pv->pv_va = pteva; + pv->pv_pmap = pmap; + pv->pv_next = NULL; + pv->pv_ptem = NULL; + + ptepa = VM_PAGE_TO_PHYS(m); + pmap->pm_pdir[ptepindex] = + (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_MANAGED); + ppv = pa_to_pvh(ptepa); +#if defined(PMAP_DIAGNOSTIC) + if (*ppv) + panic("pmap_allocpte: page is already mapped"); +#endif + *ppv = pv; + splx(s); + m->flags |= PG_MAPPED; + } else { +#if defined(PMAP_DIAGNOSTIC) + if (VM_PAGE_TO_PHYS(m) != (ptepa & PG_FRAME)) + panic("pmap_allocpte: mismatch"); +#endif + pmap->pm_pdir[ptepindex] = + (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_MANAGED); + pmap_update_1pg(pteva); + m->flags |= PG_MAPPED; + } + return m; +} + +static __inline vm_page_t +pmap_allocpte(pmap, va) + pmap_t pmap; + vm_offset_t va; +{ + int ptepindex; + vm_offset_t ptepa; + vm_page_t m; + + /* + * Calculate pagetable page index + */ + ptepindex = va >> PDRSHIFT; + + /* + * Get the page directory entry + */ + ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; + + /* + * If the page table page is mapped, we just increment the + * hold count, and activate it. + */ + if ((ptepa & (PG_RW|PG_U|PG_V)) == (PG_RW|PG_U|PG_V)) { + m = PHYS_TO_VM_PAGE(ptepa); + ++m->hold_count; + if (m->queue != PQ_ACTIVE) + vm_page_activate(m); + return m; + } + return _pmap_allocpte(pmap, va, ptepindex); +} + /* * Insert the given physical page (p) at * the specified virtual address (v) in the @@ -1245,40 +1523,32 @@ pmap_enter(pmap, va, pa, prot, wired) vm_prot_t prot; boolean_t wired; { - register pt_entry_t *pte; + register unsigned *pte; vm_offset_t opa; - register pv_entry_t pv, npv; vm_offset_t origpte, newpte; + vm_offset_t ptepa; + vm_page_t mpte; + int s; if (pmap == NULL) return; - pv = NULL; - - va = va & PG_FRAME; + va &= PG_FRAME; if (va > VM_MAX_KERNEL_ADDRESS) panic("pmap_enter: toobig"); + mpte = NULL; /* * In the case that a page table page is not * resident, we are creating it here. */ - if ((va < VM_MIN_KERNEL_ADDRESS) && - (curproc != NULL) && - (pmap->pm_map->pmap == pmap)) { - vm_offset_t v; - - v = (vm_offset_t) vtopte(va); - /* Fault the pte only if needed: */ - if (*((int *)vtopte(v)) == 0) - (void) vm_fault(pmap->pm_map, - trunc_page(v), VM_PROT_WRITE, FALSE); - } + if (va < UPT_MIN_ADDRESS) + mpte = pmap_allocpte(pmap, va); + pte = pmap_pte(pmap, va); /* * Page Directory table entry not valid, we need a new PT page */ - pte = pmap_pte(pmap, va); if (pte == NULL) { printf("kernel page directory invalid pdir=%p, va=0x%lx\n", pmap->pm_pdir[PTDPTDI], va); @@ -1286,10 +1556,9 @@ pmap_enter(pmap, va, pa, prot, wired) } origpte = *(vm_offset_t *)pte; + pa &= PG_FRAME; opa = origpte & PG_FRAME; - pa = pa & PG_FRAME; - /* * Mapping has not changed, must be protection or wiring change. */ @@ -1315,55 +1584,35 @@ pmap_enter(pmap, va, pa, prot, wired) * We might be turning off write access to the page, * so we go ahead and sense modify status. */ - if (origpte & PG_M) { + if (origpte & PG_MANAGED) { vm_page_t m; - m = PHYS_TO_VM_PAGE(pa); - m->dirty = VM_PAGE_BITS_ALL; + if (origpte & PG_M) { + m = PHYS_TO_VM_PAGE(pa); + m->dirty = VM_PAGE_BITS_ALL; + } + pa |= PG_MANAGED; } + + if (mpte) + --mpte->hold_count; + goto validate; } /* * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. */ - if (opa) { - pmap_remove_page(pmap, va); - opa = 0; - origpte = 0; - } + if (opa) + (void) pmap_remove_pte(pmap, pte, va); + /* * Enter on the PV list if part of our managed memory Note that we * raise IPL while manipulating pv_table since pmap_enter can be * called at interrupt time. */ if (pmap_is_managed(pa)) { - int s; - - pv = pa_to_pvh(pa); - s = splhigh(); - /* - * No entries yet, use header as the first entry - */ - if (pv->pv_pmap == NULL) { - pv->pv_va = va; - pv->pv_pmap = pmap; - pv->pv_next = NULL; - pv->pv_ptem = NULL; - } - /* - * There is at least one other VA mapping this page. Place - * this entry after the header. - */ - else { - npv = get_pv_entry(); - npv->pv_va = va; - npv->pv_pmap = pmap; - npv->pv_next = pv->pv_next; - pv->pv_next = npv; - pv = npv; - pv->pv_ptem = NULL; - } - splx(s); + pmap_insert_entry(pmap, va, mpte, pa); + pa |= PG_MANAGED; } /* @@ -1383,106 +1632,16 @@ validate: newpte |= PG_W; if (va < UPT_MIN_ADDRESS) newpte |= PG_U; - else if (va < UPT_MAX_ADDRESS) - newpte |= PG_U | PG_RW; /* * if the mapping or permission bits are different, we need * to update the pte. */ if ((origpte & ~(PG_M|PG_A)) != newpte) { - *pte = (pt_entry_t) newpte; + *pte = newpte; if (origpte) pmap_update_1pg(va); } - - if (origpte == 0) { - vm_page_t mpte; - mpte = pmap_use_pt(pmap, va); - if (pv) - pv->pv_ptem = mpte; - } -} - -/* - * Add a list of wired pages to the kva - * this routine is only used for temporary - * kernel mappings that do not need to have - * page modification or references recorded. - * Note that old mappings are simply written - * over. The page *must* be wired. - */ -void -pmap_qenter(va, m, count) - vm_offset_t va; - vm_page_t *m; - int count; -{ - int i; - register pt_entry_t *pte; - - for (i = 0; i < count; i++) { - vm_offset_t tva = va + i * PAGE_SIZE; - pt_entry_t npte = (pt_entry_t) ((int) (VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V)); - pt_entry_t opte; - pte = vtopte(tva); - opte = *pte; - *pte = npte; - if (opte) pmap_update_1pg(tva); - } -} -/* - * this routine jerks page mappings from the - * kernel -- it is meant only for temporary mappings. - */ -void -pmap_qremove(va, count) - vm_offset_t va; - int count; -{ - int i; - register pt_entry_t *pte; - - for (i = 0; i < count; i++) { - vm_offset_t tva = va + i * PAGE_SIZE; - pte = vtopte(tva); - *pte = 0; - pmap_update_1pg(tva); - } -} - -/* - * add a wired page to the kva - * note that in order for the mapping to take effect -- you - * should do a pmap_update after doing the pmap_kenter... - */ -void -pmap_kenter(va, pa) - vm_offset_t va; - register vm_offset_t pa; -{ - register pt_entry_t *pte; - pt_entry_t npte, opte; - - npte = (pt_entry_t) ((int) (pa | PG_RW | PG_V)); - pte = vtopte(va); - opte = *pte; - *pte = npte; - if (opte) pmap_update_1pg(va); -} - -/* - * remove a page from the kernel pagetables - */ -void -pmap_kremove(va) - vm_offset_t va; -{ - register pt_entry_t *pte; - - pte = vtopte(va); - *pte = (pt_entry_t) 0; - pmap_update_1pg(va); } /* @@ -1502,46 +1661,27 @@ pmap_enter_quick(pmap, va, pa) vm_offset_t va; register vm_offset_t pa; { - register pt_entry_t *pte; - register pv_entry_t pv, npv; - int s; + register unsigned *pte; + vm_page_t mpte; + + mpte = NULL; + /* + * In the case that a page table page is not + * resident, we are creating it here. + */ + if (va < UPT_MIN_ADDRESS) + mpte = pmap_allocpte(pmap, va); + + pte = (unsigned *)vtopte(va); + if (*pte) + (void) pmap_remove_pte(pmap, pte, va); /* * Enter on the PV list if part of our managed memory Note that we * raise IPL while manipulating pv_table since pmap_enter can be * called at interrupt time. */ - - pte = vtopte(va); - /* a fault on the page table might occur here */ - if (*pte) { - pmap_remove_page(pmap, va); - } - - pv = pa_to_pvh(pa); - s = splhigh(); - /* - * No entries yet, use header as the first entry - */ - if (pv->pv_pmap == NULL) { - pv->pv_pmap = pmap; - pv->pv_va = va; - pv->pv_next = NULL; - } - /* - * There is at least one other VA mapping this page. Place this entry - * after the header. - */ - else { - npv = get_pv_entry(); - npv->pv_va = va; - npv->pv_pmap = pmap; - npv->pv_next = pv->pv_next; - pv->pv_next = npv; - pv = npv; - } - splx(s); - pv->pv_ptem = pmap_use_pt(pmap, va); + pmap_insert_entry(pmap, va, mpte, pa); /* * Increment counters @@ -1551,7 +1691,7 @@ pmap_enter_quick(pmap, va, pa) /* * Now validate mapping with RO protection */ - *pte = (pt_entry_t) ((int) (pa | PG_V | PG_U)); + *pte = pa | PG_V | PG_U | PG_MANAGED; return; } @@ -1583,11 +1723,6 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) return; } - /* - * remove any already used mappings - */ - pmap_remove( pmap, trunc_page(addr), round_page(addr + size)); - /* * if we are processing a major portion of the object, then scan the * entire thing. @@ -1595,9 +1730,9 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) if (psize > (object->size >> 2)) { objpgs = psize; - for (p = object->memq.tqh_first; + for (p = TAILQ_FIRST(&object->memq); ((objpgs > 0) && (p != NULL)); - p = p->listq.tqe_next) { + p = TAILQ_NEXT(p, listq)) { tmpidx = p->pindex; if (tmpidx < pindex) { @@ -1614,7 +1749,7 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) vm_page_deactivate(p); vm_page_hold(p); p->flags |= PG_MAPPED; - pmap_enter_quick(pmap, + pmap_enter_quick(pmap, addr + (tmpidx << PAGE_SHIFT), VM_PAGE_TO_PHYS(p)); vm_page_unhold(p); @@ -1627,14 +1762,13 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) */ for (tmpidx = 0; tmpidx < psize; tmpidx += 1) { p = vm_page_lookup(object, tmpidx + pindex); - if (p && (p->busy == 0) && + if (p && ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && + (p->busy == 0) && (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { - if (p->queue == PQ_CACHE) - vm_page_deactivate(p); vm_page_hold(p); p->flags |= PG_MAPPED; - pmap_enter_quick(pmap, + pmap_enter_quick(pmap, addr + (tmpidx << PAGE_SHIFT), VM_PAGE_TO_PHYS(p)); vm_page_unhold(p); @@ -1670,6 +1804,7 @@ pmap_prefault(pmap, addra, entry, object) vm_offset_t addr; vm_pindex_t pindex; vm_page_t m; + int pageorder_index; if (entry->object.vm_object != object) return; @@ -1686,13 +1821,16 @@ pmap_prefault(pmap, addra, entry, object) for (i = 0; i < PAGEORDER_SIZE; i++) { vm_object_t lobject; - pt_entry_t *pte; + unsigned *pte; addr = addra + pmap_prefault_pageorder[i]; if (addr < starta || addr >= entry->end) continue; - pte = vtopte(addr); + if ((*pmap_pde(pmap, addr)) == NULL) + continue; + + pte = (unsigned *) vtopte(addr); if (*pte) continue; @@ -1718,16 +1856,12 @@ pmap_prefault(pmap, addra, entry, object) (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { if (m->queue == PQ_CACHE) { - if ((cnt.v_free_count + cnt.v_cache_count) < - cnt.v_free_min) - break; vm_page_deactivate(m); } vm_page_hold(m); m->flags |= PG_MAPPED; pmap_enter_quick(pmap, addr, VM_PAGE_TO_PHYS(m)); vm_page_unhold(m); - } } } @@ -1745,7 +1879,7 @@ pmap_change_wiring(pmap, va, wired) vm_offset_t va; boolean_t wired; { - register pt_entry_t *pte; + register unsigned *pte; if (pmap == NULL) return; @@ -1780,7 +1914,73 @@ pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) vm_size_t len; vm_offset_t src_addr; { -} + vm_offset_t addr; + vm_offset_t end_addr = src_addr + len; + vm_offset_t pdnxt; + unsigned src_frame, dst_frame; + pd_entry_t pde; + + if (dst_addr != src_addr) + return; + + src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME; + dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME; + + if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) + return; + + if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) { + APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V); + pmap_update(); + } + + for(addr = src_addr; addr < end_addr; addr = pdnxt) { + unsigned *src_pte, *dst_pte; + vm_page_t dstmpte, srcmpte; + vm_offset_t srcptepaddr; + + pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1)); + srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[addr >> PDRSHIFT]; + if (srcptepaddr) { + continue; + } + + srcmpte = PHYS_TO_VM_PAGE(srcptepaddr); + if (srcmpte->hold_count == 0) + continue; + + if (pdnxt > end_addr) + pdnxt = end_addr; + + src_pte = (unsigned *) vtopte(addr); + dst_pte = (unsigned *) avtopte(addr); + while (addr < pdnxt) { + unsigned ptetemp; + ptetemp = *src_pte; + if (ptetemp) { + /* + * We have to check after allocpte for the + * pte still being around... allocpte can + * block. + */ + dstmpte = pmap_allocpte(dst_pmap, addr); + if (ptetemp = *src_pte) { + *dst_pte = ptetemp; + dst_pmap->pm_stats.resident_count++; + pmap_insert_entry(dst_pmap, addr, dstmpte, + (ptetemp & PG_FRAME)); + } else { + --dstmpte->hold_count; + } + if (dstmpte->hold_count >= srcmpte->hold_count) + break; + } + addr += PAGE_SIZE; + ++src_pte; + ++dst_pte; + } + } +} /* * Routine: pmap_kernel @@ -1808,7 +2008,6 @@ pmap_zero_page(phys) *(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME); bzero(CADDR2, PAGE_SIZE); - *(int *) CMAP2 = 0; pmap_update_1pg((vm_offset_t) CADDR2); } @@ -1872,24 +2071,22 @@ pmap_page_exists(pmap, pa) pmap_t pmap; vm_offset_t pa; { - register pv_entry_t pv; + register pv_entry_t *ppv, pv; int s; if (!pmap_is_managed(pa)) return FALSE; - pv = pa_to_pvh(pa); - s = splhigh(); + s = splvm(); + ppv = pa_to_pvh(pa); /* * Not found, check current mappings returning immediately if found. */ - if (pv->pv_pmap != NULL) { - for (; pv; pv = pv->pv_next) { - if (pv->pv_pmap == pmap) { - splx(s); - return TRUE; - } + for (pv = *ppv; pv; pv = pv->pv_next) { + if (pv->pv_pmap == pmap) { + splx(s); + return TRUE; } } splx(s); @@ -1906,42 +2103,40 @@ pmap_testbit(pa, bit) register vm_offset_t pa; int bit; { - register pv_entry_t pv; - pt_entry_t *pte; + register pv_entry_t *ppv, pv; + unsigned *pte; int s; if (!pmap_is_managed(pa)) return FALSE; - pv = pa_to_pvh(pa); - s = splhigh(); + s = splvm(); + ppv = pa_to_pvh(pa); /* * Not found, check current mappings returning immediately if found. */ - if (pv->pv_pmap != NULL) { - for (; pv; pv = pv->pv_next) { - /* - * if the bit being tested is the modified bit, then - * mark UPAGES as always modified, and ptes as never - * modified. - */ - if (bit & (PG_A|PG_M)) { - if ((pv->pv_va >= clean_sva) && (pv->pv_va < clean_eva)) { - continue; - } - } - if (!pv->pv_pmap) { -#if defined(PMAP_DIAGNOSTIC) - printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); -#endif + for (pv = *ppv ;pv; pv = pv->pv_next) { + /* + * if the bit being tested is the modified bit, then + * mark UPAGES as always modified, and ptes as never + * modified. + */ + if (bit & (PG_A|PG_M)) { + if ((pv->pv_va >= clean_sva) && (pv->pv_va < clean_eva)) { continue; } - pte = pmap_pte(pv->pv_pmap, pv->pv_va); - if ((int) *pte & bit) { - splx(s); - return TRUE; - } + } + if (!pv->pv_pmap) { +#if defined(PMAP_DIAGNOSTIC) + printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); +#endif + continue; + } + pte = pmap_pte(pv->pv_pmap, pv->pv_va); + if ((int) *pte & bit) { + splx(s); + return TRUE; } } splx(s); @@ -1957,60 +2152,64 @@ pmap_changebit(pa, bit, setem) int bit; boolean_t setem; { - register pv_entry_t pv; - register pt_entry_t *pte; + register pv_entry_t pv, *ppv; + register unsigned *pte, npte; vm_offset_t va; + int changed; int s; if (!pmap_is_managed(pa)) return; - pv = pa_to_pvh(pa); - s = splhigh(); + s = splvm(); + changed = 0; + ppv = pa_to_pvh(pa); /* * Loop over all current mappings setting/clearing as appropos If * setting RO do we need to clear the VAC? */ - if (pv->pv_pmap != NULL) { - for (; pv; pv = pv->pv_next) { - va = pv->pv_va; + for ( pv = *ppv; pv; pv = pv->pv_next) { + va = pv->pv_va; - /* - * don't write protect pager mappings - */ - if (!setem && (bit == PG_RW)) { - if (va >= clean_sva && va < clean_eva) - continue; - } - if (!pv->pv_pmap) { -#if defined(PMAP_DIAGNOSTIC) - printf("Null pmap (cb) at va: 0x%lx\n", va); -#endif + /* + * don't write protect pager mappings + */ + if (!setem && (bit == PG_RW)) { + if (va >= clean_sva && va < clean_eva) continue; - } + } + if (!pv->pv_pmap) { +#if defined(PMAP_DIAGNOSTIC) + printf("Null pmap (cb) at va: 0x%lx\n", va); +#endif + continue; + } - pte = pmap_pte(pv->pv_pmap, va); - if (setem) { - *(int *)pte |= bit; - } else { - if (bit == PG_RW) { - vm_offset_t pbits = *(vm_offset_t *)pte; - if (pbits & PG_M) { - vm_page_t m; - vm_offset_t pa = pbits & PG_FRAME; - m = PHYS_TO_VM_PAGE(pa); - m->dirty = VM_PAGE_BITS_ALL; - } - *(int *)pte &= ~(PG_M|PG_RW); - } else { - *(int *)pte &= ~bit; + pte = pmap_pte(pv->pv_pmap, va); + if (setem) { + *(int *)pte |= bit; + changed = 1; + } else { + vm_offset_t pbits = *(vm_offset_t *)pte; + if (pbits & bit) + changed = 1; + if (bit == PG_RW) { + if (pbits & PG_M) { + vm_page_t m; + vm_offset_t pa = pbits & PG_FRAME; + m = PHYS_TO_VM_PAGE(pa); + m->dirty = VM_PAGE_BITS_ALL; } + *(int *)pte = pbits & ~(PG_M|PG_RW); + } else { + *(int *)pte = pbits & ~bit; } } } splx(s); - pmap_update(); + if (changed) + pmap_update(); } /* @@ -2026,8 +2225,10 @@ pmap_page_protect(phys, prot) if ((prot & VM_PROT_WRITE) == 0) { if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) pmap_changebit(phys, PG_RW, FALSE); - else + else { pmap_remove_all(phys); + pmap_update(); + } } } @@ -2127,7 +2328,7 @@ pmap_mapdev(pa, size) vm_size_t size; { vm_offset_t va, tmpva; - pt_entry_t *pte; + unsigned *pte; size = roundup(size, PAGE_SIZE); @@ -2137,8 +2338,8 @@ pmap_mapdev(pa, size) pa = pa & PG_FRAME; for (tmpva = va; size > 0;) { - pte = vtopte(tmpva); - *pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V | PG_N)); + pte = (unsigned *)vtopte(tmpva); + *pte = pa | PG_RW | PG_V | PG_N; size -= PAGE_SIZE; tmpva += PAGE_SIZE; pa += PAGE_SIZE; @@ -2164,8 +2365,8 @@ pmap_pid_dump(int pid) { pmap = &p->p_vmspace->vm_pmap; for(i=0;i<1024;i++) { pd_entry_t *pde; - pt_entry_t *pte; - unsigned base = i << PD_SHIFT; + unsigned *pte; + unsigned base = i << PDRSHIFT; pde = &pmap->pm_pdir[i]; if (pde && pmap_pde_v(pde)) { @@ -2215,7 +2416,7 @@ pads(pm) pmap_t pm; { unsigned va, i, j; - pt_entry_t *ptep; + unsigned *ptep; if (pm == kernel_pmap) return; @@ -2253,3 +2454,5 @@ pmap_pvdump(pa) printf(" "); } #endif + + diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index c97e50aca6c1..b81cfc10601b 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 - * $Id: trap.c,v 1.74 1996/03/27 17:33:39 bde Exp $ + * $Id: trap.c,v 1.75 1996/03/28 05:40:57 dyson Exp $ */ /* @@ -805,25 +805,11 @@ int trapwrite(addr) v = trunc_page(vtopte(va)); - /* - * wire the pte page - */ - if (va < USRSTACK) { - vm_map_pageable(&vm->vm_map, v, round_page(v+1), FALSE); - } - /* * fault the data page */ rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, FALSE); - /* - * unwire the pte page - */ - if (va < USRSTACK) { - vm_map_pageable(&vm->vm_map, v, round_page(v+1), TRUE); - } - --p->p_lock; if (rv != KERN_SUCCESS) diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 4c1823e40360..e764b2f7def5 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -38,7 +38,7 @@ * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ - * $Id: vm_machdep.c,v 1.61 1996/05/02 10:43:06 phk Exp $ + * $Id: vm_machdep.c,v 1.62 1996/05/02 14:19:55 phk Exp $ */ #include "npx.h" @@ -862,7 +862,7 @@ int vm_page_zero_idle() { vm_page_t m; if ((cnt.v_free_count > cnt.v_interrupt_free_min) && - (m = vm_page_queue_free.tqh_first)) { + (m = TAILQ_FIRST(&vm_page_queue_free))) { TAILQ_REMOVE(&vm_page_queue_free, m, pageq); enable_intr(); pmap_zero_page(VM_PAGE_TO_PHYS(m)); diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h index a843fbfcbddc..f0d7fe695214 100644 --- a/sys/i386/include/pmap.h +++ b/sys/i386/include/pmap.h @@ -42,7 +42,7 @@ * * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 - * $Id: pmap.h,v 1.37 1996/05/02 14:20:04 phk Exp $ + * $Id: pmap.h,v 1.38 1996/05/02 22:25:18 phk Exp $ */ #ifndef _MACHINE_PMAP_H_ @@ -69,6 +69,7 @@ /* Our various interpretations of the above */ #define PG_W PG_AVAIL1 /* "Wired" pseudoflag */ +#define PG_MANAGED PG_AVAIL2 #define PG_FRAME (~PAGE_MASK) #define PG_PROT (PG_RW|PG_U) /* all protection bits . */ #define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */ @@ -87,12 +88,8 @@ #define VADDR(pdi, pti) ((vm_offset_t)(((pdi)< @@ -171,7 +171,7 @@ interpret: * Map the image header (first page) of the file into * kernel address space */ - error = vm_mmap(kernel_map, /* map */ + error = vm_mmap(exech_map, /* map */ (vm_offset_t *)&imgp->image_header, /* address */ PAGE_SIZE, /* size */ VM_PROT_READ, /* protection */ @@ -206,7 +206,7 @@ interpret: /* free old vnode and name buffer */ vrele(ndp->ni_vp); FREE(ndp->ni_cnd.cn_pnbuf, M_NAMEI); - if (vm_map_remove(kernel_map, (vm_offset_t)imgp->image_header, + if (vm_map_remove(exech_map, (vm_offset_t)imgp->image_header, (vm_offset_t)imgp->image_header + PAGE_SIZE)) panic("execve: header dealloc failed (1)"); @@ -319,7 +319,7 @@ interpret: * free various allocated resources */ kmem_free(exec_map, (vm_offset_t)imgp->stringbase, ARG_MAX); - if (vm_map_remove(kernel_map, (vm_offset_t)imgp->image_header, + if (vm_map_remove(exech_map, (vm_offset_t)imgp->image_header, (vm_offset_t)imgp->image_header + PAGE_SIZE)) panic("execve: header dealloc failed (2)"); vrele(ndp->ni_vp); @@ -331,7 +331,7 @@ exec_fail_dealloc: if (imgp->stringbase != NULL) kmem_free(exec_map, (vm_offset_t)imgp->stringbase, ARG_MAX); if (imgp->image_header && imgp->image_header != (char *)-1) - if (vm_map_remove(kernel_map, (vm_offset_t)imgp->image_header, + if (vm_map_remove(exech_map, (vm_offset_t)imgp->image_header, (vm_offset_t)imgp->image_header + PAGE_SIZE)) panic("execve: header dealloc failed (3)"); if (ndp->ni_vp) diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index c97e50aca6c1..b81cfc10601b 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 - * $Id: trap.c,v 1.74 1996/03/27 17:33:39 bde Exp $ + * $Id: trap.c,v 1.75 1996/03/28 05:40:57 dyson Exp $ */ /* @@ -805,25 +805,11 @@ int trapwrite(addr) v = trunc_page(vtopte(va)); - /* - * wire the pte page - */ - if (va < USRSTACK) { - vm_map_pageable(&vm->vm_map, v, round_page(v+1), FALSE); - } - /* * fault the data page */ rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, FALSE); - /* - * unwire the pte page - */ - if (va < USRSTACK) { - vm_map_pageable(&vm->vm_map, v, round_page(v+1), TRUE); - } - --p->p_lock; if (rv != KERN_SUCCESS) diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index d449b94b8469..cb76f05531c9 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -18,7 +18,7 @@ * 5. Modifications may be freely made to this file if the above conditions * are met. * - * $Id: vfs_bio.c,v 1.88 1996/03/09 06:46:51 dyson Exp $ + * $Id: vfs_bio.c,v 1.89 1996/05/03 21:01:26 phk Exp $ */ /* @@ -509,7 +509,7 @@ brelse(struct buf * bp) /* buffers with no memory */ if (bp->b_bufsize == 0) { bp->b_qindex = QUEUE_EMPTY; - TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); + TAILQ_INSERT_HEAD(&bufqueues[QUEUE_EMPTY], bp, b_freelist); LIST_REMOVE(bp, b_hash); LIST_INSERT_HEAD(&invalhash, bp, b_hash); bp->b_dev = NODEV; @@ -742,7 +742,7 @@ start: goto trytofreespace; /* can we constitute a new buffer? */ - if ((bp = bufqueues[QUEUE_EMPTY].tqh_first)) { + if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]))) { if (bp->b_qindex != QUEUE_EMPTY) panic("getnewbuf: inconsistent EMPTY queue, qindex=%d", bp->b_qindex); @@ -756,11 +756,11 @@ trytofreespace: * This is desirable because file data is cached in the * VM/Buffer cache even if a buffer is freed. */ - if ((bp = bufqueues[QUEUE_AGE].tqh_first)) { + if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]))) { if (bp->b_qindex != QUEUE_AGE) panic("getnewbuf: inconsistent AGE queue, qindex=%d", bp->b_qindex); - } else if ((bp = bufqueues[QUEUE_LRU].tqh_first)) { + } else if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_LRU]))) { if (bp->b_qindex != QUEUE_LRU) panic("getnewbuf: inconsistent LRU queue, qindex=%d", bp->b_qindex); @@ -783,7 +783,7 @@ trytofreespace: (vmiospace < maxvmiobufspace)) { --bp->b_usecount; TAILQ_REMOVE(&bufqueues[QUEUE_LRU], bp, b_freelist); - if (bufqueues[QUEUE_LRU].tqh_first != NULL) { + if (TAILQ_FIRST(&bufqueues[QUEUE_LRU]) != NULL) { TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist); goto start; } @@ -1498,9 +1498,9 @@ count_lock_queue() struct buf *bp; count = 0; - for (bp = bufqueues[QUEUE_LOCKED].tqh_first; + for (bp = TAILQ_FIRST(&bufqueues[QUEUE_LOCKED]); bp != NULL; - bp = bp->b_freelist.tqe_next) + bp = TAILQ_NEXT(bp, b_freelist)) count++; return (count); } @@ -1663,7 +1663,6 @@ vfs_clean_pages(struct buf * bp) void vfs_bio_clrbuf(struct buf *bp) { int i; - int remapbuffer = 0; if( bp->b_flags & B_VMIO) { if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE)) { int mask; @@ -1691,14 +1690,12 @@ vfs_bio_clrbuf(struct buf *bp) { bzero(bp->b_data + (i << PAGE_SHIFT) + j * DEV_BSIZE, DEV_BSIZE); } } - bp->b_pages[i]->valid = VM_PAGE_BITS_ALL; + /* bp->b_pages[i]->valid = VM_PAGE_BITS_ALL; */ } bp->b_resid = 0; } else { clrbuf(bp); } - if (remapbuffer) - pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); } /* diff --git a/sys/sys/queue.h b/sys/sys/queue.h index 8df0499dbd5e..abe8e98ebd91 100644 --- a/sys/sys/queue.h +++ b/sys/sys/queue.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)queue.h 8.5 (Berkeley) 8/20/94 - * $Id: queue.h,v 1.8 1996/03/31 03:21:45 gibbs Exp $ + * $Id: queue.h,v 1.9 1996/04/08 07:51:57 phk Exp $ */ #ifndef _SYS_QUEUE_H_ @@ -268,7 +268,9 @@ struct { \ #define TAILQ_LAST(head) ((head)->tqh_last) -#define TAILQ_NEXT(elm, field) ((elm)->field.teq_next) +#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) + +#define TAILQ_PREV(elm, field) ((elm)->field.tqe_prev) #define TAILQ_INIT(head) { \ (head)->tqh_first = NULL; \ diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index 8b1ddf2ab642..514b4716a652 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)device_pager.c 8.1 (Berkeley) 6/11/93 - * $Id: device_pager.c,v 1.21 1996/03/09 06:54:41 dyson Exp $ + * $Id: device_pager.c,v 1.22 1996/05/03 21:01:45 phk Exp $ */ #include @@ -182,7 +182,7 @@ dev_pager_dealloc(object) /* * Free up our fake pages. */ - while ((m = object->un_pager.devp.devp_pglist.tqh_first) != 0) { + while ((m = TAILQ_FIRST(&object->un_pager.devp.devp_pglist)) != 0) { TAILQ_REMOVE(&object->un_pager.devp.devp_pglist, m, pageq); dev_pager_putfake(m); } @@ -265,14 +265,14 @@ dev_pager_getfake(paddr) vm_page_t m; int i; - if (dev_pager_fakelist.tqh_first == NULL) { + if (TAILQ_FIRST(&dev_pager_fakelist) == NULL) { m = (vm_page_t) malloc(PAGE_SIZE * 2, M_VMPGDATA, M_WAITOK); for (i = (PAGE_SIZE * 2) / sizeof(*m); i > 0; i--) { TAILQ_INSERT_TAIL(&dev_pager_fakelist, m, pageq); m++; } } - m = dev_pager_fakelist.tqh_first; + m = TAILQ_FIRST(&dev_pager_fakelist); TAILQ_REMOVE(&dev_pager_fakelist, m, pageq); m->flags = PG_BUSY | PG_FICTITIOUS; diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index b6c7db613f56..4feebd56415e 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -39,7 +39,7 @@ * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ * * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 - * $Id: swap_pager.c,v 1.64 1996/05/02 14:21:14 phk Exp $ + * $Id: swap_pager.c,v 1.65 1996/05/03 21:01:47 phk Exp $ */ /* @@ -530,7 +530,7 @@ swap_pager_reclaim() /* for each pager queue */ for (k = 0; swp_qs[k]; k++) { - object = swp_qs[k]->tqh_first; + object = TAILQ_FIRST(swp_qs[k]); while (object && (reclaimcount < MAXRECLAIM)) { /* @@ -555,7 +555,7 @@ swap_pager_reclaim() } } } - object = object->pager_object_list.tqe_next; + object = TAILQ_NEXT(object, pager_object_list); } } @@ -956,8 +956,8 @@ swap_pager_getpages(object, m, count, reqpage) spc = NULL; /* we might not use an spc data structure */ - if ((count == 1) && (swap_pager_free.tqh_first != NULL)) { - spc = swap_pager_free.tqh_first; + if ((count == 1) && (TAILQ_FIRST(&swap_pager_free) != NULL)) { + spc = TAILQ_FIRST(&swap_pager_free); TAILQ_REMOVE(&swap_pager_free, spc, spc_list); kva = spc->spc_kva; bp = spc->spc_bp; @@ -1263,9 +1263,9 @@ swap_pager_putpages(object, m, count, sync, rtvals) /* * get a swap pager clean data structure, block until we get it */ - if (swap_pager_free.tqh_first == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { + if (TAILQ_FIRST(&swap_pager_free) == NULL || + TAILQ_NEXT(TAILQ_FIRST(&swap_pager_free),spc_list) == NULL || + TAILQ_NEXT(TAILQ_NEXT(TAILQ_FIRST(&swap_pager_free),spc_list),spc_list) == NULL) { s = splbio(); if (curproc == pageproc) { retryfree: @@ -1285,9 +1285,9 @@ retryfree: */ if (tsleep(&swap_pager_free, PVM, "swpfre", hz/5)) { swap_pager_sync(); - if (swap_pager_free.tqh_first == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { + if (TAILQ_FIRST(&swap_pager_free) == NULL || + TAILQ_NEXT(TAILQ_FIRST(&swap_pager_free),spc_list) == NULL || + TAILQ_NEXT(TAILQ_NEXT(TAILQ_FIRST(&swap_pager_free),spc_list),spc_list) == NULL) { splx(s); return VM_PAGER_AGAIN; } @@ -1297,17 +1297,17 @@ retryfree: * the free swap control blocks. */ swap_pager_sync(); - if (swap_pager_free.tqh_first == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { + if (TAILQ_FIRST(&swap_pager_free) == NULL || + TAILQ_NEXT(TAILQ_FIRST(&swap_pager_free),spc_list) == NULL || + TAILQ_NEXT(TAILQ_NEXT(TAILQ_FIRST(&swap_pager_free),spc_list),spc_list) == NULL) { goto retryfree; } } } else { pagedaemon_wakeup(); - while (swap_pager_free.tqh_first == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { + while (TAILQ_FIRST(&swap_pager_free) == NULL || + TAILQ_NEXT(TAILQ_FIRST(&swap_pager_free),spc_list) == NULL || + TAILQ_NEXT(TAILQ_NEXT(TAILQ_FIRST(&swap_pager_free),spc_list),spc_list) == NULL) { swap_pager_needflags |= SWAP_FREE_NEEDED; tsleep(&swap_pager_free, PVM, "swpfre", 0); pagedaemon_wakeup(); @@ -1315,7 +1315,7 @@ retryfree: } splx(s); } - spc = swap_pager_free.tqh_first; + spc = TAILQ_FIRST(&swap_pager_free); TAILQ_REMOVE(&swap_pager_free, spc, spc_list); kva = spc->spc_kva; @@ -1482,7 +1482,7 @@ swap_pager_sync() register int s; tspc = NULL; - if (swap_pager_done.tqh_first == NULL) + if (TAILQ_FIRST(&swap_pager_done) == NULL) return; for (;;) { s = splbio(); @@ -1490,7 +1490,7 @@ swap_pager_sync() * Look up and removal from done list must be done at splbio() * to avoid conflicts with swap_pager_iodone. */ - while ((spc = swap_pager_done.tqh_first) != 0) { + while ((spc = TAILQ_FIRST(&swap_pager_done)) != 0) { pmap_qremove(spc->spc_kva, spc->spc_count); swap_pager_finish(spc); TAILQ_REMOVE(&swap_pager_done, spc, spc_list); @@ -1609,7 +1609,7 @@ swap_pager_iodone(bp) wakeup(spc->spc_object); } if ((swap_pager_needflags & SWAP_FREE_NEEDED) || - swap_pager_inuse.tqh_first == 0) { + TAILQ_FIRST(&swap_pager_inuse) == 0) { swap_pager_needflags &= ~SWAP_FREE_NEEDED; wakeup(&swap_pager_free); } @@ -1623,7 +1623,7 @@ swap_pager_iodone(bp) wakeup(&vm_pageout_pages_needed); vm_pageout_pages_needed = 0; } - if ((swap_pager_inuse.tqh_first == NULL) || + if ((TAILQ_FIRST(&swap_pager_inuse) == NULL) || ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min && nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) { pagedaemon_wakeup(); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index e9f8f16065e5..904270b63a35 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -66,7 +66,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_fault.c,v 1.42 1996/03/09 06:48:26 dyson Exp $ + * $Id: vm_fault.c,v 1.43 1996/03/28 04:53:23 dyson Exp $ */ /* @@ -269,8 +269,8 @@ RetryFault:; int s; UNLOCK_THINGS; - s = splhigh(); - if ((m->flags & PG_BUSY) || m->busy) { + s = splvm(); + if (((m->flags & PG_BUSY) || m->busy)) { m->flags |= PG_WANTED | PG_REFERENCED; cnt.v_intrans++; tsleep(m, PSWP, "vmpfw", 0); @@ -311,7 +311,7 @@ RetryFault:; * Allocate a new page for this object/offset pair. */ m = vm_page_alloc(object, pindex, - vp?VM_ALLOC_NORMAL:VM_ALLOC_ZERO); + (vp || object->backing_object)?VM_ALLOC_NORMAL:VM_ALLOC_ZERO); if (m == NULL) { UNLOCK_AND_DEALLOCATE; @@ -551,9 +551,9 @@ readrest: vm_pindex_t other_pindex, other_pindex_offset; vm_page_t tm; - other_object = object->shadow_head.tqh_first; + other_object = TAILQ_FIRST(&object->shadow_head); if (other_object == first_object) - other_object = other_object->shadow_list.tqe_next; + other_object = TAILQ_NEXT(other_object, shadow_list); if (!other_object) panic("vm_fault: other object missing"); if (other_object && @@ -712,7 +712,7 @@ readrest: m->valid = VM_PAGE_BITS_ALL; pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired); - if (vp && (change_wiring == 0) && (wired == 0)) + if ((change_wiring == 0) && (wired == 0)) pmap_prefault(map->pmap, vaddr, entry, first_object); /* @@ -780,8 +780,9 @@ vm_fault_wire(map, start, end) for (va = start; va < end; va += PAGE_SIZE) { while( curproc != pageproc && - (cnt.v_free_count <= cnt.v_pageout_free_min)) + (cnt.v_free_count <= cnt.v_pageout_free_min)) { VM_WAIT; + } rv = vm_fault(map, va, VM_PROT_READ|VM_PROT_WRITE, TRUE); if (rv) { @@ -817,11 +818,10 @@ vm_fault_unwire(map, start, end) for (va = start; va < end; va += PAGE_SIZE) { pa = pmap_extract(pmap, va); - if (pa == (vm_offset_t) 0) { - panic("unwire: page not in pmap"); + if (pa != (vm_offset_t) 0) { + pmap_change_wiring(pmap, va, FALSE); + vm_page_unwire(PHYS_TO_VM_PAGE(pa)); } - pmap_change_wiring(pmap, va, FALSE); - vm_page_unwire(PHYS_TO_VM_PAGE(pa)); } /* diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 4e6db8c20f18..8e09433587d3 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -59,7 +59,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_glue.c,v 1.47 1996/04/09 04:36:58 dyson Exp $ + * $Id: vm_glue.c,v 1.48 1996/05/02 09:34:51 phk Exp $ */ #include "opt_ddb.h" @@ -196,16 +196,15 @@ vm_fork(p1, p2) register struct proc *p1, *p2; { register struct user *up; - vm_offset_t addr, ptaddr, ptpa; int error, i; - vm_map_t map; pmap_t pvp; - vm_page_t stkm; + vm_object_t upobj; while ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) { VM_WAIT; } +#if 0 /* * avoid copying any of the parent's pagetables or other per-process * objects that reside in the map by marking all of them @@ -213,6 +212,7 @@ vm_fork(p1, p2) */ (void) vm_map_inherit(&p1->p_vmspace->vm_map, UPT_MIN_ADDRESS - UPAGES * PAGE_SIZE, VM_MAX_ADDRESS, VM_INHERIT_NONE); +#endif p2->p_vmspace = vmspace_fork(p1->p_vmspace); if (p1->p_vmspace->vm_shm) @@ -223,61 +223,26 @@ vm_fork(p1, p2) * process */ - addr = (vm_offset_t) kstack; - - map = &p2->p_vmspace->vm_map; pvp = &p2->p_vmspace->vm_pmap; /* * allocate object for the upages */ - p2->p_vmspace->vm_upages_obj = vm_object_allocate( OBJT_DEFAULT, + p2->p_vmspace->vm_upages_obj = upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES); - /* - * put upages into the address space - */ - error = vm_map_find(map, p2->p_vmspace->vm_upages_obj, 0, - &addr, UPT_MIN_ADDRESS - addr, FALSE, VM_PROT_ALL, - VM_PROT_ALL, 0); - if (error != KERN_SUCCESS) - panic("vm_fork: vm_map_find (UPAGES) failed, addr=0x%x, error=%d", addr, error); - - addr += UPAGES * PAGE_SIZE; - /* allocate space for page tables */ - error = vm_map_find(map, NULL, 0, &addr, UPT_MAX_ADDRESS - addr, FALSE, - VM_PROT_ALL, VM_PROT_ALL, 0); - if (error != KERN_SUCCESS) - panic("vm_fork: vm_map_find (PTES) failed, addr=0x%x, error=%d", addr, error); - /* get a kernel virtual address for the UPAGES for this proc */ up = (struct user *) kmem_alloc_pageable(u_map, UPAGES * PAGE_SIZE); if (up == NULL) panic("vm_fork: u_map allocation failed"); - /* - * create a pagetable page for the UPAGES in the process address space - */ - ptaddr = trunc_page((u_int) vtopte(kstack)); - (void) vm_fault(map, ptaddr, VM_PROT_READ|VM_PROT_WRITE, FALSE); - ptpa = pmap_extract(pvp, ptaddr); - if (ptpa == 0) { - panic("vm_fork: no pte for UPAGES"); - } - - /* - * hold the page table page for the kernel stack, and fault them in - */ - stkm = PHYS_TO_VM_PAGE(ptpa); - vm_page_hold(stkm); - for(i=0;ip_vmspace->vm_upages_obj, + while ((m = vm_page_alloc(upobj, i, VM_ALLOC_NORMAL)) == NULL) { VM_WAIT; } @@ -286,24 +251,20 @@ vm_fork(p1, p2) * Wire the page */ vm_page_wire(m); - m->flags &= ~PG_BUSY; + PAGE_WAKEUP(m); /* * Enter the page into both the kernel and the process * address space. */ pmap_enter( pvp, (vm_offset_t) kstack + i * PAGE_SIZE, - VM_PAGE_TO_PHYS(m), VM_PROT_READ|VM_PROT_WRITE, 1); + VM_PAGE_TO_PHYS(m), VM_PROT_READ|VM_PROT_WRITE, TRUE); pmap_kenter(((vm_offset_t) up) + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m)); m->flags &= ~PG_ZERO; + m->flags |= PG_MAPPED; m->valid = VM_PAGE_BITS_ALL; } - /* - * The page table page for the kernel stack should be held in memory - * now. - */ - vm_page_unhold(stkm); p2->p_addr = up; @@ -371,33 +332,22 @@ faultin(p) int s; if ((p->p_flag & P_INMEM) == 0) { - vm_map_t map = &p->p_vmspace->vm_map; pmap_t pmap = &p->p_vmspace->vm_pmap; vm_page_t stkm, m; - vm_offset_t ptpa; int error; + vm_object_t upobj = p->p_vmspace->vm_upages_obj; ++p->p_lock; #if defined(SWAP_DEBUG) printf("swapping in %d\n", p->p_pid); #endif - ptaddr = trunc_page((u_int) vtopte(kstack)); - (void) vm_fault(map, ptaddr, VM_PROT_READ|VM_PROT_WRITE, FALSE); - ptpa = pmap_extract(&p->p_vmspace->vm_pmap, ptaddr); - if (ptpa == 0) { - panic("vm_fork: no pte for UPAGES"); - } - stkm = PHYS_TO_VM_PAGE(ptpa); - vm_page_hold(stkm); - for(i=0;ip_vmspace->vm_upages_obj, i)) == NULL) { - if ((m = vm_page_alloc(p->p_vmspace->vm_upages_obj, i, VM_ALLOC_NORMAL)) == NULL) { + if ((m = vm_page_lookup(upobj, i)) == NULL) { + if ((m = vm_page_alloc(upobj, i, VM_ALLOC_NORMAL)) == NULL) { VM_WAIT; goto retry; } @@ -407,10 +357,9 @@ retry: tsleep(m, PVM, "swinuw",0); goto retry; } + m->flags |= PG_BUSY; } vm_page_wire(m); - if (m->valid == VM_PAGE_BITS_ALL) - m->flags &= ~PG_BUSY; splx(s); pmap_enter( pmap, (vm_offset_t) kstack + i * PAGE_SIZE, @@ -419,16 +368,15 @@ retry: VM_PAGE_TO_PHYS(m)); if (m->valid != VM_PAGE_BITS_ALL) { int rv; - rv = vm_pager_get_pages(p->p_vmspace->vm_upages_obj, + rv = vm_pager_get_pages(upobj, &m, 1, 0); if (rv != VM_PAGER_OK) panic("faultin: cannot get upages for proc: %d\n", p->p_pid); m->valid = VM_PAGE_BITS_ALL; - m->flags &= ~PG_BUSY; } + PAGE_WAKEUP(m); + m->flags |= PG_MAPPED; } - vm_page_unhold(stkm); - s = splhigh(); @@ -527,8 +475,12 @@ swapout_procs() outpri = outpri2 = INT_MIN; retry: for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { + struct vmspace *vm; if (!swappable(p)) continue; + + vm = p->p_vmspace; + switch (p->p_stat) { default: continue; @@ -549,22 +501,25 @@ retry: (p->p_slptime <= 4)) continue; - vm_map_reference(&p->p_vmspace->vm_map); + ++vm->vm_refcnt; + vm_map_reference(&vm->vm_map); /* * do not swapout a process that is waiting for VM * datastructures there is a possible deadlock. */ - if (!lock_try_write(&p->p_vmspace->vm_map.lock)) { - vm_map_deallocate(&p->p_vmspace->vm_map); + if (!lock_try_write(&vm->vm_map.lock)) { + vm_map_deallocate(&vm->vm_map); + vmspace_free(vm); continue; } - vm_map_unlock(&p->p_vmspace->vm_map); + vm_map_unlock(&vm->vm_map); /* * If the process has been asleep for awhile and had * most of its pages taken away already, swap it out. */ swapout(p); - vm_map_deallocate(&p->p_vmspace->vm_map); + vm_map_deallocate(&vm->vm_map); + vmspace_free(vm); didswap++; goto retry; } @@ -612,6 +567,7 @@ swapout(p) panic("swapout: upage already missing???"); m->dirty = VM_PAGE_BITS_ALL; vm_page_unwire(m); + vm_page_deactivate(m); pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i); } pmap_remove(pmap, (vm_offset_t) kstack, diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index a820f9dcf055..fbad3f1f1b26 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_kern.c,v 1.23 1996/04/24 04:16:44 dyson Exp $ + * $Id: vm_kern.c,v 1.24 1996/05/10 19:28:54 wollman Exp $ */ /* @@ -100,6 +100,7 @@ vm_map_t io_map; vm_map_t clean_map; vm_map_t phys_map; vm_map_t exec_map; +vm_map_t exech_map; vm_map_t u_map; /* @@ -327,22 +328,8 @@ kmem_malloc(map, size, waitflag) vm_map_insert(map, kmem_object, offset, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); - /* - * If we can wait, just mark the range as wired (will fault pages as - * necessary). - */ - if (waitflag == M_WAITOK) { - vm_map_unlock(map); - (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, - FALSE); - vm_map_simplify(map, addr); - return (addr); - } - /* - * If we cannot wait then we must allocate all memory up front, - * pulling it off the active queue to prevent pageout. - */ for (i = 0; i < size; i += PAGE_SIZE) { +retry: m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i), (waitflag == M_NOWAIT) ? VM_ALLOC_INTERRUPT : VM_ALLOC_SYSTEM); @@ -352,6 +339,10 @@ kmem_malloc(map, size, waitflag) * aren't on any queues. */ if (m == NULL) { + if (waitflag == M_WAITOK) { + VM_WAIT; + goto retry; + } while (i != 0) { i -= PAGE_SIZE; m = vm_page_lookup(kmem_object, @@ -362,7 +353,7 @@ kmem_malloc(map, size, waitflag) vm_map_unlock(map); return (0); } - m->flags &= ~(PG_BUSY|PG_ZERO); + m->flags &= ~PG_ZERO; m->valid = VM_PAGE_BITS_ALL; } @@ -386,7 +377,9 @@ kmem_malloc(map, size, waitflag) for (i = 0; i < size; i += PAGE_SIZE) { m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i)); vm_page_wire(m); - pmap_kenter(addr + i, VM_PAGE_TO_PHYS(m)); + PAGE_WAKEUP(m); + pmap_enter(kernel_pmap, addr + i, VM_PAGE_TO_PHYS(m), + VM_PROT_ALL, 1); } vm_map_unlock(map); diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 619530963283..e0948e49f4d5 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_map.c,v 1.43 1996/04/29 22:04:57 dyson Exp $ + * $Id: vm_map.c,v 1.44 1996/05/03 21:01:49 phk Exp $ */ /* @@ -157,11 +157,15 @@ static int kentry_count; static vm_offset_t mapvm_start, mapvm, mapvmmax; static int mapvmpgcnt; +static struct vm_map_entry *mappool; +static int mappoolcnt; +#define KENTRY_LOW_WATER 128 + static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t)); static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t)); static vm_map_entry_t vm_map_entry_create __P((vm_map_t)); static void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t)); -static void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t)); +static __inline void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t)); static void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t)); static void vm_map_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t)); @@ -214,11 +218,10 @@ vmspace_alloc(min, max, pageable) if (mapvmpgcnt == 0 && mapvm == 0) { int s; - mapvmpgcnt = btoc(cnt.v_page_count * sizeof(struct vm_map_entry)); - s = splhigh(); - mapvm_start = mapvm = kmem_alloc_pageable(kernel_map, mapvmpgcnt * PAGE_SIZE); + mapvmpgcnt = (cnt.v_page_count * sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE; + mapvm_start = mapvm = kmem_alloc_pageable(kernel_map, + mapvmpgcnt * PAGE_SIZE); mapvmmax = mapvm_start + mapvmpgcnt * PAGE_SIZE; - splx(s); if (!mapvm) mapvmpgcnt = 0; } @@ -241,7 +244,6 @@ vmspace_free(vm) panic("vmspace_free: attempt to free already freed vmspace"); if (--vm->vm_refcnt == 0) { - int s, i; /* * Lock the map, to wait out all other references to it. @@ -252,11 +254,17 @@ vmspace_free(vm) (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, vm->vm_map.max_offset); vm_map_unlock(&vm->vm_map); + while( vm->vm_map.ref_count != 1) tsleep(&vm->vm_map.ref_count, PVM, "vmsfre", 0); --vm->vm_map.ref_count; + vm_object_pmap_remove(vm->vm_upages_obj, + 0, vm->vm_upages_obj->size); + vm_object_deallocate(vm->vm_upages_obj); pmap_release(&vm->vm_pmap); FREE(vm, M_VMMAP); + } else { + wakeup(&vm->vm_map.ref_count); } } @@ -314,45 +322,66 @@ vm_map_init(map, min, max, pageable) lock_init(&map->lock, TRUE); } +/* + * vm_map_entry_dispose: [ internal use only ] + * + * Inverse of vm_map_entry_create. + */ +static __inline void +vm_map_entry_dispose(map, entry) + vm_map_t map; + vm_map_entry_t entry; +{ + int s; + + if (kentry_count < KENTRY_LOW_WATER) { + s = splvm(); + entry->next = kentry_free; + kentry_free = entry; + ++kentry_count; + splx(s); + } else { + entry->next = mappool; + mappool = entry; + ++mappoolcnt; + } +} + /* * vm_map_entry_create: [ internal use only ] * * Allocates a VM map entry for insertion. * No entry fields are filled in. This routine is */ -static struct vm_map_entry *mappool; -static int mappoolcnt; - static vm_map_entry_t vm_map_entry_create(map) vm_map_t map; { vm_map_entry_t entry; int i; - -#define KENTRY_LOW_WATER 64 -#define MAPENTRY_LOW_WATER 128 + int s; /* * This is a *very* nasty (and sort of incomplete) hack!!!! */ if (kentry_count < KENTRY_LOW_WATER) { + s = splvm(); if (mapvmpgcnt && mapvm) { vm_page_t m; m = vm_page_alloc(kernel_object, - OFF_TO_IDX(mapvm - vm_map_min(kernel_map)), + OFF_TO_IDX(mapvm - VM_MIN_KERNEL_ADDRESS), (map == kmem_map) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL); + if (m) { int newentries; newentries = (PAGE_SIZE / sizeof(struct vm_map_entry)); vm_page_wire(m); - m->flags &= ~PG_BUSY; + PAGE_WAKEUP(m); m->valid = VM_PAGE_BITS_ALL; - pmap_enter(vm_map_pmap(kmem_map), mapvm, - VM_PAGE_TO_PHYS(m), VM_PROT_DEFAULT, 1); - m->flags |= PG_WRITEABLE|PG_MAPPED; + pmap_kenter(mapvm, VM_PAGE_TO_PHYS(m)); + m->flags |= PG_WRITEABLE; entry = (vm_map_entry_t) mapvm; mapvm += PAGE_SIZE; @@ -364,65 +393,33 @@ vm_map_entry_create(map) } } } + splx(s); } - if (map == kernel_map || map == kmem_map || map == pager_map) { + if (map == kernel_map || map == kmem_map || map == pager_map) { + s = splvm(); entry = kentry_free; if (entry) { kentry_free = entry->next; --kentry_count; - return entry; - } - entry = mappool; - if (entry) { - mappool = entry->next; - --mappoolcnt; - return entry; + } else { + panic("vm_map_entry_create: out of map entries for kernel"); } + splx(s); } else { entry = mappool; if (entry) { mappool = entry->next; --mappoolcnt; - return entry; + } else { + MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry), + M_VMMAPENT, M_WAITOK); } - MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry), - M_VMMAPENT, M_WAITOK); } - if (entry == NULL) - panic("vm_map_entry_create: out of map entries"); return (entry); } -/* - * vm_map_entry_dispose: [ internal use only ] - * - * Inverse of vm_map_entry_create. - */ -static void -vm_map_entry_dispose(map, entry) - vm_map_t map; - vm_map_entry_t entry; -{ - if ((kentry_count < KENTRY_LOW_WATER) || - ((vm_offset_t) entry >= kentry_data && (vm_offset_t) entry < (kentry_data + kentry_data_size)) || - ((vm_offset_t) entry >= mapvm_start && (vm_offset_t) entry < mapvmmax)) { - entry->next = kentry_free; - kentry_free = entry; - ++kentry_count; - return; - } else { - if (mappoolcnt < MAPENTRY_LOW_WATER) { - entry->next = mappool; - mappool = entry; - ++mappoolcnt; - return; - } - FREE(entry, M_VMMAPENT); - } -} - /* * vm_map_entry_{un,}link: * @@ -637,9 +634,9 @@ vm_map_insert(map, object, offset, start, end, prot, max, cow) if ((prev_entry != &map->header) && (prev_entry->end == start) && + ((object == NULL) || (prev_entry->object.vm_object == object)) && (prev_entry->is_a_map == FALSE) && (prev_entry->is_sub_map == FALSE) && - ((object == NULL) || (prev_entry->object.vm_object == object)) && (prev_entry->inheritance == VM_INHERIT_DEFAULT) && (prev_entry->protection == prot) && (prev_entry->max_protection == max) && @@ -664,13 +661,7 @@ vm_map_insert(map, object, offset, start, end, prot, max, cow) prev_entry->end = end; return (KERN_SUCCESS); } - } /* else if ((object == prev_entry->object.vm_object) && - (prev_entry->offset + (prev_entry->end - prev_entry->start) == offset)) { - map->size += (end - prev_entry->end); - prev_entry->end = end; - printf("map optim 1\n"); - return (KERN_SUCCESS); - } */ + } } /* * Create a new entry @@ -711,7 +702,6 @@ vm_map_insert(map, object, offset, start, end, prot, max, cow) /* * Update the free space hint */ - if ((map->first_free == prev_entry) && (prev_entry->end >= new_entry->start)) map->first_free = new_entry; @@ -803,7 +793,7 @@ vm_map_find(map, object, offset, addr, length, find_space, prot, max, cow) start = *addr; if (map == kmem_map) - s = splhigh(); + s = splvm(); vm_map_lock(map); if (find_space) { @@ -866,10 +856,13 @@ vm_map_simplify_entry(map, entry) (prev->wired_count == 0)) { if (map->first_free == prev) map->first_free = entry; + if (map->hint == prev) + map->hint = entry; vm_map_entry_unlink(map, prev); entry->start = prev->start; entry->offset = prev->offset; - vm_object_deallocate(prev->object.vm_object); + if (prev->object.vm_object) + vm_object_deallocate(prev->object.vm_object); vm_map_entry_dispose(map, prev); } } @@ -891,9 +884,12 @@ vm_map_simplify_entry(map, entry) (next->wired_count == 0)) { if (map->first_free == next) map->first_free = entry; + if (map->hint == next) + map->hint = entry; vm_map_entry_unlink(map, next); entry->end = next->end; - vm_object_deallocate(next->object.vm_object); + if (next->object.vm_object) + vm_object_deallocate(next->object.vm_object); vm_map_entry_dispose(map, next); } } @@ -1131,7 +1127,6 @@ vm_map_protect(map, start, end, new_prot, set_max) */ if (current->protection != old_prot) { - #define MASK(entry) ((entry)->copy_on_write ? ~VM_PROT_WRITE : \ VM_PROT_ALL) #define max(a,b) ((a) > (b) ? (a) : (b)) @@ -1585,7 +1580,7 @@ vm_map_clean(map, start, end, syncio, invalidate) * The map in question should be locked. * [This is the reason for this routine's existence.] */ -static void +static __inline void vm_map_entry_unwire(map, entry) vm_map_t map; register vm_map_entry_t entry; @@ -1599,7 +1594,7 @@ vm_map_entry_unwire(map, entry) * * Deallocate the given entry from the target map. */ -static void +static __inline void vm_map_entry_delete(map, entry) register vm_map_t map; register vm_map_entry_t entry; @@ -1658,7 +1653,9 @@ vm_map_delete(map, start, end) * Save the free space hint */ - if (map->first_free->start >= start) + if (entry == &map->header) { + map->first_free = &map->header; + } else if (map->first_free->start >= start) map->first_free = entry->prev; /* @@ -1667,14 +1664,16 @@ vm_map_delete(map, start, end) while ((entry != &map->header) && (entry->start < end)) { vm_map_entry_t next; - register vm_offset_t s, e; - register vm_object_t object; + vm_offset_t s, e; + vm_object_t object; + vm_ooffset_t offset; vm_map_clip_end(map, entry, end); next = entry->next; s = entry->start; e = entry->end; + offset = entry->offset; /* * Unwire before removing addresses from the pmap; otherwise, @@ -1691,15 +1690,16 @@ vm_map_delete(map, start, end) * which are sharing it. */ - if (object == kernel_object || object == kmem_object) - vm_object_page_remove(object, OFF_TO_IDX(entry->offset), - OFF_TO_IDX(entry->offset + (e - s)), FALSE); - else if (!map->is_main_map) + if (object == kernel_object || object == kmem_object) { + vm_object_page_remove(object, OFF_TO_IDX(offset), + OFF_TO_IDX(offset + (e - s)), FALSE); + } else if (!map->is_main_map) { vm_object_pmap_remove(object, - OFF_TO_IDX(entry->offset), - OFF_TO_IDX(entry->offset + (e - s))); - else + OFF_TO_IDX(offset), + OFF_TO_IDX(offset + (e - s))); + } else { pmap_remove(map->pmap, s, e); + } /* * Delete the entry (which may delete the object) only after @@ -1729,7 +1729,7 @@ vm_map_remove(map, start, end) register int result, s = 0; if (map == kmem_map) - s = splhigh(); + s = splvm(); vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); @@ -1806,16 +1806,6 @@ vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) if (src_entry->is_sub_map || dst_entry->is_sub_map) return; - if (dst_entry->object.vm_object != NULL) - printf("vm_map_copy_entry: dst_entry object not NULL!\n"); - - /* - * If our destination map was wired down, unwire it now. - */ - - if (dst_entry->wired_count != 0) - vm_map_entry_unwire(dst_map, dst_entry); - if (src_entry->wired_count == 0) { boolean_t src_needs_copy; @@ -1847,35 +1837,28 @@ vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) - src_entry->start))); } } + /* * Make a copy of the object. */ - temp_pindex = OFF_TO_IDX(dst_entry->offset); - vm_object_copy(src_entry->object.vm_object, - OFF_TO_IDX(src_entry->offset), - &dst_entry->object.vm_object, - &temp_pindex, - &src_needs_copy); - dst_entry->offset = IDX_TO_OFF(temp_pindex); - /* - * If we didn't get a copy-object now, mark the source map - * entry so that a shadow will be created to hold its changed - * pages. - */ - if (src_needs_copy) + if (src_entry->object.vm_object) { + if ((src_entry->object.vm_object->handle == NULL) && + (src_entry->object.vm_object->type == OBJT_DEFAULT || + src_entry->object.vm_object->type == OBJT_SWAP)) + vm_object_collapse(src_entry->object.vm_object); + ++src_entry->object.vm_object->ref_count; + src_entry->copy_on_write = TRUE; src_entry->needs_copy = TRUE; - /* - * The destination always needs to have a shadow created. - */ - dst_entry->needs_copy = TRUE; - - /* - * Mark the entries copy-on-write, so that write-enabling the - * entry won't make copy-on-write pages writable. - */ - src_entry->copy_on_write = TRUE; - dst_entry->copy_on_write = TRUE; + dst_entry->needs_copy = TRUE; + dst_entry->copy_on_write = TRUE; + dst_entry->object.vm_object = + src_entry->object.vm_object; + dst_entry->offset = src_entry->offset; + } else { + dst_entry->object.vm_object = NULL; + dst_entry->offset = 0; + } pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start, dst_entry->end - dst_entry->start, src_entry->start); @@ -1962,7 +1945,6 @@ vmspace_fork(vm1) /* * Clone the entry and link into the map. */ - new_entry = vm_map_entry_create(new_map); *new_entry = *old_entry; new_entry->wired_count = 0; @@ -2251,11 +2233,13 @@ vm_map_simplify(map, start) vm_map_entry_t prev_entry; vm_map_lock(map); - if ( - (vm_map_lookup_entry(map, start, &this_entry)) && + if ((vm_map_lookup_entry(map, start, &this_entry)) && ((prev_entry = this_entry->prev) != &map->header) && - (prev_entry->end == start) && + (prev_entry->object.vm_object == this_entry->object.vm_object) && + ((prev_entry->offset + (prev_entry->end - prev_entry->start)) + == this_entry->offset) && + (map->is_main_map) && (prev_entry->is_a_map == FALSE) && @@ -2270,18 +2254,15 @@ vm_map_simplify(map, start) (prev_entry->wired_count == this_entry->wired_count) && (prev_entry->copy_on_write == this_entry->copy_on_write) && - (prev_entry->needs_copy == this_entry->needs_copy) && - - (prev_entry->object.vm_object == this_entry->object.vm_object) && - ((prev_entry->offset + (prev_entry->end - prev_entry->start)) - == this_entry->offset) - ) { + (prev_entry->needs_copy == this_entry->needs_copy)) { if (map->first_free == this_entry) map->first_free = prev_entry; - SAVE_HINT(map, prev_entry); + if (map->hint == this_entry) + SAVE_HINT(map, prev_entry); vm_map_entry_unlink(map, this_entry); prev_entry->end = this_entry->end; - vm_object_deallocate(this_entry->object.vm_object); + if (this_entry->object.vm_object) + vm_object_deallocate(this_entry->object.vm_object); vm_map_entry_dispose(map, this_entry); } vm_map_unlock(map); diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index ade41bc40d2e..648130904414 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)vm_meter.c 8.4 (Berkeley) 1/4/94 - * $Id: vm_meter.c,v 1.13 1995/12/14 09:55:02 phk Exp $ + * $Id: vm_meter.c,v 1.14 1996/03/11 06:11:40 hsu Exp $ */ #include @@ -136,9 +136,9 @@ vmtotal SYSCTL_HANDLER_ARGS /* * Mark all objects as inactive. */ - for (object = vm_object_list.tqh_first; + for (object = TAILQ_FIRST(&vm_object_list); object != NULL; - object = object->object_list.tqe_next) + object = TAILQ_NEXT(object,object_list)) object->flags &= ~OBJ_ACTIVE; /* * Calculate process statistics. @@ -191,9 +191,9 @@ vmtotal SYSCTL_HANDLER_ARGS /* * Calculate object memory usage statistics. */ - for (object = vm_object_list.tqh_first; + for (object = TAILQ_FIRST(&vm_object_list); object != NULL; - object = object->object_list.tqe_next) { + object = TAILQ_NEXT(object, object_list)) { totalp->t_vm += num_pages(object->size); totalp->t_rm += object->resident_page_count; if (object->flags & OBJ_ACTIVE) { diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index ede01dc19b68..16f8ebec2b74 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -38,7 +38,7 @@ * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ * * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 - * $Id: vm_mmap.c,v 1.40 1996/03/16 15:00:05 davidg Exp $ + * $Id: vm_mmap.c,v 1.41 1996/05/03 21:01:51 phk Exp $ */ /* @@ -802,8 +802,7 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) /* * "Pre-fault" resident pages. */ - if ((map != kernel_map) && - (type == OBJT_VNODE) && (map->pmap != NULL)) { + if ((type == OBJT_VNODE) && (map->pmap != NULL)) { pmap_object_init_pt(map->pmap, *addr, object, (vm_pindex_t) OFF_TO_IDX(foff), size); } diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 6b180ae013ff..187e7773b2db 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_object.c,v 1.67 1996/03/29 06:28:48 davidg Exp $ + * $Id: vm_object.c,v 1.68 1996/04/24 04:16:45 dyson Exp $ */ /* @@ -278,7 +278,7 @@ vm_object_deallocate(object) (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { vm_object_t robject; - robject = object->shadow_head.tqh_first; + robject = TAILQ_FIRST(&object->shadow_head); if ((robject != NULL) && (robject->handle == NULL) && (robject->type == OBJT_DEFAULT || @@ -288,7 +288,7 @@ vm_object_deallocate(object) object->ref_count += 2; do { - s = splhigh(); + s = splvm(); while (robject->paging_in_progress) { robject->flags |= OBJ_PIPWNT; tsleep(robject, PVM, "objde1", 0); @@ -375,7 +375,7 @@ vm_object_terminate(object) /* * wait for the pageout daemon to be done with the object */ - s = splhigh(); + s = splvm(); while (object->paging_in_progress) { object->flags |= OBJ_PIPWNT; tsleep(object, PVM, "objtrm", 0); @@ -402,9 +402,10 @@ vm_object_terminate(object) * Now free the pages. For internal objects, this also removes them * from paging queues. */ - while ((p = object->memq.tqh_first) != NULL) { + while ((p = TAILQ_FIRST(&object->memq)) != NULL) { if (p->flags & PG_BUSY) printf("vm_object_terminate: freeing busy page\n"); + vm_page_protect(p, VM_PROT_NONE); PAGE_WAKEUP(p); vm_page_free(p); cnt.v_pfree++; @@ -478,12 +479,12 @@ vm_object_page_clean(object, start, end, syncio, lockflag) if ((tstart == 0) && (tend == object->size)) { object->flags &= ~(OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); } - for(p = object->memq.tqh_first; p; p = p->listq.tqe_next) + for(p = TAILQ_FIRST(&object->memq); p; p = TAILQ_NEXT(p, listq)) p->flags |= PG_CLEANCHK; rescan: - for(p = object->memq.tqh_first; p; p = np) { - np = p->listq.tqe_next; + for(p = TAILQ_FIRST(&object->memq); p; p = np) { + np = TAILQ_NEXT(p, listq); pi = p->pindex; if (((p->flags & PG_CLEANCHK) == 0) || @@ -499,7 +500,7 @@ rescan: continue; } - s = splhigh(); + s = splvm(); if ((p->flags & PG_BUSY) || p->busy) { p->flags |= PG_WANTED|PG_REFERENCED; tsleep(p, PVM, "vpcwai", 0); @@ -597,8 +598,8 @@ vm_object_deactivate_pages(object) { register vm_page_t p, next; - for (p = object->memq.tqh_first; p != NULL; p = next) { - next = p->listq.tqe_next; + for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) { + next = TAILQ_NEXT(p, listq); vm_page_deactivate(p); } } @@ -613,7 +614,7 @@ vm_object_cache_trim() register vm_object_t object; while (vm_object_cached > vm_object_cache_max) { - object = vm_object_cached_list.tqh_first; + object = TAILQ_FIRST(&vm_object_cached_list); vm_object_reference(object); pager_cache(object, FALSE); @@ -641,7 +642,7 @@ vm_object_pmap_copy(object, start, end) if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0) return; - for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { + for (p = TAILQ_FIRST(&object->memq); p != NULL; p = TAILQ_NEXT(p, listq)) { vm_page_protect(p, VM_PROT_READ); } @@ -665,7 +666,7 @@ vm_object_pmap_remove(object, start, end) register vm_page_t p; if (object == NULL) return; - for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { + for (p = TAILQ_FIRST(&object->memq); p != NULL; p = TAILQ_NEXT(p, listq)) { if (p->pindex >= start && p->pindex < end) vm_page_protect(p, VM_PROT_NONE); } @@ -808,17 +809,16 @@ vm_object_qcollapse(object) backing_object_paging_offset_index = OFF_TO_IDX(backing_object->paging_offset); paging_offset_index = OFF_TO_IDX(object->paging_offset); size = object->size; - p = backing_object->memq.tqh_first; + p = TAILQ_FIRST(&backing_object->memq); while (p) { vm_page_t next; - next = p->listq.tqe_next; + next = TAILQ_NEXT(p, listq); if ((p->flags & (PG_BUSY | PG_FICTITIOUS)) || (p->queue == PQ_CACHE) || !p->valid || p->hold_count || p->wire_count || p->busy) { p = next; continue; } - vm_page_protect(p, VM_PROT_NONE); new_pindex = p->pindex - backing_offset_index; if (p->pindex < backing_offset_index || new_pindex >= size) { @@ -826,6 +826,7 @@ vm_object_qcollapse(object) swap_pager_freespace(backing_object, backing_object_paging_offset_index+p->pindex, 1); + vm_page_protect(p, VM_PROT_NONE); vm_page_free(p); } else { pp = vm_page_lookup(object, new_pindex); @@ -834,6 +835,7 @@ vm_object_qcollapse(object) if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, backing_object_paging_offset_index + p->pindex, 1); + vm_page_protect(p, VM_PROT_NONE); vm_page_free(p); } else { if (backing_object->type == OBJT_SWAP) @@ -930,7 +932,7 @@ vm_object_collapse(object) * shadow them. */ - while ((p = backing_object->memq.tqh_first) != 0) { + while ((p = TAILQ_FIRST(&backing_object->memq)) != 0) { new_pindex = p->pindex - backing_offset_index; @@ -1071,7 +1073,7 @@ vm_object_collapse(object) * here. */ - for (p = backing_object->memq.tqh_first; p; p = p->listq.tqe_next) { + for (p = TAILQ_FIRST(&backing_object->memq); p; p = TAILQ_NEXT(p, listq)) { new_pindex = p->pindex - backing_offset_index; /* @@ -1160,24 +1162,29 @@ vm_object_page_remove(object, start, end, clean_only) again: size = end - start; if (size > 4 || size >= object->size / 4) { - for (p = object->memq.tqh_first; p != NULL; p = next) { - next = p->listq.tqe_next; + for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) { + next = TAILQ_NEXT(p, listq); if ((start <= p->pindex) && (p->pindex < end)) { - if (p->wire_count != 0) { vm_page_protect(p, VM_PROT_NONE); p->valid = 0; continue; } - s = splhigh(); + /* + * The busy flags are only cleared at + * interrupt -- minimize the spl transitions + */ if ((p->flags & PG_BUSY) || p->busy) { - p->flags |= PG_WANTED; - tsleep(p, PVM, "vmopar", 0); + s = splvm(); + if ((p->flags & PG_BUSY) || p->busy) { + p->flags |= PG_WANTED; + tsleep(p, PVM, "vmopar", 0); + splx(s); + goto again; + } splx(s); - goto again; } - splx(s); if (clean_only) { vm_page_test_dirty(p); @@ -1199,14 +1206,20 @@ again: size -= 1; continue; } - s = splhigh(); + /* + * The busy flags are only cleared at + * interrupt -- minimize the spl transitions + */ if ((p->flags & PG_BUSY) || p->busy) { - p->flags |= PG_WANTED; - tsleep(p, PVM, "vmopar", 0); + s = splvm(); + if ((p->flags & PG_BUSY) || p->busy) { + p->flags |= PG_WANTED; + tsleep(p, PVM, "vmopar", 0); + splx(s); + goto again; + } splx(s); - goto again; } - splx(s); if (clean_only) { vm_page_test_dirty(p); if (p->valid & p->dirty) { @@ -1391,9 +1404,9 @@ DDB_vm_object_check() * make sure that internal objs are in a map somewhere * and none have zero ref counts. */ - for (object = vm_object_list.tqh_first; + for (object = TAILQ_FIRST(&vm_object_list); object != NULL; - object = object->object_list.tqe_next) { + object = TAILQ_NEXT(object, object_list)) { if (object->handle == NULL && (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { if (object->ref_count == 0) { @@ -1436,14 +1449,14 @@ vm_object_print(iobject, full, dummy3, dummy4) (int) object->paging_offset, (int) object->backing_object, (int) object->backing_object_offset); printf("cache: next=%p, prev=%p\n", - object->cached_list.tqe_next, object->cached_list.tqe_prev); + TAILQ_NEXT(object, cached_list), TAILQ_PREV(object, cached_list)); if (!full) return; indent += 2; count = 0; - for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { + for (p = TAILQ_FIRST(&object->memq); p != NULL; p = TAILQ_NEXT(p, listq)) { if (count == 0) iprintf("memory:="); else if (count == 6) { diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 6f10cea516f6..7a95941a2b6b 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 - * $Id: vm_page.c,v 1.49 1996/03/09 06:56:39 dyson Exp $ + * $Id: vm_page.c,v 1.50 1996/03/28 04:53:27 dyson Exp $ */ /* @@ -140,7 +140,6 @@ static u_short vm_page_dev_bsize_chunks[] = { static inline __pure int vm_page_hash __P((vm_object_t object, vm_pindex_t pindex)) __pure2; -static void vm_page_unqueue __P((vm_page_t )); /* * vm_set_page_size: @@ -244,7 +243,7 @@ vm_page_startup(starta, enda, vaddr) vm_page_buckets = (struct pglist *) vaddr; bucket = vm_page_buckets; if (vm_page_bucket_count == 0) { - vm_page_bucket_count = 2; + vm_page_bucket_count = 1; while (vm_page_bucket_count < atop(total)) vm_page_bucket_count <<= 1; } @@ -383,7 +382,7 @@ vm_page_hash(object, pindex) * The object and page must be locked, and must be splhigh. */ -inline void +__inline void vm_page_insert(m, object, pindex) register vm_page_t m; register vm_object_t object; @@ -432,7 +431,7 @@ vm_page_insert(m, object, pindex) * The object and page must be locked, and at splhigh. */ -inline void +__inline void vm_page_remove(m) register vm_page_t m; { @@ -487,14 +486,13 @@ vm_page_lookup(object, pindex) bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; - s = splhigh(); - for (m = bucket->tqh_first; m != NULL; m = m->hashq.tqe_next) { + s = splvm(); + for (m = TAILQ_FIRST(bucket); m != NULL; m = TAILQ_NEXT(m,hashq)) { if ((m->object == object) && (m->pindex == pindex)) { splx(s); return (m); } } - splx(s); return (NULL); } @@ -515,7 +513,7 @@ vm_page_rename(m, new_object, new_pindex) { int s; - s = splhigh(); + s = splvm(); vm_page_remove(m); vm_page_insert(m, new_object, new_pindex); splx(s); @@ -524,7 +522,7 @@ vm_page_rename(m, new_object, new_pindex) /* * vm_page_unqueue must be called at splhigh(); */ -static inline void +__inline void vm_page_unqueue(vm_page_t m) { int queue = m->queue; @@ -575,19 +573,19 @@ vm_page_alloc(object, pindex, page_req) page_req = VM_ALLOC_SYSTEM; }; - s = splhigh(); + s = splvm(); switch (page_req) { case VM_ALLOC_NORMAL: if (cnt.v_free_count >= cnt.v_free_reserved) { - m = vm_page_queue_free.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_free); if (m == NULL) { --vm_page_zero_count; - m = vm_page_queue_zero.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_zero); } } else { - m = vm_page_queue_cache.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_cache); if (m == NULL) { splx(s); pagedaemon_wakeup(); @@ -598,14 +596,14 @@ vm_page_alloc(object, pindex, page_req) case VM_ALLOC_ZERO: if (cnt.v_free_count >= cnt.v_free_reserved) { - m = vm_page_queue_zero.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_zero); if (m) { --vm_page_zero_count; } else { - m = vm_page_queue_free.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_free); } } else { - m = vm_page_queue_cache.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_cache); if (m == NULL) { splx(s); pagedaemon_wakeup(); @@ -618,13 +616,13 @@ vm_page_alloc(object, pindex, page_req) if ((cnt.v_free_count >= cnt.v_free_reserved) || ((cnt.v_cache_count == 0) && (cnt.v_free_count >= cnt.v_interrupt_free_min))) { - m = vm_page_queue_free.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_free); if (m == NULL) { --vm_page_zero_count; - m = vm_page_queue_zero.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_zero); } } else { - m = vm_page_queue_cache.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_cache); if (m == NULL) { splx(s); pagedaemon_wakeup(); @@ -635,10 +633,10 @@ vm_page_alloc(object, pindex, page_req) case VM_ALLOC_INTERRUPT: if (cnt.v_free_count > 0) { - m = vm_page_queue_free.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_free); if (m == NULL) { --vm_page_zero_count; - m = vm_page_queue_zero.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_zero); } } else { splx(s); @@ -663,8 +661,8 @@ vm_page_alloc(object, pindex, page_req) m->flags = PG_BUSY; } m->wire_count = 0; - m->hold_count = 0; m->act_count = 0; + m->hold_count = 0; m->busy = 0; m->valid = 0; m->dirty = 0; @@ -688,114 +686,35 @@ vm_page_alloc(object, pindex, page_req) } /* - * This interface is for merging with malloc() someday. - * Even if we never implement compaction so that contiguous allocation - * works after initialization time, malloc()'s data structures are good - * for statistics and for allocations of less than a page. + * vm_page_activate: + * + * Put the specified page on the active list (if appropriate). + * + * The page queues must be locked. */ -void * -contigmalloc(size, type, flags, low, high, alignment, boundary) - unsigned long size; /* should be size_t here and for malloc() */ - int type; - int flags; - unsigned long low; - unsigned long high; - unsigned long alignment; - unsigned long boundary; +void +vm_page_activate(m) + register vm_page_t m; { - int i, s, start; - vm_offset_t addr, phys, tmp_addr; - vm_page_t pga = vm_page_array; + int s; - size = round_page(size); - if (size == 0) - panic("vm_page_alloc_contig: size must not be 0"); - if ((alignment & (alignment - 1)) != 0) - panic("vm_page_alloc_contig: alignment must be a power of 2"); - if ((boundary & (boundary - 1)) != 0) - panic("vm_page_alloc_contig: boundary must be a power of 2"); + s = splvm(); + if (m->queue == PQ_ACTIVE) + panic("vm_page_activate: already active"); - start = 0; - s = splhigh(); -again: - /* - * Find first page in array that is free, within range, aligned, and - * such that the boundary won't be crossed. - */ - for (i = start; i < cnt.v_page_count; i++) { - phys = VM_PAGE_TO_PHYS(&pga[i]); - if ((pga[i].queue == PQ_FREE) && - (phys >= low) && (phys < high) && - ((phys & (alignment - 1)) == 0) && - (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)) - break; + if (m->queue == PQ_CACHE) + cnt.v_reactivated++; + + vm_page_unqueue(m); + + if (m->wire_count == 0) { + if (m->act_count < 5) + m->act_count = 5; + TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); + m->queue = PQ_ACTIVE; + cnt.v_active_count++; } - - /* - * If the above failed or we will exceed the upper bound, fail. - */ - if ((i == cnt.v_page_count) || - ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { - splx(s); - return (NULL); - } - start = i; - - /* - * Check successive pages for contiguous and free. - */ - for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { - if ((VM_PAGE_TO_PHYS(&pga[i]) != - (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) || - (pga[i].queue != PQ_FREE)) { - start++; - goto again; - } - } - - /* - * We've found a contiguous chunk that meets are requirements. - * Allocate kernel VM, unfree and assign the physical pages to it and - * return kernel VM pointer. - */ - tmp_addr = addr = kmem_alloc_pageable(kernel_map, size); - if (addr == 0) { - splx(s); - return (NULL); - } - - for (i = start; i < (start + size / PAGE_SIZE); i++) { - vm_page_t m = &pga[i]; - - TAILQ_REMOVE(&vm_page_queue_free, m, pageq); - cnt.v_free_count--; - m->valid = VM_PAGE_BITS_ALL; - m->flags = 0; - m->dirty = 0; - m->wire_count = 0; - m->act_count = 0; - m->busy = 0; - m->queue = PQ_NONE; - vm_page_insert(m, kernel_object, - OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS)); - vm_page_wire(m); - pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m)); - tmp_addr += PAGE_SIZE; - } - splx(s); - return ((void *)addr); -} - -vm_offset_t -vm_page_alloc_contig(size, low, high, alignment) - vm_offset_t size; - vm_offset_t low; - vm_offset_t high; - vm_offset_t alignment; -{ - return ((vm_offset_t)contigmalloc(size, M_DEVBUF, M_NOWAIT, low, high, - alignment, 0ul)); } /* @@ -813,7 +732,7 @@ vm_page_free(m) int s; int flags = m->flags; - s = splhigh(); + s = splvm(); if (m->busy || (flags & PG_BUSY) || (m->queue == PQ_FREE)) { printf("vm_page_free: pindex(%ld), busy(%d), PG_BUSY(%d)\n", m->pindex, m->busy, (flags & PG_BUSY) ? 1 : 0); @@ -824,7 +743,8 @@ vm_page_free(m) } if (m->hold_count) { - panic("freeing held page, count=%d", m->hold_count); + panic("freeing held page, count=%d, pindex=%d(0x%x)", + m->hold_count, m->pindex, m->pindex); } vm_page_remove(m); @@ -840,7 +760,19 @@ vm_page_free(m) m->wire_count = 0; } m->queue = PQ_FREE; - TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq); + + /* + * If the pageout process is grabbing the page, it is likely + * that the page is NOT in the cache. It is more likely that + * the page will be partially in the cache if it is being + * explicitly freed. + */ + if (curproc == pageproc) { + TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq); + } else { + TAILQ_INSERT_HEAD(&vm_page_queue_free, m, pageq); + } + splx(s); /* * if pageout daemon needs pages, then tell it that there are @@ -859,7 +791,6 @@ vm_page_free(m) */ if ((cnt.v_free_count + cnt.v_cache_count) == cnt.v_free_min) { wakeup(&cnt.v_free_count); - wakeup(&proc0); } } else { splx(s); @@ -884,7 +815,7 @@ vm_page_wire(m) int s; if (m->wire_count == 0) { - s = splhigh(); + s = splvm(); vm_page_unqueue(m); splx(s); cnt.v_wire_count++; @@ -907,56 +838,23 @@ vm_page_unwire(m) { int s; - s = splhigh(); + s = splvm(); if (m->wire_count > 0) m->wire_count--; if (m->wire_count == 0) { cnt.v_wire_count--; - TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); - m->queue = PQ_ACTIVE; - if( m->act_count < ACT_MAX) - m->act_count += 1; - cnt.v_active_count++; - } - splx(s); -} - -/* - * vm_page_activate: - * - * Put the specified page on the active list (if appropriate). - * - * The page queues must be locked. - */ -void -vm_page_activate(m) - register vm_page_t m; -{ - int s; - - s = splhigh(); - if (m->queue == PQ_ACTIVE) - panic("vm_page_activate: already active"); - - if (m->queue == PQ_CACHE) - cnt.v_reactivated++; - - vm_page_unqueue(m); - - if (m->wire_count == 0) { TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); m->queue = PQ_ACTIVE; if (m->act_count < 5) m->act_count = 5; - else if( m->act_count < ACT_MAX) - m->act_count += 1; cnt.v_active_count++; } splx(s); } + /* * vm_page_deactivate: * @@ -982,7 +880,7 @@ vm_page_deactivate(m) if (m->queue == PQ_INACTIVE) return; - spl = splhigh(); + spl = splvm(); if (m->wire_count == 0 && m->hold_count == 0) { if (m->queue == PQ_CACHE) cnt.v_reactivated++; @@ -990,7 +888,6 @@ vm_page_deactivate(m) TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); m->queue = PQ_INACTIVE; cnt.v_inactive_count++; - m->act_count = 0; } splx(spl); } @@ -1014,7 +911,7 @@ vm_page_cache(m) return; vm_page_protect(m, VM_PROT_NONE); - s = splhigh(); + s = splvm(); vm_page_unqueue(m); TAILQ_INSERT_TAIL(&vm_page_queue_cache, m, pageq); m->queue = PQ_CACHE; @@ -1030,35 +927,6 @@ vm_page_cache(m) splx(s); } -/* - * vm_page_zero_fill: - * - * Zero-fill the specified page. - * Written as a standard pagein routine, to - * be used by the zero-fill object. - */ -boolean_t -vm_page_zero_fill(m) - vm_page_t m; -{ - pmap_zero_page(VM_PAGE_TO_PHYS(m)); - return (TRUE); -} - -/* - * vm_page_copy: - * - * Copy one page to another - */ -void -vm_page_copy(src_m, dest_m) - vm_page_t src_m; - vm_page_t dest_m; -{ - pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m)); - dest_m->valid = VM_PAGE_BITS_ALL; -} - /* * mapping function for valid bits or for dirty bits in @@ -1126,8 +994,6 @@ vm_page_is_valid(m, base, size) return 0; } - - void vm_page_test_dirty(m) vm_page_t m; @@ -1138,6 +1004,115 @@ vm_page_test_dirty(m) } } +/* + * This interface is for merging with malloc() someday. + * Even if we never implement compaction so that contiguous allocation + * works after initialization time, malloc()'s data structures are good + * for statistics and for allocations of less than a page. + */ +void * +contigmalloc(size, type, flags, low, high, alignment, boundary) + unsigned long size; /* should be size_t here and for malloc() */ + int type; + int flags; + unsigned long low; + unsigned long high; + unsigned long alignment; + unsigned long boundary; +{ + int i, s, start; + vm_offset_t addr, phys, tmp_addr; + vm_page_t pga = vm_page_array; + + size = round_page(size); + if (size == 0) + panic("vm_page_alloc_contig: size must not be 0"); + if ((alignment & (alignment - 1)) != 0) + panic("vm_page_alloc_contig: alignment must be a power of 2"); + if ((boundary & (boundary - 1)) != 0) + panic("vm_page_alloc_contig: boundary must be a power of 2"); + + start = 0; + s = splvm(); +again: + /* + * Find first page in array that is free, within range, aligned, and + * such that the boundary won't be crossed. + */ + for (i = start; i < cnt.v_page_count; i++) { + phys = VM_PAGE_TO_PHYS(&pga[i]); + if ((pga[i].queue == PQ_FREE) && + (phys >= low) && (phys < high) && + ((phys & (alignment - 1)) == 0) && + (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)) + break; + } + + /* + * If the above failed or we will exceed the upper bound, fail. + */ + if ((i == cnt.v_page_count) || + ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { + splx(s); + return (NULL); + } + start = i; + + /* + * Check successive pages for contiguous and free. + */ + for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { + if ((VM_PAGE_TO_PHYS(&pga[i]) != + (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) || + (pga[i].queue != PQ_FREE)) { + start++; + goto again; + } + } + + /* + * We've found a contiguous chunk that meets are requirements. + * Allocate kernel VM, unfree and assign the physical pages to it and + * return kernel VM pointer. + */ + tmp_addr = addr = kmem_alloc_pageable(kernel_map, size); + if (addr == 0) { + splx(s); + return (NULL); + } + + for (i = start; i < (start + size / PAGE_SIZE); i++) { + vm_page_t m = &pga[i]; + + TAILQ_REMOVE(&vm_page_queue_free, m, pageq); + cnt.v_free_count--; + m->valid = VM_PAGE_BITS_ALL; + m->flags = 0; + m->dirty = 0; + m->wire_count = 0; + m->busy = 0; + m->queue = PQ_NONE; + vm_page_insert(m, kernel_object, + OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS)); + vm_page_wire(m); + pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m)); + tmp_addr += PAGE_SIZE; + } + + splx(s); + return ((void *)addr); +} + +vm_offset_t +vm_page_alloc_contig(size, low, high, alignment) + vm_offset_t size; + vm_offset_t low; + vm_offset_t high; + vm_offset_t alignment; +{ + return ((vm_offset_t)contigmalloc(size, M_DEVBUF, M_NOWAIT, low, high, + alignment, 0ul)); +} #ifdef DDB void DDB_print_page_info(void) diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index e10c1063199e..950ebe16afd6 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -65,7 +65,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_pageout.c,v 1.69 1996/03/28 04:53:28 dyson Exp $ + * $Id: vm_pageout.c,v 1.70 1996/04/11 21:05:25 bde Exp $ */ /* @@ -138,8 +138,6 @@ extern int nswiodone; extern int vm_swap_size; extern int vfs_update_wakeup; -#define MAXSCAN 1024 /* maximum number of pages to scan in queues */ - #define MAXLAUNDER (cnt.v_page_count > 1800 ? 32 : 16) #define VM_PAGEOUT_PAGE_COUNT 16 @@ -415,9 +413,9 @@ vm_pageout_object_deactivate_pages(map, object, count, map_remove_only) * scan the objects entire memory queue */ rcount = object->resident_page_count; - p = object->memq.tqh_first; + p = TAILQ_FIRST(&object->memq); while (p && (rcount-- > 0)) { - next = p->listq.tqe_next; + next = TAILQ_NEXT(p, listq); cnt.v_pdpages++; if (p->wire_count != 0 || p->hold_count != 0 || @@ -434,26 +432,9 @@ vm_pageout_object_deactivate_pages(map, object, count, map_remove_only) if (p->queue == PQ_ACTIVE) { if (!pmap_is_referenced(VM_PAGE_TO_PHYS(p)) && (p->flags & PG_REFERENCED) == 0) { - p->act_count -= min(p->act_count, ACT_DECLINE); - /* - * if the page act_count is zero -- then we - * deactivate - */ - if (!p->act_count) { - if (!map_remove_only) - vm_page_deactivate(p); - vm_page_protect(p, VM_PROT_NONE); - /* - * else if on the next go-around we - * will deactivate the page we need to - * place the page on the end of the - * queue to age the other pages in - * memory. - */ - } else { - TAILQ_REMOVE(&vm_page_queue_active, p, pageq); - TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq); - } + vm_page_protect(p, VM_PROT_NONE); + if (!map_remove_only) + vm_page_deactivate(p); /* * see if we are done yet */ @@ -471,8 +452,6 @@ vm_pageout_object_deactivate_pages(map, object, count, map_remove_only) */ pmap_clear_reference(VM_PAGE_TO_PHYS(p)); p->flags &= ~PG_REFERENCED; - if (p->act_count < ACT_MAX) - p->act_count += ACT_ADVANCE; TAILQ_REMOVE(&vm_page_queue_active, p, pageq); TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq); @@ -544,9 +523,12 @@ vm_pageout_scan() vm_object_t object; int force_wakeup = 0; int vnodes_skipped = 0; + int usagefloor; + int i; pages_freed = 0; + /* * Start scanning the inactive queue for pages we can free. We keep * scanning until we have enough free pages or we have scanned through @@ -559,13 +541,14 @@ vm_pageout_scan() rescan1: maxscan = cnt.v_inactive_count; - m = vm_page_queue_inactive.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_inactive); while ((m != NULL) && (maxscan-- > 0) && - ((cnt.v_cache_count + cnt.v_free_count) < (cnt.v_cache_min + cnt.v_free_target))) { + ((cnt.v_cache_count + cnt.v_free_count) < + (cnt.v_cache_min + cnt.v_free_target))) { vm_page_t next; cnt.v_pdpages++; - next = m->pageq.tqe_next; + next = TAILQ_NEXT(m, pageq); #if defined(VM_DIAGNOSE) if (m->queue != PQ_INACTIVE) { @@ -575,7 +558,8 @@ rescan1: #endif /* - * dont mess with busy pages + * Dont mess with busy pages, keep in the front of the + * queue, most likely are being paged out. */ if (m->busy || (m->flags & PG_BUSY)) { m = next; @@ -600,8 +584,6 @@ rescan1: m->flags &= ~PG_REFERENCED; pmap_clear_reference(VM_PAGE_TO_PHYS(m)); vm_page_activate(m); - if (m->act_count < ACT_MAX) - m->act_count += ACT_ADVANCE; m = next; continue; } @@ -681,14 +663,11 @@ rescan1: page_shortage = 1; } } - maxscan = MAXSCAN; - pcount = cnt.v_active_count; - m = vm_page_queue_active.tqh_first; - while ((m != NULL) && (maxscan > 0) && - (pcount-- > 0) && (page_shortage > 0)) { - cnt.v_pdpages++; - next = m->pageq.tqe_next; + pcount = cnt.v_active_count; + m = TAILQ_FIRST(&vm_page_queue_active); + while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) { + next = TAILQ_NEXT(m, pageq); /* * Don't deactivate pages that are busy. @@ -701,54 +680,47 @@ rescan1: m = next; continue; } - if (m->object->ref_count && - ((m->flags & PG_REFERENCED) || - pmap_is_referenced(VM_PAGE_TO_PHYS(m))) ) { - pmap_clear_reference(VM_PAGE_TO_PHYS(m)); - m->flags &= ~PG_REFERENCED; - if (m->act_count < ACT_MAX) { - m->act_count += ACT_ADVANCE; + + /* + * The count for pagedaemon pages is done after checking the + * page for eligbility... + */ + cnt.v_pdpages++; + if ((m->flags & PG_REFERENCED) == 0) { + if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { + pmap_clear_reference(VM_PAGE_TO_PHYS(m)); + m->flags |= PG_REFERENCED; } + } else { + pmap_clear_reference(VM_PAGE_TO_PHYS(m)); + } + if ( (m->object->ref_count != 0) && + (m->flags & PG_REFERENCED) ) { + m->flags &= ~PG_REFERENCED; TAILQ_REMOVE(&vm_page_queue_active, m, pageq); TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); } else { m->flags &= ~PG_REFERENCED; - pmap_clear_reference(VM_PAGE_TO_PHYS(m)); - m->act_count -= min(m->act_count, ACT_DECLINE); - - /* - * if the page act_count is zero -- then we deactivate - */ - if (!m->act_count && (page_shortage > 0)) { - if (m->object->ref_count == 0) { - --page_shortage; - vm_page_test_dirty(m); - if (m->dirty == 0) { - m->act_count = 0; - vm_page_cache(m); - } else { - vm_page_deactivate(m); - } + if (page_shortage > 0) { + --page_shortage; + vm_page_test_dirty(m); + if (m->dirty == 0) { + vm_page_cache(m); } else { vm_page_protect(m, VM_PROT_NONE); vm_page_deactivate(m); - --page_shortage; } - } else if (m->act_count) { - TAILQ_REMOVE(&vm_page_queue_active, m, pageq); - TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); } } - maxscan--; m = next; } - + /* * We try to maintain some *really* free pages, this allows interrupt * code to be guaranteed space. */ while (cnt.v_free_count < cnt.v_free_reserved) { - m = vm_page_queue_cache.tqh_first; + m = TAILQ_FIRST(&vm_page_queue_cache); if (!m) break; vm_page_free(m); @@ -770,23 +742,13 @@ rescan1: } } #ifndef NO_SWAPPING - /* - * now swap processes out if we are in low memory conditions - */ - if (!swap_pager_full && vm_swap_size && - vm_pageout_req_swapout == 0) { - vm_pageout_req_swapout = 1; + if (cnt.v_free_count + cnt.v_cache_count < cnt.v_free_target) { vm_req_vmdaemon(); + vm_pageout_req_swapout = 1; } #endif } -#ifndef NO_SWAPPING - if ((cnt.v_inactive_count + cnt.v_free_count + cnt.v_cache_count) < - (cnt.v_inactive_target + cnt.v_free_min)) { - vm_req_vmdaemon(); - } -#endif /* * make sure that we have swap space -- if we are low on memory and @@ -883,22 +845,23 @@ vm_pageout() * The pageout daemon is never done, so loop forever. */ while (TRUE) { - int s = splhigh(); - + int s = splvm(); if (!vm_pages_needed || ((cnt.v_free_count >= cnt.v_free_reserved) && (cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min))) { vm_pages_needed = 0; tsleep(&vm_pages_needed, PVM, "psleep", 0); + } else if (!vm_pages_needed) { + tsleep(&vm_pages_needed, PVM, "psleep", hz/3); } + if (vm_pages_needed) + cnt.v_pdwakeups++; vm_pages_needed = 0; splx(s); - cnt.v_pdwakeups++; vm_pager_sync(); vm_pageout_scan(); vm_pager_sync(); wakeup(&cnt.v_free_count); - wakeup(kmem_map); } } @@ -908,7 +871,7 @@ vm_req_vmdaemon() { static int lastrun = 0; - if ((ticks > (lastrun + hz / 10)) || (ticks < lastrun)) { + if ((ticks > (lastrun + hz)) || (ticks < lastrun)) { wakeup(&vm_daemon_needed); lastrun = ticks; } @@ -978,7 +941,7 @@ vm_daemon() * we remove cached objects that have no RSS... */ restart: - object = vm_object_cached_list.tqh_first; + object = TAILQ_FIRST(&vm_object_cached_list); while (object) { /* * if there are no resident pages -- get rid of the object @@ -988,7 +951,7 @@ restart: pager_cache(object, FALSE); goto restart; } - object = object->cached_list.tqe_next; + object = TAILQ_NEXT(object, cached_list); } } } diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c index 63ebdd9a57e9..c7c9964bd0e4 100644 --- a/sys/vm/vm_pager.c +++ b/sys/vm/vm_pager.c @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_pager.c,v 1.21 1995/12/14 09:55:11 phk Exp $ + * $Id: vm_pager.c,v 1.22 1996/05/03 21:01:53 phk Exp $ */ /* @@ -249,7 +249,7 @@ vm_pager_object_lookup(pg_list, handle) { register vm_object_t object; - for (object = pg_list->tqh_first; object != NULL; object = object->pager_object_list.tqe_next) + for (object = TAILQ_FIRST(pg_list); object != NULL; object = TAILQ_NEXT(object,pager_object_list)) if (object->handle == handle) return (object); return (NULL); @@ -288,7 +288,7 @@ getpbuf() s = splbio(); /* get a bp from the swap buffer header pool */ - while ((bp = bswlist.tqh_first) == NULL) { + while ((bp = TAILQ_FIRST(&bswlist)) == NULL) { bswneeded = 1; tsleep(&bswneeded, PVM, "wswbuf", 0); } @@ -313,7 +313,7 @@ trypbuf() struct buf *bp; s = splbio(); - if ((bp = bswlist.tqh_first) == NULL) { + if ((bp = TAILQ_FIRST(&bswlist)) == NULL) { splx(s); return NULL; }