Implement an alternative solution to the amd64 and i386 pmap problem that we

previously addressed in r348246.

This pmap problem also exists on arm64 and riscv.  However, the original
solution developed for amd64 and i386 cannot be used on arm64 and riscv.  In
particular, arm64 and riscv do not define a PG_PROMOTED flag in their level
2 PTEs.  (A PG_PROMOTED flag makes no sense on arm64, where unlike x86 or
riscv we are required to break the old 4KB mappings before making the 2MB
mapping; and on riscv there are no unused bits in the PTE to define a
PG_PROMOTED flag.)

This commit implements an alternative solution that can be used on all four
architectures.  Moreover, this solution has two other advantages.  First, on
older AMD processors that required the Erratum 383 workaround, it is less
costly.  Specifically, it avoids unnecessary calls to pmap_fill_ptp() on a
superpage demotion.  Second, it enables the elimination of some calls to
pagezero() in pmap_kernel_remove_{l2,pde}().

In addition, remove a related stale comment from pmap_enter_{l2,pde}().

Reviewed by:	kib, markj (an earlier version)
MFC after:	1 week
Differential Revision:	https://reviews.freebsd.org/D20538
This commit is contained in:
Alan Cox 2019-06-09 03:36:10 +00:00
parent c3308a9469
commit fd2dae0a30
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=348828
4 changed files with 100 additions and 58 deletions

View File

@ -1053,7 +1053,7 @@ static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde,
static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted);
static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva,
vm_offset_t eva);
static void pmap_invalidate_cache_range_all(vm_offset_t sva,
@ -1757,8 +1757,13 @@ pmap_init(void)
mpte->pindex = pmap_pde_pindex(KERNBASE) + i;
mpte->phys_addr = KPTphys + (i << PAGE_SHIFT);
mpte->wire_count = 1;
/*
* Collect the page table pages that were replaced by a 2MB
* page in create_pagetables(). They are zero filled.
*/
if (i << PDRSHIFT < KERNend &&
pmap_insert_pt_page(kernel_pmap, mpte))
pmap_insert_pt_page(kernel_pmap, mpte, false))
panic("pmap_init: pmap_insert_pt_page failed");
}
PMAP_UNLOCK(kernel_pmap);
@ -3129,12 +3134,15 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
* of idle page table pages. Each of a pmap's page table pages is responsible
* for mapping a distinct range of virtual addresses. The pmap's collection is
* ordered by this virtual address range.
*
* If "promoted" is false, then the page table page "mpte" must be zero filled.
*/
static __inline int
pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0;
return (vm_radix_insert(&pmap->pm_root, mpte));
}
@ -4626,7 +4634,7 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
* If the page table page is not leftover from an earlier promotion,
* initialize it.
*/
if ((oldpde & PG_PROMOTED) == 0)
if (mpte->valid == 0)
pmap_fill_ptp(firstpte, newpte);
pmap_demote_pde_check(firstpte, newpte);
@ -4699,9 +4707,11 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
newpde = mptepa | X86_PG_M | X86_PG_A | X86_PG_RW | X86_PG_V;
/*
* Initialize the page table page.
* If this page table page was unmapped by a promotion, then it
* contains valid mappings. Zero it to invalidate those mappings.
*/
pagezero((void *)PHYS_TO_DMAP(mptepa));
if (mpte->valid != 0)
pagezero((void *)PHYS_TO_DMAP(mptepa));
/*
* Demote the mapping.
@ -4766,6 +4776,8 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
} else {
mpte = pmap_remove_pt_page(pmap, sva);
if (mpte != NULL) {
KASSERT(mpte->valid == VM_PAGE_BITS_ALL,
("pmap_remove_pde: pte page not promoted"));
pmap_resident_count_dec(pmap, 1);
KASSERT(mpte->wire_count == NPTEPG,
("pmap_remove_pde: pte page wire count error"));
@ -5399,7 +5411,7 @@ setpte:
("pmap_promote_pde: page table page is out of range"));
KASSERT(mpte->pindex == pmap_pde_pindex(va),
("pmap_promote_pde: page table page's pindex is wrong"));
if (pmap_insert_pt_page(pmap, mpte)) {
if (pmap_insert_pt_page(pmap, mpte, true)) {
atomic_add_long(&pmap_pde_p_failures, 1);
CTR2(KTR_PMAP,
"pmap_promote_pde: failure for va %#lx in pmap %p", va,
@ -5826,15 +5838,13 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
}
vm_page_free_pages_toq(&free, true);
if (va >= VM_MAXUSER_ADDRESS) {
/*
* Both pmap_remove_pde() and pmap_remove_ptes() will
* leave the kernel page table page zero filled.
*/
mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
if (pmap_insert_pt_page(pmap, mt)) {
/*
* XXX Currently, this can't happen because
* we do not perform pmap_enter(psind == 1)
* on the kernel pmap.
*/
if (pmap_insert_pt_page(pmap, mt, false))
panic("pmap_enter_pde: trie insert failed");
}
} else
KASSERT(*pde == 0, ("pmap_enter_pde: non-zero pde %p",
pde));
@ -6824,6 +6834,8 @@ pmap_remove_pages(pmap_t pmap)
}
mpte = pmap_remove_pt_page(pmap, pv->pv_va);
if (mpte != NULL) {
KASSERT(mpte->valid == VM_PAGE_BITS_ALL,
("pmap_remove_pages: pte page not promoted"));
pmap_resident_count_dec(pmap, 1);
KASSERT(mpte->wire_count == NPTEPG,
("pmap_remove_pages: pte page wire count error"));

View File

@ -2401,9 +2401,11 @@ pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va)
newl2 = ml3pa | L2_TABLE;
/*
* Initialize the page table page.
* If this page table page was unmapped by a promotion, then it
* contains valid mappings. Zero it to invalidate those mappings.
*/
pagezero((void *)PHYS_TO_DMAP(ml3pa));
if (ml3->valid != 0)
pagezero((void *)PHYS_TO_DMAP(ml3pa));
/*
* Demote the mapping. The caller must have already invalidated the
@ -2456,6 +2458,8 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva,
} else {
ml3 = pmap_remove_pt_page(pmap, sva);
if (ml3 != NULL) {
KASSERT(ml3->valid == VM_PAGE_BITS_ALL,
("pmap_remove_l2: l3 page not promoted"));
pmap_resident_count_dec(pmap, 1);
KASSERT(ml3->wire_count == NL3PG,
("pmap_remove_l2: l3 page wire count error"));
@ -2812,12 +2816,15 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
* of idle page table pages. Each of a pmap's page table pages is responsible
* for mapping a distinct range of virtual addresses. The pmap's collection is
* ordered by this virtual address range.
*
* If "promoted" is false, then the page table page "mpte" must be zero filled.
*/
static __inline int
pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0;
return (vm_radix_insert(&pmap->pm_root, mpte));
}
@ -2962,7 +2969,7 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
("pmap_promote_l2: page table page is out of range"));
KASSERT(mpte->pindex == pmap_l2_pindex(va),
("pmap_promote_l2: page table page's pindex is wrong"));
if (pmap_insert_pt_page(pmap, mpte)) {
if (pmap_insert_pt_page(pmap, mpte, true)) {
atomic_add_long(&pmap_l2_p_failures, 1);
CTR2(KTR_PMAP,
"pmap_promote_l2: failure for va %#lx in pmap %p", va,
@ -3386,15 +3393,13 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags,
}
vm_page_free_pages_toq(&free, true);
if (va >= VM_MAXUSER_ADDRESS) {
/*
* Both pmap_remove_l2() and pmap_remove_l3() will
* leave the kernel page table page zero filled.
*/
mt = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
if (pmap_insert_pt_page(pmap, mt)) {
/*
* XXX Currently, this can't happen bacuse
* we do not perform pmap_enter(psind == 1)
* on the kernel pmap.
*/
if (pmap_insert_pt_page(pmap, mt, false))
panic("pmap_enter_l2: trie insert failed");
}
} else
KASSERT(pmap_load(l2) == 0,
("pmap_enter_l2: non-zero L2 entry %p", l2));
@ -4081,6 +4086,8 @@ pmap_remove_pages(pmap_t pmap)
ml3 = pmap_remove_pt_page(pmap,
pv->pv_va);
if (ml3 != NULL) {
KASSERT(ml3->valid == VM_PAGE_BITS_ALL,
("pmap_remove_pages: l3 page not promoted"));
pmap_resident_count_dec(pmap,1);
KASSERT(ml3->wire_count == NL3PG,
("pmap_remove_pages: l3 page wire count error"));
@ -5035,8 +5042,10 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va,
" in pmap %p", va, pmap);
goto fail;
}
if (va < VM_MAXUSER_ADDRESS)
if (va < VM_MAXUSER_ADDRESS) {
ml3->wire_count = NL3PG;
pmap_resident_count_inc(pmap, 1);
}
}
l3phys = VM_PAGE_TO_PHYS(ml3);
@ -5048,10 +5057,10 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va,
newl3 = (oldl2 & (ATTR_MASK & ~ATTR_DESCR_MASK)) | L3_PAGE;
/*
* If the page table page is new, initialize it.
* If the page table page is not leftover from an earlier promotion,
* initialize it.
*/
if (ml3->wire_count == 1) {
ml3->wire_count = NL3PG;
if (ml3->valid == 0) {
for (i = 0; i < Ln_ENTRIES; i++) {
l3[i] = newl3 | phys;
phys += L3_SIZE;

View File

@ -318,7 +318,7 @@ static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde,
u_int flags, vm_page_t m);
static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot, vm_page_t mpte);
static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted);
static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
pd_entry_t pde);
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
@ -990,9 +990,14 @@ __CONCAT(PMTYPE, init)(void)
mpte->pindex = i + KPTDI;
mpte->phys_addr = KPTphys + ptoa(i);
mpte->wire_count = 1;
/*
* Collect the page table pages that were replaced by a 2/4MB
* page. They are filled with equivalent 4KB page mappings.
*/
if (pseflag != 0 &&
KERNBASE <= i << PDRSHIFT && i << PDRSHIFT < KERNend &&
pmap_insert_pt_page(kernel_pmap, mpte))
pmap_insert_pt_page(kernel_pmap, mpte, true))
panic("pmap_init: pmap_insert_pt_page failed");
}
PMAP_UNLOCK(kernel_pmap);
@ -1900,12 +1905,15 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
* of idle page table pages. Each of a pmap's page table pages is responsible
* for mapping a distinct range of virtual addresses. The pmap's collection is
* ordered by this virtual address range.
*
* If "promoted" is false, then the page table page "mpte" must be zero filled.
*/
static __inline int
pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0;
return (vm_radix_insert(&pmap->pm_root, mpte));
}
@ -2823,7 +2831,7 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
* If the page table page is not leftover from an earlier promotion,
* initialize it.
*/
if ((oldpde & PG_PROMOTED) == 0)
if (mpte->valid == 0)
pmap_fill_ptp(firstpte, newpte);
KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME),
@ -2895,9 +2903,11 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
newpde = mptepa | PG_M | PG_A | PG_RW | PG_V;
/*
* Initialize the page table page.
* If this page table page was unmapped by a promotion, then it
* contains valid mappings. Zero it to invalidate those mappings.
*/
pagezero((void *)&KPTmap[i386_btop(trunc_4mpage(va))]);
if (mpte->valid != 0)
pagezero((void *)&KPTmap[i386_btop(trunc_4mpage(va))]);
/*
* Remove the mapping.
@ -2960,6 +2970,8 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
} else {
mpte = pmap_remove_pt_page(pmap, sva);
if (mpte != NULL) {
KASSERT(mpte->valid == VM_PAGE_BITS_ALL,
("pmap_remove_pde: pte page not promoted"));
pmap->pm_stats.resident_count--;
KASSERT(mpte->wire_count == NPTEPG,
("pmap_remove_pde: pte page wire count error"));
@ -3533,7 +3545,7 @@ setpte:
("pmap_promote_pde: page table page is out of range"));
KASSERT(mpte->pindex == va >> PDRSHIFT,
("pmap_promote_pde: page table page's pindex is wrong"));
if (pmap_insert_pt_page(pmap, mpte)) {
if (pmap_insert_pt_page(pmap, mpte, true)) {
pmap_pde_p_failures++;
CTR2(KTR_PMAP,
"pmap_promote_pde: failure for va %#x in pmap %p", va,
@ -3911,15 +3923,13 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
}
vm_page_free_pages_toq(&free, true);
if (pmap == kernel_pmap) {
/*
* Both pmap_remove_pde() and pmap_remove_ptes() will
* leave the kernel page table page zero filled.
*/
mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
if (pmap_insert_pt_page(pmap, mt)) {
/*
* XXX Currently, this can't happen because
* we do not perform pmap_enter(psind == 1)
* on the kernel pmap.
*/
if (pmap_insert_pt_page(pmap, mt, false))
panic("pmap_enter_pde: trie insert failed");
}
} else
KASSERT(*pde == 0, ("pmap_enter_pde: non-zero pde %p",
pde));
@ -4797,6 +4807,8 @@ __CONCAT(PMTYPE, remove_pages)(pmap_t pmap)
}
mpte = pmap_remove_pt_page(pmap, pv->pv_va);
if (mpte != NULL) {
KASSERT(mpte->valid == VM_PAGE_BITS_ALL,
("pmap_remove_pages: pte page not promoted"));
pmap->pm_stats.resident_count--;
KASSERT(mpte->wire_count == NPTEPG,
("pmap_remove_pages: pte page wire count error"));

View File

@ -1104,12 +1104,15 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
* of idle page table pages. Each of a pmap's page table pages is responsible
* for mapping a distinct range of virtual addresses. The pmap's collection is
* ordered by this virtual address range.
*
* If "promoted" is false, then the page table page "ml3" must be zero filled.
*/
static __inline int
pmap_insert_pt_page(pmap_t pmap, vm_page_t ml3)
pmap_insert_pt_page(pmap_t pmap, vm_page_t ml3, bool promoted)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
ml3->valid = promoted ? VM_PAGE_BITS_ALL : 0;
return (vm_radix_insert(&pmap->pm_root, ml3));
}
@ -2002,9 +2005,11 @@ pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va)
newl2 = ml3pa | PTE_V;
/*
* Initialize the page table page.
* If this page table page was unmapped by a promotion, then it
* contains valid mappings. Zero it to invalidate those mappings.
*/
pagezero((void *)PHYS_TO_DMAP(ml3pa));
if (ml3->valid != 0)
pagezero((void *)PHYS_TO_DMAP(ml3pa));
/*
* Demote the mapping.
@ -2064,6 +2069,8 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva,
} else {
ml3 = pmap_remove_pt_page(pmap, sva);
if (ml3 != NULL) {
KASSERT(ml3->valid == VM_PAGE_BITS_ALL,
("pmap_remove_l2: l3 page not promoted"));
pmap_resident_count_dec(pmap, 1);
KASSERT(ml3->wire_count == Ln_ENTRIES,
("pmap_remove_l2: l3 page wire count error"));
@ -2482,8 +2489,10 @@ pmap_demote_l2_locked(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
"failure for va %#lx in pmap %p", va, pmap);
return (false);
}
if (va < VM_MAXUSER_ADDRESS)
if (va < VM_MAXUSER_ADDRESS) {
mpte->wire_count = Ln_ENTRIES;
pmap_resident_count_inc(pmap, 1);
}
}
mptepa = VM_PAGE_TO_PHYS(mpte);
firstl3 = (pt_entry_t *)PHYS_TO_DMAP(mptepa);
@ -2495,10 +2504,10 @@ pmap_demote_l2_locked(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
newl3 = oldl2;
/*
* If the page table page is new, initialize it.
* If the page table page is not leftover from an earlier promotion,
* initialize it.
*/
if (mpte->wire_count == 1) {
mpte->wire_count = Ln_ENTRIES;
if (mpte->valid == 0) {
for (i = 0; i < Ln_ENTRIES; i++)
pmap_store(firstl3 + i, newl3 + (i << PTE_PPN0_S));
}
@ -2589,7 +2598,7 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
ml3 = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2)));
KASSERT(ml3->pindex == pmap_l2_pindex(va),
("pmap_promote_l2: page table page's pindex is wrong"));
if (pmap_insert_pt_page(pmap, ml3)) {
if (pmap_insert_pt_page(pmap, ml3, true)) {
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p",
va, pmap);
atomic_add_long(&pmap_l2_p_failures, 1);
@ -2972,15 +2981,13 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags,
}
vm_page_free_pages_toq(&free, true);
if (va >= VM_MAXUSER_ADDRESS) {
/*
* Both pmap_remove_l2() and pmap_remove_l3() will
* leave the kernel page table page zero filled.
*/
mt = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2)));
if (pmap_insert_pt_page(pmap, mt)) {
/*
* XXX Currently, this can't happen bacuse
* we do not perform pmap_enter(psind == 1)
* on the kernel pmap.
*/
if (pmap_insert_pt_page(pmap, mt, false))
panic("pmap_enter_l2: trie insert failed");
}
} else
KASSERT(pmap_load(l2) == 0,
("pmap_enter_l2: non-zero L2 entry %p", l2));
@ -3557,6 +3564,8 @@ pmap_remove_pages_pv(pmap_t pmap, vm_page_t m, pv_entry_t pv,
}
mpte = pmap_remove_pt_page(pmap, pv->pv_va);
if (mpte != NULL) {
KASSERT(ml3->valid == VM_PAGE_BITS_ALL,
("pmap_remove_pages: l3 page not promoted"));
pmap_resident_count_dec(pmap, 1);
KASSERT(mpte->wire_count == Ln_ENTRIES,
("pmap_remove_pages: pte page wire count error"));