Implements both SV39 and SV48. No 32bit support yet. Currently implements basic setup of paging in start.S by mapping a large chunk of memory into both an identity map and to the bottom of the kernel address space. Run the kernel out of this physical mapping. Added basic arch mmu support for querying existing paging structures and mapping 4K pages. No unmap support as of yet. System boots with mmu on when running supervisor test on qemu. Untested on real hardware as of yet.
342 lines
10 KiB
C
342 lines
10 KiB
C
/*
|
|
* Copyright (c) 2020 Travis Geiselbrecht
|
|
*
|
|
* Use of this source code is governed by a MIT-style
|
|
* license that can be found in the LICENSE file or at
|
|
* https://opensource.org/licenses/MIT
|
|
*/
|
|
#if RISCV_MMU
|
|
|
|
#include "arch/riscv/mmu.h"
|
|
|
|
#include <assert.h>
|
|
#include <string.h>
|
|
#include <lk/debug.h>
|
|
#include <lk/err.h>
|
|
#include <lk/trace.h>
|
|
#include <arch/ops.h>
|
|
#include <arch/mmu.h>
|
|
#include <arch/riscv.h>
|
|
#include <arch/riscv/csr.h>
|
|
#include <kernel/vm.h>
|
|
|
|
#define LOCAL_TRACE 0
|
|
|
|
#include <kernel/vm.h>
|
|
|
|
#if __riscv_xlen == 32
|
|
#error "32 bit mmu not supported yet"
|
|
#endif
|
|
|
|
riscv_pte_t kernel_pgtable[512] __ALIGNED(PAGE_SIZE);
|
|
paddr_t kernel_pgtable_phys; // filled in by start.S
|
|
|
|
// initial memory mappings. VM uses to construct mappings after the fact
|
|
struct mmu_initial_mapping mmu_initial_mappings[] = {
|
|
// all of memory, mapped in start.S
|
|
{
|
|
.phys = 0,
|
|
.virt = KERNEL_ASPACE_BASE,
|
|
#if RISCV_MMU == 48
|
|
.size = 512UL * GB,
|
|
#elif RISCV_MMU == 39
|
|
.size = 64UL * GB,
|
|
#else
|
|
#error implement
|
|
#endif
|
|
.flags = 0,
|
|
.name = "memory"
|
|
},
|
|
|
|
// null entry to terminate the list
|
|
{ 0 }
|
|
};
|
|
|
|
static inline void riscv_set_satp(uint asid, paddr_t pt) {
|
|
ulong satp;
|
|
|
|
#if RISCV_MMU == 48
|
|
satp = RISCV_SATP_MODE_SV48;
|
|
#elif RISCV_MMU == 39
|
|
satp = RISCV_SATP_MODE_SV39;
|
|
#endif
|
|
|
|
// make sure the asid is in range
|
|
DEBUG_ASSERT((asid & RISCV_SATP_ASID_MASK) == 0);
|
|
satp |= (ulong)asid << RISCV_SATP_ASID_SHIFT;
|
|
|
|
// make sure the page table is aligned
|
|
DEBUG_ASSERT(IS_PAGE_ALIGNED(pt));
|
|
satp |= pt;
|
|
|
|
riscv_csr_write(RISCV_CSR_SATP, satp);
|
|
|
|
// TODO: TLB flush here or use asid properly
|
|
// sfence.vma zero, zero
|
|
}
|
|
|
|
// given a va address and the level, compute the index in the current PT
|
|
static inline uint vaddr_to_index(vaddr_t va, uint level) {
|
|
// levels count down from PT_LEVELS - 1
|
|
DEBUG_ASSERT(level < RISCV_MMU_PT_LEVELS);
|
|
|
|
// canonicalize the address
|
|
va &= RISCV_MMU_CANONICAL_MASK;
|
|
|
|
uint index = ((va >> PAGE_SIZE_SHIFT) >> (level * RISCV_MMU_PT_SHIFT)) & (RISCV_MMU_PT_ENTRIES - 1);
|
|
LTRACEF_LEVEL(3, "canonical va %#lx, level %u = index %#x\n", va, level, index);
|
|
|
|
return index;
|
|
}
|
|
|
|
static uintptr_t page_size_per_level(uint level) {
|
|
// levels count down from PT_LEVELS - 1
|
|
DEBUG_ASSERT(level < RISCV_MMU_PT_LEVELS);
|
|
|
|
return 1UL << (PAGE_SIZE_SHIFT + level * RISCV_MMU_PT_SHIFT);
|
|
}
|
|
|
|
static uintptr_t page_mask_per_level(uint level) {
|
|
return page_size_per_level(level) - 1;
|
|
}
|
|
|
|
static volatile riscv_pte_t *alloc_ptable(paddr_t *pa) {
|
|
// grab a page from the pmm
|
|
vm_page_t *p = pmm_alloc_page();
|
|
if (!p) {
|
|
return NULL;
|
|
}
|
|
|
|
// get the physical and virtual mappings of the page
|
|
*pa = vm_page_to_paddr(p);
|
|
riscv_pte_t *pte = paddr_to_kvaddr(*pa);
|
|
|
|
// zero it out
|
|
memset(pte, 0, PAGE_SIZE);
|
|
|
|
smp_wmb();
|
|
|
|
LTRACEF_LEVEL(3, "returning pa %#lx, va %p\n", *pa, pte);
|
|
return pte;
|
|
}
|
|
|
|
static riscv_pte_t mmu_flags_to_pte(uint flags) {
|
|
riscv_pte_t pte = 0;
|
|
|
|
pte |= (flags & ARCH_MMU_FLAG_PERM_USER) ? RISCV_PTE_U : 0;
|
|
pte |= (flags & ARCH_MMU_FLAG_PERM_RO) ? RISCV_PTE_R : (RISCV_PTE_R | RISCV_PTE_W);
|
|
pte |= (flags & ARCH_MMU_FLAG_PERM_NO_EXECUTE) ? 0 : RISCV_PTE_X;
|
|
|
|
return pte;
|
|
}
|
|
|
|
static uint pte_flags_to_mmu_flags(riscv_pte_t pte) {
|
|
uint f = 0;
|
|
if ((pte & (RISCV_PTE_R | RISCV_PTE_W)) == RISCV_PTE_R) {
|
|
f |= ARCH_MMU_FLAG_PERM_RO;
|
|
}
|
|
f |= (pte & RISCV_PTE_X) ? 0 : ARCH_MMU_FLAG_PERM_NO_EXECUTE;
|
|
f |= (pte & RISCV_PTE_U) ? ARCH_MMU_FLAG_PERM_USER : 0;
|
|
return f;
|
|
}
|
|
|
|
// public api
|
|
|
|
// initialize per address space
|
|
status_t arch_mmu_init_aspace(arch_aspace_t *aspace, vaddr_t base, size_t size, uint flags) {
|
|
LTRACEF("aspace %p, base %#lx, size %#zx, flags %#x\n", aspace, base, size, flags);
|
|
|
|
DEBUG_ASSERT(aspace);
|
|
|
|
// validate that the base + size is sane and doesn't wrap
|
|
DEBUG_ASSERT(size > PAGE_SIZE);
|
|
DEBUG_ASSERT(base + size - 1 > base);
|
|
|
|
aspace->flags = flags;
|
|
if (flags & ARCH_ASPACE_FLAG_KERNEL) {
|
|
// at the moment we can only deal with address spaces as globally defined
|
|
DEBUG_ASSERT(base == KERNEL_ASPACE_BASE);
|
|
DEBUG_ASSERT(size == KERNEL_ASPACE_SIZE);
|
|
|
|
aspace->base = base;
|
|
aspace->size = size;
|
|
aspace->pt_virt = kernel_pgtable;
|
|
aspace->pt_phys = kernel_pgtable_phys;
|
|
} else {
|
|
PANIC_UNIMPLEMENTED;
|
|
}
|
|
|
|
LTRACEF("pt phys %#lx, pt virt %p\n", aspace->pt_phys, aspace->pt_virt);
|
|
|
|
return NO_ERROR;
|
|
}
|
|
status_t arch_mmu_destroy_aspace(arch_aspace_t *aspace) {
|
|
LTRACEF("aspace %p\n", aspace);
|
|
|
|
PANIC_UNIMPLEMENTED;
|
|
}
|
|
|
|
// routines to map/unmap/query mappings per address space
|
|
int arch_mmu_map(arch_aspace_t *aspace, vaddr_t vaddr, paddr_t paddr, uint count, const uint flags) {
|
|
LTRACEF("vaddr %#lx paddr %#lx count %u flags %#x\n", vaddr, paddr, count, flags);
|
|
|
|
DEBUG_ASSERT(aspace);
|
|
|
|
restart:
|
|
if (count == 0)
|
|
return NO_ERROR;
|
|
|
|
// bootstrap the top level walk
|
|
uint level = RISCV_MMU_PT_LEVELS - 1;
|
|
uint index = vaddr_to_index(vaddr, level);
|
|
volatile riscv_pte_t *ptep = aspace->pt_virt + index;
|
|
|
|
for (;;) {
|
|
LTRACEF_LEVEL(2, "level %u, index %u, pte %p (%#lx) va %#lx pa %#lx\n",
|
|
level, index, ptep, *ptep, vaddr, paddr);
|
|
|
|
// look at our page table entry
|
|
riscv_pte_t pte = *ptep;
|
|
if (level > 0 && !(pte & RISCV_PTE_V)) {
|
|
// invalid entry, will have to add a page table
|
|
paddr_t ptp;
|
|
volatile riscv_pte_t *ptv = alloc_ptable(&ptp);
|
|
if (!ptv) {
|
|
return ERR_NO_MEMORY;
|
|
}
|
|
|
|
LTRACEF_LEVEL(2, "new ptable table %p, pa %#lx\n", ptv, ptp);
|
|
|
|
// link it in. RMW == 0 is a page table link
|
|
pte = RISCV_PTE_PPN_TO_PTE(ptp) | RISCV_PTE_V;
|
|
*ptep = pte;
|
|
|
|
// go one level deeper
|
|
level--;
|
|
index = vaddr_to_index(vaddr, level);
|
|
ptep = ptv + index;
|
|
} else if ((pte & RISCV_PTE_V) && !(pte & RISCV_PTE_PERM_MASK)) {
|
|
// next level page table pointer (RWX = 0)
|
|
paddr_t ptp = RISCV_PTE_PPN(pte);
|
|
volatile riscv_pte_t *ptv = paddr_to_kvaddr(ptp);
|
|
|
|
LTRACEF_LEVEL(2, "next level page table at %p, pa %#lx\n", ptv, ptp);
|
|
|
|
// go one level deeper
|
|
level--;
|
|
index = vaddr_to_index(vaddr, level);
|
|
ptep = ptv + index;
|
|
} else if (pte & RISCV_PTE_V) {
|
|
// terminal entry already exists
|
|
if (level > 0) {
|
|
PANIC_UNIMPLEMENTED_MSG("terminal large page entry");
|
|
} else {
|
|
PANIC_UNIMPLEMENTED_MSG("terminal page entry");
|
|
}
|
|
} else {
|
|
DEBUG_ASSERT(level == 0 && !(pte & RISCV_PTE_V));
|
|
|
|
// hit a open terminal page table entry, lets add ours
|
|
pte = RISCV_PTE_PPN_TO_PTE(paddr);
|
|
pte |= mmu_flags_to_pte(flags);
|
|
pte |= RISCV_PTE_A | RISCV_PTE_D | RISCV_PTE_V;
|
|
pte |= (aspace->flags & ARCH_ASPACE_FLAG_KERNEL) ? RISCV_PTE_G : 0;
|
|
|
|
LTRACEF_LEVEL(2, "added new terminal entry: pte %#lx\n", pte);
|
|
|
|
*ptep = pte;
|
|
|
|
// simple algorithm: restart walk from top, one page at a time
|
|
// TODO: more efficiently deal with runs and large pages
|
|
count--;
|
|
paddr += PAGE_SIZE;
|
|
vaddr += PAGE_SIZE;
|
|
goto restart;
|
|
}
|
|
|
|
// make sure we didn't decrement level one too many
|
|
DEBUG_ASSERT(level < RISCV_MMU_PT_LEVELS);
|
|
}
|
|
// unreachable
|
|
}
|
|
|
|
int arch_mmu_unmap(arch_aspace_t *aspace, vaddr_t vaddr, uint count) {
|
|
LTRACEF("vaddr %#lx count %u\n", vaddr, count);
|
|
|
|
PANIC_UNIMPLEMENTED;
|
|
}
|
|
|
|
status_t arch_mmu_query(arch_aspace_t *aspace, const vaddr_t vaddr, paddr_t *paddr, uint *flags) {
|
|
LTRACEF("aspace %p, vaddr %#lx\n", aspace, vaddr);
|
|
|
|
DEBUG_ASSERT(aspace);
|
|
|
|
// trim the vaddr to the aspace
|
|
if (vaddr < aspace->base || vaddr > aspace->base + aspace->size - 1) {
|
|
return ERR_OUT_OF_RANGE;
|
|
}
|
|
|
|
uint level = RISCV_MMU_PT_LEVELS - 1;
|
|
uint index = vaddr_to_index(vaddr, level);
|
|
volatile riscv_pte_t *ptep = aspace->pt_virt + index;
|
|
|
|
// walk down through the levels, looking for a terminal entry that matches our address
|
|
for (;;) {
|
|
LTRACEF_LEVEL(2, "level %u, index %u, pte %p (%#lx)\n", level, index, ptep, *ptep);
|
|
|
|
// look at our page table entry
|
|
riscv_pte_t pte = *ptep;
|
|
if ((pte & RISCV_PTE_V) == 0) {
|
|
// invalid entry, terminate search
|
|
return ERR_NOT_FOUND;
|
|
} else if ((pte & RISCV_PTE_PERM_MASK) == 0) {
|
|
// next level page table pointer (RWX = 0)
|
|
paddr_t ptp = RISCV_PTE_PPN(pte);
|
|
volatile riscv_pte_t *ptv = paddr_to_kvaddr(ptp);
|
|
|
|
LTRACEF_LEVEL(2, "next level page table at %p, pa %#lx\n", ptv, ptp);
|
|
|
|
// go one level deeper
|
|
level--;
|
|
index = vaddr_to_index(vaddr, level);
|
|
ptep = ptv + index;
|
|
} else {
|
|
// terminal entry
|
|
LTRACEF_LEVEL(3, "terminal entry\n");
|
|
|
|
if (paddr) {
|
|
// extract the ppn
|
|
paddr_t pa = RISCV_PTE_PPN(pte);
|
|
uintptr_t page_mask = page_mask_per_level(level);
|
|
|
|
// add the va offset into the physical address
|
|
*paddr = pa | (vaddr & page_mask);
|
|
LTRACEF_LEVEL(3, "raw pa %#lx, page_mask %#lx, final pa %#lx\n", pa, page_mask, *paddr);
|
|
}
|
|
|
|
if (flags) {
|
|
// compute the flags
|
|
*flags = pte_flags_to_mmu_flags(pte);
|
|
LTRACEF_LEVEL(3, "computed flags %#x\n", *flags);
|
|
}
|
|
|
|
return NO_ERROR;
|
|
}
|
|
|
|
// make sure we didn't decrement level one too many
|
|
DEBUG_ASSERT(level < RISCV_MMU_PT_LEVELS);
|
|
}
|
|
// unreachable
|
|
}
|
|
|
|
|
|
// load a new user address space context.
|
|
// aspace argument NULL should load kernel-only context
|
|
void arch_mmu_context_switch(arch_aspace_t *aspace) {
|
|
LTRACEF("aspace %p\n", aspace);
|
|
|
|
PANIC_UNIMPLEMENTED;
|
|
}
|
|
|
|
#endif
|