adding paging support for x86

This commit is contained in:
Sergio Rodriguez
2015-06-11 12:05:12 -07:00
parent 69b20e84d3
commit 8b78376b6b
10 changed files with 1094 additions and 96 deletions

View File

@@ -30,14 +30,11 @@
#include <sys/types.h>
#include <string.h>
static tss_t system_tss;
tss_t system_tss;
static void* allocate_tss(void);
uint32_t default_tss = 0;
void arch_early_init(void)
{
x86_mmu_init();
platform_init_mmu_mappings();
/* enable caches here for now */
clear_in_cr0(X86_CR0_NW | X86_CR0_CD);
@@ -50,18 +47,23 @@ void arch_early_init(void)
system_tss.eflags = 0x00003002;
system_tss.bitmap = offsetof(tss_t, tss_bitmap);
system_tss.trace = 1; // trap on hardware task switch
set_global_desc(TSS_SELECTOR, &system_tss, sizeof(tss_t), 1, 0, 0, SEG_TYPE_TSS, 0, 0);
x86_ltr(TSS_SELECTOR);
}
/* Kernel does not need tss if running without user space */
static inline void set_kernel_tss(void)
{
default_tss = NULL;
system_tss.esp0 = default_tss;
}
void arch_init(void)
{
/* Kernel Space tss can be NULL */
set_kernel_tss();
}
void arch_chain_load(void *entry, ulong arg0, ulong arg1, ulong arg2, ulong arg3)
{
PANIC_UNIMPLEMENTED;
}

View File

@@ -104,7 +104,7 @@ real_start:
/*We jumped here in protected mode in a code segment that migh not longer
be valid , do a long jump to our code segment, we use retf instead of
ljmp to be able to use relative labels */
movl $codesel, %ecx /*Pushing our code segment */
movl $codesel_32, %ecx /*Pushing our code segment */
push %ecx
movl $farjump, %ecx /*and jump address */
push %ecx
@@ -115,15 +115,86 @@ farjump:
movl $__bss_start, %edi /* starting address of the bss */
movl $__bss_end, %ecx /* find the length of the bss in bytes */
subl %edi, %ecx
shrl $2, %ecx /* convert to 32 bit words, since the bss is aligned anyway */
shrl $2, %ecx /* convert to 32 bit words, since the bss is aligned anyway */
2:
movl $0, (%edi)
addl $4, %edi
loop 2b
#ifdef PAE_MODE_ENABLED
/* Preparing PAE paging, we will use 2MB pages covering 1GB
for initial bootstrap, this page table will be 1 to 1 */
/* Setting the First PDPTE with a PD table reference*/
xorl %eax, %eax
movl $pdp, %eax
orl $0x01, %eax
movl %eax, (pdpt)
movl $pdp, %esi
movl $0x1ff, %ecx
fill_pdp:
movl $0x1ff, %eax
subl %ecx, %eax
shll $21,%eax
orl $0x83, %eax
movl %eax, (%esi)
addl $8,%esi
loop fill_pdp
/* Set PDPT in CR3 */
movl $pdpt, %eax
mov %eax, %cr3
/* Enabling PAE*/
mov %cr4, %eax
btsl $(5), %eax
mov %eax, %cr4
/* Enabling Paging and from this point we are in
32 bit compatibility mode */
mov %cr0, %eax
btsl $(31), %eax
mov %eax, %cr0
#else
/* Set PD in CR3 */
movl $pd, %eax
mov %eax, %cr3
movl $pd, %esi
movl $0x100, %ecx
fill_pd:
xor %eax, %eax
mov $0x100, %eax
sub %ecx, %eax
shll $22,%eax
orl $0x87, %eax
movl %eax, (%esi)
addl $4,%esi
loop fill_pd
/* Enabling Paging and from this point we are in */
xorl %eax, %eax
mov %cr4, %eax
orl $0x10, %eax
mov %eax, %cr4
xorl %eax, %eax
mov %cr0, %eax
btsl $(31), %eax
mov %eax, %cr0
#endif
/* Flushing TLB's */
mov %cr3,%eax
mov %eax,%cr3
main_lk:
/* call the main module */
call lk_main
0: /* just sit around waiting for interrupts */
hlt /* interrupts will unhalt the processor */
pause
@@ -173,6 +244,7 @@ interrupt_common:
movl %esp, %eax /* store pointer to iframe, using same method */
pushl %eax
call platform_irq
cmpl $0,%eax
@@ -180,6 +252,7 @@ interrupt_common:
call thread_preempt
0:
popl %eax /* drop pointer to iframe */
popl %eax /* restore task_esp, stack switch can occur here if task_esp is modified */
movl %eax, %esp
@@ -198,7 +271,6 @@ interrupt_common:
_multiboot_info:
.int 0
.global _gdtr
_gdtr:
.short _gdt_end - _gdt - 1
.int _gdt
@@ -209,69 +281,99 @@ _gdt:
.int 0
/* ring 0 descriptors */
.set codesel, . - _gdt
_code_gde:
.set codesel_32, . - _gdt
_code_32_gde:
.short 0xffff /* limit 15:00 */
.short 0x0000 /* base 15:00 */
.byte 0x00 /* base 23:16 */
.byte 0x00 /* base 23:16 */
.byte 0b10011010 /* P(1) DPL(00) S(1) 1 C(0) R(1) A(0) */
.byte 0b11001111 /* G(1) D(1) 0 0 limit 19:16 */
.byte 0x0 /* base 31:24 */
.byte 0x0 /* base 31:24 */
.set datasel, . - _gdt
_data_gde:
.short 0xffff /* limit 15:00 */
.short 0x0000 /* base 15:00 */
.byte 0x00 /* base 23:16 */
.byte 0x00 /* base 23:16 */
.byte 0b10010010 /* P(1) DPL(00) S(1) 0 E(0) W(1) A(0) */
.byte 0b11001111 /* G(1) B(1) 0 0 limit 19:16 */
.byte 0x0 /* base 31:24 */
.set videosel, . - _gdt
_video_gde:
.short 0xffff /* limit 15:00 */
.short 0x8000 /* base 15:00 */
.byte 0x0b /* base 23:16 */
.byte 0b10010010 /* P(1) DPL(00) S(1) 0 E(0) W(1) A(0) */
.byte 0b11001111 /* G(1) B(1) 0 0 limit 19:16 */
.byte 0x0 /* base 31:24 */
.if 1
/* ring 3 descriptors */
.set user_codesel, . - _gdt
_user_code_gde:
.byte 0x0 /* base 31:24 */
.set user_codesel_32, . - _gdt
_user_code_32_gde:
.short 0xffff /* limit 15:00 */
.short 0x0000 /* base 15:00 */
.byte 0x00 /* base 23:16 */
.byte 0x00 /* base 23:16 */
.byte 0b11111010 /* P(1) DPL(11) S(1) 1 C(0) R(1) A(0) */
.byte 0b11001111 /* G(1) D(1) 0 0 limit 19:16 */
.byte 0x0 /* base 31:24 */
.byte 0x0 /* base 31:24 */
.set user_datasel, . - _gdt
_user_data_gde:
_user_data_32_gde:
.short 0xffff /* limit 15:00 */
.short 0x0000 /* base 15:00 */
.byte 0x00 /* base 23:16 */
.byte 0x00 /* base 23:16 */
.byte 0b11110010 /* P(1) DPL(11) S(1) 0 E(0) W(1) A(0) */
.byte 0b11001111 /* G(1) B(1) 0 0 limit 19:16 */
.byte 0x0 /* base 31:24 */
.endif
.byte 0x0 /* base 31:24 */
.set codesel_64, . - _gdt
_code_64_gde:
.short 0xffff /* limit 15:00 */
.short 0x0000 /* base 15:00 */
.byte 0x00 /* base 23:16 */
.byte 0b10011010 /* P(1) DPL(00) S(1) 1 C(0) R(1) A(0) */
.byte 0b10101111 /* G(1) D(0) L(1) AVL(0) limit 19:16 */
.byte 0x0 /* base 31:24 */
.set datasel_64, . - _gdt
_data_64_gde:
.short 0xffff /* limit 15:00 */
.short 0x0000 /* base 15:00 */
.byte 0x00 /* base 23:16 */
.byte 0b10010010 /* P(1) DPL(00) S(1) 1 C(0) R(1) A(0) */
.byte 0b11001111 /* G(1) B(1) 0 AVL(0) limit 19:16 */
.byte 0x0 /* base 31:24 */
.quad 0x0000000000000000
.quad 0x0000000000000000
.set user_codesel_64, . - _gdt
_user_code_64_gde:
.short 0xffff /* limit 15:00 */
.short 0x0000 /* base 15:00 */
.byte 0x00 /* base 23:16 */
.byte 0b11111010 /* P(1) DPL(11) S(1) 1 C(0) R(1) A(0) */
.byte 0b10101111 /* G(1) D(1) L(0) AVL(0) limit 19:16 */
.byte 0x0 /* base 31:24 */
.set user_datasel_64, . - _gdt
_user_data_64_gde:
.short 0xffff /* limit 15:00 */
.short 0x0000 /* base 15:00 */
.byte 0x00 /* base 23:16 */
.byte 0b11110010 /* P(1) DPL(11) S(1) 0 E(0) W(1) A(0) */
.byte 0b11001111 /* G(1) B(1) 0 0 limit 19:16 */
.byte 0x0 /* base 31:24 */
.set null_2, . - _gdt
_null_2:
.int 0
.int 0
/* TSS descriptor */
.if 1
.set tsssel, . - _gdt
_tss_gde:
.short 0 /* limit 15:00 */
.short 0 /* base 15:00 */
.byte 0 /* base 23:16 */
.byte 0xe9 /* P(1) DPL(11) 0 10 B(0) 1 */
.byte 0x00 /* G(0) 0 0 AVL(0) limit 19:16 */
.short 0 /* base 31:24 */
.endif
.byte 0x89 /* P(1) DPL(11) 0 10 B(0) 1 */
.byte 0x80 /* G(0) 0 0 AVL(0) limit 19:16 */
.short 0 /* base 31:24 */
.global _gdt_end
_gdt_end:
.align 8
.global _idtr
_idtr:
.short _idt_end - _idt - 1 /* IDT limit */
@@ -284,7 +386,7 @@ _idt:
.set i, 0
.rept NUM_INT-1
.short 0 /* low 16 bits of ISR offset (_isr#i & 0FFFFh) */
.short codesel /* selector */
.short codesel_32 /* selector */
.byte 0
.byte 0x8e /* present, ring 0, 32-bit interrupt gate */
.short 0 /* high 16 bits of ISR offset (_isr#i / 65536) */
@@ -295,7 +397,7 @@ _idt:
/* syscall int (ring 3) */
_idt30:
.short 0 /* low 16 bits of ISR offset (_isr#i & 0FFFFh) */
.short codesel /* selector */
.short codesel_32 /* selector */
.byte 0
.byte 0xee /* present, ring 3, 32-bit interrupt gate */
.short 0 /* high 16 bits of ISR offset (_isr#i / 65536) */
@@ -303,6 +405,26 @@ _idt30:
.global _idt_end
_idt_end:
/* Memory for the initial page table, we will use 2 pages for a
1 to 1 mapping that covers 1GB of physical memory */
.align 4096
.fill 4096
#ifdef PAE_MODE_ENABLED
.align 4096
pdpt:
.fill 4096
pdp:
.fill 4096
#else
.align 4096
pd:
.fill 4096
#endif
.align 4096
.fill 4096
.bss
.align 4096

View File

@@ -24,6 +24,7 @@
#include <arch/x86.h>
#include <kernel/thread.h>
static void dump_fault_frame(struct x86_iframe *frame)
{
dprintf(CRITICAL, " CS: %04x EIP: %08x EFL: %08x CR2: %08x\n",
@@ -74,3 +75,67 @@ void x86_unhandled_exception(struct x86_iframe *frame)
{
exception_die(frame, "unhandled exception, halting\n");
}
void x86_pfe_handler(struct x86_iframe *frame)
{
/* Handle a page fault exception */
addr_t v_addr, ssp, esp, ip, rip;
uint32_t error_code;
thread_t *current_thread;
v_addr = x86_get_cr2();
error_code = frame->err_code;
ssp = frame->user_ss & X86_8BYTE_MASK;
esp = frame->user_esp;
ip = frame->cs & X86_8BYTE_MASK;
rip = frame->eip;
#ifdef PAGE_FAULT_DEBUG_INFO
dprintf(CRITICAL, "<PAGE FAULT> Instruction Pointer = 0x%x:0x%x\n",
(unsigned int)ip,
(unsigned int)rip);
dprintf(CRITICAL, "<PAGE FAULT> Stack Pointer = 0x%x:0x%x\n",
(unsigned int)ssp,
(unsigned int)esp);
dprintf(CRITICAL, "<PAGE FAULT> Fault Linear Address = 0x%x\n",
(unsigned int)v_addr);
dprintf(CRITICAL, "<PAGE FAULT> Error Code Value = 0x%x\n",
error_code);
dprintf(CRITICAL, "<PAGE FAULT> Error Code Type = %s %s %s%s, %s\n",
error_code & PFEX_U ? "user" : "supervisor",
error_code & PFEX_W ? "write" : "read",
error_code & PFEX_I ? "instruction" : "data",
error_code & PFEX_RSV ? " rsv" : "",
error_code & PFEX_P ? "protection violation" : "page not present");
#endif
current_thread = get_current_thread();
dump_thread(current_thread);
if(error_code & PFEX_U) {
// User mode page fault
switch(error_code) {
case 4:
case 5:
case 6:
case 7:
#ifdef PAGE_FAULT_DEBUG_INFO
thread_detach(current_thread);
#else
thread_exit(current_thread->retcode);
#endif
break;
}
}
else {
// Supervisor mode page fault
switch(error_code) {
case 0:
case 1:
case 2:
case 3:
exception_die(frame, "Page Fault exception, halting\n");
break;
}
}
}

View File

@@ -24,11 +24,14 @@
#define __ARCH_CPU_H
#define PAGE_SIZE 4096
#define PAGE_SIZE_SHIFT 12
// TODO: define to resolve to platform setup discovered value
#define CACHE_LINE 32
#define ARCH_DEFAULT_STACK_SIZE 8192
#define DEFAULT_TSS 4096
#endif

View File

@@ -25,10 +25,21 @@
#include <compiler.h>
#include <sys/types.h>
#include <stdlib.h>
#include <stdbool.h>
__BEGIN_CDECLS
void x86_mmu_init(void);
#define PFEX_P 0x01
#define PFEX_W 0x02
#define PFEX_U 0x04
#define PFEX_RSV 0x08
#define PFEX_I 0x10
#define X86_8BYTE_MASK 0xFFFFFFFF
#define X86_CPUID_ADDR_WIDTH 0x80000008
void arch_mmu_init(void);
addr_t *x86_create_new_cr3(void);
struct x86_iframe {
uint32_t pivot; // stack switch pivot
@@ -68,14 +79,20 @@ typedef struct {
uint8_t tss_bitmap[8192];
} __PACKED tss_t;
#define X86_CR0_PE 0x00000001 /* protected mode enable */
#define X86_CR0_MP 0x00000002 /* monitor coprocessor */
#define X86_CR0_EM 0x00000004 /* emulation */
#define X86_CR0_TS 0x00000008 /* task switched */
#define X86_CR0_WP 0x00010000 /* supervisor write protect */
#define X86_CR0_NW 0x20000000 /* not write-through */
#define X86_CR0_CD 0x40000000 /* cache disable */
#define X86_CR0_PG 0x80000000 /* enable paging */
#define X86_CR0_PE 0x00000001 /* protected mode enable */
#define X86_CR0_MP 0x00000002 /* monitor coprocessor */
#define X86_CR0_EM 0x00000004 /* emulation */
#define X86_CR0_TS 0x00000008 /* task switched */
#define X86_CR0_WP 0x00010000 /* supervisor write protect */
#define X86_CR0_NW 0x20000000 /* not write-through */
#define X86_CR0_CD 0x40000000 /* cache disable */
#define X86_CR0_PG 0x80000000 /* enable paging */
#define X86_CR4_SMEP 0x00100000 /* SMEP protection enabling */
#define X86_CR4_SMAP 0x00200000 /* SMAP protection enabling */
#define X86_CR4_PAE 0x00000020 /* PAE paging */
#define x86_EFER_NXE 0x00000800 /* to enable execute disable bit */
#define x86_MSR_EFER 0xc0000080 /* EFER Model Specific Register id */
#define X86_CR4_PSE 0xffffffef /* Disabling PSE bit in the CR4 */
static inline void set_in_cr0(uint32_t mask)
{
@@ -122,24 +139,25 @@ static inline uint32_t x86_save_eflags(void)
{
unsigned int state;
__asm__ volatile(
"pushfl;"
"popl %0"
: "=rm" (state)
:: "memory");
__asm__ volatile(
"pushfl;"
"popl %0"
: "=rm" (state)
:: "memory");
return state;
return state;
}
static inline void x86_restore_eflags(uint32_t eflags)
{
__asm__ volatile(
"pushl %0;"
"popfl"
:: "g" (eflags)
: "memory", "cc");
"pushl %0;"
"popfl"
:: "g" (eflags)
: "memory", "cc");
}
#define rdtsc(low,high) \
__asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
@@ -281,6 +299,154 @@ static inline void outpdrep(uint16_t _port, uint32_t *_buffer,
"c" (_writes));
}
__END_CDECLS
static inline uint64_t read_msr (uint32_t msr_id)
{
uint64_t msr_read_val = 0;
uint32_t low_val = 0;
uint32_t high_val = 0;
__asm__ __volatile__ (
"rdmsr \n\t"
: "=a" (low_val), "=d"(high_val)
: "c" (msr_id));
msr_read_val = high_val;
msr_read_val = (msr_read_val << 32) | low_val;
return msr_read_val;
}
static inline void write_msr (uint32_t msr_id, uint64_t msr_write_val)
{
uint32_t low_val = (uint32_t)msr_write_val;
uint32_t high_val = (uint32_t)(msr_write_val >> 32);
__asm__ __volatile__ (
"wrmsr \n\t"
: : "c" (msr_id), "a" (low_val), "d"(high_val));
}
static inline uint32_t x86_get_cr3(void)
{
uint32_t rv;
__asm__ __volatile__ (
"mov %%cr3, %0"
: "=r" (rv));
return rv;
}
static inline void x86_set_cr3(uint32_t in_val)
{
__asm__ __volatile__ (
"mov %0,%%cr3 \n\t"
:
:"r" (in_val));
}
static inline uint32_t x86_get_cr0(void)
{
uint32_t rv;
__asm__ __volatile__ (
"mov %%cr0, %0 \n\t"
: "=r" (rv));
return rv;
}
static inline uint32_t x86_get_cr4(void)
{
uint32_t rv;
__asm__ __volatile__ (
"mov %%cr4, %0 \n\t"
: "=r" (rv));
return rv;
}
static inline void x86_set_cr0(uint32_t in_val)
{
__asm__ __volatile__ (
"mov %0,%%cr0 \n\t"
:
:"r" (in_val));
}
static inline void x86_set_cr4(uint32_t in_val)
{
__asm__ __volatile__ (
"mov %0,%%cr4 \n\t"
:
:"r" (in_val));
}
static inline uint32_t x86_get_address_width(void)
{
uint32_t rv;
__asm__ __volatile__ (
"cpuid \n\t"
:"=a" (rv)
:"a" (X86_CPUID_ADDR_WIDTH));
/* Extracting bit 15:8 from eax register */
return ((rv >> 8) & 0x0ff);
}
static inline bool x86_is_paging_enabled(void)
{
if(x86_get_cr0() & X86_CR0_PG)
return true;
return false;
}
static inline uint32_t x86_is_PAE_enabled(void)
{
if(x86_is_paging_enabled() == false)
return false;
if(!(x86_get_cr4() & X86_CR4_PAE))
return false;
return true;
}
static inline uint32_t lapic_avail(void)
{
uint32_t smp = 0x01;
__asm__ __volatile__ (
"cpuid \n\t"
:"=d" (smp)
:"a" (smp));
return ((smp>>0x08) & 0x1);
//return smp;
}
static inline uint32_t check_smep_avail(void)
{
uint32_t reg_a = 0x07;
uint32_t reg_b = 0x0;
uint32_t reg_c = 0x0;
__asm__ __volatile__ (
"cpuid \n\t"
:"=b" (reg_b)
:"a" (reg_a),"c" (reg_c));
return ((reg_b>>0x06) & 0x1);
}
static inline uint32_t check_smap_avail(void)
{
uint32_t reg_a = 0x07;
uint32_t reg_b = 0x0;
uint32_t reg_c = 0x0;
__asm__ __volatile__ (
"cpuid \n\t"
:"=b" (reg_b)
:"a" (reg_a),"c" (reg_c));
return ((reg_b>>0x13) & 0x1);
}
__END_CDECLS
#endif

View File

@@ -1,5 +1,6 @@
/*
* Copyright (c) 2009 Corey Tabaka
* Copyright (c) 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
@@ -28,14 +29,22 @@
/*
* System Selectors
*/
#define CODE_SELECTOR 0x08
#define DATA_SELECTOR 0x10
#define VIDEO_SELECTOR 0x18
#define TSS_SELECTOR 0x30
#define NULL_SELECTOR 0x00
#define USER_CODE_SELECTOR 0x23
#define USER_DATA_SELECTOR 0x2b
/********* x86 selectors *********/
#define CODE_SELECTOR 0x08
#define DATA_SELECTOR 0x10
#define USER_CODE_32_SELECTOR 0x18
#define USER_DATA_32_SELECTOR 0x20
#define NULL_2_SELECTOR 0x28
/******* x86-64 selectors ********/
#define CODE_64_SELECTOR 0x30
#define STACK_64_SELECTOR 0x38
#define USER_CODE_64_SELECTOR 0x50
#define USER_DATA_64_SELECTOR 0x58
#define TSS_SELECTOR 0x60
/*
* Descriptor Types
*/

View File

@@ -20,21 +20,103 @@
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef __ARCH_ARM_MMU_H
#define __ARCH_ARM_MMU_H
//#ifndef __ARCH_ARM_MMU_H
//#define __ARCH_ARM_MMU_H
#include <sys/types.h>
#include <compiler.h>
__BEGIN_CDECLS
void arm_mmu_init(void);
void x86_mmu_init(void);
#define MMU_FLAG_CACHED 0x1
#define MMU_FLAG_BUFFERED 0x2
#define MMU_FLAG_READWRITE 0x4
void arm_mmu_map_section(addr_t paddr, addr_t vaddr, uint flags);
#define X86_MMU_PG_P 0x001 /* P Valid */
#define X86_MMU_PG_RW 0x002 /* R/W Read/Write */
#define X86_MMU_PG_U 0x004 /* U/S User/Supervisor */
#define X86_MMU_PG_PS 0x080 /* PS Page size (0=4k,1=4M) */
#define X86_MMU_PG_PTE_PAT 0x080 /* PAT PAT index */
#define X86_MMU_PG_G 0x100 /* G Global */
#define X86_MMU_CLEAR 0x0
#define X86_DIRTY_ACCESS_MASK 0xf9f
#define X86_MMU_CACHE_DISABLE 0x010 /* C Cache disable */
#define PAGE_SIZE 4096
#define PAGE_DIV_SHIFT 12
#ifdef PAE_MODE_ENABLED
/* PAE mode */
#define X86_PDPT_ADDR_MASK (0x00000000ffffffe0ul)
#define X86_PG_FRAME (0x000ffffffffff000ul)
#define X86_PHY_ADDR_MASK (0x000ffffffffffffful)
#define X86_FLAGS_MASK (0x0000000000000ffful) /* NX Bit is ignored in the PAE mode */
#define X86_PTE_NOT_PRESENT (0xFFFFFFFFFFFFFFFEul)
#define X86_2MB_PAGE_FRAME (0x000fffffffe00000ul)
#define PAGE_OFFSET_MASK_4KB (0x0000000000000ffful)
#define PAGE_OFFSET_MASK_2MB (0x00000000001ffffful)
#define X86_MMU_PG_NX (1ul << 63)
#define X86_PAE_PAGING_LEVELS 3
#define PDP_SHIFT 30
#define PD_SHIFT 21
#define PT_SHIFT 12
#define ADDR_OFFSET 9
#define PDPT_ADDR_OFFSET 2
#define NO_OF_PT_ENTRIES 512
#else
/* non PAE mode */
#define X86_PG_FRAME (0xfffff000)
#define X86_FLAGS_MASK (0x00000fff)
#define X86_PTE_NOT_PRESENT (0xfffffffe)
#define X86_4MB_PAGE_FRAME (0xffc00000)
#define PAGE_OFFSET_MASK_4KB (0x00000fff)
#define PAGE_OFFSET_MASK_4MB (0x003fffff)
#define NO_OF_PT_ENTRIES 1024
#define X86_PAGING_LEVELS 2
#define PD_SHIFT 22
#define PT_SHIFT 12
#define ADDR_OFFSET 10
#endif
#define X86_PHYS_TO_VIRT(x) (x)
#define X86_VIRT_TO_PHYS(x) (x)
/* Different page table levels in the page table mgmt hirerachy */
enum page_table_levels {
PF_L,
PT_L,
PD_L,
#ifdef PAE_MODE_ENABLED
PDP_L
#endif
} page_level;
struct map_range {
vaddr_t start_vaddr;
#ifdef PAE_MODE_ENABLED
uint64_t start_paddr; /* Physical address in the PAE mode is 64 bits wide */
#else
paddr_t start_paddr; /* Physical address in the PAE mode is 32 bits wide */
#endif
uint32_t size;
};
#ifdef PAE_MODE_ENABLED
typedef uint64_t map_addr_t;
typedef uint64_t arch_flags_t;
#else
typedef uint32_t map_addr_t;
typedef uint32_t arch_flags_t;
#endif
status_t x86_mmu_map_range (map_addr_t pdpt, struct map_range *range, arch_flags_t flags);
status_t x86_mmu_check_mapping (map_addr_t pdpt, map_addr_t paddr,
vaddr_t vaddr, arch_flags_t in_flags,
uint32_t *ret_level, arch_flags_t *ret_flags,
map_addr_t *last_valid_entry);
__END_CDECLS
#endif
//#endif

View File

@@ -26,12 +26,14 @@ ENTRY(_start)
SECTIONS
{
.text 0x0200000 : {
__code_start = .;
KEEP(*(.text.boot))
*(.text* .sram.text)
*(.gnu.linkonce.t.*)
__code_end = .;
} =0x9090
.rodata : ALIGN(4) {
.rodata : ALIGN(4096) {
__rodata_start = .;
*(.rodata*)
*(.gnu.linkonce.r.*)
@@ -57,7 +59,7 @@ INCLUDE "arch/shared_data_sections.ld"
__data_end = .;
.bss : ALIGN(4) {
.bss : ALIGN(4096) {
__bss_start = .;
*(.bss*)
*(.gnu.linkonce.b.*)
@@ -66,6 +68,7 @@ INCLUDE "arch/shared_data_sections.ld"
__bss_end = .;
}
_end = .;
/* put a symbol arbitrarily 4MB past the end of the kernel */

View File

@@ -1,5 +1,6 @@
/*
* Copyright (c) 2009 Corey Tabaka
* Copyright (c) 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
@@ -26,19 +27,553 @@
#include <arch.h>
#include <arch/x86.h>
#include <arch/x86/mmu.h>
#include <stdlib.h>
#include <string.h>
#include <arch/mmu.h>
#include <assert.h>
#include <err.h>
#include <arch/arch_ops.h>
void x86_mmu_map_section(addr_t paddr, addr_t vaddr, uint flags)
#ifdef PAE_MODE_ENABLED
/* PDP table address is 32 bit wide when on PAE mode, but the PDP entries are 64 bit wide */
static inline map_addr_t get_pdp_entry_from_pdp_table(vaddr_t vaddr, map_addr_t pdpt)
{
// TODO: stuff
//x86_invalidate_tlb();
uint32_t pdp_index;
map_addr_t *pdp_table;
pdp_index = ((vaddr >> PDP_SHIFT) & ((1ul << PDPT_ADDR_OFFSET) - 1));
pdp_table = (map_addr_t *)(pdpt & X86_PDPT_ADDR_MASK);
return X86_PHYS_TO_VIRT(pdp_table[pdp_index]);
}
void x86_mmu_init(void)
static inline map_addr_t get_pfn_from_pt(map_addr_t pt)
{
// TODO: stuff
map_addr_t pfn;
pfn = (pt & X86_2MB_PAGE_FRAME);
return X86_PHYS_TO_VIRT(pfn);
}
void arch_disable_mmu(void)
#else
static inline map_addr_t get_pfn_from_pde(map_addr_t pde)
{
// TODO: stuff
map_addr_t pfn;
pfn = (pde & X86_4MB_PAGE_FRAME);
return X86_PHYS_TO_VIRT(pfn);
}
#endif
static inline map_addr_t get_pd_entry_from_pd_table(vaddr_t vaddr, map_addr_t pdt)
{
uint32_t pd_index;
map_addr_t *pd_table;
pd_index = ((vaddr >> PD_SHIFT) & ((1 << ADDR_OFFSET) - 1));
pd_table = (map_addr_t *)(pdt & X86_PG_FRAME);
return X86_PHYS_TO_VIRT(pd_table[pd_index]);
}
static inline map_addr_t get_pt_entry_from_page_table(vaddr_t vaddr, map_addr_t pt)
{
uint32_t pt_index;
map_addr_t *pt_table;
pt_index = ((vaddr >> PT_SHIFT) & ((1 << ADDR_OFFSET) - 1));
pt_table = (map_addr_t *)(pt & X86_PG_FRAME);
return X86_PHYS_TO_VIRT(pt_table[pt_index]);
}
static inline map_addr_t get_pfn_from_pte(map_addr_t pte)
{
map_addr_t pfn;
pfn = (pte & X86_PG_FRAME);
return X86_PHYS_TO_VIRT(pfn);
}
/**
* @brief Walk the page table structures - supported for both PAE & non-PAE modes
*
*/
static status_t x86_mmu_page_walking(map_addr_t init_table, vaddr_t vaddr, uint32_t *ret_level,
arch_flags_t *existing_flags, map_addr_t *last_valid_entry)
{
map_addr_t pt, pte, pdt;
#ifdef PAE_MODE_ENABLED
map_addr_t pdpt;
#endif
DEBUG_ASSERT(init_table);
if((!ret_level) || (!last_valid_entry) || (!existing_flags)) {
return ERR_INVALID_ARGS;
}
*existing_flags = 0;
#ifdef PAE_MODE_ENABLED
pdpt = init_table; /* First level table in PAE mode is pdpt */
*ret_level = PDP_L;
*last_valid_entry = pdpt;
pdt = get_pdp_entry_from_pdp_table(vaddr, pdpt);
if ((pdt & X86_MMU_PG_P) == 0) {
*ret_level = PDP_L;
*last_valid_entry = pdpt;
return ERR_NOT_FOUND;
}
pt = get_pd_entry_from_pd_table(vaddr, pdt);
if ((pt & X86_MMU_PG_P) == 0) {
*ret_level = PD_L;
*last_valid_entry = pdt;
return ERR_NOT_FOUND;
}
#else
pdt = init_table; /* First table in non PAE mode is pdt */
*ret_level = PD_L;
*last_valid_entry = pdt;
pt = get_pd_entry_from_pd_table(vaddr, pdt);
if ((pt & X86_MMU_PG_P) == 0)
return ERR_NOT_FOUND;
#endif
/* 4 MB pages (non PAE mode) and 2 MB pages (PAE mode) */
/* In this case, the page directory entry is NOT actually a PT (page table) */
if (pt & X86_MMU_PG_PS) {
#ifdef PAE_MODE_ENABLED
/* Getting the Page frame & adding the 4KB page offset from the vaddr */
*last_valid_entry = get_pfn_from_pt(pt) + (vaddr & PAGE_OFFSET_MASK_2MB);
#else
/* Getting the Page frame & adding the 4MB page offset from the vaddr */
*last_valid_entry = get_pfn_from_pde(pt) + (vaddr & PAGE_OFFSET_MASK_4MB);
#endif
*existing_flags = (X86_PHYS_TO_VIRT(pt)) & X86_FLAGS_MASK;
goto last;
}
/* 4 KB pages */
pte = get_pt_entry_from_page_table(vaddr, pt);
if ((pte & X86_MMU_PG_P) == 0) {
*ret_level = PT_L;
*last_valid_entry = pt;
return ERR_NOT_FOUND;
}
/* Getting the Page frame & adding the 4KB page offset from the vaddr */
*last_valid_entry = get_pfn_from_pte(pte) + (vaddr & PAGE_OFFSET_MASK_4KB);
*existing_flags = (X86_PHYS_TO_VIRT(pte)) & X86_FLAGS_MASK;
last:
*ret_level = PF_L;
return NO_ERROR;
}
/**
* Walk the page table structures to see if the mapping between a virtual address
* and a physical address exists. Also, check the flags.
*
*/
status_t x86_mmu_check_mapping(map_addr_t init_table, map_addr_t paddr,
vaddr_t vaddr, arch_flags_t in_flags,
uint32_t *ret_level, arch_flags_t *ret_flags,
map_addr_t *last_valid_entry)
{
status_t status;
arch_flags_t existing_flags = 0;
DEBUG_ASSERT(init_table);
if((!ret_level) || (!last_valid_entry) || (!ret_flags) ||
(!IS_ALIGNED(vaddr, PAGE_SIZE)) ||
(!IS_ALIGNED(paddr, PAGE_SIZE))) {
return ERR_INVALID_ARGS;
}
status = x86_mmu_page_walking(init_table, vaddr, ret_level, &existing_flags, last_valid_entry);
if(status || ((*last_valid_entry) != paddr)) {
/* We did not reach till we check the access flags for the mapping */
*ret_flags = in_flags;
return ERR_NOT_FOUND;
}
/* Checking the access flags for the mapped address. If it is not zero, then
* the access flags are different & the return flag will have those access bits
* which are different.
*/
*ret_flags = (in_flags ^ existing_flags) & X86_DIRTY_ACCESS_MASK;
if(!(*ret_flags))
return NO_ERROR;
return ERR_NOT_FOUND;
}
#ifdef PAE_MODE_ENABLED
static void update_pdp_entry(vaddr_t vaddr, map_addr_t pdpt, map_addr_t *m)
{
uint32_t pdp_index;
map_addr_t *pdp_table = (map_addr_t *)(pdpt & X86_PG_FRAME);
pdp_index = ((vaddr >> PDP_SHIFT) & ((1ul << PDPT_ADDR_OFFSET) - 1));
pdp_table[pdp_index] = (map_addr_t)m;
pdp_table[pdp_index] |= X86_MMU_PG_P | X86_MMU_PG_RW | X86_MMU_PG_U;
}
#endif
static void update_pt_entry(vaddr_t vaddr, map_addr_t paddr, arch_flags_t flags, map_addr_t pt)
{
uint32_t pt_index;
map_addr_t *pt_table = (map_addr_t *)(pt & X86_PG_FRAME);
pt_index = ((vaddr >> PT_SHIFT) & ((1 << ADDR_OFFSET) - 1));
pt_table[pt_index] = paddr;
pt_table[pt_index] |= flags;
}
static void update_pd_entry(vaddr_t vaddr, map_addr_t pdt, map_addr_t *m)
{
uint32_t pd_index;
map_addr_t *pd_table = (map_addr_t *)(pdt & X86_PG_FRAME);
pd_index = ((vaddr >> PD_SHIFT) & ((1 << ADDR_OFFSET) - 1));
pd_table[pd_index] = (map_addr_t)m;
pd_table[pd_index] |= X86_MMU_PG_P | X86_MMU_PG_RW | X86_MMU_PG_U;
}
/**
* @brief Allocating a new page table
*/
static map_addr_t *_map_alloc_page()
{
map_addr_t *page_ptr = memalign(PAGE_SIZE, PAGE_SIZE);
if(page_ptr)
memset(page_ptr, 0, PAGE_SIZE);
return page_ptr;
}
/**
* @brief Add a new mapping for the given virtual address & physical address
*
* This is a API which handles the mapping b/w a virtual address & physical address
* either by checking if the mapping already exists and is valid OR by adding a
* new mapping with the required flags.
*
*/
static status_t x86_mmu_add_mapping(map_addr_t init_table, map_addr_t paddr,
vaddr_t vaddr, arch_flags_t flags)
{
#ifdef PAE_MODE_ENABLED
map_addr_t pdt;
uint32_t pd_new = 0;
#endif
map_addr_t pt, *m = NULL;
status_t ret = NO_ERROR;
DEBUG_ASSERT(init_table);
if((!IS_ALIGNED(vaddr, PAGE_SIZE)) || (!IS_ALIGNED(paddr, PAGE_SIZE)) )
return ERR_INVALID_ARGS;
#ifdef PAE_MODE_ENABLED
pdt = get_pdp_entry_from_pdp_table(vaddr, init_table);
if((pdt & X86_MMU_PG_P) == 0) {
/* Creating a new pd table */
m = _map_alloc_page();
if(m == NULL) {
ret = ERR_NO_MEMORY;
goto clean;
}
update_pdp_entry(vaddr, init_table, m);
pdt = (map_addr_t)m;
pd_new = 1;
}
if(!pd_new)
pt = get_pd_entry_from_pd_table(vaddr, pdt);
if(pd_new || (pt & X86_MMU_PG_P) == 0) {
/* Creating a new pt */
m = _map_alloc_page();
if(m == NULL) {
ret = ERR_NO_MEMORY;
if(pd_new)
goto clean_pd;
goto clean;
}
update_pd_entry(vaddr, pdt, m);
pt = (map_addr_t)m;
}
#else
pt = get_pd_entry_from_pd_table(vaddr, init_table);
if((pt & X86_MMU_PG_P) == 0) {
/* Creating a new pt */
m = _map_alloc_page();
if(m == NULL) {
ret = ERR_NO_MEMORY;
goto clean;
}
update_pd_entry(vaddr, init_table, m);
pt = (map_addr_t)m;
}
#endif
/* Updating the page table entry with the paddr and access flags required for the mapping */
update_pt_entry(vaddr, paddr, flags, pt);
ret = NO_ERROR;
#ifdef PAE_MODE_ENABLED
goto clean;
clean_pd:
if(pd_new)
free((map_addr_t *)pdt);
#endif
clean:
return ret;
}
/**
* @brief x86 MMU unmap an entry in the page tables recursively and clear out tables
*
*/
static void x86_mmu_unmap_entry(vaddr_t vaddr, int level, map_addr_t table_entry)
{
uint32_t offset = 0, next_level_offset = 0;
map_addr_t *table, *next_table_addr, value;
next_table_addr = NULL;
table = (map_addr_t *)(X86_VIRT_TO_PHYS(table_entry) & X86_PG_FRAME);
switch(level)
{
#ifdef PAE_MODE_ENABLED
case PDP_L:
offset = ((vaddr >> PDP_SHIFT) & ((1 << PDPT_ADDR_OFFSET) - 1));
next_table_addr = (map_addr_t *)X86_PHYS_TO_VIRT(table[offset]);
if((X86_PHYS_TO_VIRT(table[offset]) & X86_MMU_PG_P) == 0)
return;
break;
#endif
case PD_L:
offset = ((vaddr >> PD_SHIFT) & ((1 << ADDR_OFFSET) - 1));
next_table_addr = (map_addr_t *)X86_PHYS_TO_VIRT(table[offset]);
if((X86_PHYS_TO_VIRT(table[offset]) & X86_MMU_PG_P) == 0)
return;
break;
case PT_L:
offset = ((vaddr >> PT_SHIFT) & ((1 << ADDR_OFFSET) - 1));
next_table_addr = (map_addr_t *)X86_PHYS_TO_VIRT(table[offset]);
if((X86_PHYS_TO_VIRT(table[offset]) & X86_MMU_PG_P) == 0)
return;
break;
case PF_L:
/* Reached page frame, Let's go back */
default:
return;
}
level -= 1;
x86_mmu_unmap_entry(vaddr, level, (map_addr_t)next_table_addr);
level += 1;
next_table_addr = (map_addr_t *)((map_addr_t)(X86_VIRT_TO_PHYS(next_table_addr)) & X86_PG_FRAME);
if(level > PT_L) {
/* Check all entries of next level table for present bit */
for (next_level_offset = 0; next_level_offset < NO_OF_PT_ENTRIES; next_level_offset++) {
if((next_table_addr[next_level_offset] & X86_MMU_PG_P) != 0)
return; /* There is an entry in the next level table */
}
free(next_table_addr);
}
/* All present bits for all entries in next level table for this address are 0 */
if((X86_PHYS_TO_VIRT(table[offset]) & X86_MMU_PG_P) != 0) {
arch_disable_ints();
value = table[offset];
value = value & X86_PTE_NOT_PRESENT;
table[offset] = value;
arch_enable_ints();
}
}
static int x86_mmu_unmap(map_addr_t init_table, vaddr_t vaddr, uint count)
{
int unmapped = 0;
vaddr_t next_aligned_v_addr;
DEBUG_ASSERT(init_table);
if(!IS_ALIGNED(vaddr, PAGE_SIZE))
return ERR_INVALID_ARGS;
if (count == 0)
return NO_ERROR;
next_aligned_v_addr = vaddr;
while (count > 0) {
#ifdef PAE_MODE_ENABLED
x86_mmu_unmap_entry(next_aligned_v_addr, X86_PAE_PAGING_LEVELS, init_table);
#else
x86_mmu_unmap_entry(next_aligned_v_addr, X86_PAGING_LEVELS, init_table);
#endif
next_aligned_v_addr += PAGE_SIZE;
unmapped++;
count--;
}
return unmapped;
}
int arch_mmu_unmap(vaddr_t vaddr, uint count)
{
map_addr_t init_table_from_cr3;
if(!IS_ALIGNED(vaddr, PAGE_SIZE))
return ERR_INVALID_ARGS;
if (count == 0)
return NO_ERROR;
DEBUG_ASSERT(x86_get_cr3());
init_table_from_cr3 = x86_get_cr3();
return(x86_mmu_unmap(init_table_from_cr3, vaddr, count));
}
/**
* @brief Mapping a section/range with specific permissions
*
*/
status_t x86_mmu_map_range(map_addr_t init_table, struct map_range *range, arch_flags_t flags)
{
vaddr_t next_aligned_v_addr;
map_addr_t next_aligned_p_addr;
status_t map_status;
uint32_t no_of_pages, index;
DEBUG_ASSERT(init_table);
if(!range)
return ERR_INVALID_ARGS;
/* Calculating the number of 4k pages */
if(IS_ALIGNED(range->size, PAGE_SIZE))
no_of_pages = (range->size) >> PAGE_DIV_SHIFT;
else
no_of_pages = ((range->size) >> PAGE_DIV_SHIFT) + 1;
next_aligned_v_addr = range->start_vaddr;
next_aligned_p_addr = range->start_paddr;
for(index = 0; index < no_of_pages; index++) {
map_status = x86_mmu_add_mapping(init_table, next_aligned_p_addr, next_aligned_v_addr, flags);
if(map_status) {
dprintf(SPEW, "Add mapping failed with err=%d\n", map_status);
/* Unmap the partial mapping - if any */
x86_mmu_unmap(init_table, range->start_vaddr, index);
return map_status;
}
next_aligned_v_addr += PAGE_SIZE;
next_aligned_p_addr += PAGE_SIZE;
}
return NO_ERROR;
}
status_t arch_mmu_query(vaddr_t vaddr, paddr_t *paddr, uint *flags)
{
uint32_t ret_level, current_cr3_val;
map_addr_t last_valid_entry;
arch_flags_t ret_flags;
status_t stat;
if(!paddr || !flags)
return ERR_INVALID_ARGS;
DEBUG_ASSERT(x86_get_cr3());
current_cr3_val = (map_addr_t)x86_get_cr3();
stat = x86_mmu_page_walking(current_cr3_val, vaddr, &ret_level, &ret_flags, &last_valid_entry);
if(stat)
return stat;
*paddr = (paddr_t)last_valid_entry;
/* converting x86 arch specific flags to arch mmu flags */
*flags = 0;
if(!(ret_flags & X86_MMU_PG_RW))
*flags |= ARCH_MMU_FLAG_PERM_RO;
if(ret_flags & X86_MMU_PG_U)
*flags |= ARCH_MMU_FLAG_PERM_USER;
if(ret_flags & X86_MMU_CACHE_DISABLE)
*flags |= ARCH_MMU_FLAG_UNCACHED;
#ifdef PAE_MODE_ENABLED
if(ret_flags & X86_MMU_PG_NX)
*flags |= ARCH_MMU_FLAG_PERM_NO_EXECUTE;
#endif
return NO_ERROR;
}
int arch_mmu_map(vaddr_t vaddr, paddr_t paddr, uint count, uint flags)
{
uint32_t current_cr3_val;
struct map_range range;
arch_flags_t arch_flags = X86_MMU_PG_P;
if((!IS_ALIGNED(paddr, PAGE_SIZE)) || (!IS_ALIGNED(vaddr, PAGE_SIZE)))
return ERR_INVALID_ARGS;
if (count == 0)
return NO_ERROR;
DEBUG_ASSERT(x86_get_cr3());
current_cr3_val = (map_addr_t)x86_get_cr3();
range.start_vaddr = vaddr;
range.start_paddr = (map_addr_t)paddr;
range.size = count * PAGE_SIZE;
/* converting arch mmu flags to x86 arch specific flags */
if(!(flags & ARCH_MMU_FLAG_PERM_RO))
arch_flags |= X86_MMU_PG_RW;
if(flags & ARCH_MMU_FLAG_PERM_USER)
arch_flags |= X86_MMU_PG_U;
if(flags & ARCH_MMU_FLAG_UNCACHED)
arch_flags |= X86_MMU_CACHE_DISABLE;
#ifdef PAE_MODE_ENABLED
if(flags & ARCH_MMU_FLAG_PERM_NO_EXECUTE)
arch_flags |= X86_MMU_PG_NX;
#endif
return(x86_mmu_map_range(current_cr3_val, &range, arch_flags));
}
/**
* @brief x86 MMU basic initialization
*
*/
void arch_mmu_init(void)
{
volatile uint32_t efer_msr, cr0, cr4;
/* Set WP bit in CR0*/
cr0 = x86_get_cr0();
cr0 |= X86_CR0_WP;
x86_set_cr0(cr0);
#ifdef PAE_MODE_ENABLED
/* Setting the SMEP & SMAP bit in CR4 */
cr4 = x86_get_cr4();
if(check_smep_avail())
cr4 |= X86_CR4_SMEP;
if(check_smap_avail())
cr4 |=X86_CR4_SMAP;
x86_set_cr4(cr4);
/* Set NXE bit in MSR_EFER*/
efer_msr = read_msr(x86_MSR_EFER);
efer_msr |= x86_EFER_NXE;
write_msr(x86_MSR_EFER, efer_msr);
#endif
}

View File

@@ -5,6 +5,15 @@ MODULE := $(LOCAL_DIR)
GLOBAL_INCLUDES += \
$(LOCAL_DIR)/include
WITH_KERNEL_VM=1
GLOBAL_DEFINES += \
KERNEL_ASPACE_BASE=0x00000000 \
KERNEL_ASPACE_SIZE=0x80000000
KERNEL_BASE ?= 0x00000000
KERNEL_LOAD_OFFSET ?= 0x200000
MODULE_SRCS += \
$(LOCAL_DIR)/crt0.S \
$(LOCAL_DIR)/arch.c \
@@ -15,7 +24,8 @@ MODULE_SRCS += \
$(LOCAL_DIR)/thread.c \
$(LOCAL_DIR)/mmu.c \
$(LOCAL_DIR)/faults.c \
$(LOCAL_DIR)/descriptor.c
$(LOCAL_DIR)/descriptor.c
# set the default toolchain to x86 elf and set a #define
ifndef TOOLCHAIN_PREFIX
@@ -24,7 +34,9 @@ endif
# for the moment, SMP is not supported on x86
GLOBAL_DEFINES += \
SMP_MAX_CPUS=1
- SMP_MAX_CPUS=1
LIBGCC := $(shell $(TOOLCHAIN_PREFIX)gcc $(CFLAGS) -print-libgcc-file-name)
#$(info LIBGCC = $(LIBGCC))
@@ -45,7 +57,6 @@ GENERATED += \
$(BUILDDIR)/kernel.ld
# rules for generating the linker scripts
$(BUILDDIR)/kernel.ld: $(LOCAL_DIR)/kernel.ld $(wildcard arch/*.ld)
@echo generating $@
@$(MKDIR)