KVM: VMX: Enable EPT feature for KVM
Signed-off-by: Sheng Yang <sheng.yang@intel.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
This commit is contained in:
parent
b7ebfb0509
commit
1439442c7b
4 changed files with 234 additions and 10 deletions
|
@ -1177,8 +1177,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
table[index] = __pa(new_table->spt) | PT_PRESENT_MASK
|
||||
| PT_WRITABLE_MASK | shadow_user_mask;
|
||||
table[index] = __pa(new_table->spt)
|
||||
| PT_PRESENT_MASK | PT_WRITABLE_MASK
|
||||
| shadow_user_mask | shadow_x_mask;
|
||||
}
|
||||
table_addr = table[index] & PT64_BASE_ADDR_MASK;
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ module_param(enable_vpid, bool, 0);
|
|||
static int flexpriority_enabled = 1;
|
||||
module_param(flexpriority_enabled, bool, 0);
|
||||
|
||||
static int enable_ept;
|
||||
static int enable_ept = 1;
|
||||
module_param(enable_ept, bool, 0);
|
||||
|
||||
struct vmcs {
|
||||
|
@ -284,6 +284,18 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva)
|
|||
: : "a"(&operand), "c"(ext) : "cc", "memory");
|
||||
}
|
||||
|
||||
static inline void __invept(int ext, u64 eptp, gpa_t gpa)
|
||||
{
|
||||
struct {
|
||||
u64 eptp, gpa;
|
||||
} operand = {eptp, gpa};
|
||||
|
||||
asm volatile (ASM_VMX_INVEPT
|
||||
/* CF==1 or ZF==1 --> rc = -1 */
|
||||
"; ja 1f ; ud2 ; 1:\n"
|
||||
: : "a" (&operand), "c" (ext) : "cc", "memory");
|
||||
}
|
||||
|
||||
static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
|
||||
{
|
||||
int i;
|
||||
|
@ -335,6 +347,33 @@ static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx)
|
|||
__invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
|
||||
}
|
||||
|
||||
static inline void ept_sync_global(void)
|
||||
{
|
||||
if (cpu_has_vmx_invept_global())
|
||||
__invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
|
||||
}
|
||||
|
||||
static inline void ept_sync_context(u64 eptp)
|
||||
{
|
||||
if (vm_need_ept()) {
|
||||
if (cpu_has_vmx_invept_context())
|
||||
__invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
|
||||
else
|
||||
ept_sync_global();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
|
||||
{
|
||||
if (vm_need_ept()) {
|
||||
if (cpu_has_vmx_invept_individual_addr())
|
||||
__invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
|
||||
eptp, gpa);
|
||||
else
|
||||
ept_sync_context(eptp);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long vmcs_readl(unsigned long field)
|
||||
{
|
||||
unsigned long value;
|
||||
|
@ -422,6 +461,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
|
|||
eb |= 1u << 1;
|
||||
if (vcpu->arch.rmode.active)
|
||||
eb = ~0;
|
||||
if (vm_need_ept())
|
||||
eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
|
||||
vmcs_write32(EXCEPTION_BITMAP, eb);
|
||||
}
|
||||
|
||||
|
@ -1352,8 +1393,64 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
|
|||
vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK;
|
||||
}
|
||||
|
||||
static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
|
||||
if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
|
||||
printk(KERN_ERR "EPT: Fail to load pdptrs!\n");
|
||||
return;
|
||||
}
|
||||
vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]);
|
||||
vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]);
|
||||
vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]);
|
||||
vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]);
|
||||
}
|
||||
}
|
||||
|
||||
static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
|
||||
|
||||
static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
|
||||
unsigned long cr0,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!(cr0 & X86_CR0_PG)) {
|
||||
/* From paging/starting to nonpaging */
|
||||
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
|
||||
vmcs_config.cpu_based_exec_ctrl |
|
||||
(CPU_BASED_CR3_LOAD_EXITING |
|
||||
CPU_BASED_CR3_STORE_EXITING));
|
||||
vcpu->arch.cr0 = cr0;
|
||||
vmx_set_cr4(vcpu, vcpu->arch.cr4);
|
||||
*hw_cr0 |= X86_CR0_PE | X86_CR0_PG;
|
||||
*hw_cr0 &= ~X86_CR0_WP;
|
||||
} else if (!is_paging(vcpu)) {
|
||||
/* From nonpaging to paging */
|
||||
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
|
||||
vmcs_config.cpu_based_exec_ctrl &
|
||||
~(CPU_BASED_CR3_LOAD_EXITING |
|
||||
CPU_BASED_CR3_STORE_EXITING));
|
||||
vcpu->arch.cr0 = cr0;
|
||||
vmx_set_cr4(vcpu, vcpu->arch.cr4);
|
||||
if (!(vcpu->arch.cr0 & X86_CR0_WP))
|
||||
*hw_cr0 &= ~X86_CR0_WP;
|
||||
}
|
||||
}
|
||||
|
||||
static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
|
||||
struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!is_paging(vcpu)) {
|
||||
*hw_cr4 &= ~X86_CR4_PAE;
|
||||
*hw_cr4 |= X86_CR4_PSE;
|
||||
} else if (!(vcpu->arch.cr4 & X86_CR4_PAE))
|
||||
*hw_cr4 &= ~X86_CR4_PAE;
|
||||
}
|
||||
|
||||
static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
|
||||
{
|
||||
unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) |
|
||||
KVM_VM_CR0_ALWAYS_ON;
|
||||
|
||||
vmx_fpu_deactivate(vcpu);
|
||||
|
||||
if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE))
|
||||
|
@ -1371,29 +1468,61 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
|
|||
}
|
||||
#endif
|
||||
|
||||
if (vm_need_ept())
|
||||
ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
|
||||
|
||||
vmcs_writel(CR0_READ_SHADOW, cr0);
|
||||
vmcs_writel(GUEST_CR0,
|
||||
(cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON);
|
||||
vmcs_writel(GUEST_CR0, hw_cr0);
|
||||
vcpu->arch.cr0 = cr0;
|
||||
|
||||
if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
|
||||
vmx_fpu_activate(vcpu);
|
||||
}
|
||||
|
||||
static u64 construct_eptp(unsigned long root_hpa)
|
||||
{
|
||||
u64 eptp;
|
||||
|
||||
/* TODO write the value reading from MSR */
|
||||
eptp = VMX_EPT_DEFAULT_MT |
|
||||
VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
|
||||
eptp |= (root_hpa & PAGE_MASK);
|
||||
|
||||
return eptp;
|
||||
}
|
||||
|
||||
static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
|
||||
{
|
||||
unsigned long guest_cr3;
|
||||
u64 eptp;
|
||||
|
||||
guest_cr3 = cr3;
|
||||
if (vm_need_ept()) {
|
||||
eptp = construct_eptp(cr3);
|
||||
vmcs_write64(EPT_POINTER, eptp);
|
||||
ept_sync_context(eptp);
|
||||
ept_load_pdptrs(vcpu);
|
||||
guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
|
||||
VMX_EPT_IDENTITY_PAGETABLE_ADDR;
|
||||
}
|
||||
|
||||
vmx_flush_tlb(vcpu);
|
||||
vmcs_writel(GUEST_CR3, cr3);
|
||||
vmcs_writel(GUEST_CR3, guest_cr3);
|
||||
if (vcpu->arch.cr0 & X86_CR0_PE)
|
||||
vmx_fpu_deactivate(vcpu);
|
||||
}
|
||||
|
||||
static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
|
||||
{
|
||||
vmcs_writel(CR4_READ_SHADOW, cr4);
|
||||
vmcs_writel(GUEST_CR4, cr4 | (vcpu->arch.rmode.active ?
|
||||
KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON));
|
||||
unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ?
|
||||
KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
|
||||
|
||||
vcpu->arch.cr4 = cr4;
|
||||
if (vm_need_ept())
|
||||
ept_update_paging_mode_cr4(&hw_cr4, vcpu);
|
||||
|
||||
vmcs_writel(CR4_READ_SHADOW, cr4);
|
||||
vmcs_writel(GUEST_CR4, hw_cr4);
|
||||
}
|
||||
|
||||
static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
|
||||
|
@ -2116,6 +2245,9 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
|
||||
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
|
||||
if (is_page_fault(intr_info)) {
|
||||
/* EPT won't cause page fault directly */
|
||||
if (vm_need_ept())
|
||||
BUG();
|
||||
cr2 = vmcs_readl(EXIT_QUALIFICATION);
|
||||
KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
|
||||
(u32)((u64)cr2 >> 32), handler);
|
||||
|
@ -2445,6 +2577,64 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
return kvm_task_switch(vcpu, tss_selector, reason);
|
||||
}
|
||||
|
||||
static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
{
|
||||
u64 exit_qualification;
|
||||
enum emulation_result er;
|
||||
gpa_t gpa;
|
||||
unsigned long hva;
|
||||
int gla_validity;
|
||||
int r;
|
||||
|
||||
exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
|
||||
|
||||
if (exit_qualification & (1 << 6)) {
|
||||
printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
gla_validity = (exit_qualification >> 7) & 0x3;
|
||||
if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
|
||||
printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
|
||||
printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
|
||||
(long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
|
||||
(long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
|
||||
printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
|
||||
(long unsigned int)exit_qualification);
|
||||
kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
|
||||
kvm_run->hw.hardware_exit_reason = 0;
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
|
||||
hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT);
|
||||
if (!kvm_is_error_hva(hva)) {
|
||||
r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
|
||||
if (r < 0) {
|
||||
printk(KERN_ERR "EPT: Not enough memory!\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
return 1;
|
||||
} else {
|
||||
/* must be MMIO */
|
||||
er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
|
||||
|
||||
if (er == EMULATE_FAIL) {
|
||||
printk(KERN_ERR
|
||||
"EPT: Fail to handle EPT violation vmexit!er is %d\n",
|
||||
er);
|
||||
printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
|
||||
(long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
|
||||
(long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
|
||||
printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
|
||||
(long unsigned int)exit_qualification);
|
||||
return -ENOTSUPP;
|
||||
} else if (er == EMULATE_DO_MMIO)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* The exit handlers return 1 if the exit was handled fully and guest execution
|
||||
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
|
||||
|
@ -2468,6 +2658,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
|
|||
[EXIT_REASON_APIC_ACCESS] = handle_apic_access,
|
||||
[EXIT_REASON_WBINVD] = handle_wbinvd,
|
||||
[EXIT_REASON_TASK_SWITCH] = handle_task_switch,
|
||||
[EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
|
||||
};
|
||||
|
||||
static const int kvm_vmx_max_exit_handlers =
|
||||
|
@ -2486,6 +2677,13 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
|||
KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP),
|
||||
(u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit);
|
||||
|
||||
/* Access CR3 don't cause VMExit in paging mode, so we need
|
||||
* to sync with guest real CR3. */
|
||||
if (vm_need_ept() && is_paging(vcpu)) {
|
||||
vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
|
||||
ept_load_pdptrs(vcpu);
|
||||
}
|
||||
|
||||
if (unlikely(vmx->fail)) {
|
||||
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
|
||||
kvm_run->fail_entry.hardware_entry_failure_reason
|
||||
|
@ -2494,7 +2692,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
|
||||
exit_reason != EXIT_REASON_EXCEPTION_NMI)
|
||||
(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
|
||||
exit_reason != EXIT_REASON_EPT_VIOLATION))
|
||||
printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
|
||||
"exit reason is 0x%x\n", __func__, exit_reason);
|
||||
if (exit_reason < kvm_vmx_max_exit_handlers
|
||||
|
@ -2796,6 +2995,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
|||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
allocate_vpid(vmx);
|
||||
if (id == 0 && vm_need_ept()) {
|
||||
kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
|
||||
VMX_EPT_WRITABLE_MASK |
|
||||
VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
|
||||
kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK,
|
||||
VMX_EPT_FAKE_DIRTY_MASK, 0ull,
|
||||
VMX_EPT_EXECUTABLE_MASK);
|
||||
kvm_enable_tdp();
|
||||
}
|
||||
|
||||
err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
|
||||
if (err)
|
||||
|
@ -2975,9 +3183,14 @@ static int __init vmx_init(void)
|
|||
vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP);
|
||||
vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP);
|
||||
|
||||
if (cpu_has_vmx_ept())
|
||||
bypass_guest_pf = 0;
|
||||
|
||||
if (bypass_guest_pf)
|
||||
kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
|
||||
|
||||
ept_sync_global();
|
||||
|
||||
return 0;
|
||||
|
||||
out2:
|
||||
|
|
|
@ -353,6 +353,15 @@ enum vmcs_field {
|
|||
#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
|
||||
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
|
||||
#define VMX_EPT_DEFAULT_GAW 3
|
||||
#define VMX_EPT_MAX_GAW 0x4
|
||||
#define VMX_EPT_MT_EPTE_SHIFT 3
|
||||
#define VMX_EPT_GAW_EPTP_SHIFT 3
|
||||
#define VMX_EPT_DEFAULT_MT 0x6ull
|
||||
#define VMX_EPT_READABLE_MASK 0x1ull
|
||||
#define VMX_EPT_WRITABLE_MASK 0x2ull
|
||||
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
|
||||
#define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62)
|
||||
#define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63)
|
||||
|
||||
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
|
||||
|
||||
|
|
|
@ -651,6 +651,7 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
|
|||
#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
|
||||
#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
|
||||
#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
|
||||
#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
|
||||
#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
|
||||
|
||||
#define MSR_IA32_TIME_STAMP_COUNTER 0x010
|
||||
|
|
Loading…
Reference in a new issue