Merge branch 'kvm-updates/2.6.40' of git://git.kernel.org/pub/scm/virt/kvm/kvm

* 'kvm-updates/2.6.40' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (131 commits)
  KVM: MMU: Use ptep_user for cmpxchg_gpte()
  KVM: Fix kvm mmu_notifier initialization order
  KVM: Add documentation for KVM_CAP_NR_VCPUS
  KVM: make guest mode entry to be rcu quiescent state
  KVM: x86 emulator: Make jmp far emulation into a separate function
  KVM: x86 emulator: Rename emulate_grpX() to em_grpX()
  KVM: x86 emulator: Remove unused arg from emulate_pop()
  KVM: x86 emulator: Remove unused arg from writeback()
  KVM: x86 emulator: Remove unused arg from read_descriptor()
  KVM: x86 emulator: Remove unused arg from seg_override()
  KVM: Validate userspace_addr of memslot when registered
  KVM: MMU: Clean up gpte reading with copy_from_user()
  KVM: PPC: booke: add sregs support
  KVM: PPC: booke: save/restore VRSAVE (a.k.a. USPRG0)
  KVM: PPC: use ticks, not usecs, for exit timing
  KVM: PPC: fix exit accounting for SPRs, tlbwe, tlbsx
  KVM: PPC: e500: emulate SVR
  KVM: VMX: Cache vmcs segment fields
  KVM: x86 emulator: consolidate segment accessors
  KVM: VMX: Avoid reading %rip unnecessarily when handling exceptions
  ...
This commit is contained in:
Linus Torvalds 2011-05-23 08:42:08 -07:00
commit f4b10bc60a
34 changed files with 3052 additions and 1091 deletions

View file

@ -175,7 +175,10 @@ Parameters: vcpu id (apic id on x86)
Returns: vcpu fd on success, -1 on error
This API adds a vcpu to a virtual machine. The vcpu id is a small integer
in the range [0, max_vcpus).
in the range [0, max_vcpus). You can use KVM_CAP_NR_VCPUS of the
KVM_CHECK_EXTENSION ioctl() to determine the value for max_vcpus at run-time.
If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4
cpus max.
4.8 KVM_GET_DIRTY_LOG (vm ioctl)
@ -261,7 +264,7 @@ See KVM_GET_REGS for the data structure.
4.13 KVM_GET_SREGS
Capability: basic
Architectures: x86
Architectures: x86, ppc
Type: vcpu ioctl
Parameters: struct kvm_sregs (out)
Returns: 0 on success, -1 on error
@ -279,6 +282,8 @@ struct kvm_sregs {
__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
};
/* ppc -- see arch/powerpc/include/asm/kvm.h */
interrupt_bitmap is a bitmap of pending external interrupts. At most
one bit may be set. This interrupt has been acknowledged by the APIC
but not yet injected into the cpu core.
@ -286,7 +291,7 @@ but not yet injected into the cpu core.
4.14 KVM_SET_SREGS
Capability: basic
Architectures: x86
Architectures: x86, ppc
Type: vcpu ioctl
Parameters: struct kvm_sregs (in)
Returns: 0 on success, -1 on error
@ -1263,6 +1268,29 @@ struct kvm_assigned_msix_entry {
__u16 padding[3];
};
4.54 KVM_SET_TSC_KHZ
Capability: KVM_CAP_TSC_CONTROL
Architectures: x86
Type: vcpu ioctl
Parameters: virtual tsc_khz
Returns: 0 on success, -1 on error
Specifies the tsc frequency for the virtual machine. The unit of the
frequency is KHz.
4.55 KVM_GET_TSC_KHZ
Capability: KVM_CAP_GET_TSC_KHZ
Architectures: x86
Type: vcpu ioctl
Parameters: none
Returns: virtual tsc-khz on success, negative value on error
Returns the tsc frequency of the guest. The unit of the return value is
KHz. If the host has unstable tsc this ioctl returns -EIO instead as an
error.
5. The kvm_run structure
Application code obtains a pointer to the kvm_run structure by

View file

@ -83,13 +83,13 @@
union vac {
unsigned long value;
struct {
int a_int:1;
int a_from_int_cr:1;
int a_to_int_cr:1;
int a_from_psr:1;
int a_from_cpuid:1;
int a_cover:1;
int a_bsw:1;
unsigned int a_int:1;
unsigned int a_from_int_cr:1;
unsigned int a_to_int_cr:1;
unsigned int a_from_psr:1;
unsigned int a_from_cpuid:1;
unsigned int a_cover:1;
unsigned int a_bsw:1;
long reserved:57;
};
};
@ -97,12 +97,12 @@ union vac {
union vdc {
unsigned long value;
struct {
int d_vmsw:1;
int d_extint:1;
int d_ibr_dbr:1;
int d_pmc:1;
int d_to_pmd:1;
int d_itm:1;
unsigned int d_vmsw:1;
unsigned int d_extint:1;
unsigned int d_ibr_dbr:1;
unsigned int d_pmc:1;
unsigned int d_to_pmd:1;
unsigned int d_itm:1;
long reserved:58;
};
};

View file

@ -45,6 +45,114 @@ struct kvm_regs {
__u64 gpr[32];
};
#define KVM_SREGS_E_IMPL_NONE 0
#define KVM_SREGS_E_IMPL_FSL 1
#define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */
/*
* Feature bits indicate which sections of the sregs struct are valid,
* both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers
* corresponding to unset feature bits will not be modified. This allows
* restoring a checkpoint made without that feature, while keeping the
* default values of the new registers.
*
* KVM_SREGS_E_BASE contains:
* CSRR0/1 (refers to SRR2/3 on 40x)
* ESR
* DEAR
* MCSR
* TSR
* TCR
* DEC
* TB
* VRSAVE (USPRG0)
*/
#define KVM_SREGS_E_BASE (1 << 0)
/*
* KVM_SREGS_E_ARCH206 contains:
*
* PIR
* MCSRR0/1
* DECAR
* IVPR
*/
#define KVM_SREGS_E_ARCH206 (1 << 1)
/*
* Contains EPCR, plus the upper half of 64-bit registers
* that are 32-bit on 32-bit implementations.
*/
#define KVM_SREGS_E_64 (1 << 2)
#define KVM_SREGS_E_SPRG8 (1 << 3)
#define KVM_SREGS_E_MCIVPR (1 << 4)
/*
* IVORs are used -- contains IVOR0-15, plus additional IVORs
* in combination with an appropriate feature bit.
*/
#define KVM_SREGS_E_IVOR (1 << 5)
/*
* Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG.
* Also TLBnPS if MMUCFG[MAVN] = 1.
*/
#define KVM_SREGS_E_ARCH206_MMU (1 << 6)
/* DBSR, DBCR, IAC, DAC, DVC */
#define KVM_SREGS_E_DEBUG (1 << 7)
/* Enhanced debug -- DSRR0/1, SPRG9 */
#define KVM_SREGS_E_ED (1 << 8)
/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */
#define KVM_SREGS_E_SPE (1 << 9)
/* External Proxy (EXP) -- EPR */
#define KVM_SREGS_EXP (1 << 10)
/* External PID (E.PD) -- EPSC/EPLC */
#define KVM_SREGS_E_PD (1 << 11)
/* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */
#define KVM_SREGS_E_PC (1 << 12)
/* Page table (E.PT) -- EPTCFG */
#define KVM_SREGS_E_PT (1 << 13)
/* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */
#define KVM_SREGS_E_PM (1 << 14)
/*
* Special updates:
*
* Some registers may change even while a vcpu is not running.
* To avoid losing these changes, by default these registers are
* not updated by KVM_SET_SREGS. To force an update, set the bit
* in u.e.update_special corresponding to the register to be updated.
*
* The update_special field is zero on return from KVM_GET_SREGS.
*
* When restoring a checkpoint, the caller can set update_special
* to 0xffffffff to ensure that everything is restored, even new features
* that the caller doesn't know about.
*/
#define KVM_SREGS_E_UPDATE_MCSR (1 << 0)
#define KVM_SREGS_E_UPDATE_TSR (1 << 1)
#define KVM_SREGS_E_UPDATE_DEC (1 << 2)
#define KVM_SREGS_E_UPDATE_DBSR (1 << 3)
/*
* In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
* previous KVM_GET_REGS.
*
* Unless otherwise indicated, setting any register with KVM_SET_SREGS
* directly sets its value. It does not trigger any special semantics such
* as write-one-to-clear. Calling KVM_SET_SREGS on an unmodified struct
* just received from KVM_GET_SREGS is always a no-op.
*/
struct kvm_sregs {
__u32 pvr;
union {
@ -62,6 +170,82 @@ struct kvm_sregs {
__u64 dbat[8];
} ppc32;
} s;
struct {
union {
struct { /* KVM_SREGS_E_IMPL_FSL */
__u32 features; /* KVM_SREGS_E_FSL_ */
__u32 svr;
__u64 mcar;
__u32 hid0;
/* KVM_SREGS_E_FSL_PIDn */
__u32 pid1, pid2;
} fsl;
__u8 pad[256];
} impl;
__u32 features; /* KVM_SREGS_E_ */
__u32 impl_id; /* KVM_SREGS_E_IMPL_ */
__u32 update_special; /* KVM_SREGS_E_UPDATE_ */
__u32 pir; /* read-only */
__u64 sprg8;
__u64 sprg9; /* E.ED */
__u64 csrr0;
__u64 dsrr0; /* E.ED */
__u64 mcsrr0;
__u32 csrr1;
__u32 dsrr1; /* E.ED */
__u32 mcsrr1;
__u32 esr;
__u64 dear;
__u64 ivpr;
__u64 mcivpr;
__u64 mcsr; /* KVM_SREGS_E_UPDATE_MCSR */
__u32 tsr; /* KVM_SREGS_E_UPDATE_TSR */
__u32 tcr;
__u32 decar;
__u32 dec; /* KVM_SREGS_E_UPDATE_DEC */
/*
* Userspace can read TB directly, but the
* value reported here is consistent with "dec".
*
* Read-only.
*/
__u64 tb;
__u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */
__u32 dbcr[3];
__u32 iac[4];
__u32 dac[2];
__u32 dvc[2];
__u8 num_iac; /* read-only */
__u8 num_dac; /* read-only */
__u8 num_dvc; /* read-only */
__u8 pad;
__u32 epr; /* EXP */
__u32 vrsave; /* a.k.a. USPRG0 */
__u32 epcr; /* KVM_SREGS_E_64 */
__u32 mas0;
__u32 mas1;
__u64 mas2;
__u64 mas7_3;
__u32 mas4;
__u32 mas6;
__u32 ivor_low[16]; /* IVOR0-15 */
__u32 ivor_high[18]; /* IVOR32+, plus room to expand */
__u32 mmucfg; /* read-only */
__u32 eptcfg; /* E.PT, read-only */
__u32 tlbcfg[4];/* read-only */
__u32 tlbps[4]; /* read-only */
__u32 eplc, epsc; /* E.PD */
} e;
__u8 pad[1020];
} u;
};

View file

@ -61,7 +61,6 @@ static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
}
void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);

View file

@ -43,6 +43,7 @@ struct kvmppc_vcpu_e500 {
u32 host_pid[E500_PID_NUM];
u32 pid[E500_PID_NUM];
u32 svr;
u32 mas0;
u32 mas1;
@ -58,6 +59,7 @@ struct kvmppc_vcpu_e500 {
u32 hid1;
u32 tlb0cfg;
u32 tlb1cfg;
u64 mcar;
struct kvm_vcpu vcpu;
};

View file

@ -223,6 +223,7 @@ struct kvm_vcpu_arch {
ulong hflags;
ulong guest_owned_ext;
#endif
u32 vrsave; /* also USPRG0 */
u32 mmucr;
ulong sprg4;
ulong sprg5;
@ -232,6 +233,9 @@ struct kvm_vcpu_arch {
ulong csrr1;
ulong dsrr0;
ulong dsrr1;
ulong mcsrr0;
ulong mcsrr1;
ulong mcsr;
ulong esr;
u32 dec;
u32 decar;
@ -255,6 +259,7 @@ struct kvm_vcpu_arch {
u32 dbsr;
#ifdef CONFIG_KVM_EXIT_TIMING
struct mutex exit_timing_lock;
struct kvmppc_exit_timing timing_exit;
struct kvmppc_exit_timing timing_last_enter;
u32 last_exit_type;

View file

@ -61,6 +61,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
/* Core-specific hooks */
@ -142,4 +143,12 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value)
return r;
}
void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
#endif /* __POWERPC_KVM_PPC_H__ */

View file

@ -396,6 +396,7 @@ int main(void)
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));

View file

@ -107,6 +107,16 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
return 0;
}
void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
kvmppc_get_sregs_ivor(vcpu, sregs);
}
int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
return kvmppc_set_sregs_ivor(vcpu, sregs);
}
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvmppc_vcpu_44x *vcpu_44x;

View file

@ -158,7 +158,6 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
}
kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
return emulated;
}
@ -179,7 +178,6 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
}
kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
return emulated;
}

View file

@ -569,6 +569,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
kvmppc_set_msr(vcpu, regs->msr);
vcpu->arch.shared->srr0 = regs->srr0;
vcpu->arch.shared->srr1 = regs->srr1;
kvmppc_set_pid(vcpu, regs->pid);
vcpu->arch.shared->sprg0 = regs->sprg0;
vcpu->arch.shared->sprg1 = regs->sprg1;
vcpu->arch.shared->sprg2 = regs->sprg2;
@ -584,16 +585,165 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
return 0;
}
static void get_sregs_base(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
u64 tb = get_tb();
sregs->u.e.features |= KVM_SREGS_E_BASE;
sregs->u.e.csrr0 = vcpu->arch.csrr0;
sregs->u.e.csrr1 = vcpu->arch.csrr1;
sregs->u.e.mcsr = vcpu->arch.mcsr;
sregs->u.e.esr = vcpu->arch.esr;
sregs->u.e.dear = vcpu->arch.shared->dar;
sregs->u.e.tsr = vcpu->arch.tsr;
sregs->u.e.tcr = vcpu->arch.tcr;
sregs->u.e.dec = kvmppc_get_dec(vcpu, tb);
sregs->u.e.tb = tb;
sregs->u.e.vrsave = vcpu->arch.vrsave;
}
static int set_sregs_base(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
if (!(sregs->u.e.features & KVM_SREGS_E_BASE))
return 0;
vcpu->arch.csrr0 = sregs->u.e.csrr0;
vcpu->arch.csrr1 = sregs->u.e.csrr1;
vcpu->arch.mcsr = sregs->u.e.mcsr;
vcpu->arch.esr = sregs->u.e.esr;
vcpu->arch.shared->dar = sregs->u.e.dear;
vcpu->arch.vrsave = sregs->u.e.vrsave;
vcpu->arch.tcr = sregs->u.e.tcr;
if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
vcpu->arch.dec = sregs->u.e.dec;
kvmppc_emulate_dec(vcpu);
if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
/*
* FIXME: existing KVM timer handling is incomplete.
* TSR cannot be read by the guest, and its value in
* vcpu->arch is always zero. For now, just handle
* the case where the caller is trying to inject a
* decrementer interrupt.
*/
if ((sregs->u.e.tsr & TSR_DIS) &&
(vcpu->arch.tcr & TCR_DIE))
kvmppc_core_queue_dec(vcpu);
}
return 0;
}
static void get_sregs_arch206(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
sregs->u.e.features |= KVM_SREGS_E_ARCH206;
sregs->u.e.pir = 0;
sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0;
sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1;
sregs->u.e.decar = vcpu->arch.decar;
sregs->u.e.ivpr = vcpu->arch.ivpr;
}
static int set_sregs_arch206(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206))
return 0;
if (sregs->u.e.pir != 0)
return -EINVAL;
vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0;
vcpu->arch.mcsrr1 = sregs->u.e.mcsrr1;
vcpu->arch.decar = sregs->u.e.decar;
vcpu->arch.ivpr = sregs->u.e.ivpr;
return 0;
}
void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
sregs->u.e.features |= KVM_SREGS_E_IVOR;
sregs->u.e.ivor_low[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
sregs->u.e.ivor_low[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
sregs->u.e.ivor_low[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
sregs->u.e.ivor_low[3] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
sregs->u.e.ivor_low[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
sregs->u.e.ivor_low[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
sregs->u.e.ivor_low[6] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
sregs->u.e.ivor_low[7] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
sregs->u.e.ivor_low[8] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
sregs->u.e.ivor_low[9] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
sregs->u.e.ivor_low[10] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
sregs->u.e.ivor_low[11] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
sregs->u.e.ivor_low[12] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
}
int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
return 0;
vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = sregs->u.e.ivor_low[0];
vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = sregs->u.e.ivor_low[1];
vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = sregs->u.e.ivor_low[2];
vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = sregs->u.e.ivor_low[3];
vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = sregs->u.e.ivor_low[4];
vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = sregs->u.e.ivor_low[5];
vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = sregs->u.e.ivor_low[6];
vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = sregs->u.e.ivor_low[7];
vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = sregs->u.e.ivor_low[8];
vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = sregs->u.e.ivor_low[9];
vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = sregs->u.e.ivor_low[10];
vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = sregs->u.e.ivor_low[11];
vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = sregs->u.e.ivor_low[12];
vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = sregs->u.e.ivor_low[13];
vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = sregs->u.e.ivor_low[14];
vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = sregs->u.e.ivor_low[15];
return 0;
}
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
return -ENOTSUPP;
sregs->pvr = vcpu->arch.pvr;
get_sregs_base(vcpu, sregs);
get_sregs_arch206(vcpu, sregs);
kvmppc_core_get_sregs(vcpu, sregs);
return 0;
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
return -ENOTSUPP;
int ret;
if (vcpu->arch.pvr != sregs->pvr)
return -EINVAL;
ret = set_sregs_base(vcpu, sregs);
if (ret < 0)
return ret;
ret = set_sregs_arch206(vcpu, sregs);
if (ret < 0)
return ret;
return kvmppc_core_set_sregs(vcpu, sregs);
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)

View file

@ -380,7 +380,6 @@ lightweight_exit:
* because host interrupt handlers would get confused. */
lwz r1, VCPU_GPR(r1)(r4)
/* XXX handle USPRG0 */
/* Host interrupt handlers may have clobbered these guest-readable
* SPRGs, so we need to reload them here with the guest's values. */
lwz r3, VCPU_SPRG4(r4)

View file

@ -63,6 +63,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
/* Registers init */
vcpu->arch.pvr = mfspr(SPRN_PVR);
vcpu_e500->svr = mfspr(SPRN_SVR);
/* Since booke kvm only support one core, update all vcpus' PIR to 0 */
vcpu->vcpu_id = 0;
@ -96,6 +97,81 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
return 0;
}
void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_SPE |
KVM_SREGS_E_PM;
sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL;
sregs->u.e.impl.fsl.features = 0;
sregs->u.e.impl.fsl.svr = vcpu_e500->svr;
sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
sregs->u.e.mas0 = vcpu_e500->mas0;
sregs->u.e.mas1 = vcpu_e500->mas1;
sregs->u.e.mas2 = vcpu_e500->mas2;
sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3;
sregs->u.e.mas4 = vcpu_e500->mas4;
sregs->u.e.mas6 = vcpu_e500->mas6;
sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG);
sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg;
sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg;
sregs->u.e.tlbcfg[2] = 0;
sregs->u.e.tlbcfg[3] = 0;
sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
sregs->u.e.ivor_high[3] =
vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
kvmppc_get_sregs_ivor(vcpu, sregs);
}
int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0;
vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
}
if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
vcpu_e500->mas0 = sregs->u.e.mas0;
vcpu_e500->mas1 = sregs->u.e.mas1;
vcpu_e500->mas2 = sregs->u.e.mas2;
vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32;
vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3;
vcpu_e500->mas4 = sregs->u.e.mas4;
vcpu_e500->mas6 = sregs->u.e.mas6;
}
if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
return 0;
if (sregs->u.e.features & KVM_SREGS_E_SPE) {
vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] =
sregs->u.e.ivor_high[0];
vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] =
sregs->u.e.ivor_high[1];
vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] =
sregs->u.e.ivor_high[2];
}
if (sregs->u.e.features & KVM_SREGS_E_PM) {
vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] =
sregs->u.e.ivor_high[3];
}
return kvmppc_set_sregs_ivor(vcpu, sregs);
}
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvmppc_vcpu_e500 *vcpu_e500;

View file

@ -1,5 +1,5 @@
/*
* Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
* Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
*
* Author: Yu Liu, <yu.liu@freescale.com>
*
@ -78,8 +78,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
switch (sprn) {
case SPRN_PID:
vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
vcpu->arch.pid = spr_val;
kvmppc_set_pid(vcpu, spr_val);
break;
case SPRN_PID1:
vcpu_e500->pid[1] = spr_val; break;
@ -175,6 +174,8 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break;
case SPRN_HID1:
kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break;
case SPRN_SVR:
kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break;
case SPRN_MMUCSR0:
kvmppc_set_gpr(vcpu, rt, 0); break;

View file

@ -1,5 +1,5 @@
/*
* Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
* Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
*
* Author: Yu Liu, yu.liu@freescale.com
*
@ -24,6 +24,7 @@
#include "../mm/mmu_decl.h"
#include "e500_tlb.h"
#include "trace.h"
#include "timing.h"
#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
@ -506,6 +507,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
vcpu_e500->mas7 = 0;
}
kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
return EMULATE_DONE;
}
@ -571,6 +573,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
write_host_tlbe(vcpu_e500, stlbsel, sesel);
}
kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
return EMULATE_DONE;
}
@ -672,6 +675,14 @@ int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
return -1;
}
void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
vcpu->arch.pid = pid;
}
void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
{
struct tlbe *tlbe;

View file

@ -114,6 +114,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
}
}
u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
{
u64 jd = tb - vcpu->arch.dec_jiffies;
return vcpu->arch.dec - jd;
}
/* XXX to do:
* lhax
* lhaux
@ -279,11 +285,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case SPRN_DEC:
{
u64 jd = get_tb() - vcpu->arch.dec_jiffies;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd);
pr_debug("mfDEC: %x - %llx = %lx\n",
vcpu->arch.dec, jd,
kvmppc_get_gpr(vcpu, rt));
kvmppc_set_gpr(vcpu, rt,
kvmppc_get_dec(vcpu, get_tb()));
break;
}
default:
@ -294,6 +297,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
break;
}
kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
break;
case OP_31_XOP_STHX:
@ -363,6 +367,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
printk("mtspr: unknown spr %x\n", sprn);
break;
}
kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
break;
case OP_31_XOP_DCBI:

View file

@ -175,7 +175,11 @@ int kvm_dev_ioctl_check_extension(long ext)
int r;
switch (ext) {
#ifdef CONFIG_BOOKE
case KVM_CAP_PPC_BOOKE_SREGS:
#else
case KVM_CAP_PPC_SEGSTATE:
#endif
case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_UNSET_IRQ:
case KVM_CAP_PPC_IRQ_LEVEL:
@ -284,6 +288,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
#ifdef CONFIG_KVM_EXIT_TIMING
mutex_init(&vcpu->arch.exit_timing_lock);
#endif
return 0;
}
@ -294,12 +302,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
#ifdef CONFIG_BOOKE
/*
* vrsave (formerly usprg0) isn't used by Linux, but may
* be used by the guest.
*
* On non-booke this is associated with Altivec and
* is handled by code in book3s.c.
*/
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
#endif
kvmppc_core_vcpu_load(vcpu, cpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
kvmppc_core_vcpu_put(vcpu);
#ifdef CONFIG_BOOKE
vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
#endif
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,

View file

@ -34,8 +34,8 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
{
int i;
/* pause guest execution to avoid concurrent updates */
mutex_lock(&vcpu->mutex);
/* Take a lock to avoid concurrent updates */
mutex_lock(&vcpu->arch.exit_timing_lock);
vcpu->arch.last_exit_type = 0xDEAD;
for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
@ -49,7 +49,7 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
vcpu->arch.timing_exit.tv64 = 0;
vcpu->arch.timing_last_enter.tv64 = 0;
mutex_unlock(&vcpu->mutex);
mutex_unlock(&vcpu->arch.exit_timing_lock);
}
static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
@ -65,6 +65,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
return;
}
mutex_lock(&vcpu->arch.exit_timing_lock);
vcpu->arch.timing_count_type[type]++;
/* sum */
@ -93,6 +95,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
vcpu->arch.timing_min_duration[type] = duration;
if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
vcpu->arch.timing_max_duration[type] = duration;
mutex_unlock(&vcpu->arch.exit_timing_lock);
}
void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
@ -147,17 +151,30 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
{
struct kvm_vcpu *vcpu = m->private;
int i;
u64 min, max, sum, sum_quad;
seq_printf(m, "%s", "type count min max sum sum_squared\n");
for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
min = vcpu->arch.timing_min_duration[i];
do_div(min, tb_ticks_per_usec);
max = vcpu->arch.timing_max_duration[i];
do_div(max, tb_ticks_per_usec);
sum = vcpu->arch.timing_sum_duration[i];
do_div(sum, tb_ticks_per_usec);
sum_quad = vcpu->arch.timing_sum_quad_duration[i];
do_div(sum_quad, tb_ticks_per_usec);
seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n",
kvm_exit_names[i],
vcpu->arch.timing_count_type[i],
vcpu->arch.timing_min_duration[i],
vcpu->arch.timing_max_duration[i],
vcpu->arch.timing_sum_duration[i],
vcpu->arch.timing_sum_quad_duration[i]);
min,
max,
sum,
sum_quad);
}
return 0;
}

View file

@ -14,6 +14,8 @@
#include <asm/desc_defs.h>
struct x86_emulate_ctxt;
enum x86_intercept;
enum x86_intercept_stage;
struct x86_exception {
u8 vector;
@ -23,6 +25,24 @@ struct x86_exception {
u64 address; /* cr2 or nested page fault gpa */
};
/*
* This struct is used to carry enough information from the instruction
* decoder to main KVM so that a decision can be made whether the
* instruction needs to be intercepted or not.
*/
struct x86_instruction_info {
u8 intercept; /* which intercept */
u8 rep_prefix; /* rep prefix? */
u8 modrm_mod; /* mod part of modrm */
u8 modrm_reg; /* index of register used */
u8 modrm_rm; /* rm part of modrm */
u64 src_val; /* value of source operand */
u8 src_bytes; /* size of source operand */
u8 dst_bytes; /* size of destination operand */
u8 ad_bytes; /* size of src/dst address */
u64 next_rip; /* rip following the instruction */
};
/*
* x86_emulate_ops:
*
@ -62,6 +82,7 @@ struct x86_exception {
#define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */
#define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */
#define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */
#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */
struct x86_emulate_ops {
/*
@ -71,8 +92,9 @@ struct x86_emulate_ops {
* @val: [OUT] Value read from memory, zero-extended to 'u_long'.
* @bytes: [IN ] Number of bytes to read from memory.
*/
int (*read_std)(unsigned long addr, void *val,
unsigned int bytes, struct kvm_vcpu *vcpu,
int (*read_std)(struct x86_emulate_ctxt *ctxt,
unsigned long addr, void *val,
unsigned int bytes,
struct x86_exception *fault);
/*
@ -82,8 +104,8 @@ struct x86_emulate_ops {
* @val: [OUT] Value write to memory, zero-extended to 'u_long'.
* @bytes: [IN ] Number of bytes to write to memory.
*/
int (*write_std)(unsigned long addr, void *val,
unsigned int bytes, struct kvm_vcpu *vcpu,
int (*write_std)(struct x86_emulate_ctxt *ctxt,
unsigned long addr, void *val, unsigned int bytes,
struct x86_exception *fault);
/*
* fetch: Read bytes of standard (non-emulated/special) memory.
@ -92,8 +114,8 @@ struct x86_emulate_ops {
* @val: [OUT] Value read from memory, zero-extended to 'u_long'.
* @bytes: [IN ] Number of bytes to read from memory.
*/
int (*fetch)(unsigned long addr, void *val,
unsigned int bytes, struct kvm_vcpu *vcpu,
int (*fetch)(struct x86_emulate_ctxt *ctxt,
unsigned long addr, void *val, unsigned int bytes,
struct x86_exception *fault);
/*
@ -102,11 +124,9 @@ struct x86_emulate_ops {
* @val: [OUT] Value read from memory, zero-extended to 'u_long'.
* @bytes: [IN ] Number of bytes to read from memory.
*/
int (*read_emulated)(unsigned long addr,
void *val,
unsigned int bytes,
struct x86_exception *fault,
struct kvm_vcpu *vcpu);
int (*read_emulated)(struct x86_emulate_ctxt *ctxt,
unsigned long addr, void *val, unsigned int bytes,
struct x86_exception *fault);
/*
* write_emulated: Write bytes to emulated/special memory area.
@ -115,11 +135,10 @@ struct x86_emulate_ops {
* required).
* @bytes: [IN ] Number of bytes to write to memory.
*/
int (*write_emulated)(unsigned long addr,
const void *val,
int (*write_emulated)(struct x86_emulate_ctxt *ctxt,
unsigned long addr, const void *val,
unsigned int bytes,
struct x86_exception *fault,
struct kvm_vcpu *vcpu);
struct x86_exception *fault);
/*
* cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
@ -129,40 +148,54 @@ struct x86_emulate_ops {
* @new: [IN ] Value to write to @addr.
* @bytes: [IN ] Number of bytes to access using CMPXCHG.
*/
int (*cmpxchg_emulated)(unsigned long addr,
int (*cmpxchg_emulated)(struct x86_emulate_ctxt *ctxt,
unsigned long addr,
const void *old,
const void *new,
unsigned int bytes,
struct x86_exception *fault,
struct kvm_vcpu *vcpu);
struct x86_exception *fault);
void (*invlpg)(struct x86_emulate_ctxt *ctxt, ulong addr);
int (*pio_in_emulated)(int size, unsigned short port, void *val,
unsigned int count, struct kvm_vcpu *vcpu);
int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt,
int size, unsigned short port, void *val,
unsigned int count);
int (*pio_out_emulated)(int size, unsigned short port, const void *val,
unsigned int count, struct kvm_vcpu *vcpu);
int (*pio_out_emulated)(struct x86_emulate_ctxt *ctxt,
int size, unsigned short port, const void *val,
unsigned int count);
bool (*get_cached_descriptor)(struct desc_struct *desc, u32 *base3,
int seg, struct kvm_vcpu *vcpu);
void (*set_cached_descriptor)(struct desc_struct *desc, u32 base3,
int seg, struct kvm_vcpu *vcpu);
u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu);
void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu);
unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu);
void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu);
void (*get_idt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu);
ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu);
int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu);
int (*cpl)(struct kvm_vcpu *vcpu);
int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu);
int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu);
int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
bool (*get_segment)(struct x86_emulate_ctxt *ctxt, u16 *selector,
struct desc_struct *desc, u32 *base3, int seg);
void (*set_segment)(struct x86_emulate_ctxt *ctxt, u16 selector,
struct desc_struct *desc, u32 base3, int seg);
unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt,
int seg);
void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr);
int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val);
int (*cpl)(struct x86_emulate_ctxt *ctxt);
int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
void (*halt)(struct x86_emulate_ctxt *ctxt);
void (*wbinvd)(struct x86_emulate_ctxt *ctxt);
int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt);
void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */
void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
int (*intercept)(struct x86_emulate_ctxt *ctxt,
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
};
typedef u32 __attribute__((vector_size(16))) sse128_t;
/* Type, address-of, and value of an instruction's operand. */
struct operand {
enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type;
unsigned int bytes;
union {
unsigned long orig_val;
@ -174,11 +207,13 @@ struct operand {
ulong ea;
unsigned seg;
} mem;
unsigned xmm;
} addr;
union {
unsigned long val;
u64 val64;
char valptr[sizeof(unsigned long) + 2];
sse128_t vec_val;
};
};
@ -197,6 +232,7 @@ struct read_cache {
struct decode_cache {
u8 twobyte;
u8 b;
u8 intercept;
u8 lock_prefix;
u8 rep_prefix;
u8 op_bytes;
@ -209,6 +245,7 @@ struct decode_cache {
u8 seg_override;
unsigned int d;
int (*execute)(struct x86_emulate_ctxt *ctxt);
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
unsigned long regs[NR_VCPU_REGS];
unsigned long eip;
/* modrm */
@ -227,17 +264,15 @@ struct x86_emulate_ctxt {
struct x86_emulate_ops *ops;
/* Register state before/after emulation. */
struct kvm_vcpu *vcpu;
unsigned long eflags;
unsigned long eip; /* eip before instruction emulation */
/* Emulated execution mode, represented by an X86EMUL_MODE value. */
int mode;
u32 cs_base;
/* interruptibility state, as a result of execution of STI or MOV SS */
int interruptibility;
bool guest_mode; /* guest running a nested guest */
bool perm_ok; /* do not check permissions if true */
bool only_vendor_specific_insn;
@ -249,8 +284,8 @@ struct x86_emulate_ctxt {
};
/* Repeat String Operation Prefix */
#define REPE_PREFIX 1
#define REPNE_PREFIX 2
#define REPE_PREFIX 0xf3
#define REPNE_PREFIX 0xf2
/* Execution mode, passed to the emulator. */
#define X86EMUL_MODE_REAL 0 /* Real mode. */
@ -259,6 +294,69 @@ struct x86_emulate_ctxt {
#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */
#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */
/* any protected mode */
#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \
X86EMUL_MODE_PROT64)
enum x86_intercept_stage {
X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */
X86_ICPT_PRE_EXCEPT,
X86_ICPT_POST_EXCEPT,
X86_ICPT_POST_MEMACCESS,
};
enum x86_intercept {
x86_intercept_none,
x86_intercept_cr_read,
x86_intercept_cr_write,
x86_intercept_clts,
x86_intercept_lmsw,
x86_intercept_smsw,
x86_intercept_dr_read,
x86_intercept_dr_write,
x86_intercept_lidt,
x86_intercept_sidt,
x86_intercept_lgdt,
x86_intercept_sgdt,
x86_intercept_lldt,
x86_intercept_sldt,
x86_intercept_ltr,
x86_intercept_str,
x86_intercept_rdtsc,
x86_intercept_rdpmc,
x86_intercept_pushf,
x86_intercept_popf,
x86_intercept_cpuid,
x86_intercept_rsm,
x86_intercept_iret,
x86_intercept_intn,
x86_intercept_invd,
x86_intercept_pause,
x86_intercept_hlt,
x86_intercept_invlpg,
x86_intercept_invlpga,
x86_intercept_vmrun,
x86_intercept_vmload,
x86_intercept_vmsave,
x86_intercept_vmmcall,
x86_intercept_stgi,
x86_intercept_clgi,
x86_intercept_skinit,
x86_intercept_rdtscp,
x86_intercept_icebp,
x86_intercept_wbinvd,
x86_intercept_monitor,
x86_intercept_mwait,
x86_intercept_rdmsr,
x86_intercept_wrmsr,
x86_intercept_in,
x86_intercept_ins,
x86_intercept_out,
x86_intercept_outs,
nr_x86_intercepts
};
/* Host execution mode. */
#if defined(CONFIG_X86_32)
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
@ -270,6 +368,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
#define EMULATION_FAILED -1
#define EMULATION_OK 0
#define EMULATION_RESTART 1
#define EMULATION_INTERCEPTED 2
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
u16 tss_selector, int reason,

View file

@ -30,14 +30,30 @@
#define KVM_MEMORY_SLOTS 32
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
#define KVM_MMIO_SIZE 16
#define KVM_PIO_PAGE_OFFSET 1
#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
| X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
| X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \
0xFFFFFF0000000000ULL)
#define CR4_RESERVED_BITS \
(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXSAVE \
| X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
#define INVALID_PAGE (~(hpa_t)0)
#define VALID_PAGE(x) ((x) != INVALID_PAGE)
@ -118,6 +134,9 @@ enum kvm_reg {
enum kvm_reg_ex {
VCPU_EXREG_PDPTR = NR_VCPU_REGS,
VCPU_EXREG_CR3,
VCPU_EXREG_RFLAGS,
VCPU_EXREG_CPL,
VCPU_EXREG_SEGMENTS,
};
enum {
@ -256,7 +275,7 @@ struct kvm_mmu {
struct kvm_mmu_page *sp);
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte, unsigned long mmu_seq);
u64 *spte, const void *pte);
hpa_t root_hpa;
int root_level;
int shadow_root_level;
@ -340,7 +359,6 @@ struct kvm_vcpu_arch {
struct fpu guest_fpu;
u64 xcr0;
gva_t mmio_fault_cr2;
struct kvm_pio_request pio;
void *pio_data;
@ -367,18 +385,22 @@ struct kvm_vcpu_arch {
/* emulate context */
struct x86_emulate_ctxt emulate_ctxt;
bool emulate_regs_need_sync_to_vcpu;
bool emulate_regs_need_sync_from_vcpu;
gpa_t time;
struct pvclock_vcpu_time_info hv_clock;
unsigned int hw_tsc_khz;
unsigned int time_offset;
struct page *time_page;
u64 last_host_tsc;
u64 last_guest_tsc;
u64 last_kernel_ns;
u64 last_tsc_nsec;
u64 last_tsc_write;
u32 virtual_tsc_khz;
bool tsc_catchup;
u32 tsc_catchup_mult;
s8 tsc_catchup_shift;
bool nmi_pending;
bool nmi_injected;
@ -448,9 +470,6 @@ struct kvm_arch {
u64 last_tsc_nsec;
u64 last_tsc_offset;
u64 last_tsc_write;
u32 virtual_tsc_khz;
u32 virtual_tsc_mult;
s8 virtual_tsc_shift;
struct kvm_xen_hvm_config xen_hvm_config;
@ -502,6 +521,8 @@ struct kvm_vcpu_stat {
u32 nmi_injections;
};
struct x86_instruction_info;
struct kvm_x86_ops {
int (*cpu_has_kvm_support)(void); /* __init */
int (*disabled_by_bios)(void); /* __init */
@ -586,9 +607,17 @@ struct kvm_x86_ops {
bool (*has_wbinvd_exit)(void);
void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz);
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc);
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
const struct trace_print_flags *exit_reasons_str;
};
@ -627,6 +656,13 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
extern bool tdp_enabled;
/* control of guest tsc rate supported? */
extern bool kvm_has_tsc_control;
/* minimum supported tsc_khz for guests */
extern u32 kvm_min_guest_tsc_khz;
/* maximum supported tsc_khz for guests */
extern u32 kvm_max_guest_tsc_khz;
enum emulation_result {
EMULATE_DONE, /* no further processing */
EMULATE_DO_MMIO, /* kvm_run filled with mmio request */
@ -645,9 +681,6 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu,
return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
}
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
void kvm_enable_efer_bits(u64);
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data);
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
@ -657,8 +690,6 @@ struct x86_emulate_ctxt;
int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int kvm_emulate_halt(struct kvm_vcpu *vcpu);
int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
int emulate_clts(struct kvm_vcpu *vcpu);
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
@ -721,8 +752,6 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
int kvm_fix_hypercall(struct kvm_vcpu *vcpu);
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
void *insn, int insn_len);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);

View file

@ -118,6 +118,7 @@
complete list. */
#define MSR_AMD64_PATCH_LEVEL 0x0000008b
#define MSR_AMD64_TSC_RATIO 0xc0000104
#define MSR_AMD64_NB_CFG 0xc001001f
#define MSR_AMD64_PATCH_LOADER 0xc0010020
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140

File diff suppressed because it is too large Load diff

View file

@ -33,7 +33,6 @@ struct kvm_kpit_state {
};
struct kvm_pit {
unsigned long base_addresss;
struct kvm_io_device dev;
struct kvm_io_device speaker_dev;
struct kvm *kvm;
@ -51,7 +50,6 @@ struct kvm_pit {
#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100
#define KVM_PIT_CHANNEL_MASK 0x3
void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start);
struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
void kvm_free_pit(struct kvm *kvm);

View file

@ -75,7 +75,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm);
void kvm_destroy_pic(struct kvm *kvm);
int kvm_pic_read_irq(struct kvm *kvm);
void kvm_pic_update_irq(struct kvm_pic *s);
void kvm_pic_clear_isr_ack(struct kvm *kvm);
static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
{
@ -100,7 +99,6 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
int pit_has_pending_timer(struct kvm_vcpu *vcpu);
int apic_has_pending_timer(struct kvm_vcpu *vcpu);
#endif

View file

@ -1206,7 +1206,7 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp, u64 *spte,
const void *pte, unsigned long mmu_seq)
const void *pte)
{
WARN_ON(1);
}
@ -3163,9 +3163,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
}
static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp,
u64 *spte,
const void *new, unsigned long mmu_seq)
struct kvm_mmu_page *sp, u64 *spte,
const void *new)
{
if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
++vcpu->kvm->stat.mmu_pde_zapped;
@ -3173,7 +3172,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
}
++vcpu->kvm->stat.mmu_pte_updated;
vcpu->arch.mmu.update_pte(vcpu, sp, spte, new, mmu_seq);
vcpu->arch.mmu.update_pte(vcpu, sp, spte, new);
}
static bool need_remote_flush(u64 old, u64 new)
@ -3229,7 +3228,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
struct kvm_mmu_page *sp;
struct hlist_node *node;
LIST_HEAD(invalid_list);
unsigned long mmu_seq;
u64 entry, gentry, *spte;
unsigned pte_size, page_offset, misaligned, quadrant, offset;
int level, npte, invlpg_counter, r, flooded = 0;
@ -3271,9 +3269,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
break;
}
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
spin_lock(&vcpu->kvm->mmu_lock);
if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
gentry = 0;
@ -3345,8 +3340,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
if (gentry &&
!((sp->role.word ^ vcpu->arch.mmu.base_role.word)
& mask.word))
mmu_pte_write_new_pte(vcpu, sp, spte, &gentry,
mmu_seq);
mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
if (!remote_flush && need_remote_flush(entry, *spte))
remote_flush = true;
++spte;

View file

@ -78,15 +78,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
}
static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
gfn_t table_gfn, unsigned index,
static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
pt_element_t __user *ptep_user, unsigned index,
pt_element_t orig_pte, pt_element_t new_pte)
{
int npages;
pt_element_t ret;
pt_element_t *table;
struct page *page;
page = gfn_to_page(kvm, table_gfn);
npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page);
/* Check if the user is doing something meaningless. */
if (unlikely(npages != 1))
return -EFAULT;
table = kmap_atomic(page, KM_USER0);
ret = CMPXCHG(&table[index], orig_pte, new_pte);
@ -117,6 +121,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
gva_t addr, u32 access)
{
pt_element_t pte;
pt_element_t __user *ptep_user;
gfn_t table_gfn;
unsigned index, pt_access, uninitialized_var(pte_access);
gpa_t pte_gpa;
@ -152,6 +157,9 @@ walk:
pt_access = ACC_ALL;
for (;;) {
gfn_t real_gfn;
unsigned long host_addr;
index = PT_INDEX(addr, walker->level);
table_gfn = gpte_to_gfn(pte);
@ -160,43 +168,64 @@ walk:
walker->table_gfn[walker->level - 1] = table_gfn;
walker->pte_gpa[walker->level - 1] = pte_gpa;
if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, &pte,
offset, sizeof(pte),
PFERR_USER_MASK|PFERR_WRITE_MASK)) {
real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
PFERR_USER_MASK|PFERR_WRITE_MASK);
if (unlikely(real_gfn == UNMAPPED_GVA)) {
present = false;
break;
}
real_gfn = gpa_to_gfn(real_gfn);
host_addr = gfn_to_hva(vcpu->kvm, real_gfn);
if (unlikely(kvm_is_error_hva(host_addr))) {
present = false;
break;
}
ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) {
present = false;
break;
}
trace_kvm_mmu_paging_element(pte, walker->level);
if (!is_present_gpte(pte)) {
if (unlikely(!is_present_gpte(pte))) {
present = false;
break;
}
if (is_rsvd_bits_set(&vcpu->arch.mmu, pte, walker->level)) {
if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte,
walker->level))) {
rsvd_fault = true;
break;
}
if (write_fault && !is_writable_pte(pte))
if (user_fault || is_write_protection(vcpu))
if (unlikely(write_fault && !is_writable_pte(pte)
&& (user_fault || is_write_protection(vcpu))))
eperm = true;
if (user_fault && !(pte & PT_USER_MASK))
if (unlikely(user_fault && !(pte & PT_USER_MASK)))
eperm = true;
#if PTTYPE == 64
if (fetch_fault && (pte & PT64_NX_MASK))
if (unlikely(fetch_fault && (pte & PT64_NX_MASK)))
eperm = true;
#endif
if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) {
if (!eperm && !rsvd_fault
&& unlikely(!(pte & PT_ACCESSED_MASK))) {
int ret;
trace_kvm_mmu_set_accessed_bit(table_gfn, index,
sizeof(pte));
if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
index, pte, pte|PT_ACCESSED_MASK))
ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
pte, pte|PT_ACCESSED_MASK);
if (unlikely(ret < 0)) {
present = false;
break;
} else if (ret)
goto walk;
mark_page_dirty(vcpu->kvm, table_gfn);
pte |= PT_ACCESSED_MASK;
}
@ -241,17 +270,21 @@ walk:
--walker->level;
}
if (!present || eperm || rsvd_fault)
if (unlikely(!present || eperm || rsvd_fault))
goto error;
if (write_fault && !is_dirty_gpte(pte)) {
bool ret;
if (write_fault && unlikely(!is_dirty_gpte(pte))) {
int ret;
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
pte|PT_DIRTY_MASK);
if (ret)
ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
pte, pte|PT_DIRTY_MASK);
if (unlikely(ret < 0)) {
present = false;
goto error;
} else if (ret)
goto walk;
mark_page_dirty(vcpu->kvm, table_gfn);
pte |= PT_DIRTY_MASK;
walker->ptes[walker->level - 1] = pte;
@ -325,7 +358,7 @@ no_present:
}
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte, unsigned long mmu_seq)
u64 *spte, const void *pte)
{
pt_element_t gpte;
unsigned pte_access;
@ -342,8 +375,6 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
kvm_release_pfn_clean(pfn);
return;
}
if (mmu_notifier_retry(vcpu, mmu_seq))
return;
/*
* we call mmu_set_spte() with host_writable = true because that

View file

@ -63,6 +63,10 @@ MODULE_LICENSE("GPL");
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
#define TSC_RATIO_RSVD 0xffffff0000000000ULL
#define TSC_RATIO_MIN 0x0000000000000001ULL
#define TSC_RATIO_MAX 0x000000ffffffffffULL
static bool erratum_383_found __read_mostly;
static const u32 host_save_user_msrs[] = {
@ -93,14 +97,6 @@ struct nested_state {
/* A VMEXIT is required but not yet emulated */
bool exit_required;
/*
* If we vmexit during an instruction emulation we need this to restore
* the l1 guest rip after the emulation
*/
unsigned long vmexit_rip;
unsigned long vmexit_rsp;
unsigned long vmexit_rax;
/* cache for intercepts of the guest */
u32 intercept_cr;
u32 intercept_dr;
@ -144,8 +140,13 @@ struct vcpu_svm {
unsigned int3_injected;
unsigned long int3_rip;
u32 apf_reason;
u64 tsc_ratio;
};
static DEFINE_PER_CPU(u64, current_tsc_ratio);
#define TSC_RATIO_DEFAULT 0x0100000000ULL
#define MSR_INVALID 0xffffffffU
static struct svm_direct_access_msrs {
@ -190,6 +191,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm);
static int nested_svm_vmexit(struct vcpu_svm *svm);
static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code);
static u64 __scale_tsc(u64 ratio, u64 tsc);
enum {
VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
@ -376,7 +378,6 @@ struct svm_cpu_data {
};
static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
static uint32_t svm_features;
struct svm_init_data {
int cpu;
@ -569,6 +570,10 @@ static int has_svm(void)
static void svm_hardware_disable(void *garbage)
{
/* Make sure we clean up behind us */
if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
cpu_svm_disable();
}
@ -610,6 +615,11 @@ static int svm_hardware_enable(void *garbage)
wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
__get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT;
}
svm_init_erratum_383();
return 0;
@ -791,6 +801,23 @@ static __init int svm_hardware_setup(void)
if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
kvm_enable_efer_bits(EFER_FFXSR);
if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
u64 max;
kvm_has_tsc_control = true;
/*
* Make sure the user can only configure tsc_khz values that
* fit into a signed integer.
* A min value is not calculated needed because it will always
* be 1 on all machines and a value of 0 is used to disable
* tsc-scaling for the vcpu.
*/
max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX));
kvm_max_guest_tsc_khz = max;
}
if (nested) {
printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
@ -802,8 +829,6 @@ static __init int svm_hardware_setup(void)
goto err;
}
svm_features = cpuid_edx(SVM_CPUID_FUNC);
if (!boot_cpu_has(X86_FEATURE_NPT))
npt_enabled = false;
@ -854,6 +879,64 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
seg->base = 0;
}
static u64 __scale_tsc(u64 ratio, u64 tsc)
{
u64 mult, frac, _tsc;
mult = ratio >> 32;
frac = ratio & ((1ULL << 32) - 1);
_tsc = tsc;
_tsc *= mult;
_tsc += (tsc >> 32) * frac;
_tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32;
return _tsc;
}
static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
{
struct vcpu_svm *svm = to_svm(vcpu);
u64 _tsc = tsc;
if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
_tsc = __scale_tsc(svm->tsc_ratio, tsc);
return _tsc;
}
static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
{
struct vcpu_svm *svm = to_svm(vcpu);
u64 ratio;
u64 khz;
/* TSC scaling supported? */
if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR))
return;
/* TSC-Scaling disabled or guest TSC same frequency as host TSC? */
if (user_tsc_khz == 0) {
vcpu->arch.virtual_tsc_khz = 0;
svm->tsc_ratio = TSC_RATIO_DEFAULT;
return;
}
khz = user_tsc_khz;
/* TSC scaling required - calculate ratio */
ratio = khz << 32;
do_div(ratio, tsc_khz);
if (ratio == 0 || ratio & TSC_RATIO_RSVD) {
WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n",
user_tsc_khz);
return;
}
vcpu->arch.virtual_tsc_khz = user_tsc_khz;
svm->tsc_ratio = ratio;
}
static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
struct vcpu_svm *svm = to_svm(vcpu);
@ -880,6 +963,15 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
}
static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
{
u64 tsc;
tsc = svm_scale_tsc(vcpu, native_read_tsc());
return target_tsc - tsc;
}
static void init_vmcb(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
@ -975,7 +1067,7 @@ static void init_vmcb(struct vcpu_svm *svm)
svm_set_efer(&svm->vcpu, 0);
save->dr6 = 0xffff0ff0;
save->dr7 = 0x400;
save->rflags = 2;
kvm_set_rflags(&svm->vcpu, 2);
save->rip = 0x0000fff0;
svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
@ -1048,6 +1140,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
goto out;
}
svm->tsc_ratio = TSC_RATIO_DEFAULT;
err = kvm_vcpu_init(&svm->vcpu, kvm, id);
if (err)
goto free_svm;
@ -1141,6 +1235,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
if (static_cpu_has(X86_FEATURE_TSCRATEMSR) &&
svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) {
__get_cpu_var(current_tsc_ratio) = svm->tsc_ratio;
wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio);
}
}
static void svm_vcpu_put(struct kvm_vcpu *vcpu)
@ -1365,31 +1465,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (is_guest_mode(vcpu)) {
/*
* We are here because we run in nested mode, the host kvm
* intercepts cr0 writes but the l1 hypervisor does not.
* But the L1 hypervisor may intercept selective cr0 writes.
* This needs to be checked here.
*/
unsigned long old, new;
/* Remove bits that would trigger a real cr0 write intercept */
old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK;
new = cr0 & SVM_CR0_SELECTIVE_MASK;
if (old == new) {
/* cr0 write with ts and mp unchanged */
svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) {
svm->nested.vmexit_rip = kvm_rip_read(vcpu);
svm->nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
svm->nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
return;
}
}
}
#ifdef CONFIG_X86_64
if (vcpu->arch.efer & EFER_LME) {
if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
@ -2127,7 +2202,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu);
nested_vmcb->save.cr2 = vmcb->save.cr2;
nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
nested_vmcb->save.rflags = vmcb->save.rflags;
nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
nested_vmcb->save.rip = vmcb->save.rip;
nested_vmcb->save.rsp = vmcb->save.rsp;
nested_vmcb->save.rax = vmcb->save.rax;
@ -2184,7 +2259,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
svm->vmcb->save.ds = hsave->save.ds;
svm->vmcb->save.gdtr = hsave->save.gdtr;
svm->vmcb->save.idtr = hsave->save.idtr;
svm->vmcb->save.rflags = hsave->save.rflags;
kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
svm_set_efer(&svm->vcpu, hsave->save.efer);
svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
svm_set_cr4(&svm->vcpu, hsave->save.cr4);
@ -2312,7 +2387,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
hsave->save.efer = svm->vcpu.arch.efer;
hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
hsave->save.cr4 = svm->vcpu.arch.cr4;
hsave->save.rflags = vmcb->save.rflags;
hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
hsave->save.rip = kvm_rip_read(&svm->vcpu);
hsave->save.rsp = vmcb->save.rsp;
hsave->save.rax = vmcb->save.rax;
@ -2323,7 +2398,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
copy_vmcb_control_area(hsave, vmcb);
if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
svm->vcpu.arch.hflags |= HF_HIF_MASK;
else
svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
@ -2341,7 +2416,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
svm->vmcb->save.ds = nested_vmcb->save.ds;
svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
svm->vmcb->save.idtr = nested_vmcb->save.idtr;
svm->vmcb->save.rflags = nested_vmcb->save.rflags;
kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
@ -2443,13 +2518,13 @@ static int vmload_interception(struct vcpu_svm *svm)
if (nested_svm_check_permissions(svm))
return 1;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu);
nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
if (!nested_vmcb)
return 1;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu);
nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
nested_svm_unmap(page);
@ -2464,13 +2539,13 @@ static int vmsave_interception(struct vcpu_svm *svm)
if (nested_svm_check_permissions(svm))
return 1;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu);
nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
if (!nested_vmcb)
return 1;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu);
nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
nested_svm_unmap(page);
@ -2676,6 +2751,29 @@ static int emulate_on_interception(struct vcpu_svm *svm)
return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
}
bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val)
{
unsigned long cr0 = svm->vcpu.arch.cr0;
bool ret = false;
u64 intercept;
intercept = svm->nested.intercept;
if (!is_guest_mode(&svm->vcpu) ||
(!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
return false;
cr0 &= ~SVM_CR0_SELECTIVE_MASK;
val &= ~SVM_CR0_SELECTIVE_MASK;
if (cr0 ^ val) {
svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
}
return ret;
}
#define CR_VALID (1ULL << 63)
static int cr_interception(struct vcpu_svm *svm)
@ -2699,7 +2797,11 @@ static int cr_interception(struct vcpu_svm *svm)
val = kvm_register_read(&svm->vcpu, reg);
switch (cr) {
case 0:
if (!check_selective_cr0_intercepted(svm, val))
err = kvm_set_cr0(&svm->vcpu, val);
else
return 1;
break;
case 3:
err = kvm_set_cr3(&svm->vcpu, val);
@ -2744,23 +2846,6 @@ static int cr_interception(struct vcpu_svm *svm)
return 1;
}
static int cr0_write_interception(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
int r;
r = cr_interception(svm);
if (svm->nested.vmexit_rip) {
kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip);
kvm_register_write(vcpu, VCPU_REGS_RSP, svm->nested.vmexit_rsp);
kvm_register_write(vcpu, VCPU_REGS_RAX, svm->nested.vmexit_rax);
svm->nested.vmexit_rip = 0;
}
return r;
}
static int dr_interception(struct vcpu_svm *svm)
{
int reg, dr;
@ -2813,7 +2898,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
case MSR_IA32_TSC: {
struct vmcb *vmcb = get_host_vmcb(svm);
*data = vmcb->control.tsc_offset + native_read_tsc();
*data = vmcb->control.tsc_offset +
svm_scale_tsc(vcpu, native_read_tsc());
break;
}
case MSR_STAR:
@ -3048,7 +3135,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_READ_CR4] = cr_interception,
[SVM_EXIT_READ_CR8] = cr_interception,
[SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception,
[SVM_EXIT_WRITE_CR0] = cr0_write_interception,
[SVM_EXIT_WRITE_CR0] = cr_interception,
[SVM_EXIT_WRITE_CR3] = cr_interception,
[SVM_EXIT_WRITE_CR4] = cr_interception,
[SVM_EXIT_WRITE_CR8] = cr8_write_interception,
@ -3104,97 +3191,109 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_NPF] = pf_interception,
};
void dump_vmcb(struct kvm_vcpu *vcpu)
static void dump_vmcb(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_control_area *control = &svm->vmcb->control;
struct vmcb_save_area *save = &svm->vmcb->save;
pr_err("VMCB Control Area:\n");
pr_err("cr_read: %04x\n", control->intercept_cr & 0xffff);
pr_err("cr_write: %04x\n", control->intercept_cr >> 16);
pr_err("dr_read: %04x\n", control->intercept_dr & 0xffff);
pr_err("dr_write: %04x\n", control->intercept_dr >> 16);
pr_err("exceptions: %08x\n", control->intercept_exceptions);
pr_err("intercepts: %016llx\n", control->intercept);
pr_err("pause filter count: %d\n", control->pause_filter_count);
pr_err("iopm_base_pa: %016llx\n", control->iopm_base_pa);
pr_err("msrpm_base_pa: %016llx\n", control->msrpm_base_pa);
pr_err("tsc_offset: %016llx\n", control->tsc_offset);
pr_err("asid: %d\n", control->asid);
pr_err("tlb_ctl: %d\n", control->tlb_ctl);
pr_err("int_ctl: %08x\n", control->int_ctl);
pr_err("int_vector: %08x\n", control->int_vector);
pr_err("int_state: %08x\n", control->int_state);
pr_err("exit_code: %08x\n", control->exit_code);
pr_err("exit_info1: %016llx\n", control->exit_info_1);
pr_err("exit_info2: %016llx\n", control->exit_info_2);
pr_err("exit_int_info: %08x\n", control->exit_int_info);
pr_err("exit_int_info_err: %08x\n", control->exit_int_info_err);
pr_err("nested_ctl: %lld\n", control->nested_ctl);
pr_err("nested_cr3: %016llx\n", control->nested_cr3);
pr_err("event_inj: %08x\n", control->event_inj);
pr_err("event_inj_err: %08x\n", control->event_inj_err);
pr_err("lbr_ctl: %lld\n", control->lbr_ctl);
pr_err("next_rip: %016llx\n", control->next_rip);
pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
pr_err("%-20s%d\n", "asid:", control->asid);
pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
pr_err("%-20s%08x\n", "int_state:", control->int_state);
pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
pr_err("VMCB State Save Area:\n");
pr_err("es: s: %04x a: %04x l: %08x b: %016llx\n",
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"es:",
save->es.selector, save->es.attrib,
save->es.limit, save->es.base);
pr_err("cs: s: %04x a: %04x l: %08x b: %016llx\n",
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"cs:",
save->cs.selector, save->cs.attrib,
save->cs.limit, save->cs.base);
pr_err("ss: s: %04x a: %04x l: %08x b: %016llx\n",
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"ss:",
save->ss.selector, save->ss.attrib,
save->ss.limit, save->ss.base);
pr_err("ds: s: %04x a: %04x l: %08x b: %016llx\n",
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"ds:",
save->ds.selector, save->ds.attrib,
save->ds.limit, save->ds.base);
pr_err("fs: s: %04x a: %04x l: %08x b: %016llx\n",
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"fs:",
save->fs.selector, save->fs.attrib,
save->fs.limit, save->fs.base);
pr_err("gs: s: %04x a: %04x l: %08x b: %016llx\n",
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"gs:",
save->gs.selector, save->gs.attrib,
save->gs.limit, save->gs.base);
pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n",
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"gdtr:",
save->gdtr.selector, save->gdtr.attrib,
save->gdtr.limit, save->gdtr.base);
pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n",
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"ldtr:",
save->ldtr.selector, save->ldtr.attrib,
save->ldtr.limit, save->ldtr.base);
pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n",
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"idtr:",
save->idtr.selector, save->idtr.attrib,
save->idtr.limit, save->idtr.base);
pr_err("tr: s: %04x a: %04x l: %08x b: %016llx\n",
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
"tr:",
save->tr.selector, save->tr.attrib,
save->tr.limit, save->tr.base);
pr_err("cpl: %d efer: %016llx\n",
save->cpl, save->efer);
pr_err("cr0: %016llx cr2: %016llx\n",
save->cr0, save->cr2);
pr_err("cr3: %016llx cr4: %016llx\n",
save->cr3, save->cr4);
pr_err("dr6: %016llx dr7: %016llx\n",
save->dr6, save->dr7);
pr_err("rip: %016llx rflags: %016llx\n",
save->rip, save->rflags);
pr_err("rsp: %016llx rax: %016llx\n",
save->rsp, save->rax);
pr_err("star: %016llx lstar: %016llx\n",
save->star, save->lstar);
pr_err("cstar: %016llx sfmask: %016llx\n",
save->cstar, save->sfmask);
pr_err("kernel_gs_base: %016llx sysenter_cs: %016llx\n",
save->kernel_gs_base, save->sysenter_cs);
pr_err("sysenter_esp: %016llx sysenter_eip: %016llx\n",
save->sysenter_esp, save->sysenter_eip);
pr_err("gpat: %016llx dbgctl: %016llx\n",
save->g_pat, save->dbgctl);
pr_err("br_from: %016llx br_to: %016llx\n",
save->br_from, save->br_to);
pr_err("excp_from: %016llx excp_to: %016llx\n",
save->last_excp_from, save->last_excp_to);
pr_err("%-15s %016llx %-13s %016llx\n",
"cr0:", save->cr0, "cr2:", save->cr2);
pr_err("%-15s %016llx %-13s %016llx\n",
"cr3:", save->cr3, "cr4:", save->cr4);
pr_err("%-15s %016llx %-13s %016llx\n",
"dr6:", save->dr6, "dr7:", save->dr7);
pr_err("%-15s %016llx %-13s %016llx\n",
"rip:", save->rip, "rflags:", save->rflags);
pr_err("%-15s %016llx %-13s %016llx\n",
"rsp:", save->rsp, "rax:", save->rax);
pr_err("%-15s %016llx %-13s %016llx\n",
"star:", save->star, "lstar:", save->lstar);
pr_err("%-15s %016llx %-13s %016llx\n",
"cstar:", save->cstar, "sfmask:", save->sfmask);
pr_err("%-15s %016llx %-13s %016llx\n",
"kernel_gs_base:", save->kernel_gs_base,
"sysenter_cs:", save->sysenter_cs);
pr_err("%-15s %016llx %-13s %016llx\n",
"sysenter_esp:", save->sysenter_esp,
"sysenter_eip:", save->sysenter_eip);
pr_err("%-15s %016llx %-13s %016llx\n",
"gpat:", save->g_pat, "dbgctl:", save->dbgctl);
pr_err("%-15s %016llx %-13s %016llx\n",
"br_from:", save->br_from, "br_to:", save->br_to);
pr_err("%-15s %016llx %-13s %016llx\n",
"excp_from:", save->last_excp_from,
"excp_to:", save->last_excp_to);
}
static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
@ -3384,7 +3483,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
return 0;
ret = !!(vmcb->save.rflags & X86_EFLAGS_IF);
ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
if (is_guest_mode(vcpu))
return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
@ -3871,6 +3970,186 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
update_cr0_intercept(svm);
}
#define PRE_EX(exit) { .exit_code = (exit), \
.stage = X86_ICPT_PRE_EXCEPT, }
#define POST_EX(exit) { .exit_code = (exit), \
.stage = X86_ICPT_POST_EXCEPT, }
#define POST_MEM(exit) { .exit_code = (exit), \
.stage = X86_ICPT_POST_MEMACCESS, }
static struct __x86_intercept {
u32 exit_code;
enum x86_intercept_stage stage;
} x86_intercept_map[] = {
[x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0),
[x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0),
[x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0),
[x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0),
[x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0),
[x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0),
[x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0),
[x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ),
[x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ),
[x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE),
[x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE),
[x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ),
[x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ),
[x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE),
[x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE),
[x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN),
[x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL),
[x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD),
[x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE),
[x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI),
[x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI),
[x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT),
[x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA),
[x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP),
[x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR),
[x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT),
[x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG),
[x86_intercept_invd] = POST_EX(SVM_EXIT_INVD),
[x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD),
[x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR),
[x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC),
[x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR),
[x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC),
[x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID),
[x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM),
[x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE),
[x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF),
[x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF),
[x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT),
[x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET),
[x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP),
[x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT),
[x86_intercept_in] = POST_EX(SVM_EXIT_IOIO),
[x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO),
[x86_intercept_out] = POST_EX(SVM_EXIT_IOIO),
[x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO),
};
#undef PRE_EX
#undef POST_EX
#undef POST_MEM
static int svm_check_intercept(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage)
{
struct vcpu_svm *svm = to_svm(vcpu);
int vmexit, ret = X86EMUL_CONTINUE;
struct __x86_intercept icpt_info;
struct vmcb *vmcb = svm->vmcb;
if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
goto out;
icpt_info = x86_intercept_map[info->intercept];
if (stage != icpt_info.stage)
goto out;
switch (icpt_info.exit_code) {
case SVM_EXIT_READ_CR0:
if (info->intercept == x86_intercept_cr_read)
icpt_info.exit_code += info->modrm_reg;
break;
case SVM_EXIT_WRITE_CR0: {
unsigned long cr0, val;
u64 intercept;
if (info->intercept == x86_intercept_cr_write)
icpt_info.exit_code += info->modrm_reg;
if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0)
break;
intercept = svm->nested.intercept;
if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
break;
cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
if (info->intercept == x86_intercept_lmsw) {
cr0 &= 0xfUL;
val &= 0xfUL;
/* lmsw can't clear PE - catch this here */
if (cr0 & X86_CR0_PE)
val |= X86_CR0_PE;
}
if (cr0 ^ val)
icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
break;
}
case SVM_EXIT_READ_DR0:
case SVM_EXIT_WRITE_DR0:
icpt_info.exit_code += info->modrm_reg;
break;
case SVM_EXIT_MSR:
if (info->intercept == x86_intercept_wrmsr)
vmcb->control.exit_info_1 = 1;
else
vmcb->control.exit_info_1 = 0;
break;
case SVM_EXIT_PAUSE:
/*
* We get this for NOP only, but pause
* is rep not, check this here
*/
if (info->rep_prefix != REPE_PREFIX)
goto out;
case SVM_EXIT_IOIO: {
u64 exit_info;
u32 bytes;
exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16;
if (info->intercept == x86_intercept_in ||
info->intercept == x86_intercept_ins) {
exit_info |= SVM_IOIO_TYPE_MASK;
bytes = info->src_bytes;
} else {
bytes = info->dst_bytes;
}
if (info->intercept == x86_intercept_outs ||
info->intercept == x86_intercept_ins)
exit_info |= SVM_IOIO_STR_MASK;
if (info->rep_prefix)
exit_info |= SVM_IOIO_REP_MASK;
bytes = min(bytes, 4u);
exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
vmcb->control.exit_info_1 = exit_info;
vmcb->control.exit_info_2 = info->next_rip;
break;
}
default:
break;
}
vmcb->control.next_rip = info->next_rip;
vmcb->control.exit_code = icpt_info.exit_code;
vmexit = nested_svm_exit_handled(svm);
ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
: X86EMUL_CONTINUE;
out:
return ret;
}
static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@ -3952,10 +4231,14 @@ static struct kvm_x86_ops svm_x86_ops = {
.has_wbinvd_exit = svm_has_wbinvd_exit,
.set_tsc_khz = svm_set_tsc_khz,
.write_tsc_offset = svm_write_tsc_offset,
.adjust_tsc_offset = svm_adjust_tsc_offset,
.compute_tsc_offset = svm_compute_tsc_offset,
.set_tdp_cr3 = set_tdp_cr3,
.check_intercept = svm_check_intercept,
};
static int __init svm_init(void)

View file

@ -128,8 +128,11 @@ struct vcpu_vmx {
unsigned long host_rsp;
int launched;
u8 fail;
u8 cpl;
bool nmi_known_unmasked;
u32 exit_intr_info;
u32 idt_vectoring_info;
ulong rflags;
struct shared_msr_entry *guest_msrs;
int nmsrs;
int save_nmsrs;
@ -159,6 +162,10 @@ struct vcpu_vmx {
u32 ar;
} tr, es, ds, fs, gs;
} rmode;
struct {
u32 bitmask; /* 4 bits per segment (1 bit per field) */
struct kvm_save_segment seg[8];
} segment_cache;
int vpid;
bool emulation_required;
@ -171,6 +178,15 @@ struct vcpu_vmx {
bool rdtscp_enabled;
};
enum segment_cache_field {
SEG_FIELD_SEL = 0,
SEG_FIELD_BASE = 1,
SEG_FIELD_LIMIT = 2,
SEG_FIELD_AR = 3,
SEG_FIELD_NR = 4
};
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
{
return container_of(vcpu, struct vcpu_vmx, vcpu);
@ -643,6 +659,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask)
vmcs_writel(field, vmcs_readl(field) | mask);
}
static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
{
vmx->segment_cache.bitmask = 0;
}
static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
unsigned field)
{
bool ret;
u32 mask = 1 << (seg * SEG_FIELD_NR + field);
if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) {
vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS);
vmx->segment_cache.bitmask = 0;
}
ret = vmx->segment_cache.bitmask & mask;
vmx->segment_cache.bitmask |= mask;
return ret;
}
static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
{
u16 *p = &vmx->segment_cache.seg[seg].selector;
if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
*p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
return *p;
}
static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
{
ulong *p = &vmx->segment_cache.seg[seg].base;
if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
*p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
return *p;
}
static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
{
u32 *p = &vmx->segment_cache.seg[seg].limit;
if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
*p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
return *p;
}
static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
{
u32 *p = &vmx->segment_cache.seg[seg].ar;
if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
*p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
return *p;
}
static void update_exception_bitmap(struct kvm_vcpu *vcpu)
{
u32 eb;
@ -970,17 +1042,24 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
{
unsigned long rflags, save_rflags;
if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
rflags = vmcs_readl(GUEST_RFLAGS);
if (to_vmx(vcpu)->rmode.vm86_active) {
rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
save_rflags = to_vmx(vcpu)->rmode.save_rflags;
rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
}
return rflags;
to_vmx(vcpu)->rflags = rflags;
}
return to_vmx(vcpu)->rflags;
}
static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
__clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
to_vmx(vcpu)->rflags = rflags;
if (to_vmx(vcpu)->rmode.vm86_active) {
to_vmx(vcpu)->rmode.save_rflags = rflags;
rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
@ -1053,7 +1132,10 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
}
if (vmx->rmode.vm86_active) {
if (kvm_inject_realmode_interrupt(vcpu, nr) != EMULATE_DONE)
int inc_eip = 0;
if (kvm_exception_is_soft(nr))
inc_eip = vcpu->arch.event_exit_inst_len;
if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return;
}
@ -1150,6 +1232,16 @@ static u64 guest_read_tsc(void)
return host_tsc + tsc_offset;
}
/*
* Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ
* ioctl. In this case the call-back should update internal vmx state to make
* the changes effective.
*/
static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
{
/* Nothing to do here */
}
/*
* writes 'offset' into guest's timestamp counter offset register
*/
@ -1164,6 +1256,11 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
vmcs_write64(TSC_OFFSET, offset + adjustment);
}
static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
{
return target_tsc - native_read_tsc();
}
/*
* Reads an msr value (of 'msr_index') into 'pdata'.
* Returns 0 on success, non-0 otherwise.
@ -1243,9 +1340,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
break;
#ifdef CONFIG_X86_64
case MSR_FS_BASE:
vmx_segment_cache_clear(vmx);
vmcs_writel(GUEST_FS_BASE, data);
break;
case MSR_GS_BASE:
vmx_segment_cache_clear(vmx);
vmcs_writel(GUEST_GS_BASE, data);
break;
case MSR_KERNEL_GS_BASE:
@ -1689,6 +1788,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
vmx->emulation_required = 1;
vmx->rmode.vm86_active = 0;
vmx_segment_cache_clear(vmx);
vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector);
vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base);
vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit);
@ -1712,6 +1813,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs);
fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs);
vmx_segment_cache_clear(vmx);
vmcs_write16(GUEST_SS_SELECTOR, 0);
vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
@ -1775,6 +1878,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
}
vmx_segment_cache_clear(vmx);
vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR);
vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
@ -1851,6 +1956,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
{
u32 guest_tr_ar;
vmx_segment_cache_clear(to_vmx(vcpu));
guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
printk(KERN_DEBUG "%s: tss fixup for long mode. \n",
@ -1998,6 +2105,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
vmcs_writel(CR0_READ_SHADOW, cr0);
vmcs_writel(GUEST_CR0, hw_cr0);
vcpu->arch.cr0 = cr0;
__clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
}
static u64 construct_eptp(unsigned long root_hpa)
@ -2053,7 +2161,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
struct kvm_save_segment *save;
u32 ar;
@ -2075,13 +2182,13 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
var->limit = save->limit;
ar = save->ar;
if (seg == VCPU_SREG_TR
|| var->selector == vmcs_read16(sf->selector))
|| var->selector == vmx_read_guest_seg_selector(vmx, seg))
goto use_saved_rmode_seg;
}
var->base = vmcs_readl(sf->base);
var->limit = vmcs_read32(sf->limit);
var->selector = vmcs_read16(sf->selector);
ar = vmcs_read32(sf->ar_bytes);
var->base = vmx_read_guest_seg_base(vmx, seg);
var->limit = vmx_read_guest_seg_limit(vmx, seg);
var->selector = vmx_read_guest_seg_selector(vmx, seg);
ar = vmx_read_guest_seg_ar(vmx, seg);
use_saved_rmode_seg:
if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
ar = 0;
@ -2098,27 +2205,37 @@ use_saved_rmode_seg:
static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
{
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
struct kvm_segment s;
if (to_vmx(vcpu)->rmode.vm86_active) {
vmx_get_segment(vcpu, &s, seg);
return s.base;
}
return vmcs_readl(sf->base);
return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
}
static int vmx_get_cpl(struct kvm_vcpu *vcpu)
static int __vmx_get_cpl(struct kvm_vcpu *vcpu)
{
if (!is_protmode(vcpu))
return 0;
if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
if (!is_long_mode(vcpu)
&& (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */
return 3;
return vmcs_read16(GUEST_CS_SELECTOR) & 3;
return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3;
}
static int vmx_get_cpl(struct kvm_vcpu *vcpu)
{
if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) {
__set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu);
}
return to_vmx(vcpu)->cpl;
}
static u32 vmx_segment_access_rights(struct kvm_segment *var)
{
u32 ar;
@ -2148,6 +2265,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
u32 ar;
vmx_segment_cache_clear(vmx);
if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) {
vmcs_write16(sf->selector, var->selector);
vmx->rmode.tr.selector = var->selector;
@ -2184,11 +2303,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
ar |= 0x1; /* Accessed */
vmcs_write32(sf->ar_bytes, ar);
__clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
}
static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
{
u32 ar = vmcs_read32(GUEST_CS_AR_BYTES);
u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
*db = (ar >> 14) & 1;
*l = (ar >> 13) & 1;
@ -2775,6 +2895,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
if (ret != 0)
goto out;
vmx_segment_cache_clear(vmx);
seg_setup(VCPU_SREG_CS);
/*
* GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
@ -2904,7 +3026,10 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
++vcpu->stat.irq_injections;
if (vmx->rmode.vm86_active) {
if (kvm_inject_realmode_interrupt(vcpu, irq) != EMULATE_DONE)
int inc_eip = 0;
if (vcpu->arch.interrupt.soft)
inc_eip = vcpu->arch.event_exit_inst_len;
if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE)
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return;
}
@ -2937,8 +3062,9 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
}
++vcpu->stat.nmi_injections;
vmx->nmi_known_unmasked = false;
if (vmx->rmode.vm86_active) {
if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE)
if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return;
}
@ -2961,6 +3087,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
{
if (!cpu_has_virtual_nmis())
return to_vmx(vcpu)->soft_vnmi_blocked;
if (to_vmx(vcpu)->nmi_known_unmasked)
return false;
return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
}
@ -2974,6 +3102,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
vmx->vnmi_blocked_time = 0;
}
} else {
vmx->nmi_known_unmasked = !masked;
if (masked)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
@ -3091,7 +3220,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
enum emulation_result er;
vect_info = vmx->idt_vectoring_info;
intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
intr_info = vmx->exit_intr_info;
if (is_machine_check(intr_info))
return handle_machine_check(vcpu);
@ -3122,7 +3251,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
}
error_code = 0;
rip = kvm_rip_read(vcpu);
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
if (is_page_fault(intr_info)) {
@ -3169,6 +3297,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
vmx->vcpu.arch.event_exit_inst_len =
vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
kvm_run->exit_reason = KVM_EXIT_DEBUG;
rip = kvm_rip_read(vcpu);
kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
kvm_run->debug.arch.exception = ex_no;
break;
@ -3505,9 +3634,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
switch (type) {
case INTR_TYPE_NMI_INTR:
vcpu->arch.nmi_injected = false;
if (cpu_has_virtual_nmis())
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
vmx_set_nmi_mask(vcpu, true);
break;
case INTR_TYPE_EXT_INTR:
case INTR_TYPE_SOFT_INTR:
@ -3867,12 +3994,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
{
u32 exit_intr_info = vmx->exit_intr_info;
u32 exit_intr_info;
if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
|| vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI))
return;
vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
exit_intr_info = vmx->exit_intr_info;
/* Handle machine checks before interrupts are enabled */
if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
|| (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI
&& is_machine_check(exit_intr_info)))
if (is_machine_check(exit_intr_info))
kvm_machine_check();
/* We need to handle NMIs before interrupts are enabled */
@ -3886,7 +4018,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
{
u32 exit_intr_info = vmx->exit_intr_info;
u32 exit_intr_info;
bool unblock_nmi;
u8 vector;
bool idtv_info_valid;
@ -3894,6 +4026,13 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
if (cpu_has_virtual_nmis()) {
if (vmx->nmi_known_unmasked)
return;
/*
* Can't use vmx->exit_intr_info since we're not sure what
* the exit reason is.
*/
exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
/*
@ -3910,6 +4049,10 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
vector != DF_VECTOR && !idtv_info_valid)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
else
vmx->nmi_known_unmasked =
!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
& GUEST_INTR_STATE_NMI);
} else if (unlikely(vmx->soft_vnmi_blocked))
vmx->vnmi_blocked_time +=
ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
@ -3946,8 +4089,7 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
* Clear bit "block by NMI" before VM entry if a NMI
* delivery faulted.
*/
vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
vmx_set_nmi_mask(&vmx->vcpu, false);
break;
case INTR_TYPE_SOFT_EXCEPTION:
vmx->vcpu.arch.event_exit_inst_len =
@ -4124,7 +4266,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
);
vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
| (1 << VCPU_EXREG_RFLAGS)
| (1 << VCPU_EXREG_CPL)
| (1 << VCPU_EXREG_PDPTR)
| (1 << VCPU_EXREG_SEGMENTS)
| (1 << VCPU_EXREG_CR3));
vcpu->arch.regs_dirty = 0;
@ -4134,7 +4279,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->launched = 1;
vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
vmx_complete_atomic_exit(vmx);
vmx_recover_nmi_blocking(vmx);
@ -4195,8 +4339,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
goto free_vcpu;
vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!vmx->guest_msrs) {
err = -ENOMEM;
if (!vmx->guest_msrs) {
goto uninit_vcpu;
}
@ -4215,7 +4359,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if (err)
goto free_vmcs;
if (vm_need_virtualize_apic_accesses(kvm))
if (alloc_apic_access_page(kvm) != 0)
err = alloc_apic_access_page(kvm);
if (err)
goto free_vmcs;
if (enable_ept) {
@ -4368,6 +4513,13 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
{
}
static int vmx_check_intercept(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage)
{
return X86EMUL_CONTINUE;
}
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@ -4449,10 +4601,14 @@ static struct kvm_x86_ops vmx_x86_ops = {
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
.set_tsc_khz = vmx_set_tsc_khz,
.write_tsc_offset = vmx_write_tsc_offset,
.adjust_tsc_offset = vmx_adjust_tsc_offset,
.compute_tsc_offset = vmx_compute_tsc_offset,
.set_tdp_cr3 = vmx_set_cr3,
.check_intercept = vmx_check_intercept,
};
static int __init vmx_init(void)

File diff suppressed because it is too large Load diff

View file

@ -77,7 +77,7 @@ static inline u32 bit(int bitno)
void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq);
int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data);

View file

@ -541,6 +541,9 @@ struct kvm_ppc_pvinfo {
#define KVM_CAP_PPC_GET_PVINFO 57
#define KVM_CAP_PPC_IRQ_LEVEL 58
#define KVM_CAP_ASYNC_PF 59
#define KVM_CAP_TSC_CONTROL 60
#define KVM_CAP_GET_TSC_KHZ 61
#define KVM_CAP_PPC_BOOKE_SREGS 62
#ifdef KVM_CAP_IRQ_ROUTING
@ -677,6 +680,9 @@ struct kvm_clock_data {
#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
/* Available with KVM_CAP_PPC_GET_PVINFO */
#define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo)
/* Available with KVM_CAP_TSC_CONTROL */
#define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2)
#define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3)
/*
* ioctls for vcpu fds

View file

@ -27,6 +27,10 @@
#include <asm/kvm_host.h>
#ifndef KVM_MMIO_SIZE
#define KVM_MMIO_SIZE 8
#endif
/*
* vcpu->requests bit members
*/
@ -43,7 +47,6 @@
#define KVM_REQ_DEACTIVATE_FPU 10
#define KVM_REQ_EVENT 11
#define KVM_REQ_APF_HALT 12
#define KVM_REQ_NMI 13
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
@ -133,7 +136,8 @@ struct kvm_vcpu {
int mmio_read_completed;
int mmio_is_write;
int mmio_size;
unsigned char mmio_data[8];
int mmio_index;
unsigned char mmio_data[KVM_MMIO_SIZE];
gpa_t mmio_phys_addr;
#endif
@ -292,9 +296,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
}
#define kvm_for_each_vcpu(idx, vcpup, kvm) \
for (idx = 0, vcpup = kvm_get_vcpu(kvm, idx); \
idx < atomic_read(&kvm->online_vcpus) && vcpup; \
vcpup = kvm_get_vcpu(kvm, ++idx))
for (idx = 0; \
idx < atomic_read(&kvm->online_vcpus) && \
(vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
idx++)
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
@ -365,7 +370,6 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
bool *writable);
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn);
int memslot_id(struct kvm *kvm, gfn_t gfn);
void kvm_release_pfn_dirty(pfn_t);
void kvm_release_pfn_clean(pfn_t pfn);
void kvm_set_pfn_dirty(pfn_t pfn);
@ -587,8 +591,17 @@ static inline int kvm_deassign_device(struct kvm *kvm,
static inline void kvm_guest_enter(void)
{
BUG_ON(preemptible());
account_system_vtime(current);
current->flags |= PF_VCPU;
/* KVM does not hold any references to rcu protected data when it
* switches CPU into a guest mode. In fact switching to a guest mode
* is very similar to exiting to userspase from rcu point of view. In
* addition CPU may stay in a guest mode for quite a long time (up to
* one time slice). Lets treat guest mode as quiescent state, just like
* we do with user-mode execution.
*/
rcu_virt_note_context_switch(smp_processor_id());
}
static inline void kvm_guest_exit(void)
@ -597,6 +610,11 @@ static inline void kvm_guest_exit(void)
current->flags &= ~PF_VCPU;
}
static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
{
return gfn_to_memslot(kvm, gfn)->id;
}
static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
gfn_t gfn)
{

View file

@ -167,7 +167,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
"vector=%x trig_mode=%x\n",
entry->fields.dest, entry->fields.dest_mode,
entry->fields.dest_id, entry->fields.dest_mode,
entry->fields.delivery_mode, entry->fields.vector,
entry->fields.trig_mode);

View file

@ -467,6 +467,7 @@ static struct kvm *kvm_create_vm(void)
if (!kvm->buses[i])
goto out_err;
}
spin_lock_init(&kvm->mmu_lock);
r = kvm_init_mmu_notifier(kvm);
if (r)
@ -474,7 +475,6 @@ static struct kvm *kvm_create_vm(void)
kvm->mm = current->mm;
atomic_inc(&kvm->mm->mm_count);
spin_lock_init(&kvm->mmu_lock);
kvm_eventfd_init(kvm);
mutex_init(&kvm->lock);
mutex_init(&kvm->irq_lock);
@ -648,7 +648,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out;
if (mem->guest_phys_addr & (PAGE_SIZE - 1))
goto out;
if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1)))
/* We can read the guest memory with __xxx_user() later on. */
if (user_alloc &&
((mem->userspace_addr & (PAGE_SIZE - 1)) ||
!access_ok(VERIFY_WRITE, mem->userspace_addr, mem->memory_size)))
goto out;
if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
goto out;
@ -996,23 +999,6 @@ out:
return size;
}
int memslot_id(struct kvm *kvm, gfn_t gfn)
{
int i;
struct kvm_memslots *slots = kvm_memslots(kvm);
struct kvm_memory_slot *memslot = NULL;
for (i = 0; i < slots->nmemslots; ++i) {
memslot = &slots->memslots[i];
if (gfn >= memslot->base_gfn
&& gfn < memslot->base_gfn + memslot->npages)
break;
}
return memslot - slots->memslots;
}
static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
gfn_t *nr_pages)
{
@ -1300,7 +1286,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
addr = gfn_to_hva(kvm, gfn);
if (kvm_is_error_hva(addr))
return -EFAULT;
r = copy_from_user(data, (void __user *)addr + offset, len);
r = __copy_from_user(data, (void __user *)addr + offset, len);
if (r)
return -EFAULT;
return 0;