Merge branch 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (202 commits) MAINTAINERS: update KVM entry KVM: correct error-handling code KVM: fix compile warnings on s390 KVM: VMX: Check cpl before emulating debug register access KVM: fix misreporting of coalesced interrupts by kvm tracer KVM: x86: drop duplicate kvm_flush_remote_tlb calls KVM: VMX: call vmx_load_host_state() only if msr is cached KVM: VMX: Conditionally reload debug register 6 KVM: Use thread debug register storage instead of kvm specific data KVM guest: do not batch pte updates from interrupt context KVM: Fix coalesced interrupt reporting in IOAPIC KVM guest: fix bogus wallclock physical address calculation KVM: VMX: Fix cr8 exiting control clobbering by EPT KVM: Optimize kvm_mmu_unprotect_page_virt() for tdp KVM: Document KVM_CAP_IRQCHIP KVM: Protect update_cr8_intercept() when running without an apic KVM: VMX: Fix EPT with WP bit change during paging KVM: Use kvm_{read,write}_guest_virt() to read and write segment descriptors KVM: x86 emulator: Add adc and sbb missing decoder flags KVM: Add missing #include ...
This commit is contained in:
commit
69def9f05d
80 changed files with 5714 additions and 2182 deletions
|
@ -193,7 +193,7 @@ Code Seq# Include File Comments
|
|||
0xAD 00 Netfilter device in development:
|
||||
<mailto:rusty@rustcorp.com.au>
|
||||
0xAE all linux/kvm.h Kernel-based Virtual Machine
|
||||
<mailto:kvm-devel@lists.sourceforge.net>
|
||||
<mailto:kvm@vger.kernel.org>
|
||||
0xB0 all RATIO devices in development:
|
||||
<mailto:vgo@ratio.de>
|
||||
0xB1 00-1F PPPoX <mailto:mostrows@styx.uwaterloo.ca>
|
||||
|
|
|
@ -57,6 +57,7 @@ parameter is applicable:
|
|||
ISAPNP ISA PnP code is enabled.
|
||||
ISDN Appropriate ISDN support is enabled.
|
||||
JOY Appropriate joystick support is enabled.
|
||||
KVM Kernel Virtual Machine support is enabled.
|
||||
LIBATA Libata driver is enabled
|
||||
LP Printer support is enabled.
|
||||
LOOP Loopback device support is enabled.
|
||||
|
@ -1098,6 +1099,44 @@ and is between 256 and 4096 characters. It is defined in the file
|
|||
kstack=N [X86] Print N words from the kernel stack
|
||||
in oops dumps.
|
||||
|
||||
kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
|
||||
Default is 0 (don't ignore, but inject #GP)
|
||||
|
||||
kvm.oos_shadow= [KVM] Disable out-of-sync shadow paging.
|
||||
Default is 1 (enabled)
|
||||
|
||||
kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
|
||||
Default is 0 (off)
|
||||
|
||||
kvm-amd.npt= [KVM,AMD] Disable nested paging (virtualized MMU)
|
||||
for all guests.
|
||||
Default is 1 (enabled) if in 64bit or 32bit-PAE mode
|
||||
|
||||
kvm-intel.bypass_guest_pf=
|
||||
[KVM,Intel] Disables bypassing of guest page faults
|
||||
on Intel chips. Default is 1 (enabled)
|
||||
|
||||
kvm-intel.ept= [KVM,Intel] Disable extended page tables
|
||||
(virtualized MMU) support on capable Intel chips.
|
||||
Default is 1 (enabled)
|
||||
|
||||
kvm-intel.emulate_invalid_guest_state=
|
||||
[KVM,Intel] Enable emulation of invalid guest states
|
||||
Default is 0 (disabled)
|
||||
|
||||
kvm-intel.flexpriority=
|
||||
[KVM,Intel] Disable FlexPriority feature (TPR shadow).
|
||||
Default is 1 (enabled)
|
||||
|
||||
kvm-intel.unrestricted_guest=
|
||||
[KVM,Intel] Disable unrestricted guest feature
|
||||
(virtualized real and unpaged mode) on capable
|
||||
Intel chips. Default is 1 (enabled)
|
||||
|
||||
kvm-intel.vpid= [KVM,Intel] Disable Virtual Processor Identification
|
||||
feature (tagged TLBs) on capable Intel chips.
|
||||
Default is 1 (enabled)
|
||||
|
||||
l2cr= [PPC]
|
||||
|
||||
l3cr= [PPC]
|
||||
|
|
759
Documentation/kvm/api.txt
Normal file
759
Documentation/kvm/api.txt
Normal file
|
@ -0,0 +1,759 @@
|
|||
The Definitive KVM (Kernel-based Virtual Machine) API Documentation
|
||||
===================================================================
|
||||
|
||||
1. General description
|
||||
|
||||
The kvm API is a set of ioctls that are issued to control various aspects
|
||||
of a virtual machine. The ioctls belong to three classes
|
||||
|
||||
- System ioctls: These query and set global attributes which affect the
|
||||
whole kvm subsystem. In addition a system ioctl is used to create
|
||||
virtual machines
|
||||
|
||||
- VM ioctls: These query and set attributes that affect an entire virtual
|
||||
machine, for example memory layout. In addition a VM ioctl is used to
|
||||
create virtual cpus (vcpus).
|
||||
|
||||
Only run VM ioctls from the same process (address space) that was used
|
||||
to create the VM.
|
||||
|
||||
- vcpu ioctls: These query and set attributes that control the operation
|
||||
of a single virtual cpu.
|
||||
|
||||
Only run vcpu ioctls from the same thread that was used to create the
|
||||
vcpu.
|
||||
|
||||
2. File descritpors
|
||||
|
||||
The kvm API is centered around file descriptors. An initial
|
||||
open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
|
||||
can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this
|
||||
handle will create a VM file descripror which can be used to issue VM
|
||||
ioctls. A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu
|
||||
and return a file descriptor pointing to it. Finally, ioctls on a vcpu
|
||||
fd can be used to control the vcpu, including the important task of
|
||||
actually running guest code.
|
||||
|
||||
In general file descriptors can be migrated among processes by means
|
||||
of fork() and the SCM_RIGHTS facility of unix domain socket. These
|
||||
kinds of tricks are explicitly not supported by kvm. While they will
|
||||
not cause harm to the host, their actual behavior is not guaranteed by
|
||||
the API. The only supported use is one virtual machine per process,
|
||||
and one vcpu per thread.
|
||||
|
||||
3. Extensions
|
||||
|
||||
As of Linux 2.6.22, the KVM ABI has been stabilized: no backward
|
||||
incompatible change are allowed. However, there is an extension
|
||||
facility that allows backward-compatible extensions to the API to be
|
||||
queried and used.
|
||||
|
||||
The extension mechanism is not based on on the Linux version number.
|
||||
Instead, kvm defines extension identifiers and a facility to query
|
||||
whether a particular extension identifier is available. If it is, a
|
||||
set of ioctls is available for application use.
|
||||
|
||||
4. API description
|
||||
|
||||
This section describes ioctls that can be used to control kvm guests.
|
||||
For each ioctl, the following information is provided along with a
|
||||
description:
|
||||
|
||||
Capability: which KVM extension provides this ioctl. Can be 'basic',
|
||||
which means that is will be provided by any kernel that supports
|
||||
API version 12 (see section 4.1), or a KVM_CAP_xyz constant, which
|
||||
means availability needs to be checked with KVM_CHECK_EXTENSION
|
||||
(see section 4.4).
|
||||
|
||||
Architectures: which instruction set architectures provide this ioctl.
|
||||
x86 includes both i386 and x86_64.
|
||||
|
||||
Type: system, vm, or vcpu.
|
||||
|
||||
Parameters: what parameters are accepted by the ioctl.
|
||||
|
||||
Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL)
|
||||
are not detailed, but errors with specific meanings are.
|
||||
|
||||
4.1 KVM_GET_API_VERSION
|
||||
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: system ioctl
|
||||
Parameters: none
|
||||
Returns: the constant KVM_API_VERSION (=12)
|
||||
|
||||
This identifies the API version as the stable kvm API. It is not
|
||||
expected that this number will change. However, Linux 2.6.20 and
|
||||
2.6.21 report earlier versions; these are not documented and not
|
||||
supported. Applications should refuse to run if KVM_GET_API_VERSION
|
||||
returns a value other than 12. If this check passes, all ioctls
|
||||
described as 'basic' will be available.
|
||||
|
||||
4.2 KVM_CREATE_VM
|
||||
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: system ioctl
|
||||
Parameters: none
|
||||
Returns: a VM fd that can be used to control the new virtual machine.
|
||||
|
||||
The new VM has no virtual cpus and no memory. An mmap() of a VM fd
|
||||
will access the virtual machine's physical address space; offset zero
|
||||
corresponds to guest physical address zero. Use of mmap() on a VM fd
|
||||
is discouraged if userspace memory allocation (KVM_CAP_USER_MEMORY) is
|
||||
available.
|
||||
|
||||
4.3 KVM_GET_MSR_INDEX_LIST
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: system
|
||||
Parameters: struct kvm_msr_list (in/out)
|
||||
Returns: 0 on success; -1 on error
|
||||
Errors:
|
||||
E2BIG: the msr index list is to be to fit in the array specified by
|
||||
the user.
|
||||
|
||||
struct kvm_msr_list {
|
||||
__u32 nmsrs; /* number of msrs in entries */
|
||||
__u32 indices[0];
|
||||
};
|
||||
|
||||
This ioctl returns the guest msrs that are supported. The list varies
|
||||
by kvm version and host processor, but does not change otherwise. The
|
||||
user fills in the size of the indices array in nmsrs, and in return
|
||||
kvm adjusts nmsrs to reflect the actual number of msrs and fills in
|
||||
the indices array with their numbers.
|
||||
|
||||
4.4 KVM_CHECK_EXTENSION
|
||||
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: system ioctl
|
||||
Parameters: extension identifier (KVM_CAP_*)
|
||||
Returns: 0 if unsupported; 1 (or some other positive integer) if supported
|
||||
|
||||
The API allows the application to query about extensions to the core
|
||||
kvm API. Userspace passes an extension identifier (an integer) and
|
||||
receives an integer that describes the extension availability.
|
||||
Generally 0 means no and 1 means yes, but some extensions may report
|
||||
additional information in the integer return value.
|
||||
|
||||
4.5 KVM_GET_VCPU_MMAP_SIZE
|
||||
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: system ioctl
|
||||
Parameters: none
|
||||
Returns: size of vcpu mmap area, in bytes
|
||||
|
||||
The KVM_RUN ioctl (cf.) communicates with userspace via a shared
|
||||
memory region. This ioctl returns the size of that region. See the
|
||||
KVM_RUN documentation for details.
|
||||
|
||||
4.6 KVM_SET_MEMORY_REGION
|
||||
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_memory_region (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
struct kvm_memory_region {
|
||||
__u32 slot;
|
||||
__u32 flags;
|
||||
__u64 guest_phys_addr;
|
||||
__u64 memory_size; /* bytes */
|
||||
};
|
||||
|
||||
/* for kvm_memory_region::flags */
|
||||
#define KVM_MEM_LOG_DIRTY_PAGES 1UL
|
||||
|
||||
This ioctl allows the user to create or modify a guest physical memory
|
||||
slot. When changing an existing slot, it may be moved in the guest
|
||||
physical memory space, or its flags may be modified. It may not be
|
||||
resized. Slots may not overlap.
|
||||
|
||||
The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which
|
||||
instructs kvm to keep track of writes to memory within the slot. See
|
||||
the KVM_GET_DIRTY_LOG ioctl.
|
||||
|
||||
It is recommended to use the KVM_SET_USER_MEMORY_REGION ioctl instead
|
||||
of this API, if available. This newer API allows placing guest memory
|
||||
at specified locations in the host address space, yielding better
|
||||
control and easy access.
|
||||
|
||||
4.6 KVM_CREATE_VCPU
|
||||
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: vm ioctl
|
||||
Parameters: vcpu id (apic id on x86)
|
||||
Returns: vcpu fd on success, -1 on error
|
||||
|
||||
This API adds a vcpu to a virtual machine. The vcpu id is a small integer
|
||||
in the range [0, max_vcpus).
|
||||
|
||||
4.7 KVM_GET_DIRTY_LOG (vm ioctl)
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_dirty_log (in/out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
/* for KVM_GET_DIRTY_LOG */
|
||||
struct kvm_dirty_log {
|
||||
__u32 slot;
|
||||
__u32 padding;
|
||||
union {
|
||||
void __user *dirty_bitmap; /* one bit per page */
|
||||
__u64 padding;
|
||||
};
|
||||
};
|
||||
|
||||
Given a memory slot, return a bitmap containing any pages dirtied
|
||||
since the last call to this ioctl. Bit 0 is the first page in the
|
||||
memory slot. Ensure the entire structure is cleared to avoid padding
|
||||
issues.
|
||||
|
||||
4.8 KVM_SET_MEMORY_ALIAS
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_memory_alias (in)
|
||||
Returns: 0 (success), -1 (error)
|
||||
|
||||
struct kvm_memory_alias {
|
||||
__u32 slot; /* this has a different namespace than memory slots */
|
||||
__u32 flags;
|
||||
__u64 guest_phys_addr;
|
||||
__u64 memory_size;
|
||||
__u64 target_phys_addr;
|
||||
};
|
||||
|
||||
Defines a guest physical address space region as an alias to another
|
||||
region. Useful for aliased address, for example the VGA low memory
|
||||
window. Should not be used with userspace memory.
|
||||
|
||||
4.9 KVM_RUN
|
||||
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: vcpu ioctl
|
||||
Parameters: none
|
||||
Returns: 0 on success, -1 on error
|
||||
Errors:
|
||||
EINTR: an unmasked signal is pending
|
||||
|
||||
This ioctl is used to run a guest virtual cpu. While there are no
|
||||
explicit parameters, there is an implicit parameter block that can be
|
||||
obtained by mmap()ing the vcpu fd at offset 0, with the size given by
|
||||
KVM_GET_VCPU_MMAP_SIZE. The parameter block is formatted as a 'struct
|
||||
kvm_run' (see below).
|
||||
|
||||
4.10 KVM_GET_REGS
|
||||
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_regs (out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Reads the general purpose registers from the vcpu.
|
||||
|
||||
/* x86 */
|
||||
struct kvm_regs {
|
||||
/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
|
||||
__u64 rax, rbx, rcx, rdx;
|
||||
__u64 rsi, rdi, rsp, rbp;
|
||||
__u64 r8, r9, r10, r11;
|
||||
__u64 r12, r13, r14, r15;
|
||||
__u64 rip, rflags;
|
||||
};
|
||||
|
||||
4.11 KVM_SET_REGS
|
||||
|
||||
Capability: basic
|
||||
Architectures: all
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_regs (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Writes the general purpose registers into the vcpu.
|
||||
|
||||
See KVM_GET_REGS for the data structure.
|
||||
|
||||
4.12 KVM_GET_SREGS
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_sregs (out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Reads special registers from the vcpu.
|
||||
|
||||
/* x86 */
|
||||
struct kvm_sregs {
|
||||
struct kvm_segment cs, ds, es, fs, gs, ss;
|
||||
struct kvm_segment tr, ldt;
|
||||
struct kvm_dtable gdt, idt;
|
||||
__u64 cr0, cr2, cr3, cr4, cr8;
|
||||
__u64 efer;
|
||||
__u64 apic_base;
|
||||
__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
|
||||
};
|
||||
|
||||
interrupt_bitmap is a bitmap of pending external interrupts. At most
|
||||
one bit may be set. This interrupt has been acknowledged by the APIC
|
||||
but not yet injected into the cpu core.
|
||||
|
||||
4.13 KVM_SET_SREGS
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_sregs (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Writes special registers into the vcpu. See KVM_GET_SREGS for the
|
||||
data structures.
|
||||
|
||||
4.14 KVM_TRANSLATE
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_translation (in/out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Translates a virtual address according to the vcpu's current address
|
||||
translation mode.
|
||||
|
||||
struct kvm_translation {
|
||||
/* in */
|
||||
__u64 linear_address;
|
||||
|
||||
/* out */
|
||||
__u64 physical_address;
|
||||
__u8 valid;
|
||||
__u8 writeable;
|
||||
__u8 usermode;
|
||||
__u8 pad[5];
|
||||
};
|
||||
|
||||
4.15 KVM_INTERRUPT
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_interrupt (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Queues a hardware interrupt vector to be injected. This is only
|
||||
useful if in-kernel local APIC is not used.
|
||||
|
||||
/* for KVM_INTERRUPT */
|
||||
struct kvm_interrupt {
|
||||
/* in */
|
||||
__u32 irq;
|
||||
};
|
||||
|
||||
Note 'irq' is an interrupt vector, not an interrupt pin or line.
|
||||
|
||||
4.16 KVM_DEBUG_GUEST
|
||||
|
||||
Capability: basic
|
||||
Architectures: none
|
||||
Type: vcpu ioctl
|
||||
Parameters: none)
|
||||
Returns: -1 on error
|
||||
|
||||
Support for this has been removed. Use KVM_SET_GUEST_DEBUG instead.
|
||||
|
||||
4.17 KVM_GET_MSRS
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_msrs (in/out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Reads model-specific registers from the vcpu. Supported msr indices can
|
||||
be obtained using KVM_GET_MSR_INDEX_LIST.
|
||||
|
||||
struct kvm_msrs {
|
||||
__u32 nmsrs; /* number of msrs in entries */
|
||||
__u32 pad;
|
||||
|
||||
struct kvm_msr_entry entries[0];
|
||||
};
|
||||
|
||||
struct kvm_msr_entry {
|
||||
__u32 index;
|
||||
__u32 reserved;
|
||||
__u64 data;
|
||||
};
|
||||
|
||||
Application code should set the 'nmsrs' member (which indicates the
|
||||
size of the entries array) and the 'index' member of each array entry.
|
||||
kvm will fill in the 'data' member.
|
||||
|
||||
4.18 KVM_SET_MSRS
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_msrs (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Writes model-specific registers to the vcpu. See KVM_GET_MSRS for the
|
||||
data structures.
|
||||
|
||||
Application code should set the 'nmsrs' member (which indicates the
|
||||
size of the entries array), and the 'index' and 'data' members of each
|
||||
array entry.
|
||||
|
||||
4.19 KVM_SET_CPUID
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_cpuid (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Defines the vcpu responses to the cpuid instruction. Applications
|
||||
should use the KVM_SET_CPUID2 ioctl if available.
|
||||
|
||||
|
||||
struct kvm_cpuid_entry {
|
||||
__u32 function;
|
||||
__u32 eax;
|
||||
__u32 ebx;
|
||||
__u32 ecx;
|
||||
__u32 edx;
|
||||
__u32 padding;
|
||||
};
|
||||
|
||||
/* for KVM_SET_CPUID */
|
||||
struct kvm_cpuid {
|
||||
__u32 nent;
|
||||
__u32 padding;
|
||||
struct kvm_cpuid_entry entries[0];
|
||||
};
|
||||
|
||||
4.20 KVM_SET_SIGNAL_MASK
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_signal_mask (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Defines which signals are blocked during execution of KVM_RUN. This
|
||||
signal mask temporarily overrides the threads signal mask. Any
|
||||
unblocked signal received (except SIGKILL and SIGSTOP, which retain
|
||||
their traditional behaviour) will cause KVM_RUN to return with -EINTR.
|
||||
|
||||
Note the signal will only be delivered if not blocked by the original
|
||||
signal mask.
|
||||
|
||||
/* for KVM_SET_SIGNAL_MASK */
|
||||
struct kvm_signal_mask {
|
||||
__u32 len;
|
||||
__u8 sigset[0];
|
||||
};
|
||||
|
||||
4.21 KVM_GET_FPU
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_fpu (out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Reads the floating point state from the vcpu.
|
||||
|
||||
/* for KVM_GET_FPU and KVM_SET_FPU */
|
||||
struct kvm_fpu {
|
||||
__u8 fpr[8][16];
|
||||
__u16 fcw;
|
||||
__u16 fsw;
|
||||
__u8 ftwx; /* in fxsave format */
|
||||
__u8 pad1;
|
||||
__u16 last_opcode;
|
||||
__u64 last_ip;
|
||||
__u64 last_dp;
|
||||
__u8 xmm[16][16];
|
||||
__u32 mxcsr;
|
||||
__u32 pad2;
|
||||
};
|
||||
|
||||
4.22 KVM_SET_FPU
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_fpu (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Writes the floating point state to the vcpu.
|
||||
|
||||
/* for KVM_GET_FPU and KVM_SET_FPU */
|
||||
struct kvm_fpu {
|
||||
__u8 fpr[8][16];
|
||||
__u16 fcw;
|
||||
__u16 fsw;
|
||||
__u8 ftwx; /* in fxsave format */
|
||||
__u8 pad1;
|
||||
__u16 last_opcode;
|
||||
__u64 last_ip;
|
||||
__u64 last_dp;
|
||||
__u8 xmm[16][16];
|
||||
__u32 mxcsr;
|
||||
__u32 pad2;
|
||||
};
|
||||
|
||||
4.23 KVM_CREATE_IRQCHIP
|
||||
|
||||
Capability: KVM_CAP_IRQCHIP
|
||||
Architectures: x86, ia64
|
||||
Type: vm ioctl
|
||||
Parameters: none
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Creates an interrupt controller model in the kernel. On x86, creates a virtual
|
||||
ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
|
||||
local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
|
||||
only go to the IOAPIC. On ia64, a IOSAPIC is created.
|
||||
|
||||
4.24 KVM_IRQ_LINE
|
||||
|
||||
Capability: KVM_CAP_IRQCHIP
|
||||
Architectures: x86, ia64
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_irq_level
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Sets the level of a GSI input to the interrupt controller model in the kernel.
|
||||
Requires that an interrupt controller model has been previously created with
|
||||
KVM_CREATE_IRQCHIP. Note that edge-triggered interrupts require the level
|
||||
to be set to 1 and then back to 0.
|
||||
|
||||
struct kvm_irq_level {
|
||||
union {
|
||||
__u32 irq; /* GSI */
|
||||
__s32 status; /* not used for KVM_IRQ_LEVEL */
|
||||
};
|
||||
__u32 level; /* 0 or 1 */
|
||||
};
|
||||
|
||||
4.25 KVM_GET_IRQCHIP
|
||||
|
||||
Capability: KVM_CAP_IRQCHIP
|
||||
Architectures: x86, ia64
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_irqchip (in/out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Reads the state of a kernel interrupt controller created with
|
||||
KVM_CREATE_IRQCHIP into a buffer provided by the caller.
|
||||
|
||||
struct kvm_irqchip {
|
||||
__u32 chip_id; /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */
|
||||
__u32 pad;
|
||||
union {
|
||||
char dummy[512]; /* reserving space */
|
||||
struct kvm_pic_state pic;
|
||||
struct kvm_ioapic_state ioapic;
|
||||
} chip;
|
||||
};
|
||||
|
||||
4.26 KVM_SET_IRQCHIP
|
||||
|
||||
Capability: KVM_CAP_IRQCHIP
|
||||
Architectures: x86, ia64
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_irqchip (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Sets the state of a kernel interrupt controller created with
|
||||
KVM_CREATE_IRQCHIP from a buffer provided by the caller.
|
||||
|
||||
struct kvm_irqchip {
|
||||
__u32 chip_id; /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */
|
||||
__u32 pad;
|
||||
union {
|
||||
char dummy[512]; /* reserving space */
|
||||
struct kvm_pic_state pic;
|
||||
struct kvm_ioapic_state ioapic;
|
||||
} chip;
|
||||
};
|
||||
|
||||
5. The kvm_run structure
|
||||
|
||||
Application code obtains a pointer to the kvm_run structure by
|
||||
mmap()ing a vcpu fd. From that point, application code can control
|
||||
execution by changing fields in kvm_run prior to calling the KVM_RUN
|
||||
ioctl, and obtain information about the reason KVM_RUN returned by
|
||||
looking up structure members.
|
||||
|
||||
struct kvm_run {
|
||||
/* in */
|
||||
__u8 request_interrupt_window;
|
||||
|
||||
Request that KVM_RUN return when it becomes possible to inject external
|
||||
interrupts into the guest. Useful in conjunction with KVM_INTERRUPT.
|
||||
|
||||
__u8 padding1[7];
|
||||
|
||||
/* out */
|
||||
__u32 exit_reason;
|
||||
|
||||
When KVM_RUN has returned successfully (return value 0), this informs
|
||||
application code why KVM_RUN has returned. Allowable values for this
|
||||
field are detailed below.
|
||||
|
||||
__u8 ready_for_interrupt_injection;
|
||||
|
||||
If request_interrupt_window has been specified, this field indicates
|
||||
an interrupt can be injected now with KVM_INTERRUPT.
|
||||
|
||||
__u8 if_flag;
|
||||
|
||||
The value of the current interrupt flag. Only valid if in-kernel
|
||||
local APIC is not used.
|
||||
|
||||
__u8 padding2[2];
|
||||
|
||||
/* in (pre_kvm_run), out (post_kvm_run) */
|
||||
__u64 cr8;
|
||||
|
||||
The value of the cr8 register. Only valid if in-kernel local APIC is
|
||||
not used. Both input and output.
|
||||
|
||||
__u64 apic_base;
|
||||
|
||||
The value of the APIC BASE msr. Only valid if in-kernel local
|
||||
APIC is not used. Both input and output.
|
||||
|
||||
union {
|
||||
/* KVM_EXIT_UNKNOWN */
|
||||
struct {
|
||||
__u64 hardware_exit_reason;
|
||||
} hw;
|
||||
|
||||
If exit_reason is KVM_EXIT_UNKNOWN, the vcpu has exited due to unknown
|
||||
reasons. Further architecture-specific information is available in
|
||||
hardware_exit_reason.
|
||||
|
||||
/* KVM_EXIT_FAIL_ENTRY */
|
||||
struct {
|
||||
__u64 hardware_entry_failure_reason;
|
||||
} fail_entry;
|
||||
|
||||
If exit_reason is KVM_EXIT_FAIL_ENTRY, the vcpu could not be run due
|
||||
to unknown reasons. Further architecture-specific information is
|
||||
available in hardware_entry_failure_reason.
|
||||
|
||||
/* KVM_EXIT_EXCEPTION */
|
||||
struct {
|
||||
__u32 exception;
|
||||
__u32 error_code;
|
||||
} ex;
|
||||
|
||||
Unused.
|
||||
|
||||
/* KVM_EXIT_IO */
|
||||
struct {
|
||||
#define KVM_EXIT_IO_IN 0
|
||||
#define KVM_EXIT_IO_OUT 1
|
||||
__u8 direction;
|
||||
__u8 size; /* bytes */
|
||||
__u16 port;
|
||||
__u32 count;
|
||||
__u64 data_offset; /* relative to kvm_run start */
|
||||
} io;
|
||||
|
||||
If exit_reason is KVM_EXIT_IO_IN or KVM_EXIT_IO_OUT, then the vcpu has
|
||||
executed a port I/O instruction which could not be satisfied by kvm.
|
||||
data_offset describes where the data is located (KVM_EXIT_IO_OUT) or
|
||||
where kvm expects application code to place the data for the next
|
||||
KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a patcked array.
|
||||
|
||||
struct {
|
||||
struct kvm_debug_exit_arch arch;
|
||||
} debug;
|
||||
|
||||
Unused.
|
||||
|
||||
/* KVM_EXIT_MMIO */
|
||||
struct {
|
||||
__u64 phys_addr;
|
||||
__u8 data[8];
|
||||
__u32 len;
|
||||
__u8 is_write;
|
||||
} mmio;
|
||||
|
||||
If exit_reason is KVM_EXIT_MMIO or KVM_EXIT_IO_OUT, then the vcpu has
|
||||
executed a memory-mapped I/O instruction which could not be satisfied
|
||||
by kvm. The 'data' member contains the written data if 'is_write' is
|
||||
true, and should be filled by application code otherwise.
|
||||
|
||||
/* KVM_EXIT_HYPERCALL */
|
||||
struct {
|
||||
__u64 nr;
|
||||
__u64 args[6];
|
||||
__u64 ret;
|
||||
__u32 longmode;
|
||||
__u32 pad;
|
||||
} hypercall;
|
||||
|
||||
Unused.
|
||||
|
||||
/* KVM_EXIT_TPR_ACCESS */
|
||||
struct {
|
||||
__u64 rip;
|
||||
__u32 is_write;
|
||||
__u32 pad;
|
||||
} tpr_access;
|
||||
|
||||
To be documented (KVM_TPR_ACCESS_REPORTING).
|
||||
|
||||
/* KVM_EXIT_S390_SIEIC */
|
||||
struct {
|
||||
__u8 icptcode;
|
||||
__u64 mask; /* psw upper half */
|
||||
__u64 addr; /* psw lower half */
|
||||
__u16 ipa;
|
||||
__u32 ipb;
|
||||
} s390_sieic;
|
||||
|
||||
s390 specific.
|
||||
|
||||
/* KVM_EXIT_S390_RESET */
|
||||
#define KVM_S390_RESET_POR 1
|
||||
#define KVM_S390_RESET_CLEAR 2
|
||||
#define KVM_S390_RESET_SUBSYSTEM 4
|
||||
#define KVM_S390_RESET_CPU_INIT 8
|
||||
#define KVM_S390_RESET_IPL 16
|
||||
__u64 s390_reset_flags;
|
||||
|
||||
s390 specific.
|
||||
|
||||
/* KVM_EXIT_DCR */
|
||||
struct {
|
||||
__u32 dcrn;
|
||||
__u32 data;
|
||||
__u8 is_write;
|
||||
} dcr;
|
||||
|
||||
powerpc specific.
|
||||
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
};
|
|
@ -2926,6 +2926,7 @@ F: include/linux/sunrpc/
|
|||
|
||||
KERNEL VIRTUAL MACHINE (KVM)
|
||||
M: Avi Kivity <avi@redhat.com>
|
||||
M: Marcelo Tosatti <mtosatti@redhat.com>
|
||||
L: kvm@vger.kernel.org
|
||||
W: http://kvm.qumranet.com
|
||||
S: Supported
|
||||
|
|
|
@ -235,7 +235,8 @@ struct kvm_vm_data {
|
|||
#define KVM_REQ_PTC_G 32
|
||||
#define KVM_REQ_RESUME 33
|
||||
|
||||
#define KVM_PAGES_PER_HPAGE 1
|
||||
#define KVM_NR_PAGE_SIZES 1
|
||||
#define KVM_PAGES_PER_HPAGE(x) 1
|
||||
|
||||
struct kvm;
|
||||
struct kvm_vcpu;
|
||||
|
@ -465,7 +466,6 @@ struct kvm_arch {
|
|||
unsigned long metaphysical_rr4;
|
||||
unsigned long vmm_init_rr;
|
||||
|
||||
int online_vcpus;
|
||||
int is_sn2;
|
||||
|
||||
struct kvm_ioapic *vioapic;
|
||||
|
|
|
@ -19,9 +19,13 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
static inline unsigned int kvm_arch_para_features(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,12 +1,8 @@
|
|||
#
|
||||
# KVM configuration
|
||||
#
|
||||
config HAVE_KVM
|
||||
bool
|
||||
|
||||
config HAVE_KVM_IRQCHIP
|
||||
bool
|
||||
default y
|
||||
source "virt/kvm/Kconfig"
|
||||
|
||||
menuconfig VIRTUALIZATION
|
||||
bool "Virtualization"
|
||||
|
@ -28,6 +24,8 @@ config KVM
|
|||
depends on PCI
|
||||
select PREEMPT_NOTIFIERS
|
||||
select ANON_INODES
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select KVM_APIC_ARCHITECTURE
|
||||
---help---
|
||||
Support hosting fully virtualized guest machines using hardware
|
||||
virtualization extensions. You will need a fairly recent
|
||||
|
@ -49,9 +47,6 @@ config KVM_INTEL
|
|||
Provides support for KVM on Itanium 2 processors equipped with the VT
|
||||
extensions.
|
||||
|
||||
config KVM_TRACE
|
||||
bool
|
||||
|
||||
source drivers/virtio/Kconfig
|
||||
|
||||
endif # VIRTUALIZATION
|
||||
|
|
|
@ -210,16 +210,6 @@ int kvm_dev_ioctl_check_extension(long ext)
|
|||
|
||||
}
|
||||
|
||||
static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, int len, int is_write)
|
||||
{
|
||||
struct kvm_io_device *dev;
|
||||
|
||||
dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, is_write);
|
||||
|
||||
return dev;
|
||||
}
|
||||
|
||||
static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
{
|
||||
kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
|
||||
|
@ -231,6 +221,7 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
{
|
||||
struct kvm_mmio_req *p;
|
||||
struct kvm_io_device *mmio_dev;
|
||||
int r;
|
||||
|
||||
p = kvm_get_vcpu_ioreq(vcpu);
|
||||
|
||||
|
@ -247,16 +238,13 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
kvm_run->exit_reason = KVM_EXIT_MMIO;
|
||||
return 0;
|
||||
mmio:
|
||||
mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr, p->size, !p->dir);
|
||||
if (mmio_dev) {
|
||||
if (!p->dir)
|
||||
kvm_iodevice_write(mmio_dev, p->addr, p->size,
|
||||
&p->data);
|
||||
else
|
||||
kvm_iodevice_read(mmio_dev, p->addr, p->size,
|
||||
&p->data);
|
||||
|
||||
} else
|
||||
if (p->dir)
|
||||
r = kvm_io_bus_read(&vcpu->kvm->mmio_bus, p->addr,
|
||||
p->size, &p->data);
|
||||
else
|
||||
r = kvm_io_bus_write(&vcpu->kvm->mmio_bus, p->addr,
|
||||
p->size, &p->data);
|
||||
if (r)
|
||||
printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
|
||||
p->state = STATE_IORESP_READY;
|
||||
|
||||
|
@ -337,13 +325,12 @@ static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
|
|||
{
|
||||
union ia64_lid lid;
|
||||
int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
for (i = 0; i < kvm->arch.online_vcpus; i++) {
|
||||
if (kvm->vcpus[i]) {
|
||||
lid.val = VCPU_LID(kvm->vcpus[i]);
|
||||
if (lid.id == id && lid.eid == eid)
|
||||
return kvm->vcpus[i];
|
||||
}
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
lid.val = VCPU_LID(vcpu);
|
||||
if (lid.id == id && lid.eid == eid)
|
||||
return vcpu;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
@ -409,21 +396,21 @@ static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
struct kvm *kvm = vcpu->kvm;
|
||||
struct call_data call_data;
|
||||
int i;
|
||||
struct kvm_vcpu *vcpui;
|
||||
|
||||
call_data.ptc_g_data = p->u.ptc_g_data;
|
||||
|
||||
for (i = 0; i < kvm->arch.online_vcpus; i++) {
|
||||
if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state ==
|
||||
KVM_MP_STATE_UNINITIALIZED ||
|
||||
vcpu == kvm->vcpus[i])
|
||||
kvm_for_each_vcpu(i, vcpui, kvm) {
|
||||
if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED ||
|
||||
vcpu == vcpui)
|
||||
continue;
|
||||
|
||||
if (waitqueue_active(&kvm->vcpus[i]->wq))
|
||||
wake_up_interruptible(&kvm->vcpus[i]->wq);
|
||||
if (waitqueue_active(&vcpui->wq))
|
||||
wake_up_interruptible(&vcpui->wq);
|
||||
|
||||
if (kvm->vcpus[i]->cpu != -1) {
|
||||
call_data.vcpu = kvm->vcpus[i];
|
||||
smp_call_function_single(kvm->vcpus[i]->cpu,
|
||||
if (vcpui->cpu != -1) {
|
||||
call_data.vcpu = vcpui;
|
||||
smp_call_function_single(vcpui->cpu,
|
||||
vcpu_global_purge, &call_data, 1);
|
||||
} else
|
||||
printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
|
||||
|
@ -852,8 +839,6 @@ struct kvm *kvm_arch_create_vm(void)
|
|||
|
||||
kvm_init_vm(kvm);
|
||||
|
||||
kvm->arch.online_vcpus = 0;
|
||||
|
||||
return kvm;
|
||||
|
||||
}
|
||||
|
@ -1000,10 +985,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||
goto out;
|
||||
if (irqchip_in_kernel(kvm)) {
|
||||
__s32 status;
|
||||
mutex_lock(&kvm->lock);
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
|
||||
irq_event.irq, irq_event.level);
|
||||
mutex_unlock(&kvm->lock);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
if (ioctl == KVM_IRQ_LINE_STATUS) {
|
||||
irq_event.status = status;
|
||||
if (copy_to_user(argp, &irq_event,
|
||||
|
@ -1216,7 +1201,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
|||
if (IS_ERR(vmm_vcpu))
|
||||
return PTR_ERR(vmm_vcpu);
|
||||
|
||||
if (vcpu->vcpu_id == 0) {
|
||||
if (kvm_vcpu_is_bsp(vcpu)) {
|
||||
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
|
||||
|
||||
/*Set entry address for first run.*/
|
||||
|
@ -1224,7 +1209,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
|||
|
||||
/*Initialize itc offset for vcpus*/
|
||||
itc_offset = 0UL - kvm_get_itc(vcpu);
|
||||
for (i = 0; i < kvm->arch.online_vcpus; i++) {
|
||||
for (i = 0; i < KVM_MAX_VCPUS; i++) {
|
||||
v = (struct kvm_vcpu *)((char *)vcpu +
|
||||
sizeof(struct kvm_vcpu_data) * i);
|
||||
v->arch.itc_offset = itc_offset;
|
||||
|
@ -1356,8 +1341,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
|
|||
goto fail;
|
||||
}
|
||||
|
||||
kvm->arch.online_vcpus++;
|
||||
|
||||
return vcpu;
|
||||
fail:
|
||||
return ERR_PTR(r);
|
||||
|
@ -1952,19 +1935,6 @@ int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
|
|||
return find_highest_bits((int *)&vpd->irr[0]);
|
||||
}
|
||||
|
||||
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_highest_pending_irq(vcpu) != -1)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* do real check here */
|
||||
return 1;
|
||||
}
|
||||
|
||||
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.timer_fired;
|
||||
|
@ -1977,7 +1947,8 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
|
|||
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE;
|
||||
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
|
||||
(kvm_highest_pending_irq(vcpu) != -1);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
|
||||
|
|
|
@ -830,8 +830,8 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
|
|||
|
||||
kvm = (struct kvm *)KVM_VM_BASE;
|
||||
|
||||
if (vcpu->vcpu_id == 0) {
|
||||
for (i = 0; i < kvm->arch.online_vcpus; i++) {
|
||||
if (kvm_vcpu_is_bsp(vcpu)) {
|
||||
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) {
|
||||
v = (struct kvm_vcpu *)((char *)vcpu +
|
||||
sizeof(struct kvm_vcpu_data) * i);
|
||||
VMX(v, itc_offset) = itc_offset;
|
||||
|
|
|
@ -34,7 +34,8 @@
|
|||
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
|
||||
|
||||
/* We don't currently support large pages. */
|
||||
#define KVM_PAGES_PER_HPAGE (1UL << 31)
|
||||
#define KVM_NR_PAGE_SIZES 1
|
||||
#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
|
||||
|
||||
struct kvm;
|
||||
struct kvm_run;
|
||||
|
@ -153,7 +154,6 @@ struct kvm_vcpu_arch {
|
|||
u32 pid;
|
||||
u32 swap_pid;
|
||||
|
||||
u32 pvr;
|
||||
u32 ccr0;
|
||||
u32 ccr1;
|
||||
u32 dbcr0;
|
||||
|
|
|
@ -138,7 +138,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
|
|||
kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
|
||||
}
|
||||
|
||||
static int kvmppc_44x_init(void)
|
||||
static int __init kvmppc_44x_init(void)
|
||||
{
|
||||
int r;
|
||||
|
||||
|
@ -149,7 +149,7 @@ static int kvmppc_44x_init(void)
|
|||
return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE);
|
||||
}
|
||||
|
||||
static void kvmppc_44x_exit(void)
|
||||
static void __exit kvmppc_44x_exit(void)
|
||||
{
|
||||
kvmppc_booke_exit();
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "timing.h"
|
||||
|
||||
#include "44x_tlb.h"
|
||||
#include "trace.h"
|
||||
|
||||
#ifndef PPC44x_TLBE_SIZE
|
||||
#define PPC44x_TLBE_SIZE PPC44x_TLB_4K
|
||||
|
@ -263,7 +264,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
|
|||
|
||||
/* XXX set tlb_44x_index to stlb_index? */
|
||||
|
||||
KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
|
||||
trace_kvm_stlb_inval(stlb_index);
|
||||
}
|
||||
|
||||
void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
|
||||
|
@ -365,8 +366,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
|
|||
/* Insert shadow mapping into hardware TLB. */
|
||||
kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
|
||||
kvmppc_44x_tlbwe(victim, &stlbe);
|
||||
KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1,
|
||||
stlbe.word2, handler);
|
||||
trace_kvm_stlb_write(victim, stlbe.tid, stlbe.word0, stlbe.word1,
|
||||
stlbe.word2);
|
||||
}
|
||||
|
||||
/* For a particular guest TLB entry, invalidate the corresponding host TLB
|
||||
|
@ -485,8 +486,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
|
|||
kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
|
||||
}
|
||||
|
||||
KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
|
||||
tlbe->word1, tlbe->word2, handler);
|
||||
trace_kvm_gtlb_write(gtlb_index, tlbe->tid, tlbe->word0, tlbe->word1,
|
||||
tlbe->word2);
|
||||
|
||||
kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
|
||||
return EMULATE_DONE;
|
||||
|
|
|
@ -2,8 +2,7 @@
|
|||
# KVM configuration
|
||||
#
|
||||
|
||||
config HAVE_KVM_IRQCHIP
|
||||
bool
|
||||
source "virt/kvm/Kconfig"
|
||||
|
||||
menuconfig VIRTUALIZATION
|
||||
bool "Virtualization"
|
||||
|
@ -59,17 +58,6 @@ config KVM_E500
|
|||
|
||||
If unsure, say N.
|
||||
|
||||
config KVM_TRACE
|
||||
bool "KVM trace support"
|
||||
depends on KVM && MARKERS && SYSFS
|
||||
select RELAY
|
||||
select DEBUG_FS
|
||||
default n
|
||||
---help---
|
||||
This option allows reading a trace of kvm-related events through
|
||||
relayfs. Note the ABI is not considered stable and will be
|
||||
modified in future updates.
|
||||
|
||||
source drivers/virtio/Kconfig
|
||||
|
||||
endif # VIRTUALIZATION
|
||||
|
|
|
@ -8,7 +8,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm
|
|||
|
||||
common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
|
||||
|
||||
common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o)
|
||||
CFLAGS_44x_tlb.o := -I.
|
||||
CFLAGS_e500_tlb.o := -I.
|
||||
CFLAGS_emulate.o := -I.
|
||||
|
||||
kvm-objs := $(common-objs-y) powerpc.o emulate.o
|
||||
obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
|
||||
|
|
|
@ -520,7 +520,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
|
|||
return kvmppc_core_vcpu_translate(vcpu, tr);
|
||||
}
|
||||
|
||||
int kvmppc_booke_init(void)
|
||||
int __init kvmppc_booke_init(void)
|
||||
{
|
||||
unsigned long ivor[16];
|
||||
unsigned long max_ivor = 0;
|
||||
|
|
|
@ -60,9 +60,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
|
|||
|
||||
kvmppc_e500_tlb_setup(vcpu_e500);
|
||||
|
||||
/* Use the same core vertion as host's */
|
||||
vcpu->arch.pvr = mfspr(SPRN_PVR);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -132,7 +129,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
|
|||
kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
|
||||
}
|
||||
|
||||
static int kvmppc_e500_init(void)
|
||||
static int __init kvmppc_e500_init(void)
|
||||
{
|
||||
int r, i;
|
||||
unsigned long ivor[3];
|
||||
|
@ -160,7 +157,7 @@ static int kvmppc_e500_init(void)
|
|||
return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE);
|
||||
}
|
||||
|
||||
static void kvmppc_e500_exit(void)
|
||||
static void __init kvmppc_e500_exit(void)
|
||||
{
|
||||
kvmppc_booke_exit();
|
||||
}
|
||||
|
|
|
@ -180,6 +180,9 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
|
|||
case SPRN_MMUCSR0:
|
||||
vcpu->arch.gpr[rt] = 0; break;
|
||||
|
||||
case SPRN_MMUCFG:
|
||||
vcpu->arch.gpr[rt] = mfspr(SPRN_MMUCFG); break;
|
||||
|
||||
/* extra exceptions */
|
||||
case SPRN_IVOR32:
|
||||
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
|
||||
#include "../mm/mmu_decl.h"
|
||||
#include "e500_tlb.h"
|
||||
#include "trace.h"
|
||||
|
||||
#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
|
||||
|
||||
|
@ -224,9 +225,8 @@ static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
|
|||
|
||||
kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
|
||||
stlbe->mas1 = 0;
|
||||
KVMTRACE_5D(STLB_INVAL, &vcpu_e500->vcpu, index_of(tlbsel, esel),
|
||||
stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
|
||||
handler);
|
||||
trace_kvm_stlb_inval(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
|
||||
stlbe->mas3, stlbe->mas7);
|
||||
}
|
||||
|
||||
static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
|
@ -269,7 +269,7 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
|
|||
tlbsel = (vcpu_e500->mas4 >> 28) & 0x1;
|
||||
victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
|
||||
pidsel = (vcpu_e500->mas4 >> 16) & 0xf;
|
||||
tsized = (vcpu_e500->mas4 >> 8) & 0xf;
|
||||
tsized = (vcpu_e500->mas4 >> 7) & 0x1f;
|
||||
|
||||
vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
|
||||
| MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
|
||||
|
@ -309,7 +309,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
|||
vcpu_e500->shadow_pages[tlbsel][esel] = new_page;
|
||||
|
||||
/* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
|
||||
stlbe->mas1 = MAS1_TSIZE(BOOKE_PAGESZ_4K)
|
||||
stlbe->mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
|
||||
| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
|
||||
stlbe->mas2 = (gvaddr & MAS2_EPN)
|
||||
| e500_shadow_mas2_attrib(gtlbe->mas2,
|
||||
|
@ -319,9 +319,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
|||
vcpu_e500->vcpu.arch.msr & MSR_PR);
|
||||
stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
|
||||
|
||||
KVMTRACE_5D(STLB_WRITE, &vcpu_e500->vcpu, index_of(tlbsel, esel),
|
||||
stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
|
||||
handler);
|
||||
trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
|
||||
stlbe->mas3, stlbe->mas7);
|
||||
}
|
||||
|
||||
/* XXX only map the one-one case, for now use TLB0 */
|
||||
|
@ -535,9 +534,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
|
|||
gtlbe->mas3 = vcpu_e500->mas3;
|
||||
gtlbe->mas7 = vcpu_e500->mas7;
|
||||
|
||||
KVMTRACE_5D(GTLB_WRITE, vcpu, vcpu_e500->mas0,
|
||||
gtlbe->mas1, gtlbe->mas2, gtlbe->mas3, gtlbe->mas7,
|
||||
handler);
|
||||
trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2,
|
||||
gtlbe->mas3, gtlbe->mas7);
|
||||
|
||||
/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
|
||||
if (tlbe_is_host_safe(vcpu, gtlbe)) {
|
||||
|
@ -545,7 +543,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
|
|||
case 0:
|
||||
/* TLB0 */
|
||||
gtlbe->mas1 &= ~MAS1_TSIZE(~0);
|
||||
gtlbe->mas1 |= MAS1_TSIZE(BOOKE_PAGESZ_4K);
|
||||
gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
|
||||
|
||||
stlbsel = 0;
|
||||
sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel);
|
||||
|
@ -679,14 +677,14 @@ void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
|
|||
|
||||
/* Insert large initial mapping for guest. */
|
||||
tlbe = &vcpu_e500->guest_tlb[1][0];
|
||||
tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_256M);
|
||||
tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
|
||||
tlbe->mas2 = 0;
|
||||
tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
|
||||
tlbe->mas7 = 0;
|
||||
|
||||
/* 4K map for serial output. Used by kernel wrapper. */
|
||||
tlbe = &vcpu_e500->guest_tlb[1][1];
|
||||
tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_4K);
|
||||
tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
|
||||
tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
|
||||
tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
|
||||
tlbe->mas7 = 0;
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#define __KVM_E500_TLB_H__
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/mmu-fsl-booke.h>
|
||||
#include <asm/mmu-book3e.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/kvm_e500.h>
|
||||
|
||||
|
@ -59,7 +59,7 @@ extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
|
|||
/* TLB helper functions */
|
||||
static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
|
||||
{
|
||||
return (tlbe->mas1 >> 8) & 0xf;
|
||||
return (tlbe->mas1 >> 7) & 0x1f;
|
||||
}
|
||||
|
||||
static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
|
||||
|
@ -70,7 +70,7 @@ static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
|
|||
static inline u64 get_tlb_bytes(const struct tlbe *tlbe)
|
||||
{
|
||||
unsigned int pgsize = get_tlb_size(tlbe);
|
||||
return 1ULL << 10 << (pgsize << 1);
|
||||
return 1ULL << 10 << pgsize;
|
||||
}
|
||||
|
||||
static inline gva_t get_tlb_end(const struct tlbe *tlbe)
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include <asm/kvm_ppc.h>
|
||||
#include <asm/disassemble.h>
|
||||
#include "timing.h"
|
||||
#include "trace.h"
|
||||
|
||||
#define OP_TRAP 3
|
||||
|
||||
|
@ -187,7 +188,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
|||
case SPRN_SRR1:
|
||||
vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
|
||||
case SPRN_PVR:
|
||||
vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
|
||||
vcpu->arch.gpr[rt] = mfspr(SPRN_PVR); break;
|
||||
case SPRN_PIR:
|
||||
vcpu->arch.gpr[rt] = mfspr(SPRN_PIR); break;
|
||||
|
||||
/* Note: mftb and TBRL/TBWL are user-accessible, so
|
||||
* the guest can always access the real TB anyways.
|
||||
|
@ -417,7 +420,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
|||
}
|
||||
}
|
||||
|
||||
KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit);
|
||||
trace_kvm_ppc_instr(inst, vcpu->arch.pc, emulated);
|
||||
|
||||
if (advance)
|
||||
vcpu->arch.pc += 4; /* Advance past emulated instruction. */
|
||||
|
|
|
@ -31,25 +31,17 @@
|
|||
#include "timing.h"
|
||||
#include "../mm/mmu_decl.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "trace.h"
|
||||
|
||||
gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return gfn;
|
||||
}
|
||||
|
||||
int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
|
||||
{
|
||||
return !!(v->arch.pending_exceptions);
|
||||
}
|
||||
|
||||
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* do real check here */
|
||||
return 1;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
|
||||
{
|
||||
return !(v->arch.msr & MSR_WE);
|
||||
return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions);
|
||||
}
|
||||
|
||||
|
||||
|
@ -122,13 +114,17 @@ struct kvm *kvm_arch_create_vm(void)
|
|||
static void kvmppc_free_vcpus(struct kvm *kvm)
|
||||
{
|
||||
unsigned int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
|
||||
if (kvm->vcpus[i]) {
|
||||
kvm_arch_vcpu_free(kvm->vcpus[i]);
|
||||
kvm->vcpus[i] = NULL;
|
||||
}
|
||||
}
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
kvm_arch_vcpu_free(vcpu);
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
|
||||
kvm->vcpus[i] = NULL;
|
||||
|
||||
atomic_set(&kvm->online_vcpus, 0);
|
||||
mutex_unlock(&kvm->lock);
|
||||
}
|
||||
|
||||
void kvm_arch_sync_events(struct kvm *kvm)
|
||||
|
|
104
arch/powerpc/kvm/trace.h
Normal file
104
arch/powerpc/kvm/trace.h
Normal file
|
@ -0,0 +1,104 @@
|
|||
#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_KVM_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM kvm
|
||||
#define TRACE_INCLUDE_PATH .
|
||||
#define TRACE_INCLUDE_FILE trace
|
||||
|
||||
/*
|
||||
* Tracepoint for guest mode entry.
|
||||
*/
|
||||
TRACE_EVENT(kvm_ppc_instr,
|
||||
TP_PROTO(unsigned int inst, unsigned long pc, unsigned int emulate),
|
||||
TP_ARGS(inst, pc, emulate),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, inst )
|
||||
__field( unsigned long, pc )
|
||||
__field( unsigned int, emulate )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->inst = inst;
|
||||
__entry->pc = pc;
|
||||
__entry->emulate = emulate;
|
||||
),
|
||||
|
||||
TP_printk("inst %u pc 0x%lx emulate %u\n",
|
||||
__entry->inst, __entry->pc, __entry->emulate)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_stlb_inval,
|
||||
TP_PROTO(unsigned int stlb_index),
|
||||
TP_ARGS(stlb_index),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, stlb_index )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->stlb_index = stlb_index;
|
||||
),
|
||||
|
||||
TP_printk("stlb_index %u", __entry->stlb_index)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_stlb_write,
|
||||
TP_PROTO(unsigned int victim, unsigned int tid, unsigned int word0,
|
||||
unsigned int word1, unsigned int word2),
|
||||
TP_ARGS(victim, tid, word0, word1, word2),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, victim )
|
||||
__field( unsigned int, tid )
|
||||
__field( unsigned int, word0 )
|
||||
__field( unsigned int, word1 )
|
||||
__field( unsigned int, word2 )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->victim = victim;
|
||||
__entry->tid = tid;
|
||||
__entry->word0 = word0;
|
||||
__entry->word1 = word1;
|
||||
__entry->word2 = word2;
|
||||
),
|
||||
|
||||
TP_printk("victim %u tid %u w0 %u w1 %u w2 %u",
|
||||
__entry->victim, __entry->tid, __entry->word0,
|
||||
__entry->word1, __entry->word2)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_gtlb_write,
|
||||
TP_PROTO(unsigned int gtlb_index, unsigned int tid, unsigned int word0,
|
||||
unsigned int word1, unsigned int word2),
|
||||
TP_ARGS(gtlb_index, tid, word0, word1, word2),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, gtlb_index )
|
||||
__field( unsigned int, tid )
|
||||
__field( unsigned int, word0 )
|
||||
__field( unsigned int, word1 )
|
||||
__field( unsigned int, word2 )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->gtlb_index = gtlb_index;
|
||||
__entry->tid = tid;
|
||||
__entry->word0 = word0;
|
||||
__entry->word1 = word1;
|
||||
__entry->word2 = word2;
|
||||
),
|
||||
|
||||
TP_printk("gtlb_index %u tid %u w0 %u w1 %u w2 %u",
|
||||
__entry->gtlb_index, __entry->tid, __entry->word0,
|
||||
__entry->word1, __entry->word2)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_KVM_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
#include <trace/define_trace.h>
|
|
@ -15,15 +15,6 @@
|
|||
*/
|
||||
#include <linux/types.h>
|
||||
|
||||
/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
|
||||
struct kvm_pic_state {
|
||||
/* no PIC for s390 */
|
||||
};
|
||||
|
||||
struct kvm_ioapic_state {
|
||||
/* no IOAPIC for s390 */
|
||||
};
|
||||
|
||||
/* for KVM_GET_REGS and KVM_SET_REGS */
|
||||
struct kvm_regs {
|
||||
/* general purpose regs for s390 */
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
* asm-s390/kvm_host.h - definition for kernel virtual machines on s390
|
||||
*
|
||||
* Copyright IBM Corp. 2008
|
||||
* Copyright IBM Corp. 2008,2009
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License (version 2 only)
|
||||
|
@ -40,7 +40,11 @@ struct sca_block {
|
|||
struct sca_entry cpu[64];
|
||||
} __attribute__((packed));
|
||||
|
||||
#define KVM_PAGES_PER_HPAGE 256
|
||||
#define KVM_NR_PAGE_SIZES 2
|
||||
#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + ((x) - 1) * 8)
|
||||
#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
|
||||
#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
|
||||
#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
|
||||
|
||||
#define CPUSTAT_HOST 0x80000000
|
||||
#define CPUSTAT_WAIT 0x10000000
|
||||
|
@ -182,8 +186,9 @@ struct kvm_s390_interrupt_info {
|
|||
};
|
||||
|
||||
/* for local_interrupt.action_flags */
|
||||
#define ACTION_STORE_ON_STOP 1
|
||||
#define ACTION_STOP_ON_STOP 2
|
||||
#define ACTION_STORE_ON_STOP (1<<0)
|
||||
#define ACTION_STOP_ON_STOP (1<<1)
|
||||
#define ACTION_RELOADVCPU_ON_STOP (1<<2)
|
||||
|
||||
struct kvm_s390_local_interrupt {
|
||||
spinlock_t lock;
|
||||
|
@ -227,8 +232,6 @@ struct kvm_vm_stat {
|
|||
};
|
||||
|
||||
struct kvm_arch{
|
||||
unsigned long guest_origin;
|
||||
unsigned long guest_memsize;
|
||||
struct sca_block *sca;
|
||||
debug_info_t *dbf;
|
||||
struct kvm_s390_float_interrupt float_int;
|
||||
|
|
|
@ -13,6 +13,8 @@
|
|||
#ifndef __S390_KVM_PARA_H
|
||||
#define __S390_KVM_PARA_H
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
/*
|
||||
* Hypercalls for KVM on s390. The calling convention is similar to the
|
||||
* s390 ABI, so we use R2-R6 for parameters 1-5. In addition we use R1
|
||||
|
@ -147,4 +149,6 @@ static inline unsigned int kvm_arch_para_features(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* __S390_KVM_PARA_H */
|
||||
|
|
|
@ -1,11 +1,7 @@
|
|||
#
|
||||
# KVM configuration
|
||||
#
|
||||
config HAVE_KVM
|
||||
bool
|
||||
|
||||
config HAVE_KVM_IRQCHIP
|
||||
bool
|
||||
source "virt/kvm/Kconfig"
|
||||
|
||||
menuconfig VIRTUALIZATION
|
||||
bool "Virtualization"
|
||||
|
@ -38,9 +34,6 @@ config KVM
|
|||
|
||||
If unsure, say N.
|
||||
|
||||
config KVM_TRACE
|
||||
bool
|
||||
|
||||
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
|
||||
# the virtualization menu.
|
||||
source drivers/virtio/Kconfig
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
* gaccess.h - access guest memory
|
||||
*
|
||||
* Copyright IBM Corp. 2008
|
||||
* Copyright IBM Corp. 2008,2009
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License (version 2 only)
|
||||
|
@ -16,13 +16,14 @@
|
|||
#include <linux/compiler.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include "kvm-s390.h"
|
||||
|
||||
static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu,
|
||||
unsigned long guestaddr)
|
||||
{
|
||||
unsigned long prefix = vcpu->arch.sie_block->prefix;
|
||||
unsigned long origin = vcpu->kvm->arch.guest_origin;
|
||||
unsigned long memsize = vcpu->kvm->arch.guest_memsize;
|
||||
unsigned long origin = vcpu->arch.sie_block->gmsor;
|
||||
unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
|
||||
|
||||
if (guestaddr < 2 * PAGE_SIZE)
|
||||
guestaddr += prefix;
|
||||
|
@ -158,8 +159,8 @@ static inline int copy_to_guest(struct kvm_vcpu *vcpu, unsigned long guestdest,
|
|||
const void *from, unsigned long n)
|
||||
{
|
||||
unsigned long prefix = vcpu->arch.sie_block->prefix;
|
||||
unsigned long origin = vcpu->kvm->arch.guest_origin;
|
||||
unsigned long memsize = vcpu->kvm->arch.guest_memsize;
|
||||
unsigned long origin = vcpu->arch.sie_block->gmsor;
|
||||
unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
|
||||
|
||||
if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE))
|
||||
goto slowpath;
|
||||
|
@ -209,8 +210,8 @@ static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to,
|
|||
unsigned long guestsrc, unsigned long n)
|
||||
{
|
||||
unsigned long prefix = vcpu->arch.sie_block->prefix;
|
||||
unsigned long origin = vcpu->kvm->arch.guest_origin;
|
||||
unsigned long memsize = vcpu->kvm->arch.guest_memsize;
|
||||
unsigned long origin = vcpu->arch.sie_block->gmsor;
|
||||
unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
|
||||
|
||||
if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE))
|
||||
goto slowpath;
|
||||
|
@ -244,8 +245,8 @@ static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu,
|
|||
unsigned long guestdest,
|
||||
const void *from, unsigned long n)
|
||||
{
|
||||
unsigned long origin = vcpu->kvm->arch.guest_origin;
|
||||
unsigned long memsize = vcpu->kvm->arch.guest_memsize;
|
||||
unsigned long origin = vcpu->arch.sie_block->gmsor;
|
||||
unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
|
||||
|
||||
if (guestdest + n > memsize)
|
||||
return -EFAULT;
|
||||
|
@ -262,8 +263,8 @@ static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to,
|
|||
unsigned long guestsrc,
|
||||
unsigned long n)
|
||||
{
|
||||
unsigned long origin = vcpu->kvm->arch.guest_origin;
|
||||
unsigned long memsize = vcpu->kvm->arch.guest_memsize;
|
||||
unsigned long origin = vcpu->arch.sie_block->gmsor;
|
||||
unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu);
|
||||
|
||||
if (guestsrc + n > memsize)
|
||||
return -EFAULT;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
* intercept.c - in-kernel handling for sie intercepts
|
||||
*
|
||||
* Copyright IBM Corp. 2008
|
||||
* Copyright IBM Corp. 2008,2009
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License (version 2 only)
|
||||
|
@ -128,7 +128,7 @@ static int handle_noop(struct kvm_vcpu *vcpu)
|
|||
|
||||
static int handle_stop(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int rc;
|
||||
int rc = 0;
|
||||
|
||||
vcpu->stat.exit_stop_request++;
|
||||
atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
|
||||
|
@ -141,12 +141,18 @@ static int handle_stop(struct kvm_vcpu *vcpu)
|
|||
rc = -ENOTSUPP;
|
||||
}
|
||||
|
||||
if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) {
|
||||
vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP;
|
||||
rc = SIE_INTERCEPT_RERUNVCPU;
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTR;
|
||||
}
|
||||
|
||||
if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
|
||||
vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
|
||||
VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
|
||||
rc = -ENOTSUPP;
|
||||
} else
|
||||
rc = 0;
|
||||
}
|
||||
|
||||
spin_unlock_bh(&vcpu->arch.local_int.lock);
|
||||
return rc;
|
||||
}
|
||||
|
@ -158,9 +164,9 @@ static int handle_validity(struct kvm_vcpu *vcpu)
|
|||
|
||||
vcpu->stat.exit_validity++;
|
||||
if ((viwhy == 0x37) && (vcpu->arch.sie_block->prefix
|
||||
<= vcpu->kvm->arch.guest_memsize - 2*PAGE_SIZE)){
|
||||
<= kvm_s390_vcpu_get_memsize(vcpu) - 2*PAGE_SIZE)) {
|
||||
rc = fault_in_pages_writeable((char __user *)
|
||||
vcpu->kvm->arch.guest_origin +
|
||||
vcpu->arch.sie_block->gmsor +
|
||||
vcpu->arch.sie_block->prefix,
|
||||
2*PAGE_SIZE);
|
||||
if (rc)
|
||||
|
|
|
@ -283,7 +283,7 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
|
|||
return 1;
|
||||
}
|
||||
|
||||
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
|
||||
static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
|
||||
struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
|
||||
|
@ -320,12 +320,6 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
|
|||
return rc;
|
||||
}
|
||||
|
||||
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* do real check here */
|
||||
return 1;
|
||||
}
|
||||
|
||||
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 0;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
* s390host.c -- hosting zSeries kernel virtual machines
|
||||
*
|
||||
* Copyright IBM Corp. 2008
|
||||
* Copyright IBM Corp. 2008,2009
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License (version 2 only)
|
||||
|
@ -10,6 +10,7 @@
|
|||
* Author(s): Carsten Otte <cotte@de.ibm.com>
|
||||
* Christian Borntraeger <borntraeger@de.ibm.com>
|
||||
* Heiko Carstens <heiko.carstens@de.ibm.com>
|
||||
* Christian Ehrhardt <ehrhardt@de.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/compiler.h>
|
||||
|
@ -210,13 +211,17 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
|||
static void kvm_free_vcpus(struct kvm *kvm)
|
||||
{
|
||||
unsigned int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
|
||||
if (kvm->vcpus[i]) {
|
||||
kvm_arch_vcpu_destroy(kvm->vcpus[i]);
|
||||
kvm->vcpus[i] = NULL;
|
||||
}
|
||||
}
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
kvm_arch_vcpu_destroy(vcpu);
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
|
||||
kvm->vcpus[i] = NULL;
|
||||
|
||||
atomic_set(&kvm->online_vcpus, 0);
|
||||
mutex_unlock(&kvm->lock);
|
||||
}
|
||||
|
||||
void kvm_arch_sync_events(struct kvm *kvm)
|
||||
|
@ -278,16 +283,10 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
|
|||
vcpu->arch.sie_block->gbea = 1;
|
||||
}
|
||||
|
||||
/* The current code can have up to 256 pages for virtio */
|
||||
#define VIRTIODESCSPACE (256ul * 4096ul)
|
||||
|
||||
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
|
||||
vcpu->arch.sie_block->gmslm = vcpu->kvm->arch.guest_memsize +
|
||||
vcpu->kvm->arch.guest_origin +
|
||||
VIRTIODESCSPACE - 1ul;
|
||||
vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
|
||||
set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests);
|
||||
vcpu->arch.sie_block->ecb = 2;
|
||||
vcpu->arch.sie_block->eca = 0xC1002001U;
|
||||
vcpu->arch.sie_block->fac = (int) (long) facilities;
|
||||
|
@ -319,8 +318,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
|
|||
BUG_ON(!kvm->arch.sca);
|
||||
if (!kvm->arch.sca->cpu[id].sda)
|
||||
kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
|
||||
else
|
||||
BUG_ON(!kvm->vcpus[id]); /* vcpu does already exist */
|
||||
vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
|
||||
vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
|
||||
|
||||
|
@ -490,9 +487,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
|
||||
vcpu_load(vcpu);
|
||||
|
||||
rerun_vcpu:
|
||||
if (vcpu->requests)
|
||||
if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
|
||||
kvm_s390_vcpu_set_mem(vcpu);
|
||||
|
||||
/* verify, that memory has been registered */
|
||||
if (!vcpu->kvm->arch.guest_memsize) {
|
||||
if (!vcpu->arch.sie_block->gmslm) {
|
||||
vcpu_put(vcpu);
|
||||
VCPU_EVENT(vcpu, 3, "%s", "no memory registered to run vcpu");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -509,6 +512,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
|
||||
break;
|
||||
case KVM_EXIT_UNKNOWN:
|
||||
case KVM_EXIT_INTR:
|
||||
case KVM_EXIT_S390_RESET:
|
||||
break;
|
||||
default:
|
||||
|
@ -522,8 +526,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
rc = kvm_handle_sie_intercept(vcpu);
|
||||
} while (!signal_pending(current) && !rc);
|
||||
|
||||
if (signal_pending(current) && !rc)
|
||||
if (rc == SIE_INTERCEPT_RERUNVCPU)
|
||||
goto rerun_vcpu;
|
||||
|
||||
if (signal_pending(current) && !rc) {
|
||||
kvm_run->exit_reason = KVM_EXIT_INTR;
|
||||
rc = -EINTR;
|
||||
}
|
||||
|
||||
if (rc == -ENOTSUPP) {
|
||||
/* intercept cannot be handled in-kernel, prepare kvm-run */
|
||||
|
@ -676,6 +685,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
|
|||
int user_alloc)
|
||||
{
|
||||
int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
/* A few sanity checks. We can have exactly one memory slot which has
|
||||
to start at guest virtual zero and which has to be located at a
|
||||
|
@ -684,7 +694,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
|
|||
vmas. It is okay to mmap() and munmap() stuff in this slot after
|
||||
doing this call at any time */
|
||||
|
||||
if (mem->slot || kvm->arch.guest_memsize)
|
||||
if (mem->slot)
|
||||
return -EINVAL;
|
||||
|
||||
if (mem->guest_phys_addr)
|
||||
|
@ -699,36 +709,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
|
|||
if (!user_alloc)
|
||||
return -EINVAL;
|
||||
|
||||
/* lock all vcpus */
|
||||
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
|
||||
if (!kvm->vcpus[i])
|
||||
/* request update of sie control block for all available vcpus */
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
|
||||
continue;
|
||||
if (!mutex_trylock(&kvm->vcpus[i]->mutex))
|
||||
goto fail_out;
|
||||
}
|
||||
|
||||
kvm->arch.guest_origin = mem->userspace_addr;
|
||||
kvm->arch.guest_memsize = mem->memory_size;
|
||||
|
||||
/* update sie control blocks, and unlock all vcpus */
|
||||
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
|
||||
if (kvm->vcpus[i]) {
|
||||
kvm->vcpus[i]->arch.sie_block->gmsor =
|
||||
kvm->arch.guest_origin;
|
||||
kvm->vcpus[i]->arch.sie_block->gmslm =
|
||||
kvm->arch.guest_memsize +
|
||||
kvm->arch.guest_origin +
|
||||
VIRTIODESCSPACE - 1ul;
|
||||
mutex_unlock(&kvm->vcpus[i]->mutex);
|
||||
}
|
||||
kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
fail_out:
|
||||
for (; i >= 0; i--)
|
||||
mutex_unlock(&kvm->vcpus[i]->mutex);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow(struct kvm *kvm)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
* kvm_s390.h - definition for kvm on s390
|
||||
*
|
||||
* Copyright IBM Corp. 2008
|
||||
* Copyright IBM Corp. 2008,2009
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License (version 2 only)
|
||||
|
@ -9,6 +9,7 @@
|
|||
*
|
||||
* Author(s): Carsten Otte <cotte@de.ibm.com>
|
||||
* Christian Borntraeger <borntraeger@de.ibm.com>
|
||||
* Christian Ehrhardt <ehrhardt@de.ibm.com>
|
||||
*/
|
||||
|
||||
#ifndef ARCH_S390_KVM_S390_H
|
||||
|
@ -18,8 +19,13 @@
|
|||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
/* The current code can have up to 256 pages for virtio */
|
||||
#define VIRTIODESCSPACE (256ul * 4096ul)
|
||||
|
||||
typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* negativ values are error codes, positive values for internal conditions */
|
||||
#define SIE_INTERCEPT_RERUNVCPU (1<<0)
|
||||
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
|
||||
|
||||
#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
|
||||
|
@ -50,6 +56,30 @@ int kvm_s390_inject_vm(struct kvm *kvm,
|
|||
int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
|
||||
struct kvm_s390_interrupt *s390int);
|
||||
int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
|
||||
int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
|
||||
|
||||
static inline int kvm_s390_vcpu_get_memsize(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.sie_block->gmslm
|
||||
- vcpu->arch.sie_block->gmsor
|
||||
- VIRTIODESCSPACE + 1ul;
|
||||
}
|
||||
|
||||
static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_memory_slot *mem;
|
||||
|
||||
down_read(&vcpu->kvm->slots_lock);
|
||||
mem = &vcpu->kvm->memslots[0];
|
||||
|
||||
vcpu->arch.sie_block->gmsor = mem->userspace_addr;
|
||||
vcpu->arch.sie_block->gmslm =
|
||||
mem->userspace_addr +
|
||||
(mem->npages << PAGE_SHIFT) +
|
||||
VIRTIODESCSPACE - 1ul;
|
||||
|
||||
up_read(&vcpu->kvm->slots_lock);
|
||||
}
|
||||
|
||||
/* implemented in priv.c */
|
||||
int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
* sigp.c - handlinge interprocessor communication
|
||||
*
|
||||
* Copyright IBM Corp. 2008
|
||||
* Copyright IBM Corp. 2008,2009
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License (version 2 only)
|
||||
|
@ -9,6 +9,7 @@
|
|||
*
|
||||
* Author(s): Carsten Otte <cotte@de.ibm.com>
|
||||
* Christian Borntraeger <borntraeger@de.ibm.com>
|
||||
* Christian Ehrhardt <ehrhardt@de.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/kvm.h>
|
||||
|
@ -107,46 +108,57 @@ unlock:
|
|||
return rc;
|
||||
}
|
||||
|
||||
static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
|
||||
static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
|
||||
{
|
||||
struct kvm_s390_interrupt_info *inti;
|
||||
|
||||
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
|
||||
if (!inti)
|
||||
return -ENOMEM;
|
||||
inti->type = KVM_S390_SIGP_STOP;
|
||||
|
||||
spin_lock_bh(&li->lock);
|
||||
list_add_tail(&inti->list, &li->list);
|
||||
atomic_set(&li->active, 1);
|
||||
atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
|
||||
li->action_bits |= action;
|
||||
if (waitqueue_active(&li->wq))
|
||||
wake_up_interruptible(&li->wq);
|
||||
spin_unlock_bh(&li->lock);
|
||||
|
||||
return 0; /* order accepted */
|
||||
}
|
||||
|
||||
static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
|
||||
{
|
||||
struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
|
||||
struct kvm_s390_local_interrupt *li;
|
||||
struct kvm_s390_interrupt_info *inti;
|
||||
int rc;
|
||||
|
||||
if (cpu_addr >= KVM_MAX_VCPUS)
|
||||
return 3; /* not operational */
|
||||
|
||||
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
|
||||
if (!inti)
|
||||
return -ENOMEM;
|
||||
|
||||
inti->type = KVM_S390_SIGP_STOP;
|
||||
|
||||
spin_lock(&fi->lock);
|
||||
li = fi->local_int[cpu_addr];
|
||||
if (li == NULL) {
|
||||
rc = 3; /* not operational */
|
||||
kfree(inti);
|
||||
goto unlock;
|
||||
}
|
||||
spin_lock_bh(&li->lock);
|
||||
list_add_tail(&inti->list, &li->list);
|
||||
atomic_set(&li->active, 1);
|
||||
atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
|
||||
if (store)
|
||||
li->action_bits |= ACTION_STORE_ON_STOP;
|
||||
li->action_bits |= ACTION_STOP_ON_STOP;
|
||||
if (waitqueue_active(&li->wq))
|
||||
wake_up_interruptible(&li->wq);
|
||||
spin_unlock_bh(&li->lock);
|
||||
rc = 0; /* order accepted */
|
||||
|
||||
rc = __inject_sigp_stop(li, action);
|
||||
|
||||
unlock:
|
||||
spin_unlock(&fi->lock);
|
||||
VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action)
|
||||
{
|
||||
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
|
||||
return __inject_sigp_stop(li, action);
|
||||
}
|
||||
|
||||
static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
|
||||
{
|
||||
int rc;
|
||||
|
@ -177,9 +189,9 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
|
|||
/* make sure that the new value is valid memory */
|
||||
address = address & 0x7fffe000u;
|
||||
if ((copy_from_guest(vcpu, &tmp,
|
||||
(u64) (address + vcpu->kvm->arch.guest_origin) , 1)) ||
|
||||
(u64) (address + vcpu->arch.sie_block->gmsor) , 1)) ||
|
||||
(copy_from_guest(vcpu, &tmp, (u64) (address +
|
||||
vcpu->kvm->arch.guest_origin + PAGE_SIZE), 1))) {
|
||||
vcpu->arch.sie_block->gmsor + PAGE_SIZE), 1))) {
|
||||
*reg |= SIGP_STAT_INVALID_PARAMETER;
|
||||
return 1; /* invalid parameter */
|
||||
}
|
||||
|
@ -262,11 +274,11 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
|
|||
break;
|
||||
case SIGP_STOP:
|
||||
vcpu->stat.instruction_sigp_stop++;
|
||||
rc = __sigp_stop(vcpu, cpu_addr, 0);
|
||||
rc = __sigp_stop(vcpu, cpu_addr, ACTION_STOP_ON_STOP);
|
||||
break;
|
||||
case SIGP_STOP_STORE_STATUS:
|
||||
vcpu->stat.instruction_sigp_stop++;
|
||||
rc = __sigp_stop(vcpu, cpu_addr, 1);
|
||||
rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP);
|
||||
break;
|
||||
case SIGP_SET_ARCH:
|
||||
vcpu->stat.instruction_sigp_arch++;
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
|
||||
#define APIC_LVR 0x30
|
||||
#define APIC_LVR_MASK 0xFF00FF
|
||||
#define APIC_LVR_DIRECTED_EOI (1 << 24)
|
||||
#define GET_APIC_VERSION(x) ((x) & 0xFFu)
|
||||
#define GET_APIC_MAXLVT(x) (((x) >> 16) & 0xFFu)
|
||||
#ifdef CONFIG_X86_32
|
||||
|
@ -41,6 +42,7 @@
|
|||
#define APIC_DFR_CLUSTER 0x0FFFFFFFul
|
||||
#define APIC_DFR_FLAT 0xFFFFFFFFul
|
||||
#define APIC_SPIV 0xF0
|
||||
#define APIC_SPIV_DIRECTED_EOI (1 << 12)
|
||||
#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
|
||||
#define APIC_SPIV_APIC_ENABLED (1 << 8)
|
||||
#define APIC_ISR 0x100
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
#define __KVM_HAVE_USER_NMI
|
||||
#define __KVM_HAVE_GUEST_DEBUG
|
||||
#define __KVM_HAVE_MSIX
|
||||
#define __KVM_HAVE_MCE
|
||||
#define __KVM_HAVE_PIT_STATE2
|
||||
|
||||
/* Architectural interrupt line count. */
|
||||
#define KVM_NR_INTERRUPTS 256
|
||||
|
@ -236,6 +238,14 @@ struct kvm_pit_state {
|
|||
struct kvm_pit_channel_state channels[3];
|
||||
};
|
||||
|
||||
#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001
|
||||
|
||||
struct kvm_pit_state2 {
|
||||
struct kvm_pit_channel_state channels[3];
|
||||
__u32 flags;
|
||||
__u32 reserved[9];
|
||||
};
|
||||
|
||||
struct kvm_reinject_control {
|
||||
__u8 pit_reinject;
|
||||
__u8 reserved[31];
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_para.h>
|
||||
|
@ -37,12 +38,14 @@
|
|||
#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \
|
||||
0xFFFFFF0000000000ULL)
|
||||
|
||||
#define KVM_GUEST_CR0_MASK \
|
||||
(X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE \
|
||||
| X86_CR0_NW | X86_CR0_CD)
|
||||
#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
|
||||
(X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
|
||||
#define KVM_GUEST_CR0_MASK \
|
||||
(KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
|
||||
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
|
||||
(X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP)
|
||||
#define KVM_VM_CR0_ALWAYS_ON \
|
||||
(X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE | X86_CR0_TS \
|
||||
| X86_CR0_MP)
|
||||
(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
|
||||
#define KVM_GUEST_CR4_MASK \
|
||||
(X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE)
|
||||
#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
|
||||
|
@ -51,12 +54,12 @@
|
|||
#define INVALID_PAGE (~(hpa_t)0)
|
||||
#define UNMAPPED_GVA (~(gpa_t)0)
|
||||
|
||||
/* shadow tables are PAE even on non-PAE hosts */
|
||||
#define KVM_HPAGE_SHIFT 21
|
||||
#define KVM_HPAGE_SIZE (1UL << KVM_HPAGE_SHIFT)
|
||||
#define KVM_HPAGE_MASK (~(KVM_HPAGE_SIZE - 1))
|
||||
|
||||
#define KVM_PAGES_PER_HPAGE (KVM_HPAGE_SIZE / PAGE_SIZE)
|
||||
/* KVM Hugepage definitions for x86 */
|
||||
#define KVM_NR_PAGE_SIZES 3
|
||||
#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9))
|
||||
#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
|
||||
#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
|
||||
#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
|
||||
|
||||
#define DE_VECTOR 0
|
||||
#define DB_VECTOR 1
|
||||
|
@ -120,6 +123,10 @@ enum kvm_reg {
|
|||
NR_VCPU_REGS
|
||||
};
|
||||
|
||||
enum kvm_reg_ex {
|
||||
VCPU_EXREG_PDPTR = NR_VCPU_REGS,
|
||||
};
|
||||
|
||||
enum {
|
||||
VCPU_SREG_ES,
|
||||
VCPU_SREG_CS,
|
||||
|
@ -131,7 +138,7 @@ enum {
|
|||
VCPU_SREG_LDTR,
|
||||
};
|
||||
|
||||
#include <asm/kvm_x86_emulate.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
|
||||
#define KVM_NR_MEM_OBJS 40
|
||||
|
||||
|
@ -308,7 +315,6 @@ struct kvm_vcpu_arch {
|
|||
struct {
|
||||
gfn_t gfn; /* presumed gfn during guest pte update */
|
||||
pfn_t pfn; /* pfn corresponding to that gfn */
|
||||
int largepage;
|
||||
unsigned long mmu_seq;
|
||||
} update_pte;
|
||||
|
||||
|
@ -334,16 +340,6 @@ struct kvm_vcpu_arch {
|
|||
u8 nr;
|
||||
} interrupt;
|
||||
|
||||
struct {
|
||||
int vm86_active;
|
||||
u8 save_iopl;
|
||||
struct kvm_save_segment {
|
||||
u16 selector;
|
||||
unsigned long base;
|
||||
u32 limit;
|
||||
u32 ar;
|
||||
} tr, es, ds, fs, gs;
|
||||
} rmode;
|
||||
int halt_request; /* real mode on Intel only */
|
||||
|
||||
int cpuid_nent;
|
||||
|
@ -366,13 +362,15 @@ struct kvm_vcpu_arch {
|
|||
u32 pat;
|
||||
|
||||
int switch_db_regs;
|
||||
unsigned long host_db[KVM_NR_DB_REGS];
|
||||
unsigned long host_dr6;
|
||||
unsigned long host_dr7;
|
||||
unsigned long db[KVM_NR_DB_REGS];
|
||||
unsigned long dr6;
|
||||
unsigned long dr7;
|
||||
unsigned long eff_db[KVM_NR_DB_REGS];
|
||||
|
||||
u64 mcg_cap;
|
||||
u64 mcg_status;
|
||||
u64 mcg_ctl;
|
||||
u64 *mce_banks;
|
||||
};
|
||||
|
||||
struct kvm_mem_alias {
|
||||
|
@ -409,6 +407,7 @@ struct kvm_arch{
|
|||
|
||||
struct page *ept_identity_pagetable;
|
||||
bool ept_identity_pagetable_done;
|
||||
gpa_t ept_identity_map_addr;
|
||||
|
||||
unsigned long irq_sources_bitmap;
|
||||
unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
|
||||
|
@ -526,6 +525,9 @@ struct kvm_x86_ops {
|
|||
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
|
||||
int (*get_tdp_level)(void);
|
||||
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
|
||||
bool (*gb_page_enable)(void);
|
||||
|
||||
const struct trace_print_flags *exit_reasons_str;
|
||||
};
|
||||
|
||||
extern struct kvm_x86_ops *kvm_x86_ops;
|
||||
|
@ -618,6 +620,7 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
|
|||
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
|
||||
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
|
||||
u32 error_code);
|
||||
bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
|
||||
|
||||
int kvm_pic_set_irq(void *opaque, int irq, int level);
|
||||
|
||||
|
@ -752,8 +755,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
|
|||
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
|
||||
}
|
||||
|
||||
#define MSR_IA32_TIME_STAMP_COUNTER 0x010
|
||||
|
||||
#define TSS_IOPB_BASE_OFFSET 0x66
|
||||
#define TSS_BASE_SIZE 0x68
|
||||
#define TSS_IOPB_SIZE (65536 / 8)
|
||||
|
@ -796,5 +797,8 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
|
|||
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
|
||||
int kvm_age_hva(struct kvm *kvm, unsigned long hva);
|
||||
int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
|
||||
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
|
||||
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
|
||||
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
|
||||
|
||||
#endif /* _ASM_X86_KVM_HOST_H */
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
#ifndef _ASM_X86_KVM_PARA_H
|
||||
#define _ASM_X86_KVM_PARA_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
|
||||
* should be used to determine that a VM is running under KVM.
|
||||
*/
|
||||
|
|
|
@ -374,6 +374,7 @@
|
|||
/* AMD-V MSRs */
|
||||
|
||||
#define MSR_VM_CR 0xc0010114
|
||||
#define MSR_VM_IGNNE 0xc0010115
|
||||
#define MSR_VM_HSAVE_PA 0xc0010117
|
||||
|
||||
#endif /* _ASM_X86_MSR_INDEX_H */
|
||||
|
|
|
@ -55,6 +55,7 @@
|
|||
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
|
||||
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
|
||||
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
|
||||
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
|
||||
|
||||
|
||||
#define PIN_BASED_EXT_INTR_MASK 0x00000001
|
||||
|
@ -351,9 +352,16 @@ enum vmcs_field {
|
|||
#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0
|
||||
#define VMX_EPT_EXTENT_CONTEXT 1
|
||||
#define VMX_EPT_EXTENT_GLOBAL 2
|
||||
|
||||
#define VMX_EPT_EXECUTE_ONLY_BIT (1ull)
|
||||
#define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6)
|
||||
#define VMX_EPTP_UC_BIT (1ull << 8)
|
||||
#define VMX_EPTP_WB_BIT (1ull << 14)
|
||||
#define VMX_EPT_2MB_PAGE_BIT (1ull << 16)
|
||||
#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24)
|
||||
#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
|
||||
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
|
||||
|
||||
#define VMX_EPT_DEFAULT_GAW 3
|
||||
#define VMX_EPT_MAX_GAW 0x4
|
||||
#define VMX_EPT_MT_EPTE_SHIFT 3
|
||||
|
|
|
@ -34,7 +34,6 @@
|
|||
struct kvm_para_state {
|
||||
u8 mmu_queue[MMU_QUEUE_SIZE];
|
||||
int mmu_queue_len;
|
||||
enum paravirt_lazy_mode mode;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct kvm_para_state, para_state);
|
||||
|
@ -77,7 +76,7 @@ static void kvm_deferred_mmu_op(void *buffer, int len)
|
|||
{
|
||||
struct kvm_para_state *state = kvm_para_state();
|
||||
|
||||
if (state->mode != PARAVIRT_LAZY_MMU) {
|
||||
if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) {
|
||||
kvm_mmu_op(buffer, len);
|
||||
return;
|
||||
}
|
||||
|
@ -185,10 +184,7 @@ static void kvm_release_pt(unsigned long pfn)
|
|||
|
||||
static void kvm_enter_lazy_mmu(void)
|
||||
{
|
||||
struct kvm_para_state *state = kvm_para_state();
|
||||
|
||||
paravirt_enter_lazy_mmu();
|
||||
state->mode = paravirt_get_lazy_mode();
|
||||
}
|
||||
|
||||
static void kvm_leave_lazy_mmu(void)
|
||||
|
@ -197,7 +193,6 @@ static void kvm_leave_lazy_mmu(void)
|
|||
|
||||
mmu_queue_flush(state);
|
||||
paravirt_leave_lazy_mmu();
|
||||
state->mode = paravirt_get_lazy_mode();
|
||||
}
|
||||
|
||||
static void __init paravirt_ops_setup(void)
|
||||
|
|
|
@ -50,8 +50,8 @@ static unsigned long kvm_get_wallclock(void)
|
|||
struct timespec ts;
|
||||
int low, high;
|
||||
|
||||
low = (int)__pa(&wall_clock);
|
||||
high = ((u64)__pa(&wall_clock) >> 32);
|
||||
low = (int)__pa_symbol(&wall_clock);
|
||||
high = ((u64)__pa_symbol(&wall_clock) >> 32);
|
||||
native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
|
||||
|
||||
vcpu_time = &get_cpu_var(hv_clock);
|
||||
|
|
|
@ -1,12 +1,8 @@
|
|||
#
|
||||
# KVM configuration
|
||||
#
|
||||
config HAVE_KVM
|
||||
bool
|
||||
|
||||
config HAVE_KVM_IRQCHIP
|
||||
bool
|
||||
default y
|
||||
source "virt/kvm/Kconfig"
|
||||
|
||||
menuconfig VIRTUALIZATION
|
||||
bool "Virtualization"
|
||||
|
@ -29,6 +25,9 @@ config KVM
|
|||
select PREEMPT_NOTIFIERS
|
||||
select MMU_NOTIFIER
|
||||
select ANON_INODES
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_EVENTFD
|
||||
select KVM_APIC_ARCHITECTURE
|
||||
---help---
|
||||
Support hosting fully virtualized guest machines using hardware
|
||||
virtualization extensions. You will need a fairly recent
|
||||
|
@ -63,18 +62,6 @@ config KVM_AMD
|
|||
To compile this as a module, choose M here: the module
|
||||
will be called kvm-amd.
|
||||
|
||||
config KVM_TRACE
|
||||
bool "KVM trace support"
|
||||
depends on KVM && SYSFS
|
||||
select MARKERS
|
||||
select RELAY
|
||||
select DEBUG_FS
|
||||
default n
|
||||
---help---
|
||||
This option allows reading a trace of kvm-related events through
|
||||
relayfs. Note the ABI is not considered stable and will be
|
||||
modified in future updates.
|
||||
|
||||
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
|
||||
# the virtualization menu.
|
||||
source drivers/lguest/Kconfig
|
||||
|
|
|
@ -1,22 +1,19 @@
|
|||
#
|
||||
# Makefile for Kernel-based Virtual Machine module
|
||||
#
|
||||
|
||||
common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
|
||||
coalesced_mmio.o irq_comm.o)
|
||||
ifeq ($(CONFIG_KVM_TRACE),y)
|
||||
common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
|
||||
endif
|
||||
ifeq ($(CONFIG_IOMMU_API),y)
|
||||
common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
|
||||
endif
|
||||
|
||||
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
|
||||
|
||||
kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
|
||||
i8254.o timer.o
|
||||
obj-$(CONFIG_KVM) += kvm.o
|
||||
kvm-intel-objs = vmx.o
|
||||
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
|
||||
kvm-amd-objs = svm.o
|
||||
obj-$(CONFIG_KVM_AMD) += kvm-amd.o
|
||||
CFLAGS_x86.o := -I.
|
||||
CFLAGS_svm.o := -I.
|
||||
CFLAGS_vmx.o := -I.
|
||||
|
||||
kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
|
||||
coalesced_mmio.o irq_comm.o eventfd.o)
|
||||
kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o)
|
||||
|
||||
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
|
||||
i8254.o timer.o
|
||||
kvm-intel-y += vmx.o
|
||||
kvm-amd-y += svm.o
|
||||
|
||||
obj-$(CONFIG_KVM) += kvm.o
|
||||
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
|
||||
obj-$(CONFIG_KVM_AMD) += kvm-amd.o
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/******************************************************************************
|
||||
* x86_emulate.c
|
||||
* emulate.c
|
||||
*
|
||||
* Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
|
||||
*
|
||||
|
@ -30,7 +30,9 @@
|
|||
#define DPRINTF(x...) do {} while (0)
|
||||
#endif
|
||||
#include <linux/module.h>
|
||||
#include <asm/kvm_x86_emulate.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
|
||||
#include "mmu.h" /* for is_long_mode() */
|
||||
|
||||
/*
|
||||
* Opcode effective-address decode tables.
|
||||
|
@ -60,6 +62,7 @@
|
|||
#define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
|
||||
#define SrcOne (7<<4) /* Implied '1' */
|
||||
#define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */
|
||||
#define SrcImmU (9<<4) /* Immediate operand, unsigned */
|
||||
#define SrcMask (0xf<<4)
|
||||
/* Generic ModRM decode. */
|
||||
#define ModRM (1<<8)
|
||||
|
@ -97,11 +100,11 @@ static u32 opcode_table[256] = {
|
|||
/* 0x10 - 0x17 */
|
||||
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
|
||||
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
|
||||
0, 0, 0, 0,
|
||||
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
|
||||
/* 0x18 - 0x1F */
|
||||
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
|
||||
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
|
||||
0, 0, 0, 0,
|
||||
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
|
||||
/* 0x20 - 0x27 */
|
||||
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
|
||||
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
|
||||
|
@ -195,7 +198,7 @@ static u32 opcode_table[256] = {
|
|||
ByteOp | SrcImmUByte, SrcImmUByte,
|
||||
/* 0xE8 - 0xEF */
|
||||
SrcImm | Stack, SrcImm | ImplicitOps,
|
||||
SrcImm | Src2Imm16, SrcImmByte | ImplicitOps,
|
||||
SrcImmU | Src2Imm16, SrcImmByte | ImplicitOps,
|
||||
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
|
||||
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
|
||||
/* 0xF0 - 0xF7 */
|
||||
|
@ -208,7 +211,7 @@ static u32 opcode_table[256] = {
|
|||
|
||||
static u32 twobyte_table[256] = {
|
||||
/* 0x00 - 0x0F */
|
||||
0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
|
||||
0, Group | GroupDual | Group7, 0, 0, 0, ImplicitOps, ImplicitOps, 0,
|
||||
ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
|
||||
/* 0x10 - 0x1F */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -216,7 +219,9 @@ static u32 twobyte_table[256] = {
|
|||
ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* 0x30 - 0x3F */
|
||||
ImplicitOps, 0, ImplicitOps, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
ImplicitOps, 0, ImplicitOps, 0,
|
||||
ImplicitOps, ImplicitOps, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* 0x40 - 0x47 */
|
||||
DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
|
||||
DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
|
||||
|
@ -319,8 +324,11 @@ static u32 group2_table[] = {
|
|||
};
|
||||
|
||||
/* EFLAGS bit definitions. */
|
||||
#define EFLG_VM (1<<17)
|
||||
#define EFLG_RF (1<<16)
|
||||
#define EFLG_OF (1<<11)
|
||||
#define EFLG_DF (1<<10)
|
||||
#define EFLG_IF (1<<9)
|
||||
#define EFLG_SF (1<<7)
|
||||
#define EFLG_ZF (1<<6)
|
||||
#define EFLG_AF (1<<4)
|
||||
|
@ -1027,6 +1035,7 @@ done_prefixes:
|
|||
c->src.type = OP_MEM;
|
||||
break;
|
||||
case SrcImm:
|
||||
case SrcImmU:
|
||||
c->src.type = OP_IMM;
|
||||
c->src.ptr = (unsigned long *)c->eip;
|
||||
c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
|
||||
|
@ -1044,6 +1053,19 @@ done_prefixes:
|
|||
c->src.val = insn_fetch(s32, 4, c->eip);
|
||||
break;
|
||||
}
|
||||
if ((c->d & SrcMask) == SrcImmU) {
|
||||
switch (c->src.bytes) {
|
||||
case 1:
|
||||
c->src.val &= 0xff;
|
||||
break;
|
||||
case 2:
|
||||
c->src.val &= 0xffff;
|
||||
break;
|
||||
case 4:
|
||||
c->src.val &= 0xffffffff;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case SrcImmByte:
|
||||
case SrcImmUByte:
|
||||
|
@ -1375,6 +1397,217 @@ static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
|
|||
ctxt->interruptibility = mask;
|
||||
}
|
||||
|
||||
static inline void
|
||||
setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
|
||||
struct kvm_segment *cs, struct kvm_segment *ss)
|
||||
{
|
||||
memset(cs, 0, sizeof(struct kvm_segment));
|
||||
kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS);
|
||||
memset(ss, 0, sizeof(struct kvm_segment));
|
||||
|
||||
cs->l = 0; /* will be adjusted later */
|
||||
cs->base = 0; /* flat segment */
|
||||
cs->g = 1; /* 4kb granularity */
|
||||
cs->limit = 0xffffffff; /* 4GB limit */
|
||||
cs->type = 0x0b; /* Read, Execute, Accessed */
|
||||
cs->s = 1;
|
||||
cs->dpl = 0; /* will be adjusted later */
|
||||
cs->present = 1;
|
||||
cs->db = 1;
|
||||
|
||||
ss->unusable = 0;
|
||||
ss->base = 0; /* flat segment */
|
||||
ss->limit = 0xffffffff; /* 4GB limit */
|
||||
ss->g = 1; /* 4kb granularity */
|
||||
ss->s = 1;
|
||||
ss->type = 0x03; /* Read/Write, Accessed */
|
||||
ss->db = 1; /* 32bit stack segment */
|
||||
ss->dpl = 0;
|
||||
ss->present = 1;
|
||||
}
|
||||
|
||||
static int
|
||||
emulate_syscall(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
struct decode_cache *c = &ctxt->decode;
|
||||
struct kvm_segment cs, ss;
|
||||
u64 msr_data;
|
||||
|
||||
/* syscall is not available in real mode */
|
||||
if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL
|
||||
|| !(ctxt->vcpu->arch.cr0 & X86_CR0_PE))
|
||||
return -1;
|
||||
|
||||
setup_syscalls_segments(ctxt, &cs, &ss);
|
||||
|
||||
kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
|
||||
msr_data >>= 32;
|
||||
cs.selector = (u16)(msr_data & 0xfffc);
|
||||
ss.selector = (u16)(msr_data + 8);
|
||||
|
||||
if (is_long_mode(ctxt->vcpu)) {
|
||||
cs.db = 0;
|
||||
cs.l = 1;
|
||||
}
|
||||
kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
|
||||
kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
|
||||
|
||||
c->regs[VCPU_REGS_RCX] = c->eip;
|
||||
if (is_long_mode(ctxt->vcpu)) {
|
||||
#ifdef CONFIG_X86_64
|
||||
c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
|
||||
|
||||
kvm_x86_ops->get_msr(ctxt->vcpu,
|
||||
ctxt->mode == X86EMUL_MODE_PROT64 ?
|
||||
MSR_LSTAR : MSR_CSTAR, &msr_data);
|
||||
c->eip = msr_data;
|
||||
|
||||
kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
|
||||
ctxt->eflags &= ~(msr_data | EFLG_RF);
|
||||
#endif
|
||||
} else {
|
||||
/* legacy mode */
|
||||
kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
|
||||
c->eip = (u32)msr_data;
|
||||
|
||||
ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
emulate_sysenter(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
struct decode_cache *c = &ctxt->decode;
|
||||
struct kvm_segment cs, ss;
|
||||
u64 msr_data;
|
||||
|
||||
/* inject #UD if LOCK prefix is used */
|
||||
if (c->lock_prefix)
|
||||
return -1;
|
||||
|
||||
/* inject #GP if in real mode or paging is disabled */
|
||||
if (ctxt->mode == X86EMUL_MODE_REAL ||
|
||||
!(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
|
||||
kvm_inject_gp(ctxt->vcpu, 0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* XXX sysenter/sysexit have not been tested in 64bit mode.
|
||||
* Therefore, we inject an #UD.
|
||||
*/
|
||||
if (ctxt->mode == X86EMUL_MODE_PROT64)
|
||||
return -1;
|
||||
|
||||
setup_syscalls_segments(ctxt, &cs, &ss);
|
||||
|
||||
kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
|
||||
switch (ctxt->mode) {
|
||||
case X86EMUL_MODE_PROT32:
|
||||
if ((msr_data & 0xfffc) == 0x0) {
|
||||
kvm_inject_gp(ctxt->vcpu, 0);
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
case X86EMUL_MODE_PROT64:
|
||||
if (msr_data == 0x0) {
|
||||
kvm_inject_gp(ctxt->vcpu, 0);
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
|
||||
cs.selector = (u16)msr_data;
|
||||
cs.selector &= ~SELECTOR_RPL_MASK;
|
||||
ss.selector = cs.selector + 8;
|
||||
ss.selector &= ~SELECTOR_RPL_MASK;
|
||||
if (ctxt->mode == X86EMUL_MODE_PROT64
|
||||
|| is_long_mode(ctxt->vcpu)) {
|
||||
cs.db = 0;
|
||||
cs.l = 1;
|
||||
}
|
||||
|
||||
kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
|
||||
kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
|
||||
|
||||
kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
|
||||
c->eip = msr_data;
|
||||
|
||||
kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
|
||||
c->regs[VCPU_REGS_RSP] = msr_data;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
emulate_sysexit(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
struct decode_cache *c = &ctxt->decode;
|
||||
struct kvm_segment cs, ss;
|
||||
u64 msr_data;
|
||||
int usermode;
|
||||
|
||||
/* inject #UD if LOCK prefix is used */
|
||||
if (c->lock_prefix)
|
||||
return -1;
|
||||
|
||||
/* inject #GP if in real mode or paging is disabled */
|
||||
if (ctxt->mode == X86EMUL_MODE_REAL
|
||||
|| !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
|
||||
kvm_inject_gp(ctxt->vcpu, 0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* sysexit must be called from CPL 0 */
|
||||
if (kvm_x86_ops->get_cpl(ctxt->vcpu) != 0) {
|
||||
kvm_inject_gp(ctxt->vcpu, 0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
setup_syscalls_segments(ctxt, &cs, &ss);
|
||||
|
||||
if ((c->rex_prefix & 0x8) != 0x0)
|
||||
usermode = X86EMUL_MODE_PROT64;
|
||||
else
|
||||
usermode = X86EMUL_MODE_PROT32;
|
||||
|
||||
cs.dpl = 3;
|
||||
ss.dpl = 3;
|
||||
kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
|
||||
switch (usermode) {
|
||||
case X86EMUL_MODE_PROT32:
|
||||
cs.selector = (u16)(msr_data + 16);
|
||||
if ((msr_data & 0xfffc) == 0x0) {
|
||||
kvm_inject_gp(ctxt->vcpu, 0);
|
||||
return -1;
|
||||
}
|
||||
ss.selector = (u16)(msr_data + 24);
|
||||
break;
|
||||
case X86EMUL_MODE_PROT64:
|
||||
cs.selector = (u16)(msr_data + 32);
|
||||
if (msr_data == 0x0) {
|
||||
kvm_inject_gp(ctxt->vcpu, 0);
|
||||
return -1;
|
||||
}
|
||||
ss.selector = cs.selector + 8;
|
||||
cs.db = 0;
|
||||
cs.l = 1;
|
||||
break;
|
||||
}
|
||||
cs.selector |= SELECTOR_RPL_MASK;
|
||||
ss.selector |= SELECTOR_RPL_MASK;
|
||||
|
||||
kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
|
||||
kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
|
||||
|
||||
c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
|
||||
c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
|
||||
{
|
||||
|
@ -1970,6 +2203,12 @@ twobyte_insn:
|
|||
goto cannot_emulate;
|
||||
}
|
||||
break;
|
||||
case 0x05: /* syscall */
|
||||
if (emulate_syscall(ctxt) == -1)
|
||||
goto cannot_emulate;
|
||||
else
|
||||
goto writeback;
|
||||
break;
|
||||
case 0x06:
|
||||
emulate_clts(ctxt->vcpu);
|
||||
c->dst.type = OP_NONE;
|
||||
|
@ -2036,6 +2275,18 @@ twobyte_insn:
|
|||
rc = X86EMUL_CONTINUE;
|
||||
c->dst.type = OP_NONE;
|
||||
break;
|
||||
case 0x34: /* sysenter */
|
||||
if (emulate_sysenter(ctxt) == -1)
|
||||
goto cannot_emulate;
|
||||
else
|
||||
goto writeback;
|
||||
break;
|
||||
case 0x35: /* sysexit */
|
||||
if (emulate_sysexit(ctxt) == -1)
|
||||
goto cannot_emulate;
|
||||
else
|
||||
goto writeback;
|
||||
break;
|
||||
case 0x40 ... 0x4f: /* cmov */
|
||||
c->dst.val = c->dst.orig_val = c->src.val;
|
||||
if (!test_cc(c->b, ctxt->eflags))
|
|
@ -231,7 +231,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
struct kvm_pit *pit = vcpu->kvm->arch.vpit;
|
||||
|
||||
if (pit && vcpu->vcpu_id == 0 && pit->pit_state.irq_ack)
|
||||
if (pit && kvm_vcpu_is_bsp(vcpu) && pit->pit_state.irq_ack)
|
||||
return atomic_read(&pit->pit_state.pit_timer.pending);
|
||||
return 0;
|
||||
}
|
||||
|
@ -252,7 +252,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
|
|||
struct kvm_pit *pit = vcpu->kvm->arch.vpit;
|
||||
struct hrtimer *timer;
|
||||
|
||||
if (vcpu->vcpu_id != 0 || !pit)
|
||||
if (!kvm_vcpu_is_bsp(vcpu) || !pit)
|
||||
return;
|
||||
|
||||
timer = &pit->pit_state.pit_timer.timer;
|
||||
|
@ -294,7 +294,7 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
|
|||
pt->timer.function = kvm_timer_fn;
|
||||
pt->t_ops = &kpit_ops;
|
||||
pt->kvm = ps->pit->kvm;
|
||||
pt->vcpu_id = 0;
|
||||
pt->vcpu = pt->kvm->bsp_vcpu;
|
||||
|
||||
atomic_set(&pt->pending, 0);
|
||||
ps->irq_ack = 1;
|
||||
|
@ -332,33 +332,62 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
|
|||
case 1:
|
||||
/* FIXME: enhance mode 4 precision */
|
||||
case 4:
|
||||
create_pit_timer(ps, val, 0);
|
||||
if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) {
|
||||
create_pit_timer(ps, val, 0);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
case 3:
|
||||
create_pit_timer(ps, val, 1);
|
||||
if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){
|
||||
create_pit_timer(ps, val, 1);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
destroy_pit_timer(&ps->pit_timer);
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val)
|
||||
void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start)
|
||||
{
|
||||
mutex_lock(&kvm->arch.vpit->pit_state.lock);
|
||||
pit_load_count(kvm, channel, val);
|
||||
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
|
||||
u8 saved_mode;
|
||||
if (hpet_legacy_start) {
|
||||
/* save existing mode for later reenablement */
|
||||
saved_mode = kvm->arch.vpit->pit_state.channels[0].mode;
|
||||
kvm->arch.vpit->pit_state.channels[0].mode = 0xff; /* disable timer */
|
||||
pit_load_count(kvm, channel, val);
|
||||
kvm->arch.vpit->pit_state.channels[0].mode = saved_mode;
|
||||
} else {
|
||||
pit_load_count(kvm, channel, val);
|
||||
}
|
||||
}
|
||||
|
||||
static void pit_ioport_write(struct kvm_io_device *this,
|
||||
gpa_t addr, int len, const void *data)
|
||||
static inline struct kvm_pit *dev_to_pit(struct kvm_io_device *dev)
|
||||
{
|
||||
struct kvm_pit *pit = (struct kvm_pit *)this->private;
|
||||
return container_of(dev, struct kvm_pit, dev);
|
||||
}
|
||||
|
||||
static inline struct kvm_pit *speaker_to_pit(struct kvm_io_device *dev)
|
||||
{
|
||||
return container_of(dev, struct kvm_pit, speaker_dev);
|
||||
}
|
||||
|
||||
static inline int pit_in_range(gpa_t addr)
|
||||
{
|
||||
return ((addr >= KVM_PIT_BASE_ADDRESS) &&
|
||||
(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
|
||||
}
|
||||
|
||||
static int pit_ioport_write(struct kvm_io_device *this,
|
||||
gpa_t addr, int len, const void *data)
|
||||
{
|
||||
struct kvm_pit *pit = dev_to_pit(this);
|
||||
struct kvm_kpit_state *pit_state = &pit->pit_state;
|
||||
struct kvm *kvm = pit->kvm;
|
||||
int channel, access;
|
||||
struct kvm_kpit_channel_state *s;
|
||||
u32 val = *(u32 *) data;
|
||||
if (!pit_in_range(addr))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
val &= 0xff;
|
||||
addr &= KVM_PIT_CHANNEL_MASK;
|
||||
|
@ -421,16 +450,19 @@ static void pit_ioport_write(struct kvm_io_device *this,
|
|||
}
|
||||
|
||||
mutex_unlock(&pit_state->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pit_ioport_read(struct kvm_io_device *this,
|
||||
gpa_t addr, int len, void *data)
|
||||
static int pit_ioport_read(struct kvm_io_device *this,
|
||||
gpa_t addr, int len, void *data)
|
||||
{
|
||||
struct kvm_pit *pit = (struct kvm_pit *)this->private;
|
||||
struct kvm_pit *pit = dev_to_pit(this);
|
||||
struct kvm_kpit_state *pit_state = &pit->pit_state;
|
||||
struct kvm *kvm = pit->kvm;
|
||||
int ret, count;
|
||||
struct kvm_kpit_channel_state *s;
|
||||
if (!pit_in_range(addr))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
addr &= KVM_PIT_CHANNEL_MASK;
|
||||
s = &pit_state->channels[addr];
|
||||
|
@ -485,37 +517,36 @@ static void pit_ioport_read(struct kvm_io_device *this,
|
|||
memcpy(data, (char *)&ret, len);
|
||||
|
||||
mutex_unlock(&pit_state->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pit_in_range(struct kvm_io_device *this, gpa_t addr,
|
||||
int len, int is_write)
|
||||
static int speaker_ioport_write(struct kvm_io_device *this,
|
||||
gpa_t addr, int len, const void *data)
|
||||
{
|
||||
return ((addr >= KVM_PIT_BASE_ADDRESS) &&
|
||||
(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
|
||||
}
|
||||
|
||||
static void speaker_ioport_write(struct kvm_io_device *this,
|
||||
gpa_t addr, int len, const void *data)
|
||||
{
|
||||
struct kvm_pit *pit = (struct kvm_pit *)this->private;
|
||||
struct kvm_pit *pit = speaker_to_pit(this);
|
||||
struct kvm_kpit_state *pit_state = &pit->pit_state;
|
||||
struct kvm *kvm = pit->kvm;
|
||||
u32 val = *(u32 *) data;
|
||||
if (addr != KVM_SPEAKER_BASE_ADDRESS)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
mutex_lock(&pit_state->lock);
|
||||
pit_state->speaker_data_on = (val >> 1) & 1;
|
||||
pit_set_gate(kvm, 2, val & 1);
|
||||
mutex_unlock(&pit_state->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void speaker_ioport_read(struct kvm_io_device *this,
|
||||
gpa_t addr, int len, void *data)
|
||||
static int speaker_ioport_read(struct kvm_io_device *this,
|
||||
gpa_t addr, int len, void *data)
|
||||
{
|
||||
struct kvm_pit *pit = (struct kvm_pit *)this->private;
|
||||
struct kvm_pit *pit = speaker_to_pit(this);
|
||||
struct kvm_kpit_state *pit_state = &pit->pit_state;
|
||||
struct kvm *kvm = pit->kvm;
|
||||
unsigned int refresh_clock;
|
||||
int ret;
|
||||
if (addr != KVM_SPEAKER_BASE_ADDRESS)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
|
||||
refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
|
||||
|
@ -527,12 +558,7 @@ static void speaker_ioport_read(struct kvm_io_device *this,
|
|||
len = sizeof(ret);
|
||||
memcpy(data, (char *)&ret, len);
|
||||
mutex_unlock(&pit_state->lock);
|
||||
}
|
||||
|
||||
static int speaker_in_range(struct kvm_io_device *this, gpa_t addr,
|
||||
int len, int is_write)
|
||||
{
|
||||
return (addr == KVM_SPEAKER_BASE_ADDRESS);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_pit_reset(struct kvm_pit *pit)
|
||||
|
@ -541,6 +567,7 @@ void kvm_pit_reset(struct kvm_pit *pit)
|
|||
struct kvm_kpit_channel_state *c;
|
||||
|
||||
mutex_lock(&pit->pit_state.lock);
|
||||
pit->pit_state.flags = 0;
|
||||
for (i = 0; i < 3; i++) {
|
||||
c = &pit->pit_state.channels[i];
|
||||
c->mode = 0xff;
|
||||
|
@ -563,10 +590,22 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
|
|||
}
|
||||
}
|
||||
|
||||
struct kvm_pit *kvm_create_pit(struct kvm *kvm)
|
||||
static const struct kvm_io_device_ops pit_dev_ops = {
|
||||
.read = pit_ioport_read,
|
||||
.write = pit_ioport_write,
|
||||
};
|
||||
|
||||
static const struct kvm_io_device_ops speaker_dev_ops = {
|
||||
.read = speaker_ioport_read,
|
||||
.write = speaker_ioport_write,
|
||||
};
|
||||
|
||||
/* Caller must have writers lock on slots_lock */
|
||||
struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
|
||||
{
|
||||
struct kvm_pit *pit;
|
||||
struct kvm_kpit_state *pit_state;
|
||||
int ret;
|
||||
|
||||
pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
|
||||
if (!pit)
|
||||
|
@ -582,19 +621,6 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
|
|||
mutex_lock(&pit->pit_state.lock);
|
||||
spin_lock_init(&pit->pit_state.inject_lock);
|
||||
|
||||
/* Initialize PIO device */
|
||||
pit->dev.read = pit_ioport_read;
|
||||
pit->dev.write = pit_ioport_write;
|
||||
pit->dev.in_range = pit_in_range;
|
||||
pit->dev.private = pit;
|
||||
kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
|
||||
|
||||
pit->speaker_dev.read = speaker_ioport_read;
|
||||
pit->speaker_dev.write = speaker_ioport_write;
|
||||
pit->speaker_dev.in_range = speaker_in_range;
|
||||
pit->speaker_dev.private = pit;
|
||||
kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
|
||||
|
||||
kvm->arch.vpit = pit;
|
||||
pit->kvm = kvm;
|
||||
|
||||
|
@ -613,7 +639,30 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
|
|||
pit->mask_notifier.func = pit_mask_notifer;
|
||||
kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
|
||||
|
||||
kvm_iodevice_init(&pit->dev, &pit_dev_ops);
|
||||
ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
|
||||
if (flags & KVM_PIT_SPEAKER_DUMMY) {
|
||||
kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
|
||||
ret = __kvm_io_bus_register_dev(&kvm->pio_bus,
|
||||
&pit->speaker_dev);
|
||||
if (ret < 0)
|
||||
goto fail_unregister;
|
||||
}
|
||||
|
||||
return pit;
|
||||
|
||||
fail_unregister:
|
||||
__kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev);
|
||||
|
||||
fail:
|
||||
if (pit->irq_source_id >= 0)
|
||||
kvm_free_irq_source_id(kvm, pit->irq_source_id);
|
||||
|
||||
kfree(pit);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void kvm_free_pit(struct kvm *kvm)
|
||||
|
@ -623,6 +672,8 @@ void kvm_free_pit(struct kvm *kvm)
|
|||
if (kvm->arch.vpit) {
|
||||
kvm_unregister_irq_mask_notifier(kvm, 0,
|
||||
&kvm->arch.vpit->mask_notifier);
|
||||
kvm_unregister_irq_ack_notifier(kvm,
|
||||
&kvm->arch.vpit->pit_state.irq_ack_notifier);
|
||||
mutex_lock(&kvm->arch.vpit->pit_state.lock);
|
||||
timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
|
||||
hrtimer_cancel(timer);
|
||||
|
@ -637,10 +688,10 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
|
|||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
|
||||
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
|
||||
mutex_unlock(&kvm->lock);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
|
||||
/*
|
||||
* Provides NMI watchdog support via Virtual Wire mode.
|
||||
|
@ -652,11 +703,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
|
|||
* VCPU0, and only if its LVT0 is in EXTINT mode.
|
||||
*/
|
||||
if (kvm->arch.vapics_in_nmi_mode > 0)
|
||||
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
|
||||
vcpu = kvm->vcpus[i];
|
||||
if (vcpu)
|
||||
kvm_apic_nmi_wd_deliver(vcpu);
|
||||
}
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
kvm_apic_nmi_wd_deliver(vcpu);
|
||||
}
|
||||
|
||||
void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
|
||||
|
@ -665,7 +713,7 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
|
|||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_kpit_state *ps;
|
||||
|
||||
if (vcpu && pit) {
|
||||
if (pit) {
|
||||
int inject = 0;
|
||||
ps = &pit->pit_state;
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ struct kvm_kpit_channel_state {
|
|||
|
||||
struct kvm_kpit_state {
|
||||
struct kvm_kpit_channel_state channels[3];
|
||||
u32 flags;
|
||||
struct kvm_timer pit_timer;
|
||||
bool is_periodic;
|
||||
u32 speaker_data_on;
|
||||
|
@ -49,8 +50,8 @@ struct kvm_pit {
|
|||
#define KVM_PIT_CHANNEL_MASK 0x3
|
||||
|
||||
void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
|
||||
void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
|
||||
struct kvm_pit *kvm_create_pit(struct kvm *kvm);
|
||||
void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start);
|
||||
struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
|
||||
void kvm_free_pit(struct kvm *kvm);
|
||||
void kvm_pit_reset(struct kvm_pit *pit);
|
||||
|
||||
|
|
|
@ -30,50 +30,24 @@
|
|||
#include "irq.h"
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
static void pic_lock(struct kvm_pic *s)
|
||||
__acquires(&s->lock)
|
||||
{
|
||||
spin_lock(&s->lock);
|
||||
}
|
||||
|
||||
static void pic_unlock(struct kvm_pic *s)
|
||||
__releases(&s->lock)
|
||||
{
|
||||
struct kvm *kvm = s->kvm;
|
||||
unsigned acks = s->pending_acks;
|
||||
bool wakeup = s->wakeup_needed;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
s->pending_acks = 0;
|
||||
s->wakeup_needed = false;
|
||||
|
||||
spin_unlock(&s->lock);
|
||||
|
||||
while (acks) {
|
||||
kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)),
|
||||
__ffs(acks));
|
||||
acks &= acks - 1;
|
||||
}
|
||||
|
||||
if (wakeup) {
|
||||
vcpu = s->kvm->vcpus[0];
|
||||
if (vcpu)
|
||||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
}
|
||||
#include "trace.h"
|
||||
|
||||
static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
|
||||
{
|
||||
s->isr &= ~(1 << irq);
|
||||
s->isr_ack |= (1 << irq);
|
||||
if (s != &s->pics_state->pics[0])
|
||||
irq += 8;
|
||||
kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
|
||||
}
|
||||
|
||||
void kvm_pic_clear_isr_ack(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_pic *s = pic_irqchip(kvm);
|
||||
spin_lock(&s->lock);
|
||||
s->pics[0].isr_ack = 0xff;
|
||||
s->pics[1].isr_ack = 0xff;
|
||||
spin_unlock(&s->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -174,9 +148,9 @@ static void pic_update_irq(struct kvm_pic *s)
|
|||
|
||||
void kvm_pic_update_irq(struct kvm_pic *s)
|
||||
{
|
||||
pic_lock(s);
|
||||
spin_lock(&s->lock);
|
||||
pic_update_irq(s);
|
||||
pic_unlock(s);
|
||||
spin_unlock(&s->lock);
|
||||
}
|
||||
|
||||
int kvm_pic_set_irq(void *opaque, int irq, int level)
|
||||
|
@ -184,12 +158,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
|
|||
struct kvm_pic *s = opaque;
|
||||
int ret = -1;
|
||||
|
||||
pic_lock(s);
|
||||
spin_lock(&s->lock);
|
||||
if (irq >= 0 && irq < PIC_NUM_PINS) {
|
||||
ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
|
||||
pic_update_irq(s);
|
||||
trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
|
||||
s->pics[irq >> 3].imr, ret == 0);
|
||||
}
|
||||
pic_unlock(s);
|
||||
spin_unlock(&s->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -217,7 +193,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
|
|||
int irq, irq2, intno;
|
||||
struct kvm_pic *s = pic_irqchip(kvm);
|
||||
|
||||
pic_lock(s);
|
||||
spin_lock(&s->lock);
|
||||
irq = pic_get_irq(&s->pics[0]);
|
||||
if (irq >= 0) {
|
||||
pic_intack(&s->pics[0], irq);
|
||||
|
@ -242,8 +218,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
|
|||
intno = s->pics[0].irq_base + irq;
|
||||
}
|
||||
pic_update_irq(s);
|
||||
pic_unlock(s);
|
||||
kvm_notify_acked_irq(kvm, SELECT_PIC(irq), irq);
|
||||
spin_unlock(&s->lock);
|
||||
|
||||
return intno;
|
||||
}
|
||||
|
@ -252,7 +227,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
|
|||
{
|
||||
int irq, irqbase, n;
|
||||
struct kvm *kvm = s->pics_state->irq_request_opaque;
|
||||
struct kvm_vcpu *vcpu0 = kvm->vcpus[0];
|
||||
struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu;
|
||||
|
||||
if (s == &s->pics_state->pics[0])
|
||||
irqbase = 0;
|
||||
|
@ -263,7 +238,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
|
|||
if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
|
||||
if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
|
||||
n = irq + irqbase;
|
||||
s->pics_state->pending_acks |= 1 << n;
|
||||
kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
|
||||
}
|
||||
}
|
||||
s->last_irr = 0;
|
||||
|
@ -428,8 +403,7 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1)
|
|||
return s->elcr;
|
||||
}
|
||||
|
||||
static int picdev_in_range(struct kvm_io_device *this, gpa_t addr,
|
||||
int len, int is_write)
|
||||
static int picdev_in_range(gpa_t addr)
|
||||
{
|
||||
switch (addr) {
|
||||
case 0x20:
|
||||
|
@ -444,18 +418,25 @@ static int picdev_in_range(struct kvm_io_device *this, gpa_t addr,
|
|||
}
|
||||
}
|
||||
|
||||
static void picdev_write(struct kvm_io_device *this,
|
||||
static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
|
||||
{
|
||||
return container_of(dev, struct kvm_pic, dev);
|
||||
}
|
||||
|
||||
static int picdev_write(struct kvm_io_device *this,
|
||||
gpa_t addr, int len, const void *val)
|
||||
{
|
||||
struct kvm_pic *s = this->private;
|
||||
struct kvm_pic *s = to_pic(this);
|
||||
unsigned char data = *(unsigned char *)val;
|
||||
if (!picdev_in_range(addr))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (len != 1) {
|
||||
if (printk_ratelimit())
|
||||
printk(KERN_ERR "PIC: non byte write\n");
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
pic_lock(s);
|
||||
spin_lock(&s->lock);
|
||||
switch (addr) {
|
||||
case 0x20:
|
||||
case 0x21:
|
||||
|
@ -468,21 +449,24 @@ static void picdev_write(struct kvm_io_device *this,
|
|||
elcr_ioport_write(&s->pics[addr & 1], addr, data);
|
||||
break;
|
||||
}
|
||||
pic_unlock(s);
|
||||
spin_unlock(&s->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void picdev_read(struct kvm_io_device *this,
|
||||
gpa_t addr, int len, void *val)
|
||||
static int picdev_read(struct kvm_io_device *this,
|
||||
gpa_t addr, int len, void *val)
|
||||
{
|
||||
struct kvm_pic *s = this->private;
|
||||
struct kvm_pic *s = to_pic(this);
|
||||
unsigned char data = 0;
|
||||
if (!picdev_in_range(addr))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (len != 1) {
|
||||
if (printk_ratelimit())
|
||||
printk(KERN_ERR "PIC: non byte read\n");
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
pic_lock(s);
|
||||
spin_lock(&s->lock);
|
||||
switch (addr) {
|
||||
case 0x20:
|
||||
case 0x21:
|
||||
|
@ -496,7 +480,8 @@ static void picdev_read(struct kvm_io_device *this,
|
|||
break;
|
||||
}
|
||||
*(unsigned char *)val = data;
|
||||
pic_unlock(s);
|
||||
spin_unlock(&s->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -505,20 +490,27 @@ static void picdev_read(struct kvm_io_device *this,
|
|||
static void pic_irq_request(void *opaque, int level)
|
||||
{
|
||||
struct kvm *kvm = opaque;
|
||||
struct kvm_vcpu *vcpu = kvm->vcpus[0];
|
||||
struct kvm_vcpu *vcpu = kvm->bsp_vcpu;
|
||||
struct kvm_pic *s = pic_irqchip(kvm);
|
||||
int irq = pic_get_irq(&s->pics[0]);
|
||||
|
||||
s->output = level;
|
||||
if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) {
|
||||
s->pics[0].isr_ack &= ~(1 << irq);
|
||||
s->wakeup_needed = true;
|
||||
kvm_vcpu_kick(vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
static const struct kvm_io_device_ops picdev_ops = {
|
||||
.read = picdev_read,
|
||||
.write = picdev_write,
|
||||
};
|
||||
|
||||
struct kvm_pic *kvm_create_pic(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_pic *s;
|
||||
int ret;
|
||||
|
||||
s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
|
||||
if (!s)
|
||||
return NULL;
|
||||
|
@ -534,10 +526,12 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
|
|||
/*
|
||||
* Initialize PIO device
|
||||
*/
|
||||
s->dev.read = picdev_read;
|
||||
s->dev.write = picdev_write;
|
||||
s->dev.in_range = picdev_in_range;
|
||||
s->dev.private = s;
|
||||
kvm_io_bus_register_dev(&kvm->pio_bus, &s->dev);
|
||||
kvm_iodevice_init(&s->dev, &picdev_ops);
|
||||
ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev);
|
||||
if (ret < 0) {
|
||||
kfree(s);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
|
|
@ -63,7 +63,6 @@ struct kvm_kpic_state {
|
|||
|
||||
struct kvm_pic {
|
||||
spinlock_t lock;
|
||||
bool wakeup_needed;
|
||||
unsigned pending_acks;
|
||||
struct kvm *kvm;
|
||||
struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
|
||||
|
|
|
@ -29,4 +29,13 @@ static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val)
|
|||
kvm_register_write(vcpu, VCPU_REGS_RIP, val);
|
||||
}
|
||||
|
||||
static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
|
||||
{
|
||||
if (!test_bit(VCPU_EXREG_PDPTR,
|
||||
(unsigned long *)&vcpu->arch.regs_avail))
|
||||
kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR);
|
||||
|
||||
return vcpu->arch.pdptrs[index];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,51 +0,0 @@
|
|||
#ifndef __KVM_SVM_H
|
||||
#define __KVM_SVM_H
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#include <asm/svm.h>
|
||||
|
||||
static const u32 host_save_user_msrs[] = {
|
||||
#ifdef CONFIG_X86_64
|
||||
MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
|
||||
MSR_FS_BASE,
|
||||
#endif
|
||||
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
|
||||
};
|
||||
|
||||
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
|
||||
|
||||
struct kvm_vcpu;
|
||||
|
||||
struct vcpu_svm {
|
||||
struct kvm_vcpu vcpu;
|
||||
struct vmcb *vmcb;
|
||||
unsigned long vmcb_pa;
|
||||
struct svm_cpu_data *svm_data;
|
||||
uint64_t asid_generation;
|
||||
|
||||
u64 next_rip;
|
||||
|
||||
u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
|
||||
u64 host_gs_base;
|
||||
unsigned long host_cr2;
|
||||
|
||||
u32 *msrpm;
|
||||
struct vmcb *hsave;
|
||||
u64 hsave_msr;
|
||||
|
||||
u64 nested_vmcb;
|
||||
|
||||
/* These are the merged vectors */
|
||||
u32 *nested_msrpm;
|
||||
|
||||
/* gpa pointers to the real vectors */
|
||||
u64 nested_vmcb_msrpm;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
@ -6,7 +6,7 @@ struct kvm_timer {
|
|||
bool reinject;
|
||||
struct kvm_timer_ops *t_ops;
|
||||
struct kvm *kvm;
|
||||
int vcpu_id;
|
||||
struct kvm_vcpu *vcpu;
|
||||
};
|
||||
|
||||
struct kvm_timer_ops {
|
||||
|
|
|
@ -32,8 +32,11 @@
|
|||
#include <asm/current.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include "kvm_cache_regs.h"
|
||||
#include "irq.h"
|
||||
#include "trace.h"
|
||||
#include "x86.h"
|
||||
|
||||
#ifndef CONFIG_X86_64
|
||||
#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
|
||||
|
@ -141,6 +144,26 @@ static inline int apic_lvt_nmi_mode(u32 lvt_val)
|
|||
return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
|
||||
}
|
||||
|
||||
void kvm_apic_set_version(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
struct kvm_cpuid_entry2 *feat;
|
||||
u32 v = APIC_VERSION;
|
||||
|
||||
if (!irqchip_in_kernel(vcpu->kvm))
|
||||
return;
|
||||
|
||||
feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
|
||||
if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))))
|
||||
v |= APIC_LVR_DIRECTED_EOI;
|
||||
apic_set_reg(apic, APIC_LVR, v);
|
||||
}
|
||||
|
||||
static inline int apic_x2apic_mode(struct kvm_lapic *apic)
|
||||
{
|
||||
return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
|
||||
}
|
||||
|
||||
static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
|
||||
LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */
|
||||
LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
|
||||
|
@ -165,36 +188,52 @@ static int find_highest_vector(void *bitmap)
|
|||
|
||||
static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
|
||||
{
|
||||
apic->irr_pending = true;
|
||||
return apic_test_and_set_vector(vec, apic->regs + APIC_IRR);
|
||||
}
|
||||
|
||||
static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
|
||||
static inline int apic_search_irr(struct kvm_lapic *apic)
|
||||
{
|
||||
apic_clear_vector(vec, apic->regs + APIC_IRR);
|
||||
return find_highest_vector(apic->regs + APIC_IRR);
|
||||
}
|
||||
|
||||
static inline int apic_find_highest_irr(struct kvm_lapic *apic)
|
||||
{
|
||||
int result;
|
||||
|
||||
result = find_highest_vector(apic->regs + APIC_IRR);
|
||||
if (!apic->irr_pending)
|
||||
return -1;
|
||||
|
||||
result = apic_search_irr(apic);
|
||||
ASSERT(result == -1 || result >= 16);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
|
||||
{
|
||||
apic->irr_pending = false;
|
||||
apic_clear_vector(vec, apic->regs + APIC_IRR);
|
||||
if (apic_search_irr(apic) != -1)
|
||||
apic->irr_pending = true;
|
||||
}
|
||||
|
||||
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
int highest_irr;
|
||||
|
||||
/* This may race with setting of irr in __apic_accept_irq() and
|
||||
* value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
|
||||
* will cause vmexit immediately and the value will be recalculated
|
||||
* on the next vmentry.
|
||||
*/
|
||||
if (!apic)
|
||||
return 0;
|
||||
highest_irr = apic_find_highest_irr(apic);
|
||||
|
||||
return highest_irr;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
|
||||
|
||||
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
|
||||
int vector, int level, int trig_mode);
|
||||
|
@ -251,7 +290,12 @@ int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
|
|||
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
|
||||
{
|
||||
int result = 0;
|
||||
u8 logical_id;
|
||||
u32 logical_id;
|
||||
|
||||
if (apic_x2apic_mode(apic)) {
|
||||
logical_id = apic_get_reg(apic, APIC_LDR);
|
||||
return logical_id & mda;
|
||||
}
|
||||
|
||||
logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR));
|
||||
|
||||
|
@ -331,6 +375,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
|
|||
break;
|
||||
|
||||
result = !apic_test_and_set_irr(vector, apic);
|
||||
trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
|
||||
trig_mode, vector, !result);
|
||||
if (!result) {
|
||||
if (trig_mode)
|
||||
apic_debug("level trig mode repeatedly for "
|
||||
|
@ -425,7 +471,11 @@ static void apic_set_eoi(struct kvm_lapic *apic)
|
|||
trigger_mode = IOAPIC_LEVEL_TRIG;
|
||||
else
|
||||
trigger_mode = IOAPIC_EDGE_TRIG;
|
||||
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
|
||||
if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) {
|
||||
mutex_lock(&apic->vcpu->kvm->irq_lock);
|
||||
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
|
||||
mutex_unlock(&apic->vcpu->kvm->irq_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void apic_send_ipi(struct kvm_lapic *apic)
|
||||
|
@ -440,7 +490,12 @@ static void apic_send_ipi(struct kvm_lapic *apic)
|
|||
irq.level = icr_low & APIC_INT_ASSERT;
|
||||
irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
|
||||
irq.shorthand = icr_low & APIC_SHORT_MASK;
|
||||
irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
|
||||
if (apic_x2apic_mode(apic))
|
||||
irq.dest_id = icr_high;
|
||||
else
|
||||
irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
|
||||
|
||||
trace_kvm_apic_ipi(icr_low, irq.dest_id);
|
||||
|
||||
apic_debug("icr_high 0x%x, icr_low 0x%x, "
|
||||
"short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
|
||||
|
@ -449,7 +504,9 @@ static void apic_send_ipi(struct kvm_lapic *apic)
|
|||
irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
|
||||
irq.vector);
|
||||
|
||||
mutex_lock(&apic->vcpu->kvm->irq_lock);
|
||||
kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
|
||||
mutex_unlock(&apic->vcpu->kvm->irq_lock);
|
||||
}
|
||||
|
||||
static u32 apic_get_tmcct(struct kvm_lapic *apic)
|
||||
|
@ -495,12 +552,16 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
|
|||
{
|
||||
u32 val = 0;
|
||||
|
||||
KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler);
|
||||
|
||||
if (offset >= LAPIC_MMIO_LENGTH)
|
||||
return 0;
|
||||
|
||||
switch (offset) {
|
||||
case APIC_ID:
|
||||
if (apic_x2apic_mode(apic))
|
||||
val = kvm_apic_id(apic);
|
||||
else
|
||||
val = kvm_apic_id(apic) << 24;
|
||||
break;
|
||||
case APIC_ARBPRI:
|
||||
printk(KERN_WARNING "Access APIC ARBPRI register "
|
||||
"which is for P6\n");
|
||||
|
@ -522,21 +583,35 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
|
|||
return val;
|
||||
}
|
||||
|
||||
static void apic_mmio_read(struct kvm_io_device *this,
|
||||
gpa_t address, int len, void *data)
|
||||
static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
|
||||
{
|
||||
return container_of(dev, struct kvm_lapic, dev);
|
||||
}
|
||||
|
||||
static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
|
||||
void *data)
|
||||
{
|
||||
struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
|
||||
unsigned int offset = address - apic->base_address;
|
||||
unsigned char alignment = offset & 0xf;
|
||||
u32 result;
|
||||
/* this bitmask has a bit cleared for each reserver register */
|
||||
static const u64 rmask = 0x43ff01ffffffe70cULL;
|
||||
|
||||
if ((alignment + len) > 4) {
|
||||
printk(KERN_ERR "KVM_APIC_READ: alignment error %lx %d",
|
||||
(unsigned long)address, len);
|
||||
return;
|
||||
apic_debug("KVM_APIC_READ: alignment error %x %d\n",
|
||||
offset, len);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) {
|
||||
apic_debug("KVM_APIC_READ: read reserved register %x\n",
|
||||
offset);
|
||||
return 1;
|
||||
}
|
||||
|
||||
result = __apic_read(apic, offset & ~0xf);
|
||||
|
||||
trace_kvm_apic_read(offset, result);
|
||||
|
||||
switch (len) {
|
||||
case 1:
|
||||
case 2:
|
||||
|
@ -548,6 +623,28 @@ static void apic_mmio_read(struct kvm_io_device *this,
|
|||
"should be 1,2, or 4 instead\n", len);
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
|
||||
{
|
||||
return apic_hw_enabled(apic) &&
|
||||
addr >= apic->base_address &&
|
||||
addr < apic->base_address + LAPIC_MMIO_LENGTH;
|
||||
}
|
||||
|
||||
static int apic_mmio_read(struct kvm_io_device *this,
|
||||
gpa_t address, int len, void *data)
|
||||
{
|
||||
struct kvm_lapic *apic = to_lapic(this);
|
||||
u32 offset = address - apic->base_address;
|
||||
|
||||
if (!apic_mmio_in_range(apic, address))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
apic_reg_read(apic, offset, len, data);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void update_divide_count(struct kvm_lapic *apic)
|
||||
|
@ -573,6 +670,15 @@ static void start_apic_timer(struct kvm_lapic *apic)
|
|||
|
||||
if (!apic->lapic_timer.period)
|
||||
return;
|
||||
/*
|
||||
* Do not allow the guest to program periodic timers with small
|
||||
* interval, since the hrtimers are not throttled by the host
|
||||
* scheduler.
|
||||
*/
|
||||
if (apic_lvtt_period(apic)) {
|
||||
if (apic->lapic_timer.period < NSEC_PER_MSEC/2)
|
||||
apic->lapic_timer.period = NSEC_PER_MSEC/2;
|
||||
}
|
||||
|
||||
hrtimer_start(&apic->lapic_timer.timer,
|
||||
ktime_add_ns(now, apic->lapic_timer.period),
|
||||
|
@ -603,40 +709,18 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
|
|||
apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
|
||||
}
|
||||
|
||||
static void apic_mmio_write(struct kvm_io_device *this,
|
||||
gpa_t address, int len, const void *data)
|
||||
static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
|
||||
{
|
||||
struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
|
||||
unsigned int offset = address - apic->base_address;
|
||||
unsigned char alignment = offset & 0xf;
|
||||
u32 val;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* APIC register must be aligned on 128-bits boundary.
|
||||
* 32/64/128 bits registers must be accessed thru 32 bits.
|
||||
* Refer SDM 8.4.1
|
||||
*/
|
||||
if (len != 4 || alignment) {
|
||||
/* Don't shout loud, $infamous_os would cause only noise. */
|
||||
apic_debug("apic write: bad size=%d %lx\n",
|
||||
len, (long)address);
|
||||
return;
|
||||
}
|
||||
trace_kvm_apic_write(reg, val);
|
||||
|
||||
val = *(u32 *) data;
|
||||
|
||||
/* too common printing */
|
||||
if (offset != APIC_EOI)
|
||||
apic_debug("%s: offset 0x%x with length 0x%x, and value is "
|
||||
"0x%x\n", __func__, offset, len, val);
|
||||
|
||||
offset &= 0xff0;
|
||||
|
||||
KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler);
|
||||
|
||||
switch (offset) {
|
||||
switch (reg) {
|
||||
case APIC_ID: /* Local APIC ID */
|
||||
apic_set_reg(apic, APIC_ID, val);
|
||||
if (!apic_x2apic_mode(apic))
|
||||
apic_set_reg(apic, APIC_ID, val);
|
||||
else
|
||||
ret = 1;
|
||||
break;
|
||||
|
||||
case APIC_TASKPRI:
|
||||
|
@ -649,15 +733,24 @@ static void apic_mmio_write(struct kvm_io_device *this,
|
|||
break;
|
||||
|
||||
case APIC_LDR:
|
||||
apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
|
||||
if (!apic_x2apic_mode(apic))
|
||||
apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
|
||||
else
|
||||
ret = 1;
|
||||
break;
|
||||
|
||||
case APIC_DFR:
|
||||
apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
|
||||
if (!apic_x2apic_mode(apic))
|
||||
apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
|
||||
else
|
||||
ret = 1;
|
||||
break;
|
||||
|
||||
case APIC_SPIV:
|
||||
apic_set_reg(apic, APIC_SPIV, val & 0x3ff);
|
||||
case APIC_SPIV: {
|
||||
u32 mask = 0x3ff;
|
||||
if (apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
|
||||
mask |= APIC_SPIV_DIRECTED_EOI;
|
||||
apic_set_reg(apic, APIC_SPIV, val & mask);
|
||||
if (!(val & APIC_SPIV_APIC_ENABLED)) {
|
||||
int i;
|
||||
u32 lvt_val;
|
||||
|
@ -672,7 +765,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
|
|||
|
||||
}
|
||||
break;
|
||||
|
||||
}
|
||||
case APIC_ICR:
|
||||
/* No delay here, so we always clear the pending bit */
|
||||
apic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
|
||||
|
@ -680,7 +773,9 @@ static void apic_mmio_write(struct kvm_io_device *this,
|
|||
break;
|
||||
|
||||
case APIC_ICR2:
|
||||
apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
|
||||
if (!apic_x2apic_mode(apic))
|
||||
val &= 0xff000000;
|
||||
apic_set_reg(apic, APIC_ICR2, val);
|
||||
break;
|
||||
|
||||
case APIC_LVT0:
|
||||
|
@ -694,8 +789,8 @@ static void apic_mmio_write(struct kvm_io_device *this,
|
|||
if (!apic_sw_enabled(apic))
|
||||
val |= APIC_LVT_MASKED;
|
||||
|
||||
val &= apic_lvt_mask[(offset - APIC_LVTT) >> 4];
|
||||
apic_set_reg(apic, offset, val);
|
||||
val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
|
||||
apic_set_reg(apic, reg, val);
|
||||
|
||||
break;
|
||||
|
||||
|
@ -703,7 +798,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
|
|||
hrtimer_cancel(&apic->lapic_timer.timer);
|
||||
apic_set_reg(apic, APIC_TMICT, val);
|
||||
start_apic_timer(apic);
|
||||
return;
|
||||
break;
|
||||
|
||||
case APIC_TDCR:
|
||||
if (val & 4)
|
||||
|
@ -712,27 +807,59 @@ static void apic_mmio_write(struct kvm_io_device *this,
|
|||
update_divide_count(apic);
|
||||
break;
|
||||
|
||||
case APIC_ESR:
|
||||
if (apic_x2apic_mode(apic) && val != 0) {
|
||||
printk(KERN_ERR "KVM_WRITE:ESR not zero %x\n", val);
|
||||
ret = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
case APIC_SELF_IPI:
|
||||
if (apic_x2apic_mode(apic)) {
|
||||
apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
|
||||
} else
|
||||
ret = 1;
|
||||
break;
|
||||
default:
|
||||
apic_debug("Local APIC Write to read-only register %x\n",
|
||||
offset);
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
apic_debug("Local APIC Write to read-only register %x\n", reg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr,
|
||||
int len, int size)
|
||||
static int apic_mmio_write(struct kvm_io_device *this,
|
||||
gpa_t address, int len, const void *data)
|
||||
{
|
||||
struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
|
||||
int ret = 0;
|
||||
struct kvm_lapic *apic = to_lapic(this);
|
||||
unsigned int offset = address - apic->base_address;
|
||||
u32 val;
|
||||
|
||||
if (!apic_mmio_in_range(apic, address))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (apic_hw_enabled(apic) &&
|
||||
(addr >= apic->base_address) &&
|
||||
(addr < (apic->base_address + LAPIC_MMIO_LENGTH)))
|
||||
ret = 1;
|
||||
/*
|
||||
* APIC register must be aligned on 128-bits boundary.
|
||||
* 32/64/128 bits registers must be accessed thru 32 bits.
|
||||
* Refer SDM 8.4.1
|
||||
*/
|
||||
if (len != 4 || (offset & 0xf)) {
|
||||
/* Don't shout loud, $infamous_os would cause only noise. */
|
||||
apic_debug("apic write: bad size=%d %lx\n", len, (long)address);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
val = *(u32*)data;
|
||||
|
||||
/* too common printing */
|
||||
if (offset != APIC_EOI)
|
||||
apic_debug("%s: offset 0x%x with length 0x%x, and value is "
|
||||
"0x%x\n", __func__, offset, len, val);
|
||||
|
||||
apic_reg_write(apic, offset & 0xff0, val);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_free_lapic(struct kvm_vcpu *vcpu)
|
||||
|
@ -763,7 +890,6 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
|
|||
apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
|
||||
| (apic_get_reg(apic, APIC_TASKPRI) & 4));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_lapic_set_tpr);
|
||||
|
||||
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
@ -776,7 +902,6 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
|
|||
|
||||
return (tpr & 0xf0) >> 4;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_lapic_get_cr8);
|
||||
|
||||
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
|
||||
{
|
||||
|
@ -787,10 +912,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
|
|||
vcpu->arch.apic_base = value;
|
||||
return;
|
||||
}
|
||||
if (apic->vcpu->vcpu_id)
|
||||
|
||||
if (!kvm_vcpu_is_bsp(apic->vcpu))
|
||||
value &= ~MSR_IA32_APICBASE_BSP;
|
||||
|
||||
vcpu->arch.apic_base = value;
|
||||
if (apic_x2apic_mode(apic)) {
|
||||
u32 id = kvm_apic_id(apic);
|
||||
u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
|
||||
apic_set_reg(apic, APIC_LDR, ldr);
|
||||
}
|
||||
apic->base_address = apic->vcpu->arch.apic_base &
|
||||
MSR_IA32_APICBASE_BASE;
|
||||
|
||||
|
@ -800,12 +931,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
|
|||
|
||||
}
|
||||
|
||||
u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.apic_base;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_lapic_get_base);
|
||||
|
||||
void kvm_lapic_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_lapic *apic;
|
||||
|
@ -821,7 +946,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
|
|||
hrtimer_cancel(&apic->lapic_timer.timer);
|
||||
|
||||
apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
|
||||
apic_set_reg(apic, APIC_LVR, APIC_VERSION);
|
||||
kvm_apic_set_version(apic->vcpu);
|
||||
|
||||
for (i = 0; i < APIC_LVT_NUM; i++)
|
||||
apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
|
||||
|
@ -842,9 +967,10 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
|
|||
apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
|
||||
apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
|
||||
}
|
||||
apic->irr_pending = false;
|
||||
update_divide_count(apic);
|
||||
atomic_set(&apic->lapic_timer.pending, 0);
|
||||
if (vcpu->vcpu_id == 0)
|
||||
if (kvm_vcpu_is_bsp(vcpu))
|
||||
vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
|
||||
apic_update_ppr(apic);
|
||||
|
||||
|
@ -855,7 +981,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
|
|||
vcpu, kvm_apic_id(apic),
|
||||
vcpu->arch.apic_base, apic->base_address);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_lapic_reset);
|
||||
|
||||
bool kvm_apic_present(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
@ -866,7 +991,6 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_lapic_enabled);
|
||||
|
||||
/*
|
||||
*----------------------------------------------------------------------
|
||||
|
@ -917,6 +1041,11 @@ static struct kvm_timer_ops lapic_timer_ops = {
|
|||
.is_periodic = lapic_is_periodic,
|
||||
};
|
||||
|
||||
static const struct kvm_io_device_ops apic_mmio_ops = {
|
||||
.read = apic_mmio_read,
|
||||
.write = apic_mmio_write,
|
||||
};
|
||||
|
||||
int kvm_create_lapic(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_lapic *apic;
|
||||
|
@ -945,16 +1074,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
|
|||
apic->lapic_timer.timer.function = kvm_timer_fn;
|
||||
apic->lapic_timer.t_ops = &lapic_timer_ops;
|
||||
apic->lapic_timer.kvm = vcpu->kvm;
|
||||
apic->lapic_timer.vcpu_id = vcpu->vcpu_id;
|
||||
apic->lapic_timer.vcpu = vcpu;
|
||||
|
||||
apic->base_address = APIC_DEFAULT_PHYS_BASE;
|
||||
vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
|
||||
|
||||
kvm_lapic_reset(vcpu);
|
||||
apic->dev.read = apic_mmio_read;
|
||||
apic->dev.write = apic_mmio_write;
|
||||
apic->dev.in_range = apic_mmio_range;
|
||||
apic->dev.private = apic;
|
||||
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
|
||||
|
||||
return 0;
|
||||
nomem_free_apic:
|
||||
|
@ -962,7 +1088,6 @@ nomem_free_apic:
|
|||
nomem:
|
||||
return -ENOMEM;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_create_lapic);
|
||||
|
||||
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
@ -985,7 +1110,7 @@ int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
|
|||
u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0);
|
||||
int r = 0;
|
||||
|
||||
if (vcpu->vcpu_id == 0) {
|
||||
if (kvm_vcpu_is_bsp(vcpu)) {
|
||||
if (!apic_hw_enabled(vcpu->arch.apic))
|
||||
r = 1;
|
||||
if ((lvt0 & APIC_LVT_MASKED) == 0 &&
|
||||
|
@ -1025,7 +1150,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
|
|||
|
||||
apic->base_address = vcpu->arch.apic_base &
|
||||
MSR_IA32_APICBASE_BASE;
|
||||
apic_set_reg(apic, APIC_LVR, APIC_VERSION);
|
||||
kvm_apic_set_version(vcpu);
|
||||
|
||||
apic_update_ppr(apic);
|
||||
hrtimer_cancel(&apic->lapic_timer.timer);
|
||||
update_divide_count(apic);
|
||||
|
@ -1092,3 +1218,35 @@ void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
|
|||
|
||||
vcpu->arch.apic->vapic_addr = vapic_addr;
|
||||
}
|
||||
|
||||
int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
u32 reg = (msr - APIC_BASE_MSR) << 4;
|
||||
|
||||
if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
|
||||
return 1;
|
||||
|
||||
/* if this is ICR write vector before command */
|
||||
if (msr == 0x830)
|
||||
apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
|
||||
return apic_reg_write(apic, reg, (u32)data);
|
||||
}
|
||||
|
||||
int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
|
||||
|
||||
if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
|
||||
return 1;
|
||||
|
||||
if (apic_reg_read(apic, reg, 4, &low))
|
||||
return 1;
|
||||
if (msr == 0x830)
|
||||
apic_reg_read(apic, APIC_ICR2, 4, &high);
|
||||
|
||||
*data = (((u64)high) << 32) | low;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@ struct kvm_lapic {
|
|||
struct kvm_timer lapic_timer;
|
||||
u32 divide_count;
|
||||
struct kvm_vcpu *vcpu;
|
||||
bool irr_pending;
|
||||
struct page *regs_page;
|
||||
void *regs;
|
||||
gpa_t vapic_addr;
|
||||
|
@ -28,6 +29,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
|
|||
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
|
||||
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
|
||||
u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
|
||||
void kvm_apic_set_version(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
|
||||
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
|
||||
|
@ -44,4 +46,6 @@ void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
|
|||
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
|
||||
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
|
||||
int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
|
||||
#endif
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -37,6 +37,8 @@
|
|||
#define PT32_ROOT_LEVEL 2
|
||||
#define PT32E_ROOT_LEVEL 3
|
||||
|
||||
int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
|
||||
|
||||
static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
|
||||
|
@ -75,7 +77,7 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
|
|||
return vcpu->arch.cr0 & X86_CR0_PG;
|
||||
}
|
||||
|
||||
static inline int is_present_pte(unsigned long pte)
|
||||
static inline int is_present_gpte(unsigned long pte)
|
||||
{
|
||||
return pte & PT_PRESENT_MASK;
|
||||
}
|
||||
|
|
220
arch/x86/kvm/mmutrace.h
Normal file
220
arch/x86/kvm/mmutrace.h
Normal file
|
@ -0,0 +1,220 @@
|
|||
#if !defined(_TRACE_KVMMMU_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_KVMMMU_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
#include <linux/ftrace_event.h>
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM kvmmmu
|
||||
#define TRACE_INCLUDE_PATH .
|
||||
#define TRACE_INCLUDE_FILE mmutrace
|
||||
|
||||
#define KVM_MMU_PAGE_FIELDS \
|
||||
__field(__u64, gfn) \
|
||||
__field(__u32, role) \
|
||||
__field(__u32, root_count) \
|
||||
__field(__u32, unsync)
|
||||
|
||||
#define KVM_MMU_PAGE_ASSIGN(sp) \
|
||||
__entry->gfn = sp->gfn; \
|
||||
__entry->role = sp->role.word; \
|
||||
__entry->root_count = sp->root_count; \
|
||||
__entry->unsync = sp->unsync;
|
||||
|
||||
#define KVM_MMU_PAGE_PRINTK() ({ \
|
||||
const char *ret = p->buffer + p->len; \
|
||||
static const char *access_str[] = { \
|
||||
"---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" \
|
||||
}; \
|
||||
union kvm_mmu_page_role role; \
|
||||
\
|
||||
role.word = __entry->role; \
|
||||
\
|
||||
trace_seq_printf(p, "sp gfn %llx %u/%u q%u%s %s%s %spge" \
|
||||
" %snxe root %u %s%c", \
|
||||
__entry->gfn, role.level, role.glevels, \
|
||||
role.quadrant, \
|
||||
role.direct ? " direct" : "", \
|
||||
access_str[role.access], \
|
||||
role.invalid ? " invalid" : "", \
|
||||
role.cr4_pge ? "" : "!", \
|
||||
role.nxe ? "" : "!", \
|
||||
__entry->root_count, \
|
||||
__entry->unsync ? "unsync" : "sync", 0); \
|
||||
ret; \
|
||||
})
|
||||
|
||||
#define kvm_mmu_trace_pferr_flags \
|
||||
{ PFERR_PRESENT_MASK, "P" }, \
|
||||
{ PFERR_WRITE_MASK, "W" }, \
|
||||
{ PFERR_USER_MASK, "U" }, \
|
||||
{ PFERR_RSVD_MASK, "RSVD" }, \
|
||||
{ PFERR_FETCH_MASK, "F" }
|
||||
|
||||
/*
|
||||
* A pagetable walk has started
|
||||
*/
|
||||
TRACE_EVENT(
|
||||
kvm_mmu_pagetable_walk,
|
||||
TP_PROTO(u64 addr, int write_fault, int user_fault, int fetch_fault),
|
||||
TP_ARGS(addr, write_fault, user_fault, fetch_fault),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(__u64, addr)
|
||||
__field(__u32, pferr)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->addr = addr;
|
||||
__entry->pferr = (!!write_fault << 1) | (!!user_fault << 2)
|
||||
| (!!fetch_fault << 4);
|
||||
),
|
||||
|
||||
TP_printk("addr %llx pferr %x %s", __entry->addr, __entry->pferr,
|
||||
__print_flags(__entry->pferr, "|", kvm_mmu_trace_pferr_flags))
|
||||
);
|
||||
|
||||
|
||||
/* We just walked a paging element */
|
||||
TRACE_EVENT(
|
||||
kvm_mmu_paging_element,
|
||||
TP_PROTO(u64 pte, int level),
|
||||
TP_ARGS(pte, level),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(__u64, pte)
|
||||
__field(__u32, level)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->pte = pte;
|
||||
__entry->level = level;
|
||||
),
|
||||
|
||||
TP_printk("pte %llx level %u", __entry->pte, __entry->level)
|
||||
);
|
||||
|
||||
/* We set a pte accessed bit */
|
||||
TRACE_EVENT(
|
||||
kvm_mmu_set_accessed_bit,
|
||||
TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
|
||||
TP_ARGS(table_gfn, index, size),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(__u64, gpa)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->gpa = ((u64)table_gfn << PAGE_SHIFT)
|
||||
+ index * size;
|
||||
),
|
||||
|
||||
TP_printk("gpa %llx", __entry->gpa)
|
||||
);
|
||||
|
||||
/* We set a pte dirty bit */
|
||||
TRACE_EVENT(
|
||||
kvm_mmu_set_dirty_bit,
|
||||
TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
|
||||
TP_ARGS(table_gfn, index, size),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(__u64, gpa)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->gpa = ((u64)table_gfn << PAGE_SHIFT)
|
||||
+ index * size;
|
||||
),
|
||||
|
||||
TP_printk("gpa %llx", __entry->gpa)
|
||||
);
|
||||
|
||||
TRACE_EVENT(
|
||||
kvm_mmu_walker_error,
|
||||
TP_PROTO(u32 pferr),
|
||||
TP_ARGS(pferr),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(__u32, pferr)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->pferr = pferr;
|
||||
),
|
||||
|
||||
TP_printk("pferr %x %s", __entry->pferr,
|
||||
__print_flags(__entry->pferr, "|", kvm_mmu_trace_pferr_flags))
|
||||
);
|
||||
|
||||
TRACE_EVENT(
|
||||
kvm_mmu_get_page,
|
||||
TP_PROTO(struct kvm_mmu_page *sp, bool created),
|
||||
TP_ARGS(sp, created),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
KVM_MMU_PAGE_FIELDS
|
||||
__field(bool, created)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
KVM_MMU_PAGE_ASSIGN(sp)
|
||||
__entry->created = created;
|
||||
),
|
||||
|
||||
TP_printk("%s %s", KVM_MMU_PAGE_PRINTK(),
|
||||
__entry->created ? "new" : "existing")
|
||||
);
|
||||
|
||||
TRACE_EVENT(
|
||||
kvm_mmu_sync_page,
|
||||
TP_PROTO(struct kvm_mmu_page *sp),
|
||||
TP_ARGS(sp),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
KVM_MMU_PAGE_FIELDS
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
KVM_MMU_PAGE_ASSIGN(sp)
|
||||
),
|
||||
|
||||
TP_printk("%s", KVM_MMU_PAGE_PRINTK())
|
||||
);
|
||||
|
||||
TRACE_EVENT(
|
||||
kvm_mmu_unsync_page,
|
||||
TP_PROTO(struct kvm_mmu_page *sp),
|
||||
TP_ARGS(sp),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
KVM_MMU_PAGE_FIELDS
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
KVM_MMU_PAGE_ASSIGN(sp)
|
||||
),
|
||||
|
||||
TP_printk("%s", KVM_MMU_PAGE_PRINTK())
|
||||
);
|
||||
|
||||
TRACE_EVENT(
|
||||
kvm_mmu_zap_page,
|
||||
TP_PROTO(struct kvm_mmu_page *sp),
|
||||
TP_ARGS(sp),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
KVM_MMU_PAGE_FIELDS
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
KVM_MMU_PAGE_ASSIGN(sp)
|
||||
),
|
||||
|
||||
TP_printk("%s", KVM_MMU_PAGE_PRINTK())
|
||||
);
|
||||
|
||||
#endif /* _TRACE_KVMMMU_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
#include <trace/define_trace.h>
|
|
@ -27,7 +27,8 @@
|
|||
#define guest_walker guest_walker64
|
||||
#define FNAME(name) paging##64_##name
|
||||
#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
|
||||
#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
|
||||
#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
|
||||
#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
|
||||
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
|
||||
#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
|
||||
#define PT_LEVEL_BITS PT64_LEVEL_BITS
|
||||
|
@ -43,7 +44,8 @@
|
|||
#define guest_walker guest_walker32
|
||||
#define FNAME(name) paging##32_##name
|
||||
#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
|
||||
#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
|
||||
#define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl)
|
||||
#define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl)
|
||||
#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
|
||||
#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
|
||||
#define PT_LEVEL_BITS PT32_LEVEL_BITS
|
||||
|
@ -53,8 +55,8 @@
|
|||
#error Invalid PTTYPE value
|
||||
#endif
|
||||
|
||||
#define gpte_to_gfn FNAME(gpte_to_gfn)
|
||||
#define gpte_to_gfn_pde FNAME(gpte_to_gfn_pde)
|
||||
#define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
|
||||
#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL)
|
||||
|
||||
/*
|
||||
* The guest_walker structure emulates the behavior of the hardware page
|
||||
|
@ -71,14 +73,9 @@ struct guest_walker {
|
|||
u32 error_code;
|
||||
};
|
||||
|
||||
static gfn_t gpte_to_gfn(pt_element_t gpte)
|
||||
static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
|
||||
{
|
||||
return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static gfn_t gpte_to_gfn_pde(pt_element_t gpte)
|
||||
{
|
||||
return (gpte & PT_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
|
||||
return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
|
||||
|
@ -125,14 +122,16 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
|
|||
gpa_t pte_gpa;
|
||||
int rsvd_fault = 0;
|
||||
|
||||
pgprintk("%s: addr %lx\n", __func__, addr);
|
||||
trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault,
|
||||
fetch_fault);
|
||||
walk:
|
||||
walker->level = vcpu->arch.mmu.root_level;
|
||||
pte = vcpu->arch.cr3;
|
||||
#if PTTYPE == 64
|
||||
if (!is_long_mode(vcpu)) {
|
||||
pte = vcpu->arch.pdptrs[(addr >> 30) & 3];
|
||||
if (!is_present_pte(pte))
|
||||
pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3);
|
||||
trace_kvm_mmu_paging_element(pte, walker->level);
|
||||
if (!is_present_gpte(pte))
|
||||
goto not_present;
|
||||
--walker->level;
|
||||
}
|
||||
|
@ -150,12 +149,11 @@ walk:
|
|||
pte_gpa += index * sizeof(pt_element_t);
|
||||
walker->table_gfn[walker->level - 1] = table_gfn;
|
||||
walker->pte_gpa[walker->level - 1] = pte_gpa;
|
||||
pgprintk("%s: table_gfn[%d] %lx\n", __func__,
|
||||
walker->level - 1, table_gfn);
|
||||
|
||||
kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
|
||||
trace_kvm_mmu_paging_element(pte, walker->level);
|
||||
|
||||
if (!is_present_pte(pte))
|
||||
if (!is_present_gpte(pte))
|
||||
goto not_present;
|
||||
|
||||
rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
|
||||
|
@ -175,6 +173,8 @@ walk:
|
|||
#endif
|
||||
|
||||
if (!(pte & PT_ACCESSED_MASK)) {
|
||||
trace_kvm_mmu_set_accessed_bit(table_gfn, index,
|
||||
sizeof(pte));
|
||||
mark_page_dirty(vcpu->kvm, table_gfn);
|
||||
if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
|
||||
index, pte, pte|PT_ACCESSED_MASK))
|
||||
|
@ -186,18 +186,24 @@ walk:
|
|||
|
||||
walker->ptes[walker->level - 1] = pte;
|
||||
|
||||
if (walker->level == PT_PAGE_TABLE_LEVEL) {
|
||||
walker->gfn = gpte_to_gfn(pte);
|
||||
break;
|
||||
}
|
||||
if ((walker->level == PT_PAGE_TABLE_LEVEL) ||
|
||||
((walker->level == PT_DIRECTORY_LEVEL) &&
|
||||
(pte & PT_PAGE_SIZE_MASK) &&
|
||||
(PTTYPE == 64 || is_pse(vcpu))) ||
|
||||
((walker->level == PT_PDPE_LEVEL) &&
|
||||
(pte & PT_PAGE_SIZE_MASK) &&
|
||||
is_long_mode(vcpu))) {
|
||||
int lvl = walker->level;
|
||||
|
||||
if (walker->level == PT_DIRECTORY_LEVEL
|
||||
&& (pte & PT_PAGE_SIZE_MASK)
|
||||
&& (PTTYPE == 64 || is_pse(vcpu))) {
|
||||
walker->gfn = gpte_to_gfn_pde(pte);
|
||||
walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
|
||||
if (PTTYPE == 32 && is_cpuid_PSE36())
|
||||
walker->gfn = gpte_to_gfn_lvl(pte, lvl);
|
||||
walker->gfn += (addr & PT_LVL_OFFSET_MASK(lvl))
|
||||
>> PAGE_SHIFT;
|
||||
|
||||
if (PTTYPE == 32 &&
|
||||
walker->level == PT_DIRECTORY_LEVEL &&
|
||||
is_cpuid_PSE36())
|
||||
walker->gfn += pse36_gfn_delta(pte);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -205,9 +211,10 @@ walk:
|
|||
--walker->level;
|
||||
}
|
||||
|
||||
if (write_fault && !is_dirty_pte(pte)) {
|
||||
if (write_fault && !is_dirty_gpte(pte)) {
|
||||
bool ret;
|
||||
|
||||
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
|
||||
mark_page_dirty(vcpu->kvm, table_gfn);
|
||||
ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
|
||||
pte|PT_DIRTY_MASK);
|
||||
|
@ -239,6 +246,7 @@ err:
|
|||
walker->error_code |= PFERR_FETCH_MASK;
|
||||
if (rsvd_fault)
|
||||
walker->error_code |= PFERR_RSVD_MASK;
|
||||
trace_kvm_mmu_walker_error(walker->error_code);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -248,12 +256,11 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
|
|||
pt_element_t gpte;
|
||||
unsigned pte_access;
|
||||
pfn_t pfn;
|
||||
int largepage = vcpu->arch.update_pte.largepage;
|
||||
|
||||
gpte = *(const pt_element_t *)pte;
|
||||
if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
|
||||
if (!is_present_pte(gpte))
|
||||
set_shadow_pte(spte, shadow_notrap_nonpresent_pte);
|
||||
if (!is_present_gpte(gpte))
|
||||
__set_spte(spte, shadow_notrap_nonpresent_pte);
|
||||
return;
|
||||
}
|
||||
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
|
||||
|
@ -267,7 +274,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
|
|||
return;
|
||||
kvm_get_pfn(pfn);
|
||||
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
|
||||
gpte & PT_DIRTY_MASK, NULL, largepage,
|
||||
gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
|
||||
gpte_to_gfn(gpte), pfn, true);
|
||||
}
|
||||
|
||||
|
@ -276,7 +283,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
|
|||
*/
|
||||
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
struct guest_walker *gw,
|
||||
int user_fault, int write_fault, int largepage,
|
||||
int user_fault, int write_fault, int hlevel,
|
||||
int *ptwrite, pfn_t pfn)
|
||||
{
|
||||
unsigned access = gw->pt_access;
|
||||
|
@ -289,19 +296,18 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
|||
pt_element_t curr_pte;
|
||||
struct kvm_shadow_walk_iterator iterator;
|
||||
|
||||
if (!is_present_pte(gw->ptes[gw->level - 1]))
|
||||
if (!is_present_gpte(gw->ptes[gw->level - 1]))
|
||||
return NULL;
|
||||
|
||||
for_each_shadow_entry(vcpu, addr, iterator) {
|
||||
level = iterator.level;
|
||||
sptep = iterator.sptep;
|
||||
if (level == PT_PAGE_TABLE_LEVEL
|
||||
|| (largepage && level == PT_DIRECTORY_LEVEL)) {
|
||||
if (iterator.level == hlevel) {
|
||||
mmu_set_spte(vcpu, sptep, access,
|
||||
gw->pte_access & access,
|
||||
user_fault, write_fault,
|
||||
gw->ptes[gw->level-1] & PT_DIRTY_MASK,
|
||||
ptwrite, largepage,
|
||||
ptwrite, level,
|
||||
gw->gfn, pfn, false);
|
||||
break;
|
||||
}
|
||||
|
@ -311,16 +317,19 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
|||
|
||||
if (is_large_pte(*sptep)) {
|
||||
rmap_remove(vcpu->kvm, sptep);
|
||||
set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
|
||||
__set_spte(sptep, shadow_trap_nonpresent_pte);
|
||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||
}
|
||||
|
||||
if (level == PT_DIRECTORY_LEVEL
|
||||
&& gw->level == PT_DIRECTORY_LEVEL) {
|
||||
if (level <= gw->level) {
|
||||
int delta = level - gw->level + 1;
|
||||
direct = 1;
|
||||
if (!is_dirty_pte(gw->ptes[level - 1]))
|
||||
if (!is_dirty_gpte(gw->ptes[level - delta]))
|
||||
access &= ~ACC_WRITE_MASK;
|
||||
table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
|
||||
table_gfn = gpte_to_gfn(gw->ptes[level - delta]);
|
||||
/* advance table_gfn when emulating 1gb pages with 4k */
|
||||
if (delta == 0)
|
||||
table_gfn += PT_INDEX(addr, level);
|
||||
} else {
|
||||
direct = 0;
|
||||
table_gfn = gw->table_gfn[level - 2];
|
||||
|
@ -369,11 +378,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
|
|||
int user_fault = error_code & PFERR_USER_MASK;
|
||||
int fetch_fault = error_code & PFERR_FETCH_MASK;
|
||||
struct guest_walker walker;
|
||||
u64 *shadow_pte;
|
||||
u64 *sptep;
|
||||
int write_pt = 0;
|
||||
int r;
|
||||
pfn_t pfn;
|
||||
int largepage = 0;
|
||||
int level = PT_PAGE_TABLE_LEVEL;
|
||||
unsigned long mmu_seq;
|
||||
|
||||
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
|
||||
|
@ -399,14 +408,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (walker.level == PT_DIRECTORY_LEVEL) {
|
||||
gfn_t large_gfn;
|
||||
large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1);
|
||||
if (is_largepage_backed(vcpu, large_gfn)) {
|
||||
walker.gfn = large_gfn;
|
||||
largepage = 1;
|
||||
}
|
||||
if (walker.level >= PT_DIRECTORY_LEVEL) {
|
||||
level = min(walker.level, mapping_level(vcpu, walker.gfn));
|
||||
walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
|
||||
}
|
||||
|
||||
mmu_seq = vcpu->kvm->mmu_notifier_seq;
|
||||
smp_rmb();
|
||||
pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
|
||||
|
@ -422,11 +428,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
|
|||
if (mmu_notifier_retry(vcpu, mmu_seq))
|
||||
goto out_unlock;
|
||||
kvm_mmu_free_some_pages(vcpu);
|
||||
shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
|
||||
largepage, &write_pt, pfn);
|
||||
|
||||
sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
|
||||
level, &write_pt, pfn);
|
||||
pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
|
||||
shadow_pte, *shadow_pte, write_pt);
|
||||
sptep, *sptep, write_pt);
|
||||
|
||||
if (!write_pt)
|
||||
vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
|
||||
|
@ -459,8 +464,9 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
|
|||
sptep = iterator.sptep;
|
||||
|
||||
/* FIXME: properly handle invlpg on large guest pages */
|
||||
if (level == PT_PAGE_TABLE_LEVEL ||
|
||||
((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
|
||||
if (level == PT_PAGE_TABLE_LEVEL ||
|
||||
((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
|
||||
((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
|
||||
struct kvm_mmu_page *sp = page_header(__pa(sptep));
|
||||
|
||||
pte_gpa = (sp->gfn << PAGE_SHIFT);
|
||||
|
@ -472,7 +478,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
|
|||
--vcpu->kvm->stat.lpages;
|
||||
need_flush = 1;
|
||||
}
|
||||
set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
|
||||
__set_spte(sptep, shadow_trap_nonpresent_pte);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -489,7 +495,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
|
|||
if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
|
||||
sizeof(pt_element_t)))
|
||||
return;
|
||||
if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
|
||||
if (is_present_gpte(gpte) && (gpte & PT_ACCESSED_MASK)) {
|
||||
if (mmu_topup_memory_caches(vcpu))
|
||||
return;
|
||||
kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
|
||||
|
@ -536,7 +542,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
|
|||
r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt);
|
||||
pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t);
|
||||
for (j = 0; j < ARRAY_SIZE(pt); ++j)
|
||||
if (r || is_present_pte(pt[j]))
|
||||
if (r || is_present_gpte(pt[j]))
|
||||
sp->spt[i+j] = shadow_trap_nonpresent_pte;
|
||||
else
|
||||
sp->spt[i+j] = shadow_notrap_nonpresent_pte;
|
||||
|
@ -574,23 +580,23 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
|||
sizeof(pt_element_t)))
|
||||
return -EINVAL;
|
||||
|
||||
if (gpte_to_gfn(gpte) != gfn || !is_present_pte(gpte) ||
|
||||
if (gpte_to_gfn(gpte) != gfn || !is_present_gpte(gpte) ||
|
||||
!(gpte & PT_ACCESSED_MASK)) {
|
||||
u64 nonpresent;
|
||||
|
||||
rmap_remove(vcpu->kvm, &sp->spt[i]);
|
||||
if (is_present_pte(gpte))
|
||||
if (is_present_gpte(gpte))
|
||||
nonpresent = shadow_trap_nonpresent_pte;
|
||||
else
|
||||
nonpresent = shadow_notrap_nonpresent_pte;
|
||||
set_shadow_pte(&sp->spt[i], nonpresent);
|
||||
__set_spte(&sp->spt[i], nonpresent);
|
||||
continue;
|
||||
}
|
||||
|
||||
nr_present++;
|
||||
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
|
||||
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
|
||||
is_dirty_pte(gpte), 0, gfn,
|
||||
is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
|
||||
spte_to_pfn(sp->spt[i]), true, false);
|
||||
}
|
||||
|
||||
|
@ -603,9 +609,10 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
|||
#undef PT_BASE_ADDR_MASK
|
||||
#undef PT_INDEX
|
||||
#undef PT_LEVEL_MASK
|
||||
#undef PT_DIR_BASE_ADDR_MASK
|
||||
#undef PT_LVL_ADDR_MASK
|
||||
#undef PT_LVL_OFFSET_MASK
|
||||
#undef PT_LEVEL_BITS
|
||||
#undef PT_MAX_FULL_LEVELS
|
||||
#undef gpte_to_gfn
|
||||
#undef gpte_to_gfn_pde
|
||||
#undef gpte_to_gfn_lvl
|
||||
#undef CMPXCHG
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -9,12 +9,16 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)
|
|||
int restart_timer = 0;
|
||||
wait_queue_head_t *q = &vcpu->wq;
|
||||
|
||||
/* FIXME: this code should not know anything about vcpus */
|
||||
if (!atomic_inc_and_test(&ktimer->pending))
|
||||
/*
|
||||
* There is a race window between reading and incrementing, but we do
|
||||
* not care about potentially loosing timer events in the !reinject
|
||||
* case anyway.
|
||||
*/
|
||||
if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
|
||||
atomic_inc(&ktimer->pending);
|
||||
/* FIXME: this code should not know anything about vcpus */
|
||||
set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
|
||||
|
||||
if (!ktimer->reinject)
|
||||
atomic_set(&ktimer->pending, 1);
|
||||
}
|
||||
|
||||
if (waitqueue_active(q))
|
||||
wake_up_interruptible(q);
|
||||
|
@ -33,7 +37,7 @@ enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
|
|||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
|
||||
|
||||
vcpu = ktimer->kvm->vcpus[ktimer->vcpu_id];
|
||||
vcpu = ktimer->vcpu;
|
||||
if (!vcpu)
|
||||
return HRTIMER_NORESTART;
|
||||
|
||||
|
|
355
arch/x86/kvm/trace.h
Normal file
355
arch/x86/kvm/trace.h
Normal file
|
@ -0,0 +1,355 @@
|
|||
#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_KVM_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM kvm
|
||||
#define TRACE_INCLUDE_PATH arch/x86/kvm
|
||||
#define TRACE_INCLUDE_FILE trace
|
||||
|
||||
/*
|
||||
* Tracepoint for guest mode entry.
|
||||
*/
|
||||
TRACE_EVENT(kvm_entry,
|
||||
TP_PROTO(unsigned int vcpu_id),
|
||||
TP_ARGS(vcpu_id),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, vcpu_id )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_id = vcpu_id;
|
||||
),
|
||||
|
||||
TP_printk("vcpu %u", __entry->vcpu_id)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for hypercall.
|
||||
*/
|
||||
TRACE_EVENT(kvm_hypercall,
|
||||
TP_PROTO(unsigned long nr, unsigned long a0, unsigned long a1,
|
||||
unsigned long a2, unsigned long a3),
|
||||
TP_ARGS(nr, a0, a1, a2, a3),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned long, nr )
|
||||
__field( unsigned long, a0 )
|
||||
__field( unsigned long, a1 )
|
||||
__field( unsigned long, a2 )
|
||||
__field( unsigned long, a3 )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->nr = nr;
|
||||
__entry->a0 = a0;
|
||||
__entry->a1 = a1;
|
||||
__entry->a2 = a2;
|
||||
__entry->a3 = a3;
|
||||
),
|
||||
|
||||
TP_printk("nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx",
|
||||
__entry->nr, __entry->a0, __entry->a1, __entry->a2,
|
||||
__entry->a3)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for PIO.
|
||||
*/
|
||||
TRACE_EVENT(kvm_pio,
|
||||
TP_PROTO(unsigned int rw, unsigned int port, unsigned int size,
|
||||
unsigned int count),
|
||||
TP_ARGS(rw, port, size, count),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, rw )
|
||||
__field( unsigned int, port )
|
||||
__field( unsigned int, size )
|
||||
__field( unsigned int, count )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rw = rw;
|
||||
__entry->port = port;
|
||||
__entry->size = size;
|
||||
__entry->count = count;
|
||||
),
|
||||
|
||||
TP_printk("pio_%s at 0x%x size %d count %d",
|
||||
__entry->rw ? "write" : "read",
|
||||
__entry->port, __entry->size, __entry->count)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for cpuid.
|
||||
*/
|
||||
TRACE_EVENT(kvm_cpuid,
|
||||
TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx,
|
||||
unsigned long rcx, unsigned long rdx),
|
||||
TP_ARGS(function, rax, rbx, rcx, rdx),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, function )
|
||||
__field( unsigned long, rax )
|
||||
__field( unsigned long, rbx )
|
||||
__field( unsigned long, rcx )
|
||||
__field( unsigned long, rdx )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->function = function;
|
||||
__entry->rax = rax;
|
||||
__entry->rbx = rbx;
|
||||
__entry->rcx = rcx;
|
||||
__entry->rdx = rdx;
|
||||
),
|
||||
|
||||
TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx",
|
||||
__entry->function, __entry->rax,
|
||||
__entry->rbx, __entry->rcx, __entry->rdx)
|
||||
);
|
||||
|
||||
#define AREG(x) { APIC_##x, "APIC_" #x }
|
||||
|
||||
#define kvm_trace_symbol_apic \
|
||||
AREG(ID), AREG(LVR), AREG(TASKPRI), AREG(ARBPRI), AREG(PROCPRI), \
|
||||
AREG(EOI), AREG(RRR), AREG(LDR), AREG(DFR), AREG(SPIV), AREG(ISR), \
|
||||
AREG(TMR), AREG(IRR), AREG(ESR), AREG(ICR), AREG(ICR2), AREG(LVTT), \
|
||||
AREG(LVTTHMR), AREG(LVTPC), AREG(LVT0), AREG(LVT1), AREG(LVTERR), \
|
||||
AREG(TMICT), AREG(TMCCT), AREG(TDCR), AREG(SELF_IPI), AREG(EFEAT), \
|
||||
AREG(ECTRL)
|
||||
/*
|
||||
* Tracepoint for apic access.
|
||||
*/
|
||||
TRACE_EVENT(kvm_apic,
|
||||
TP_PROTO(unsigned int rw, unsigned int reg, unsigned int val),
|
||||
TP_ARGS(rw, reg, val),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, rw )
|
||||
__field( unsigned int, reg )
|
||||
__field( unsigned int, val )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rw = rw;
|
||||
__entry->reg = reg;
|
||||
__entry->val = val;
|
||||
),
|
||||
|
||||
TP_printk("apic_%s %s = 0x%x",
|
||||
__entry->rw ? "write" : "read",
|
||||
__print_symbolic(__entry->reg, kvm_trace_symbol_apic),
|
||||
__entry->val)
|
||||
);
|
||||
|
||||
#define trace_kvm_apic_read(reg, val) trace_kvm_apic(0, reg, val)
|
||||
#define trace_kvm_apic_write(reg, val) trace_kvm_apic(1, reg, val)
|
||||
|
||||
/*
|
||||
* Tracepoint for kvm guest exit:
|
||||
*/
|
||||
TRACE_EVENT(kvm_exit,
|
||||
TP_PROTO(unsigned int exit_reason, unsigned long guest_rip),
|
||||
TP_ARGS(exit_reason, guest_rip),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, exit_reason )
|
||||
__field( unsigned long, guest_rip )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->exit_reason = exit_reason;
|
||||
__entry->guest_rip = guest_rip;
|
||||
),
|
||||
|
||||
TP_printk("reason %s rip 0x%lx",
|
||||
ftrace_print_symbols_seq(p, __entry->exit_reason,
|
||||
kvm_x86_ops->exit_reasons_str),
|
||||
__entry->guest_rip)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for kvm interrupt injection:
|
||||
*/
|
||||
TRACE_EVENT(kvm_inj_virq,
|
||||
TP_PROTO(unsigned int irq),
|
||||
TP_ARGS(irq),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, irq )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->irq = irq;
|
||||
),
|
||||
|
||||
TP_printk("irq %u", __entry->irq)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for page fault.
|
||||
*/
|
||||
TRACE_EVENT(kvm_page_fault,
|
||||
TP_PROTO(unsigned long fault_address, unsigned int error_code),
|
||||
TP_ARGS(fault_address, error_code),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned long, fault_address )
|
||||
__field( unsigned int, error_code )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->fault_address = fault_address;
|
||||
__entry->error_code = error_code;
|
||||
),
|
||||
|
||||
TP_printk("address %lx error_code %x",
|
||||
__entry->fault_address, __entry->error_code)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for guest MSR access.
|
||||
*/
|
||||
TRACE_EVENT(kvm_msr,
|
||||
TP_PROTO(unsigned int rw, unsigned int ecx, unsigned long data),
|
||||
TP_ARGS(rw, ecx, data),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, rw )
|
||||
__field( unsigned int, ecx )
|
||||
__field( unsigned long, data )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rw = rw;
|
||||
__entry->ecx = ecx;
|
||||
__entry->data = data;
|
||||
),
|
||||
|
||||
TP_printk("msr_%s %x = 0x%lx",
|
||||
__entry->rw ? "write" : "read",
|
||||
__entry->ecx, __entry->data)
|
||||
);
|
||||
|
||||
#define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data)
|
||||
#define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data)
|
||||
|
||||
/*
|
||||
* Tracepoint for guest CR access.
|
||||
*/
|
||||
TRACE_EVENT(kvm_cr,
|
||||
TP_PROTO(unsigned int rw, unsigned int cr, unsigned long val),
|
||||
TP_ARGS(rw, cr, val),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, rw )
|
||||
__field( unsigned int, cr )
|
||||
__field( unsigned long, val )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rw = rw;
|
||||
__entry->cr = cr;
|
||||
__entry->val = val;
|
||||
),
|
||||
|
||||
TP_printk("cr_%s %x = 0x%lx",
|
||||
__entry->rw ? "write" : "read",
|
||||
__entry->cr, __entry->val)
|
||||
);
|
||||
|
||||
#define trace_kvm_cr_read(cr, val) trace_kvm_cr(0, cr, val)
|
||||
#define trace_kvm_cr_write(cr, val) trace_kvm_cr(1, cr, val)
|
||||
|
||||
TRACE_EVENT(kvm_pic_set_irq,
|
||||
TP_PROTO(__u8 chip, __u8 pin, __u8 elcr, __u8 imr, bool coalesced),
|
||||
TP_ARGS(chip, pin, elcr, imr, coalesced),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u8, chip )
|
||||
__field( __u8, pin )
|
||||
__field( __u8, elcr )
|
||||
__field( __u8, imr )
|
||||
__field( bool, coalesced )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->chip = chip;
|
||||
__entry->pin = pin;
|
||||
__entry->elcr = elcr;
|
||||
__entry->imr = imr;
|
||||
__entry->coalesced = coalesced;
|
||||
),
|
||||
|
||||
TP_printk("chip %u pin %u (%s%s)%s",
|
||||
__entry->chip, __entry->pin,
|
||||
(__entry->elcr & (1 << __entry->pin)) ? "level":"edge",
|
||||
(__entry->imr & (1 << __entry->pin)) ? "|masked":"",
|
||||
__entry->coalesced ? " (coalesced)" : "")
|
||||
);
|
||||
|
||||
#define kvm_apic_dst_shorthand \
|
||||
{0x0, "dst"}, \
|
||||
{0x1, "self"}, \
|
||||
{0x2, "all"}, \
|
||||
{0x3, "all-but-self"}
|
||||
|
||||
TRACE_EVENT(kvm_apic_ipi,
|
||||
TP_PROTO(__u32 icr_low, __u32 dest_id),
|
||||
TP_ARGS(icr_low, dest_id),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u32, icr_low )
|
||||
__field( __u32, dest_id )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->icr_low = icr_low;
|
||||
__entry->dest_id = dest_id;
|
||||
),
|
||||
|
||||
TP_printk("dst %x vec %u (%s|%s|%s|%s|%s)",
|
||||
__entry->dest_id, (u8)__entry->icr_low,
|
||||
__print_symbolic((__entry->icr_low >> 8 & 0x7),
|
||||
kvm_deliver_mode),
|
||||
(__entry->icr_low & (1<<11)) ? "logical" : "physical",
|
||||
(__entry->icr_low & (1<<14)) ? "assert" : "de-assert",
|
||||
(__entry->icr_low & (1<<15)) ? "level" : "edge",
|
||||
__print_symbolic((__entry->icr_low >> 18 & 0x3),
|
||||
kvm_apic_dst_shorthand))
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_apic_accept_irq,
|
||||
TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec, bool coalesced),
|
||||
TP_ARGS(apicid, dm, tm, vec, coalesced),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u32, apicid )
|
||||
__field( __u16, dm )
|
||||
__field( __u8, tm )
|
||||
__field( __u8, vec )
|
||||
__field( bool, coalesced )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->apicid = apicid;
|
||||
__entry->dm = dm;
|
||||
__entry->tm = tm;
|
||||
__entry->vec = vec;
|
||||
__entry->coalesced = coalesced;
|
||||
),
|
||||
|
||||
TP_printk("apicid %x vec %u (%s|%s)%s",
|
||||
__entry->apicid, __entry->vec,
|
||||
__print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode),
|
||||
__entry->tm ? "level" : "edge",
|
||||
__entry->coalesced ? " (coalesced)" : "")
|
||||
);
|
||||
|
||||
#endif /* _TRACE_KVM_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
#include <trace/define_trace.h>
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -31,4 +31,8 @@ static inline bool kvm_exception_is_soft(unsigned int nr)
|
|||
{
|
||||
return (nr == BP_VECTOR) || (nr == OF_VECTOR);
|
||||
}
|
||||
|
||||
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
|
||||
u32 function, u32 index);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -104,6 +104,7 @@ EXPORT_SYMBOL(kunmap);
|
|||
EXPORT_SYMBOL(kmap_atomic);
|
||||
EXPORT_SYMBOL(kunmap_atomic);
|
||||
EXPORT_SYMBOL(kmap_atomic_prot);
|
||||
EXPORT_SYMBOL(kmap_atomic_to_page);
|
||||
|
||||
void __init set_highmem_pages_init(void)
|
||||
{
|
||||
|
|
|
@ -3,6 +3,11 @@ ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm.h \
|
|||
header-y += kvm.h
|
||||
endif
|
||||
|
||||
ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm_para.h \
|
||||
$(srctree)/include/asm-$(SRCARCH)/kvm_para.h),)
|
||||
header-y += kvm_para.h
|
||||
endif
|
||||
|
||||
ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/a.out.h \
|
||||
$(srctree)/include/asm-$(SRCARCH)/a.out.h),)
|
||||
unifdef-y += a.out.h
|
||||
|
|
|
@ -268,6 +268,10 @@ ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm.h \
|
|||
$(srctree)/include/asm-$(SRCARCH)/kvm.h),)
|
||||
unifdef-y += kvm.h
|
||||
endif
|
||||
ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm_para.h \
|
||||
$(srctree)/include/asm-$(SRCARCH)/kvm_para.h),)
|
||||
unifdef-y += kvm_para.h
|
||||
endif
|
||||
unifdef-y += llc.h
|
||||
unifdef-y += loop.h
|
||||
unifdef-y += lp.h
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
#define KVM_API_VERSION 12
|
||||
|
||||
/* for KVM_TRACE_ENABLE */
|
||||
/* for KVM_TRACE_ENABLE, deprecated */
|
||||
struct kvm_user_trace_setup {
|
||||
__u32 buf_size; /* sub_buffer size of each per-cpu */
|
||||
__u32 buf_nr; /* the number of sub_buffers of each per-cpu */
|
||||
|
@ -70,6 +70,14 @@ struct kvm_irqchip {
|
|||
} chip;
|
||||
};
|
||||
|
||||
/* for KVM_CREATE_PIT2 */
|
||||
struct kvm_pit_config {
|
||||
__u32 flags;
|
||||
__u32 pad[15];
|
||||
};
|
||||
|
||||
#define KVM_PIT_SPEAKER_DUMMY 1
|
||||
|
||||
#define KVM_EXIT_UNKNOWN 0
|
||||
#define KVM_EXIT_EXCEPTION 1
|
||||
#define KVM_EXIT_IO 2
|
||||
|
@ -87,6 +95,10 @@ struct kvm_irqchip {
|
|||
#define KVM_EXIT_S390_RESET 14
|
||||
#define KVM_EXIT_DCR 15
|
||||
#define KVM_EXIT_NMI 16
|
||||
#define KVM_EXIT_INTERNAL_ERROR 17
|
||||
|
||||
/* For KVM_EXIT_INTERNAL_ERROR */
|
||||
#define KVM_INTERNAL_ERROR_EMULATION 1
|
||||
|
||||
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
|
||||
struct kvm_run {
|
||||
|
@ -173,6 +185,9 @@ struct kvm_run {
|
|||
__u32 data;
|
||||
__u8 is_write;
|
||||
} dcr;
|
||||
struct {
|
||||
__u32 suberror;
|
||||
} internal;
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
|
@ -292,6 +307,28 @@ struct kvm_guest_debug {
|
|||
struct kvm_guest_debug_arch arch;
|
||||
};
|
||||
|
||||
enum {
|
||||
kvm_ioeventfd_flag_nr_datamatch,
|
||||
kvm_ioeventfd_flag_nr_pio,
|
||||
kvm_ioeventfd_flag_nr_deassign,
|
||||
kvm_ioeventfd_flag_nr_max,
|
||||
};
|
||||
|
||||
#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
|
||||
#define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio)
|
||||
#define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign)
|
||||
|
||||
#define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1)
|
||||
|
||||
struct kvm_ioeventfd {
|
||||
__u64 datamatch;
|
||||
__u64 addr; /* legal pio/mmio address */
|
||||
__u32 len; /* 1, 2, 4, or 8 bytes */
|
||||
__s32 fd;
|
||||
__u32 flags;
|
||||
__u8 pad[36];
|
||||
};
|
||||
|
||||
#define KVM_TRC_SHIFT 16
|
||||
/*
|
||||
* kvm trace categories
|
||||
|
@ -310,35 +347,6 @@ struct kvm_guest_debug {
|
|||
#define KVM_TRC_CYCLE_SIZE 8
|
||||
#define KVM_TRC_EXTRA_MAX 7
|
||||
|
||||
/* This structure represents a single trace buffer record. */
|
||||
struct kvm_trace_rec {
|
||||
/* variable rec_val
|
||||
* is split into:
|
||||
* bits 0 - 27 -> event id
|
||||
* bits 28 -30 -> number of extra data args of size u32
|
||||
* bits 31 -> binary indicator for if tsc is in record
|
||||
*/
|
||||
__u32 rec_val;
|
||||
__u32 pid;
|
||||
__u32 vcpu_id;
|
||||
union {
|
||||
struct {
|
||||
__u64 timestamp;
|
||||
__u32 extra_u32[KVM_TRC_EXTRA_MAX];
|
||||
} __attribute__((packed)) timestamp;
|
||||
struct {
|
||||
__u32 extra_u32[KVM_TRC_EXTRA_MAX];
|
||||
} notimestamp;
|
||||
} u;
|
||||
};
|
||||
|
||||
#define TRACE_REC_EVENT_ID(val) \
|
||||
(0x0fffffff & (val))
|
||||
#define TRACE_REC_NUM_DATA_ARGS(val) \
|
||||
(0x70000000 & ((val) << 28))
|
||||
#define TRACE_REC_TCS(val) \
|
||||
(0x80000000 & ((val) << 31))
|
||||
|
||||
#define KVMIO 0xAE
|
||||
|
||||
/*
|
||||
|
@ -415,6 +423,19 @@ struct kvm_trace_rec {
|
|||
#define KVM_CAP_ASSIGN_DEV_IRQ 29
|
||||
/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
|
||||
#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
|
||||
#ifdef __KVM_HAVE_MCE
|
||||
#define KVM_CAP_MCE 31
|
||||
#endif
|
||||
#define KVM_CAP_IRQFD 32
|
||||
#ifdef __KVM_HAVE_PIT
|
||||
#define KVM_CAP_PIT2 33
|
||||
#endif
|
||||
#define KVM_CAP_SET_BOOT_CPU_ID 34
|
||||
#ifdef __KVM_HAVE_PIT_STATE2
|
||||
#define KVM_CAP_PIT_STATE2 35
|
||||
#endif
|
||||
#define KVM_CAP_IOEVENTFD 36
|
||||
#define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
@ -454,15 +475,32 @@ struct kvm_irq_routing {
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef KVM_CAP_MCE
|
||||
/* x86 MCE */
|
||||
struct kvm_x86_mce {
|
||||
__u64 status;
|
||||
__u64 addr;
|
||||
__u64 misc;
|
||||
__u64 mcg_status;
|
||||
__u8 bank;
|
||||
__u8 pad1[7];
|
||||
__u64 pad2[3];
|
||||
};
|
||||
#endif
|
||||
|
||||
#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
|
||||
|
||||
struct kvm_irqfd {
|
||||
__u32 fd;
|
||||
__u32 gsi;
|
||||
__u32 flags;
|
||||
__u8 pad[20];
|
||||
};
|
||||
|
||||
/*
|
||||
* ioctls for VM fds
|
||||
*/
|
||||
#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region)
|
||||
#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44)
|
||||
#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45)
|
||||
#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
|
||||
struct kvm_userspace_memory_region)
|
||||
#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47)
|
||||
/*
|
||||
* KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
|
||||
* a vcpu fd.
|
||||
|
@ -470,6 +508,12 @@ struct kvm_irq_routing {
|
|||
#define KVM_CREATE_VCPU _IO(KVMIO, 0x41)
|
||||
#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log)
|
||||
#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias)
|
||||
#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44)
|
||||
#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45)
|
||||
#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
|
||||
struct kvm_userspace_memory_region)
|
||||
#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47)
|
||||
#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
|
||||
/* Device model IOC */
|
||||
#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60)
|
||||
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
|
||||
|
@ -498,6 +542,10 @@ struct kvm_irq_routing {
|
|||
#define KVM_ASSIGN_SET_MSIX_ENTRY \
|
||||
_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
|
||||
#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
|
||||
#define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd)
|
||||
#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config)
|
||||
#define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78)
|
||||
#define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
|
||||
|
||||
/*
|
||||
* ioctls for vcpu fds
|
||||
|
@ -541,6 +589,10 @@ struct kvm_irq_routing {
|
|||
#define KVM_NMI _IO(KVMIO, 0x9a)
|
||||
/* Available with KVM_CAP_SET_GUEST_DEBUG */
|
||||
#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug)
|
||||
/* MCE for x86 */
|
||||
#define KVM_X86_SETUP_MCE _IOW(KVMIO, 0x9c, __u64)
|
||||
#define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO, 0x9d, __u64)
|
||||
#define KVM_X86_SET_MCE _IOW(KVMIO, 0x9e, struct kvm_x86_mce)
|
||||
|
||||
/*
|
||||
* Deprecated interfaces
|
||||
|
@ -563,6 +615,9 @@ struct kvm_debug_guest {
|
|||
#define KVM_IA64_VCPU_GET_STACK _IOR(KVMIO, 0x9a, void *)
|
||||
#define KVM_IA64_VCPU_SET_STACK _IOW(KVMIO, 0x9b, void *)
|
||||
|
||||
#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2)
|
||||
#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
|
||||
|
||||
#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02)
|
||||
#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03)
|
||||
#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04)
|
||||
|
@ -633,7 +688,7 @@ struct kvm_assigned_msix_nr {
|
|||
__u16 padding;
|
||||
};
|
||||
|
||||
#define KVM_MAX_MSIX_PER_DEV 512
|
||||
#define KVM_MAX_MSIX_PER_DEV 256
|
||||
struct kvm_assigned_msix_entry {
|
||||
__u32 assigned_dev_id;
|
||||
__u32 gsi;
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
|
||||
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
|
||||
|
||||
struct kvm;
|
||||
struct kvm_vcpu;
|
||||
extern struct kmem_cache *kvm_vcpu_cache;
|
||||
|
||||
|
@ -59,10 +60,18 @@ struct kvm_io_bus {
|
|||
|
||||
void kvm_io_bus_init(struct kvm_io_bus *bus);
|
||||
void kvm_io_bus_destroy(struct kvm_io_bus *bus);
|
||||
struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus,
|
||||
gpa_t addr, int len, int is_write);
|
||||
void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
|
||||
struct kvm_io_device *dev);
|
||||
int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, int len,
|
||||
const void *val);
|
||||
int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len,
|
||||
void *val);
|
||||
int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
|
||||
struct kvm_io_device *dev);
|
||||
int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
|
||||
struct kvm_io_device *dev);
|
||||
void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
|
||||
struct kvm_io_device *dev);
|
||||
void kvm_io_bus_unregister_dev(struct kvm *kvm, struct kvm_io_bus *bus,
|
||||
struct kvm_io_device *dev);
|
||||
|
||||
struct kvm_vcpu {
|
||||
struct kvm *kvm;
|
||||
|
@ -103,7 +112,7 @@ struct kvm_memory_slot {
|
|||
struct {
|
||||
unsigned long rmap_pde;
|
||||
int write_count;
|
||||
} *lpage_info;
|
||||
} *lpage_info[KVM_NR_PAGE_SIZES - 1];
|
||||
unsigned long userspace_addr;
|
||||
int user_alloc;
|
||||
};
|
||||
|
@ -124,7 +133,6 @@ struct kvm_kernel_irq_routing_entry {
|
|||
};
|
||||
|
||||
struct kvm {
|
||||
struct mutex lock; /* protects the vcpus array and APIC accesses */
|
||||
spinlock_t mmu_lock;
|
||||
spinlock_t requests_lock;
|
||||
struct rw_semaphore slots_lock;
|
||||
|
@ -132,10 +140,23 @@ struct kvm {
|
|||
int nmemslots;
|
||||
struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
|
||||
KVM_PRIVATE_MEM_SLOTS];
|
||||
#ifdef CONFIG_KVM_APIC_ARCHITECTURE
|
||||
u32 bsp_vcpu_id;
|
||||
struct kvm_vcpu *bsp_vcpu;
|
||||
#endif
|
||||
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
|
||||
atomic_t online_vcpus;
|
||||
struct list_head vm_list;
|
||||
struct mutex lock;
|
||||
struct kvm_io_bus mmio_bus;
|
||||
struct kvm_io_bus pio_bus;
|
||||
#ifdef CONFIG_HAVE_KVM_EVENTFD
|
||||
struct {
|
||||
spinlock_t lock;
|
||||
struct list_head items;
|
||||
} irqfds;
|
||||
struct list_head ioeventfds;
|
||||
#endif
|
||||
struct kvm_vm_stat stat;
|
||||
struct kvm_arch arch;
|
||||
atomic_t users_count;
|
||||
|
@ -144,6 +165,7 @@ struct kvm {
|
|||
struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
|
||||
#endif
|
||||
|
||||
struct mutex irq_lock;
|
||||
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
||||
struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
|
||||
struct hlist_head mask_notifier_list;
|
||||
|
@ -167,6 +189,17 @@ struct kvm {
|
|||
#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
|
||||
#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
|
||||
|
||||
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
|
||||
{
|
||||
smp_rmb();
|
||||
return kvm->vcpus[i];
|
||||
}
|
||||
|
||||
#define kvm_for_each_vcpu(idx, vcpup, kvm) \
|
||||
for (idx = 0, vcpup = kvm_get_vcpu(kvm, idx); \
|
||||
idx < atomic_read(&kvm->online_vcpus) && vcpup; \
|
||||
vcpup = kvm_get_vcpu(kvm, ++idx))
|
||||
|
||||
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
|
||||
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
|
||||
|
||||
|
@ -201,6 +234,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
|
|||
struct kvm_userspace_memory_region *mem,
|
||||
struct kvm_memory_slot old,
|
||||
int user_alloc);
|
||||
void kvm_disable_largepages(void);
|
||||
void kvm_arch_flush_shadow(struct kvm *kvm);
|
||||
gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
|
||||
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
|
||||
|
@ -243,8 +277,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
|
|||
unsigned int ioctl, unsigned long arg);
|
||||
long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg);
|
||||
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
|
||||
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_dev_ioctl_check_extension(long ext);
|
||||
|
||||
|
@ -300,7 +332,6 @@ int kvm_arch_hardware_setup(void);
|
|||
void kvm_arch_hardware_unsetup(void);
|
||||
void kvm_arch_check_processor_compat(void *rtn);
|
||||
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
|
||||
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_free_physmem(struct kvm *kvm);
|
||||
|
||||
|
@ -309,8 +340,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm);
|
|||
void kvm_free_all_assigned_devices(struct kvm *kvm);
|
||||
void kvm_arch_sync_events(struct kvm *kvm);
|
||||
|
||||
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
|
||||
int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
|
||||
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
|
||||
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
|
||||
|
||||
|
@ -366,7 +395,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
|
|||
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
|
||||
void kvm_register_irq_ack_notifier(struct kvm *kvm,
|
||||
struct kvm_irq_ack_notifier *kian);
|
||||
void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
|
||||
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
|
||||
struct kvm_irq_ack_notifier *kian);
|
||||
int kvm_request_irq_source_id(struct kvm *kvm);
|
||||
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
|
||||
|
||||
|
@ -459,37 +489,6 @@ struct kvm_stats_debugfs_item {
|
|||
extern struct kvm_stats_debugfs_item debugfs_entries[];
|
||||
extern struct dentry *kvm_debugfs_dir;
|
||||
|
||||
#define KVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5, name) \
|
||||
trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
|
||||
vcpu, 5, d1, d2, d3, d4, d5)
|
||||
#define KVMTRACE_4D(evt, vcpu, d1, d2, d3, d4, name) \
|
||||
trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
|
||||
vcpu, 4, d1, d2, d3, d4, 0)
|
||||
#define KVMTRACE_3D(evt, vcpu, d1, d2, d3, name) \
|
||||
trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
|
||||
vcpu, 3, d1, d2, d3, 0, 0)
|
||||
#define KVMTRACE_2D(evt, vcpu, d1, d2, name) \
|
||||
trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
|
||||
vcpu, 2, d1, d2, 0, 0, 0)
|
||||
#define KVMTRACE_1D(evt, vcpu, d1, name) \
|
||||
trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
|
||||
vcpu, 1, d1, 0, 0, 0, 0)
|
||||
#define KVMTRACE_0D(evt, vcpu, name) \
|
||||
trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
|
||||
vcpu, 0, 0, 0, 0, 0, 0)
|
||||
|
||||
#ifdef CONFIG_KVM_TRACE
|
||||
int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg);
|
||||
void kvm_trace_cleanup(void);
|
||||
#else
|
||||
static inline
|
||||
int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
#define kvm_trace_cleanup() ((void)0)
|
||||
#endif
|
||||
|
||||
#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_seq)
|
||||
{
|
||||
|
@ -525,4 +524,33 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_EVENTFD
|
||||
|
||||
void kvm_eventfd_init(struct kvm *kvm);
|
||||
int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
|
||||
void kvm_irqfd_release(struct kvm *kvm);
|
||||
int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
|
||||
|
||||
#else
|
||||
|
||||
static inline void kvm_eventfd_init(struct kvm *kvm) {}
|
||||
static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static inline void kvm_irqfd_release(struct kvm *kvm) {}
|
||||
static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_HAVE_KVM_EVENTFD */
|
||||
|
||||
#ifdef CONFIG_KVM_APIC_ARCHITECTURE
|
||||
static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#define KVM_ENOSYS 1000
|
||||
#define KVM_EFAULT EFAULT
|
||||
#define KVM_E2BIG E2BIG
|
||||
#define KVM_EPERM EPERM
|
||||
|
||||
#define KVM_HC_VAPIC_POLL_IRQ 1
|
||||
#define KVM_HC_MMU_OP 2
|
||||
|
|
151
include/trace/events/kvm.h
Normal file
151
include/trace/events/kvm.h
Normal file
|
@ -0,0 +1,151 @@
|
|||
#if !defined(_TRACE_KVM_MAIN_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_KVM_MAIN_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM kvm
|
||||
#define TRACE_INCLUDE_FILE kvm
|
||||
|
||||
#if defined(__KVM_HAVE_IOAPIC)
|
||||
TRACE_EVENT(kvm_set_irq,
|
||||
TP_PROTO(unsigned int gsi, int level, int irq_source_id),
|
||||
TP_ARGS(gsi, level, irq_source_id),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, gsi )
|
||||
__field( int, level )
|
||||
__field( int, irq_source_id )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->gsi = gsi;
|
||||
__entry->level = level;
|
||||
__entry->irq_source_id = irq_source_id;
|
||||
),
|
||||
|
||||
TP_printk("gsi %u level %d source %d",
|
||||
__entry->gsi, __entry->level, __entry->irq_source_id)
|
||||
);
|
||||
|
||||
#define kvm_deliver_mode \
|
||||
{0x0, "Fixed"}, \
|
||||
{0x1, "LowPrio"}, \
|
||||
{0x2, "SMI"}, \
|
||||
{0x3, "Res3"}, \
|
||||
{0x4, "NMI"}, \
|
||||
{0x5, "INIT"}, \
|
||||
{0x6, "SIPI"}, \
|
||||
{0x7, "ExtINT"}
|
||||
|
||||
TRACE_EVENT(kvm_ioapic_set_irq,
|
||||
TP_PROTO(__u64 e, int pin, bool coalesced),
|
||||
TP_ARGS(e, pin, coalesced),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u64, e )
|
||||
__field( int, pin )
|
||||
__field( bool, coalesced )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->e = e;
|
||||
__entry->pin = pin;
|
||||
__entry->coalesced = coalesced;
|
||||
),
|
||||
|
||||
TP_printk("pin %u dst %x vec=%u (%s|%s|%s%s)%s",
|
||||
__entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e,
|
||||
__print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
|
||||
(__entry->e & (1<<11)) ? "logical" : "physical",
|
||||
(__entry->e & (1<<15)) ? "level" : "edge",
|
||||
(__entry->e & (1<<16)) ? "|masked" : "",
|
||||
__entry->coalesced ? " (coalesced)" : "")
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_msi_set_irq,
|
||||
TP_PROTO(__u64 address, __u64 data),
|
||||
TP_ARGS(address, data),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( __u64, address )
|
||||
__field( __u64, data )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->address = address;
|
||||
__entry->data = data;
|
||||
),
|
||||
|
||||
TP_printk("dst %u vec %x (%s|%s|%s%s)",
|
||||
(u8)(__entry->address >> 12), (u8)__entry->data,
|
||||
__print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode),
|
||||
(__entry->address & (1<<2)) ? "logical" : "physical",
|
||||
(__entry->data & (1<<15)) ? "level" : "edge",
|
||||
(__entry->address & (1<<3)) ? "|rh" : "")
|
||||
);
|
||||
|
||||
#define kvm_irqchips \
|
||||
{KVM_IRQCHIP_PIC_MASTER, "PIC master"}, \
|
||||
{KVM_IRQCHIP_PIC_SLAVE, "PIC slave"}, \
|
||||
{KVM_IRQCHIP_IOAPIC, "IOAPIC"}
|
||||
|
||||
TRACE_EVENT(kvm_ack_irq,
|
||||
TP_PROTO(unsigned int irqchip, unsigned int pin),
|
||||
TP_ARGS(irqchip, pin),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, irqchip )
|
||||
__field( unsigned int, pin )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->irqchip = irqchip;
|
||||
__entry->pin = pin;
|
||||
),
|
||||
|
||||
TP_printk("irqchip %s pin %u",
|
||||
__print_symbolic(__entry->irqchip, kvm_irqchips),
|
||||
__entry->pin)
|
||||
);
|
||||
|
||||
|
||||
|
||||
#endif /* defined(__KVM_HAVE_IOAPIC) */
|
||||
|
||||
#define KVM_TRACE_MMIO_READ_UNSATISFIED 0
|
||||
#define KVM_TRACE_MMIO_READ 1
|
||||
#define KVM_TRACE_MMIO_WRITE 2
|
||||
|
||||
#define kvm_trace_symbol_mmio \
|
||||
{ KVM_TRACE_MMIO_READ_UNSATISFIED, "unsatisfied-read" }, \
|
||||
{ KVM_TRACE_MMIO_READ, "read" }, \
|
||||
{ KVM_TRACE_MMIO_WRITE, "write" }
|
||||
|
||||
TRACE_EVENT(kvm_mmio,
|
||||
TP_PROTO(int type, int len, u64 gpa, u64 val),
|
||||
TP_ARGS(type, len, gpa, val),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( u32, type )
|
||||
__field( u32, len )
|
||||
__field( u64, gpa )
|
||||
__field( u64, val )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->type = type;
|
||||
__entry->len = len;
|
||||
__entry->gpa = gpa;
|
||||
__entry->val = val;
|
||||
),
|
||||
|
||||
TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
|
||||
__print_symbolic(__entry->type, kvm_trace_symbol_mmio),
|
||||
__entry->len, __entry->gpa, __entry->val)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_KVM_MAIN_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
#include <trace/define_trace.h>
|
|
@ -234,6 +234,7 @@ unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
|
|||
|
||||
return 1UL << (hstate->order + PAGE_SHIFT);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vma_kernel_pagesize);
|
||||
|
||||
/*
|
||||
* Return the page size being used by the MMU to back a VMA. In the majority
|
||||
|
|
14
virt/kvm/Kconfig
Normal file
14
virt/kvm/Kconfig
Normal file
|
@ -0,0 +1,14 @@
|
|||
# KVM common configuration items and defaults
|
||||
|
||||
config HAVE_KVM
|
||||
bool
|
||||
|
||||
config HAVE_KVM_IRQCHIP
|
||||
bool
|
||||
|
||||
config HAVE_KVM_EVENTFD
|
||||
bool
|
||||
select EVENTFD
|
||||
|
||||
config KVM_APIC_ARCHITECTURE
|
||||
bool
|
|
@ -14,32 +14,28 @@
|
|||
|
||||
#include "coalesced_mmio.h"
|
||||
|
||||
static int coalesced_mmio_in_range(struct kvm_io_device *this,
|
||||
gpa_t addr, int len, int is_write)
|
||||
static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev)
|
||||
{
|
||||
return container_of(dev, struct kvm_coalesced_mmio_dev, dev);
|
||||
}
|
||||
|
||||
static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
|
||||
gpa_t addr, int len)
|
||||
{
|
||||
struct kvm_coalesced_mmio_dev *dev =
|
||||
(struct kvm_coalesced_mmio_dev*)this->private;
|
||||
struct kvm_coalesced_mmio_zone *zone;
|
||||
int next;
|
||||
struct kvm_coalesced_mmio_ring *ring;
|
||||
unsigned avail;
|
||||
int i;
|
||||
|
||||
if (!is_write)
|
||||
return 0;
|
||||
|
||||
/* kvm->lock is taken by the caller and must be not released before
|
||||
* dev.read/write
|
||||
*/
|
||||
|
||||
/* Are we able to batch it ? */
|
||||
|
||||
/* last is the first free entry
|
||||
* check if we don't meet the first used entry
|
||||
* there is always one unused entry in the buffer
|
||||
*/
|
||||
|
||||
next = (dev->kvm->coalesced_mmio_ring->last + 1) %
|
||||
KVM_COALESCED_MMIO_MAX;
|
||||
if (next == dev->kvm->coalesced_mmio_ring->first) {
|
||||
ring = dev->kvm->coalesced_mmio_ring;
|
||||
avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX;
|
||||
if (avail < KVM_MAX_VCPUS) {
|
||||
/* full */
|
||||
return 0;
|
||||
}
|
||||
|
@ -60,14 +56,15 @@ static int coalesced_mmio_in_range(struct kvm_io_device *this,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void coalesced_mmio_write(struct kvm_io_device *this,
|
||||
gpa_t addr, int len, const void *val)
|
||||
static int coalesced_mmio_write(struct kvm_io_device *this,
|
||||
gpa_t addr, int len, const void *val)
|
||||
{
|
||||
struct kvm_coalesced_mmio_dev *dev =
|
||||
(struct kvm_coalesced_mmio_dev*)this->private;
|
||||
struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
|
||||
struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
|
||||
if (!coalesced_mmio_in_range(dev, addr, len))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* kvm->lock must be taken by caller before call to in_range()*/
|
||||
spin_lock(&dev->lock);
|
||||
|
||||
/* copy data in first free entry of the ring */
|
||||
|
||||
|
@ -76,29 +73,40 @@ static void coalesced_mmio_write(struct kvm_io_device *this,
|
|||
memcpy(ring->coalesced_mmio[ring->last].data, val, len);
|
||||
smp_wmb();
|
||||
ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
|
||||
spin_unlock(&dev->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void coalesced_mmio_destructor(struct kvm_io_device *this)
|
||||
{
|
||||
kfree(this);
|
||||
struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
|
||||
|
||||
kfree(dev);
|
||||
}
|
||||
|
||||
static const struct kvm_io_device_ops coalesced_mmio_ops = {
|
||||
.write = coalesced_mmio_write,
|
||||
.destructor = coalesced_mmio_destructor,
|
||||
};
|
||||
|
||||
int kvm_coalesced_mmio_init(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_coalesced_mmio_dev *dev;
|
||||
int ret;
|
||||
|
||||
dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
|
||||
if (!dev)
|
||||
return -ENOMEM;
|
||||
dev->dev.write = coalesced_mmio_write;
|
||||
dev->dev.in_range = coalesced_mmio_in_range;
|
||||
dev->dev.destructor = coalesced_mmio_destructor;
|
||||
dev->dev.private = dev;
|
||||
spin_lock_init(&dev->lock);
|
||||
kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
|
||||
dev->kvm = kvm;
|
||||
kvm->coalesced_mmio_dev = dev;
|
||||
kvm_io_bus_register_dev(&kvm->mmio_bus, &dev->dev);
|
||||
|
||||
return 0;
|
||||
ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev);
|
||||
if (ret < 0)
|
||||
kfree(dev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
|
||||
|
@ -109,16 +117,16 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
|
|||
if (dev == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
down_write(&kvm->slots_lock);
|
||||
if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
|
||||
mutex_unlock(&kvm->lock);
|
||||
up_write(&kvm->slots_lock);
|
||||
return -ENOBUFS;
|
||||
}
|
||||
|
||||
dev->zone[dev->nb_zones] = *zone;
|
||||
dev->nb_zones++;
|
||||
|
||||
mutex_unlock(&kvm->lock);
|
||||
up_write(&kvm->slots_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -132,7 +140,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
|
|||
if (dev == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
down_write(&kvm->slots_lock);
|
||||
|
||||
i = dev->nb_zones;
|
||||
while(i) {
|
||||
|
@ -150,7 +158,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
|
|||
i--;
|
||||
}
|
||||
|
||||
mutex_unlock(&kvm->lock);
|
||||
up_write(&kvm->slots_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
struct kvm_coalesced_mmio_dev {
|
||||
struct kvm_io_device dev;
|
||||
struct kvm *kvm;
|
||||
spinlock_t lock;
|
||||
int nb_zones;
|
||||
struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX];
|
||||
};
|
||||
|
|
578
virt/kvm/eventfd.c
Normal file
578
virt/kvm/eventfd.c
Normal file
|
@ -0,0 +1,578 @@
|
|||
/*
|
||||
* kvm eventfd support - use eventfd objects to signal various KVM events
|
||||
*
|
||||
* Copyright 2009 Novell. All Rights Reserved.
|
||||
*
|
||||
* Author:
|
||||
* Gregory Haskins <ghaskins@novell.com>
|
||||
*
|
||||
* This file is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/poll.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/eventfd.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include "iodev.h"
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------
|
||||
* irqfd: Allows an fd to be used to inject an interrupt to the guest
|
||||
*
|
||||
* Credit goes to Avi Kivity for the original idea.
|
||||
* --------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
struct _irqfd {
|
||||
struct kvm *kvm;
|
||||
struct eventfd_ctx *eventfd;
|
||||
int gsi;
|
||||
struct list_head list;
|
||||
poll_table pt;
|
||||
wait_queue_head_t *wqh;
|
||||
wait_queue_t wait;
|
||||
struct work_struct inject;
|
||||
struct work_struct shutdown;
|
||||
};
|
||||
|
||||
static struct workqueue_struct *irqfd_cleanup_wq;
|
||||
|
||||
static void
|
||||
irqfd_inject(struct work_struct *work)
|
||||
{
|
||||
struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
|
||||
struct kvm *kvm = irqfd->kvm;
|
||||
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
|
||||
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Race-free decouple logic (ordering is critical)
|
||||
*/
|
||||
static void
|
||||
irqfd_shutdown(struct work_struct *work)
|
||||
{
|
||||
struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
|
||||
|
||||
/*
|
||||
* Synchronize with the wait-queue and unhook ourselves to prevent
|
||||
* further events.
|
||||
*/
|
||||
remove_wait_queue(irqfd->wqh, &irqfd->wait);
|
||||
|
||||
/*
|
||||
* We know no new events will be scheduled at this point, so block
|
||||
* until all previously outstanding events have completed
|
||||
*/
|
||||
flush_work(&irqfd->inject);
|
||||
|
||||
/*
|
||||
* It is now safe to release the object's resources
|
||||
*/
|
||||
eventfd_ctx_put(irqfd->eventfd);
|
||||
kfree(irqfd);
|
||||
}
|
||||
|
||||
|
||||
/* assumes kvm->irqfds.lock is held */
|
||||
static bool
|
||||
irqfd_is_active(struct _irqfd *irqfd)
|
||||
{
|
||||
return list_empty(&irqfd->list) ? false : true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the irqfd as inactive and schedule it for removal
|
||||
*
|
||||
* assumes kvm->irqfds.lock is held
|
||||
*/
|
||||
static void
|
||||
irqfd_deactivate(struct _irqfd *irqfd)
|
||||
{
|
||||
BUG_ON(!irqfd_is_active(irqfd));
|
||||
|
||||
list_del_init(&irqfd->list);
|
||||
|
||||
queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with wqh->lock held and interrupts disabled
|
||||
*/
|
||||
static int
|
||||
irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
|
||||
{
|
||||
struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
|
||||
unsigned long flags = (unsigned long)key;
|
||||
|
||||
if (flags & POLLIN)
|
||||
/* An event has been signaled, inject an interrupt */
|
||||
schedule_work(&irqfd->inject);
|
||||
|
||||
if (flags & POLLHUP) {
|
||||
/* The eventfd is closing, detach from KVM */
|
||||
struct kvm *kvm = irqfd->kvm;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&kvm->irqfds.lock, flags);
|
||||
|
||||
/*
|
||||
* We must check if someone deactivated the irqfd before
|
||||
* we could acquire the irqfds.lock since the item is
|
||||
* deactivated from the KVM side before it is unhooked from
|
||||
* the wait-queue. If it is already deactivated, we can
|
||||
* simply return knowing the other side will cleanup for us.
|
||||
* We cannot race against the irqfd going away since the
|
||||
* other side is required to acquire wqh->lock, which we hold
|
||||
*/
|
||||
if (irqfd_is_active(irqfd))
|
||||
irqfd_deactivate(irqfd);
|
||||
|
||||
spin_unlock_irqrestore(&kvm->irqfds.lock, flags);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
|
||||
poll_table *pt)
|
||||
{
|
||||
struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
|
||||
|
||||
irqfd->wqh = wqh;
|
||||
add_wait_queue(wqh, &irqfd->wait);
|
||||
}
|
||||
|
||||
static int
|
||||
kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
|
||||
{
|
||||
struct _irqfd *irqfd;
|
||||
struct file *file = NULL;
|
||||
struct eventfd_ctx *eventfd = NULL;
|
||||
int ret;
|
||||
unsigned int events;
|
||||
|
||||
irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
|
||||
if (!irqfd)
|
||||
return -ENOMEM;
|
||||
|
||||
irqfd->kvm = kvm;
|
||||
irqfd->gsi = gsi;
|
||||
INIT_LIST_HEAD(&irqfd->list);
|
||||
INIT_WORK(&irqfd->inject, irqfd_inject);
|
||||
INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
|
||||
|
||||
file = eventfd_fget(fd);
|
||||
if (IS_ERR(file)) {
|
||||
ret = PTR_ERR(file);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
eventfd = eventfd_ctx_fileget(file);
|
||||
if (IS_ERR(eventfd)) {
|
||||
ret = PTR_ERR(eventfd);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
irqfd->eventfd = eventfd;
|
||||
|
||||
/*
|
||||
* Install our own custom wake-up handling so we are notified via
|
||||
* a callback whenever someone signals the underlying eventfd
|
||||
*/
|
||||
init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
|
||||
init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
|
||||
|
||||
events = file->f_op->poll(file, &irqfd->pt);
|
||||
|
||||
spin_lock_irq(&kvm->irqfds.lock);
|
||||
list_add_tail(&irqfd->list, &kvm->irqfds.items);
|
||||
spin_unlock_irq(&kvm->irqfds.lock);
|
||||
|
||||
/*
|
||||
* Check if there was an event already pending on the eventfd
|
||||
* before we registered, and trigger it as if we didn't miss it.
|
||||
*/
|
||||
if (events & POLLIN)
|
||||
schedule_work(&irqfd->inject);
|
||||
|
||||
/*
|
||||
* do not drop the file until the irqfd is fully initialized, otherwise
|
||||
* we might race against the POLLHUP
|
||||
*/
|
||||
fput(file);
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
if (eventfd && !IS_ERR(eventfd))
|
||||
eventfd_ctx_put(eventfd);
|
||||
|
||||
if (!IS_ERR(file))
|
||||
fput(file);
|
||||
|
||||
kfree(irqfd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
kvm_eventfd_init(struct kvm *kvm)
|
||||
{
|
||||
spin_lock_init(&kvm->irqfds.lock);
|
||||
INIT_LIST_HEAD(&kvm->irqfds.items);
|
||||
INIT_LIST_HEAD(&kvm->ioeventfds);
|
||||
}
|
||||
|
||||
/*
|
||||
* shutdown any irqfd's that match fd+gsi
|
||||
*/
|
||||
static int
|
||||
kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
|
||||
{
|
||||
struct _irqfd *irqfd, *tmp;
|
||||
struct eventfd_ctx *eventfd;
|
||||
|
||||
eventfd = eventfd_ctx_fdget(fd);
|
||||
if (IS_ERR(eventfd))
|
||||
return PTR_ERR(eventfd);
|
||||
|
||||
spin_lock_irq(&kvm->irqfds.lock);
|
||||
|
||||
list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
|
||||
if (irqfd->eventfd == eventfd && irqfd->gsi == gsi)
|
||||
irqfd_deactivate(irqfd);
|
||||
}
|
||||
|
||||
spin_unlock_irq(&kvm->irqfds.lock);
|
||||
eventfd_ctx_put(eventfd);
|
||||
|
||||
/*
|
||||
* Block until we know all outstanding shutdown jobs have completed
|
||||
* so that we guarantee there will not be any more interrupts on this
|
||||
* gsi once this deassign function returns.
|
||||
*/
|
||||
flush_workqueue(irqfd_cleanup_wq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
|
||||
{
|
||||
if (flags & KVM_IRQFD_FLAG_DEASSIGN)
|
||||
return kvm_irqfd_deassign(kvm, fd, gsi);
|
||||
|
||||
return kvm_irqfd_assign(kvm, fd, gsi);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called as the kvm VM fd is being released. Shutdown all
|
||||
* irqfds that still remain open
|
||||
*/
|
||||
void
|
||||
kvm_irqfd_release(struct kvm *kvm)
|
||||
{
|
||||
struct _irqfd *irqfd, *tmp;
|
||||
|
||||
spin_lock_irq(&kvm->irqfds.lock);
|
||||
|
||||
list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
|
||||
irqfd_deactivate(irqfd);
|
||||
|
||||
spin_unlock_irq(&kvm->irqfds.lock);
|
||||
|
||||
/*
|
||||
* Block until we know all outstanding shutdown jobs have completed
|
||||
* since we do not take a kvm* reference.
|
||||
*/
|
||||
flush_workqueue(irqfd_cleanup_wq);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* create a host-wide workqueue for issuing deferred shutdown requests
|
||||
* aggregated from all vm* instances. We need our own isolated single-thread
|
||||
* queue to prevent deadlock against flushing the normal work-queue.
|
||||
*/
|
||||
static int __init irqfd_module_init(void)
|
||||
{
|
||||
irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup");
|
||||
if (!irqfd_cleanup_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit irqfd_module_exit(void)
|
||||
{
|
||||
destroy_workqueue(irqfd_cleanup_wq);
|
||||
}
|
||||
|
||||
module_init(irqfd_module_init);
|
||||
module_exit(irqfd_module_exit);
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------
|
||||
* ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
|
||||
*
|
||||
* userspace can register a PIO/MMIO address with an eventfd for receiving
|
||||
* notification when the memory has been touched.
|
||||
* --------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
struct _ioeventfd {
|
||||
struct list_head list;
|
||||
u64 addr;
|
||||
int length;
|
||||
struct eventfd_ctx *eventfd;
|
||||
u64 datamatch;
|
||||
struct kvm_io_device dev;
|
||||
bool wildcard;
|
||||
};
|
||||
|
||||
static inline struct _ioeventfd *
|
||||
to_ioeventfd(struct kvm_io_device *dev)
|
||||
{
|
||||
return container_of(dev, struct _ioeventfd, dev);
|
||||
}
|
||||
|
||||
static void
|
||||
ioeventfd_release(struct _ioeventfd *p)
|
||||
{
|
||||
eventfd_ctx_put(p->eventfd);
|
||||
list_del(&p->list);
|
||||
kfree(p);
|
||||
}
|
||||
|
||||
static bool
|
||||
ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
|
||||
{
|
||||
u64 _val;
|
||||
|
||||
if (!(addr == p->addr && len == p->length))
|
||||
/* address-range must be precise for a hit */
|
||||
return false;
|
||||
|
||||
if (p->wildcard)
|
||||
/* all else equal, wildcard is always a hit */
|
||||
return true;
|
||||
|
||||
/* otherwise, we have to actually compare the data */
|
||||
|
||||
BUG_ON(!IS_ALIGNED((unsigned long)val, len));
|
||||
|
||||
switch (len) {
|
||||
case 1:
|
||||
_val = *(u8 *)val;
|
||||
break;
|
||||
case 2:
|
||||
_val = *(u16 *)val;
|
||||
break;
|
||||
case 4:
|
||||
_val = *(u32 *)val;
|
||||
break;
|
||||
case 8:
|
||||
_val = *(u64 *)val;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return _val == p->datamatch ? true : false;
|
||||
}
|
||||
|
||||
/* MMIO/PIO writes trigger an event if the addr/val match */
|
||||
static int
|
||||
ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
|
||||
const void *val)
|
||||
{
|
||||
struct _ioeventfd *p = to_ioeventfd(this);
|
||||
|
||||
if (!ioeventfd_in_range(p, addr, len, val))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
eventfd_signal(p->eventfd, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called as KVM is completely shutting down. We do not
|
||||
* need to worry about locking just nuke anything we have as quickly as possible
|
||||
*/
|
||||
static void
|
||||
ioeventfd_destructor(struct kvm_io_device *this)
|
||||
{
|
||||
struct _ioeventfd *p = to_ioeventfd(this);
|
||||
|
||||
ioeventfd_release(p);
|
||||
}
|
||||
|
||||
static const struct kvm_io_device_ops ioeventfd_ops = {
|
||||
.write = ioeventfd_write,
|
||||
.destructor = ioeventfd_destructor,
|
||||
};
|
||||
|
||||
/* assumes kvm->slots_lock held */
|
||||
static bool
|
||||
ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
|
||||
{
|
||||
struct _ioeventfd *_p;
|
||||
|
||||
list_for_each_entry(_p, &kvm->ioeventfds, list)
|
||||
if (_p->addr == p->addr && _p->length == p->length &&
|
||||
(_p->wildcard || p->wildcard ||
|
||||
_p->datamatch == p->datamatch))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int
|
||||
kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
|
||||
{
|
||||
int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
|
||||
struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
|
||||
struct _ioeventfd *p;
|
||||
struct eventfd_ctx *eventfd;
|
||||
int ret;
|
||||
|
||||
/* must be natural-word sized */
|
||||
switch (args->len) {
|
||||
case 1:
|
||||
case 2:
|
||||
case 4:
|
||||
case 8:
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* check for range overflow */
|
||||
if (args->addr + args->len < args->addr)
|
||||
return -EINVAL;
|
||||
|
||||
/* check for extra flags that we don't understand */
|
||||
if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
eventfd = eventfd_ctx_fdget(args->fd);
|
||||
if (IS_ERR(eventfd))
|
||||
return PTR_ERR(eventfd);
|
||||
|
||||
p = kzalloc(sizeof(*p), GFP_KERNEL);
|
||||
if (!p) {
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&p->list);
|
||||
p->addr = args->addr;
|
||||
p->length = args->len;
|
||||
p->eventfd = eventfd;
|
||||
|
||||
/* The datamatch feature is optional, otherwise this is a wildcard */
|
||||
if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
|
||||
p->datamatch = args->datamatch;
|
||||
else
|
||||
p->wildcard = true;
|
||||
|
||||
down_write(&kvm->slots_lock);
|
||||
|
||||
/* Verify that there isnt a match already */
|
||||
if (ioeventfd_check_collision(kvm, p)) {
|
||||
ret = -EEXIST;
|
||||
goto unlock_fail;
|
||||
}
|
||||
|
||||
kvm_iodevice_init(&p->dev, &ioeventfd_ops);
|
||||
|
||||
ret = __kvm_io_bus_register_dev(bus, &p->dev);
|
||||
if (ret < 0)
|
||||
goto unlock_fail;
|
||||
|
||||
list_add_tail(&p->list, &kvm->ioeventfds);
|
||||
|
||||
up_write(&kvm->slots_lock);
|
||||
|
||||
return 0;
|
||||
|
||||
unlock_fail:
|
||||
up_write(&kvm->slots_lock);
|
||||
|
||||
fail:
|
||||
kfree(p);
|
||||
eventfd_ctx_put(eventfd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
|
||||
{
|
||||
int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
|
||||
struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
|
||||
struct _ioeventfd *p, *tmp;
|
||||
struct eventfd_ctx *eventfd;
|
||||
int ret = -ENOENT;
|
||||
|
||||
eventfd = eventfd_ctx_fdget(args->fd);
|
||||
if (IS_ERR(eventfd))
|
||||
return PTR_ERR(eventfd);
|
||||
|
||||
down_write(&kvm->slots_lock);
|
||||
|
||||
list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
|
||||
bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
|
||||
|
||||
if (p->eventfd != eventfd ||
|
||||
p->addr != args->addr ||
|
||||
p->length != args->len ||
|
||||
p->wildcard != wildcard)
|
||||
continue;
|
||||
|
||||
if (!p->wildcard && p->datamatch != args->datamatch)
|
||||
continue;
|
||||
|
||||
__kvm_io_bus_unregister_dev(bus, &p->dev);
|
||||
ioeventfd_release(p);
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
up_write(&kvm->slots_lock);
|
||||
|
||||
eventfd_ctx_put(eventfd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
|
||||
{
|
||||
if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
|
||||
return kvm_deassign_ioeventfd(kvm, args);
|
||||
|
||||
return kvm_assign_ioeventfd(kvm, args);
|
||||
}
|
|
@ -36,6 +36,7 @@
|
|||
#include <asm/processor.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/current.h>
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
#include "ioapic.h"
|
||||
#include "lapic.h"
|
||||
|
@ -103,6 +104,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
|
|||
{
|
||||
unsigned index;
|
||||
bool mask_before, mask_after;
|
||||
union kvm_ioapic_redirect_entry *e;
|
||||
|
||||
switch (ioapic->ioregsel) {
|
||||
case IOAPIC_REG_VERSION:
|
||||
|
@ -122,19 +124,20 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
|
|||
ioapic_debug("change redir index %x val %x\n", index, val);
|
||||
if (index >= IOAPIC_NUM_PINS)
|
||||
return;
|
||||
mask_before = ioapic->redirtbl[index].fields.mask;
|
||||
e = &ioapic->redirtbl[index];
|
||||
mask_before = e->fields.mask;
|
||||
if (ioapic->ioregsel & 1) {
|
||||
ioapic->redirtbl[index].bits &= 0xffffffff;
|
||||
ioapic->redirtbl[index].bits |= (u64) val << 32;
|
||||
e->bits &= 0xffffffff;
|
||||
e->bits |= (u64) val << 32;
|
||||
} else {
|
||||
ioapic->redirtbl[index].bits &= ~0xffffffffULL;
|
||||
ioapic->redirtbl[index].bits |= (u32) val;
|
||||
ioapic->redirtbl[index].fields.remote_irr = 0;
|
||||
e->bits &= ~0xffffffffULL;
|
||||
e->bits |= (u32) val;
|
||||
e->fields.remote_irr = 0;
|
||||
}
|
||||
mask_after = ioapic->redirtbl[index].fields.mask;
|
||||
mask_after = e->fields.mask;
|
||||
if (mask_before != mask_after)
|
||||
kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
|
||||
if (ioapic->redirtbl[index].fields.trig_mode == IOAPIC_LEVEL_TRIG
|
||||
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
|
||||
&& ioapic->irr & (1 << index))
|
||||
ioapic_service(ioapic, index);
|
||||
break;
|
||||
|
@ -164,7 +167,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
|
|||
/* Always delivery PIT interrupt to vcpu 0 */
|
||||
if (irq == 0) {
|
||||
irqe.dest_mode = 0; /* Physical mode. */
|
||||
irqe.dest_id = ioapic->kvm->vcpus[0]->vcpu_id;
|
||||
/* need to read apic_id from apic regiest since
|
||||
* it can be rewritten */
|
||||
irqe.dest_id = ioapic->kvm->bsp_vcpu->vcpu_id;
|
||||
}
|
||||
#endif
|
||||
return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
|
||||
|
@ -188,7 +193,10 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
|
|||
if ((edge && old_irr != ioapic->irr) ||
|
||||
(!edge && !entry.fields.remote_irr))
|
||||
ret = ioapic_service(ioapic, irq);
|
||||
else
|
||||
ret = 0; /* report coalesced interrupt */
|
||||
}
|
||||
trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
@ -220,24 +228,29 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
|
|||
__kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
|
||||
}
|
||||
|
||||
static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr,
|
||||
int len, int is_write)
|
||||
static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
|
||||
return container_of(dev, struct kvm_ioapic, dev);
|
||||
}
|
||||
|
||||
static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
|
||||
{
|
||||
return ((addr >= ioapic->base_address &&
|
||||
(addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
|
||||
}
|
||||
|
||||
static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
|
||||
void *val)
|
||||
static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
|
||||
void *val)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
|
||||
struct kvm_ioapic *ioapic = to_ioapic(this);
|
||||
u32 result;
|
||||
if (!ioapic_in_range(ioapic, addr))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
ioapic_debug("addr %lx\n", (unsigned long)addr);
|
||||
ASSERT(!(addr & 0xf)); /* check alignment */
|
||||
|
||||
mutex_lock(&ioapic->kvm->irq_lock);
|
||||
addr &= 0xff;
|
||||
switch (addr) {
|
||||
case IOAPIC_REG_SELECT:
|
||||
|
@ -264,22 +277,28 @@ static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
|
|||
default:
|
||||
printk(KERN_WARNING "ioapic: wrong length %d\n", len);
|
||||
}
|
||||
mutex_unlock(&ioapic->kvm->irq_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
|
||||
const void *val)
|
||||
static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
|
||||
const void *val)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
|
||||
struct kvm_ioapic *ioapic = to_ioapic(this);
|
||||
u32 data;
|
||||
if (!ioapic_in_range(ioapic, addr))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
|
||||
(void*)addr, len, val);
|
||||
ASSERT(!(addr & 0xf)); /* check alignment */
|
||||
|
||||
mutex_lock(&ioapic->kvm->irq_lock);
|
||||
if (len == 4 || len == 8)
|
||||
data = *(u32 *) val;
|
||||
else {
|
||||
printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
|
||||
return;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
addr &= 0xff;
|
||||
|
@ -300,6 +319,9 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
|
|||
default:
|
||||
break;
|
||||
}
|
||||
unlock:
|
||||
mutex_unlock(&ioapic->kvm->irq_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
|
||||
|
@ -314,21 +336,27 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
|
|||
ioapic->id = 0;
|
||||
}
|
||||
|
||||
static const struct kvm_io_device_ops ioapic_mmio_ops = {
|
||||
.read = ioapic_mmio_read,
|
||||
.write = ioapic_mmio_write,
|
||||
};
|
||||
|
||||
int kvm_ioapic_init(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_ioapic *ioapic;
|
||||
int ret;
|
||||
|
||||
ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
|
||||
if (!ioapic)
|
||||
return -ENOMEM;
|
||||
kvm->arch.vioapic = ioapic;
|
||||
kvm_ioapic_reset(ioapic);
|
||||
ioapic->dev.read = ioapic_mmio_read;
|
||||
ioapic->dev.write = ioapic_mmio_write;
|
||||
ioapic->dev.in_range = ioapic_in_range;
|
||||
ioapic->dev.private = ioapic;
|
||||
kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
|
||||
ioapic->kvm = kvm;
|
||||
kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
|
||||
return 0;
|
||||
ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev);
|
||||
if (ret < 0)
|
||||
kfree(ioapic);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,49 +17,54 @@
|
|||
#define __KVM_IODEV_H__
|
||||
|
||||
#include <linux/kvm_types.h>
|
||||
#include <asm/errno.h>
|
||||
|
||||
struct kvm_io_device {
|
||||
void (*read)(struct kvm_io_device *this,
|
||||
struct kvm_io_device;
|
||||
|
||||
/**
|
||||
* kvm_io_device_ops are called under kvm slots_lock.
|
||||
* read and write handlers return 0 if the transaction has been handled,
|
||||
* or non-zero to have it passed to the next device.
|
||||
**/
|
||||
struct kvm_io_device_ops {
|
||||
int (*read)(struct kvm_io_device *this,
|
||||
gpa_t addr,
|
||||
int len,
|
||||
void *val);
|
||||
int (*write)(struct kvm_io_device *this,
|
||||
gpa_t addr,
|
||||
int len,
|
||||
void *val);
|
||||
void (*write)(struct kvm_io_device *this,
|
||||
gpa_t addr,
|
||||
int len,
|
||||
const void *val);
|
||||
int (*in_range)(struct kvm_io_device *this, gpa_t addr, int len,
|
||||
int is_write);
|
||||
const void *val);
|
||||
void (*destructor)(struct kvm_io_device *this);
|
||||
|
||||
void *private;
|
||||
};
|
||||
|
||||
static inline void kvm_iodevice_read(struct kvm_io_device *dev,
|
||||
gpa_t addr,
|
||||
int len,
|
||||
void *val)
|
||||
|
||||
struct kvm_io_device {
|
||||
const struct kvm_io_device_ops *ops;
|
||||
};
|
||||
|
||||
static inline void kvm_iodevice_init(struct kvm_io_device *dev,
|
||||
const struct kvm_io_device_ops *ops)
|
||||
{
|
||||
dev->read(dev, addr, len, val);
|
||||
dev->ops = ops;
|
||||
}
|
||||
|
||||
static inline void kvm_iodevice_write(struct kvm_io_device *dev,
|
||||
gpa_t addr,
|
||||
int len,
|
||||
const void *val)
|
||||
static inline int kvm_iodevice_read(struct kvm_io_device *dev,
|
||||
gpa_t addr, int l, void *v)
|
||||
{
|
||||
dev->write(dev, addr, len, val);
|
||||
return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline int kvm_iodevice_inrange(struct kvm_io_device *dev,
|
||||
gpa_t addr, int len, int is_write)
|
||||
static inline int kvm_iodevice_write(struct kvm_io_device *dev,
|
||||
gpa_t addr, int l, const void *v)
|
||||
{
|
||||
return dev->in_range(dev, addr, len, is_write);
|
||||
return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
|
||||
{
|
||||
if (dev->destructor)
|
||||
dev->destructor(dev);
|
||||
if (dev->ops->destructor)
|
||||
dev->ops->destructor(dev);
|
||||
}
|
||||
|
||||
#endif /* __KVM_IODEV_H__ */
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
#include <asm/msidef.h>
|
||||
#ifdef CONFIG_IA64
|
||||
|
@ -62,14 +63,14 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
|||
int i, r = -1;
|
||||
struct kvm_vcpu *vcpu, *lowest = NULL;
|
||||
|
||||
WARN_ON(!mutex_is_locked(&kvm->irq_lock));
|
||||
|
||||
if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
|
||||
kvm_is_dm_lowest_prio(irq))
|
||||
printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
|
||||
|
||||
for (i = 0; i < KVM_MAX_VCPUS; i++) {
|
||||
vcpu = kvm->vcpus[i];
|
||||
|
||||
if (!vcpu || !kvm_apic_present(vcpu))
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
if (!kvm_apic_present(vcpu))
|
||||
continue;
|
||||
|
||||
if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
|
||||
|
@ -99,6 +100,8 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
|
|||
{
|
||||
struct kvm_lapic_irq irq;
|
||||
|
||||
trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
|
||||
|
||||
irq.dest_id = (e->msi.address_lo &
|
||||
MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
|
||||
irq.vector = (e->msi.data &
|
||||
|
@ -113,7 +116,7 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
|
|||
return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
|
||||
}
|
||||
|
||||
/* This should be called with the kvm->lock mutex held
|
||||
/* This should be called with the kvm->irq_lock mutex held
|
||||
* Return value:
|
||||
* < 0 Interrupt was ignored (masked or not delivered for other reasons)
|
||||
* = 0 Interrupt was coalesced (previous irq is still pending)
|
||||
|
@ -125,6 +128,10 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
|
|||
unsigned long *irq_state, sig_level;
|
||||
int ret = -1;
|
||||
|
||||
trace_kvm_set_irq(irq, level, irq_source_id);
|
||||
|
||||
WARN_ON(!mutex_is_locked(&kvm->irq_lock));
|
||||
|
||||
if (irq < KVM_IOAPIC_NUM_PINS) {
|
||||
irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
|
||||
|
||||
|
@ -134,7 +141,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
|
|||
else
|
||||
clear_bit(irq_source_id, irq_state);
|
||||
sig_level = !!(*irq_state);
|
||||
} else /* Deal with MSI/MSI-X */
|
||||
} else if (!level)
|
||||
return ret;
|
||||
else /* Deal with MSI/MSI-X */
|
||||
sig_level = 1;
|
||||
|
||||
/* Not possible to detect if the guest uses the PIC or the
|
||||
|
@ -159,6 +168,8 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
|
|||
struct hlist_node *n;
|
||||
unsigned gsi = pin;
|
||||
|
||||
trace_kvm_ack_irq(irqchip, pin);
|
||||
|
||||
list_for_each_entry(e, &kvm->irq_routing, link)
|
||||
if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
|
||||
e->irqchip.irqchip == irqchip &&
|
||||
|
@ -175,19 +186,26 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
|
|||
void kvm_register_irq_ack_notifier(struct kvm *kvm,
|
||||
struct kvm_irq_ack_notifier *kian)
|
||||
{
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
}
|
||||
|
||||
void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
|
||||
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
|
||||
struct kvm_irq_ack_notifier *kian)
|
||||
{
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
hlist_del_init(&kian->link);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
}
|
||||
|
||||
/* The caller must hold kvm->lock mutex */
|
||||
int kvm_request_irq_source_id(struct kvm *kvm)
|
||||
{
|
||||
unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
|
||||
int irq_source_id = find_first_zero_bit(bitmap,
|
||||
int irq_source_id;
|
||||
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
irq_source_id = find_first_zero_bit(bitmap,
|
||||
sizeof(kvm->arch.irq_sources_bitmap));
|
||||
|
||||
if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
|
||||
|
@ -197,6 +215,7 @@ int kvm_request_irq_source_id(struct kvm *kvm)
|
|||
|
||||
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
|
||||
set_bit(irq_source_id, bitmap);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
|
||||
return irq_source_id;
|
||||
}
|
||||
|
@ -207,6 +226,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
|
|||
|
||||
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
|
||||
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
if (irq_source_id < 0 ||
|
||||
irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
|
||||
printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
|
||||
|
@ -215,19 +235,24 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
|
|||
for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
|
||||
clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
|
||||
clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
}
|
||||
|
||||
void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
|
||||
struct kvm_irq_mask_notifier *kimn)
|
||||
{
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
kimn->irq = irq;
|
||||
hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
}
|
||||
|
||||
void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
|
||||
struct kvm_irq_mask_notifier *kimn)
|
||||
{
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
hlist_del(&kimn->link);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
}
|
||||
|
||||
void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
|
||||
|
@ -235,6 +260,8 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
|
|||
struct kvm_irq_mask_notifier *kimn;
|
||||
struct hlist_node *n;
|
||||
|
||||
WARN_ON(!mutex_is_locked(&kvm->irq_lock));
|
||||
|
||||
hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
|
||||
if (kimn->irq == irq)
|
||||
kimn->func(kimn, mask);
|
||||
|
@ -250,7 +277,9 @@ static void __kvm_free_irq_routing(struct list_head *irq_routing)
|
|||
|
||||
void kvm_free_irq_routing(struct kvm *kvm)
|
||||
{
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
__kvm_free_irq_routing(&kvm->irq_routing);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
}
|
||||
|
||||
static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
|
||||
|
@ -325,13 +354,13 @@ int kvm_set_irq_routing(struct kvm *kvm,
|
|||
e = NULL;
|
||||
}
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
list_splice(&kvm->irq_routing, &tmp);
|
||||
INIT_LIST_HEAD(&kvm->irq_routing);
|
||||
list_splice(&irq_list, &kvm->irq_routing);
|
||||
INIT_LIST_HEAD(&irq_list);
|
||||
list_splice(&tmp, &irq_list);
|
||||
mutex_unlock(&kvm->lock);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
|
||||
r = 0;
|
||||
|
||||
|
|
|
@ -59,9 +59,18 @@
|
|||
#include "irq.h"
|
||||
#endif
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
MODULE_AUTHOR("Qumranet");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
/*
|
||||
* Ordering of locks:
|
||||
*
|
||||
* kvm->slots_lock --> kvm->lock --> kvm->irq_lock
|
||||
*/
|
||||
|
||||
DEFINE_SPINLOCK(kvm_lock);
|
||||
LIST_HEAD(vm_list);
|
||||
|
||||
|
@ -79,6 +88,8 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
|
|||
|
||||
static bool kvm_rebooting;
|
||||
|
||||
static bool largepages_enabled = true;
|
||||
|
||||
#ifdef KVM_CAP_DEVICE_ASSIGNMENT
|
||||
static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
|
||||
int assigned_dev_id)
|
||||
|
@ -120,17 +131,13 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
|
|||
{
|
||||
struct kvm_assigned_dev_kernel *assigned_dev;
|
||||
struct kvm *kvm;
|
||||
int irq, i;
|
||||
int i;
|
||||
|
||||
assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
|
||||
interrupt_work);
|
||||
kvm = assigned_dev->kvm;
|
||||
|
||||
/* This is taken to safely inject irq inside the guest. When
|
||||
* the interrupt injection (or the ioapic code) uses a
|
||||
* finer-grained lock, update this
|
||||
*/
|
||||
mutex_lock(&kvm->lock);
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
spin_lock_irq(&assigned_dev->assigned_dev_lock);
|
||||
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
|
||||
struct kvm_guest_msix_entry *guest_entries =
|
||||
|
@ -143,23 +150,13 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
|
|||
kvm_set_irq(assigned_dev->kvm,
|
||||
assigned_dev->irq_source_id,
|
||||
guest_entries[i].vector, 1);
|
||||
irq = assigned_dev->host_msix_entries[i].vector;
|
||||
if (irq != 0)
|
||||
enable_irq(irq);
|
||||
assigned_dev->host_irq_disabled = false;
|
||||
}
|
||||
} else {
|
||||
} else
|
||||
kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
|
||||
assigned_dev->guest_irq, 1);
|
||||
if (assigned_dev->irq_requested_type &
|
||||
KVM_DEV_IRQ_GUEST_MSI) {
|
||||
enable_irq(assigned_dev->host_irq);
|
||||
assigned_dev->host_irq_disabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock_irq(&assigned_dev->assigned_dev_lock);
|
||||
mutex_unlock(&assigned_dev->kvm->lock);
|
||||
mutex_unlock(&assigned_dev->kvm->irq_lock);
|
||||
}
|
||||
|
||||
static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
|
||||
|
@ -179,8 +176,10 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
|
|||
|
||||
schedule_work(&assigned_dev->interrupt_work);
|
||||
|
||||
disable_irq_nosync(irq);
|
||||
assigned_dev->host_irq_disabled = true;
|
||||
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
|
||||
disable_irq_nosync(irq);
|
||||
assigned_dev->host_irq_disabled = true;
|
||||
}
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
|
||||
|
@ -215,7 +214,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
|
|||
static void deassign_guest_irq(struct kvm *kvm,
|
||||
struct kvm_assigned_dev_kernel *assigned_dev)
|
||||
{
|
||||
kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
|
||||
kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
|
||||
assigned_dev->ack_notifier.gsi = -1;
|
||||
|
||||
if (assigned_dev->irq_source_id != -1)
|
||||
|
@ -417,6 +416,7 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm,
|
|||
{
|
||||
dev->guest_irq = irq->guest_irq;
|
||||
dev->ack_notifier.gsi = -1;
|
||||
dev->host_irq_disabled = false;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
@ -427,6 +427,7 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm,
|
|||
{
|
||||
dev->guest_irq = irq->guest_irq;
|
||||
dev->ack_notifier.gsi = -1;
|
||||
dev->host_irq_disabled = false;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
@ -693,11 +694,6 @@ out:
|
|||
}
|
||||
#endif
|
||||
|
||||
static inline int valid_vcpu(int n)
|
||||
{
|
||||
return likely(n >= 0 && n < KVM_MAX_VCPUS);
|
||||
}
|
||||
|
||||
inline int kvm_is_mmio_pfn(pfn_t pfn)
|
||||
{
|
||||
if (pfn_valid(pfn)) {
|
||||
|
@ -745,12 +741,9 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
|
|||
if (alloc_cpumask_var(&cpus, GFP_ATOMIC))
|
||||
cpumask_clear(cpus);
|
||||
|
||||
me = get_cpu();
|
||||
spin_lock(&kvm->requests_lock);
|
||||
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
|
||||
vcpu = kvm->vcpus[i];
|
||||
if (!vcpu)
|
||||
continue;
|
||||
me = smp_processor_id();
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
if (test_and_set_bit(req, &vcpu->requests))
|
||||
continue;
|
||||
cpu = vcpu->cpu;
|
||||
|
@ -764,7 +757,6 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
|
|||
else
|
||||
called = false;
|
||||
spin_unlock(&kvm->requests_lock);
|
||||
put_cpu();
|
||||
free_cpumask_var(cpus);
|
||||
return called;
|
||||
}
|
||||
|
@ -986,7 +978,9 @@ static struct kvm *kvm_create_vm(void)
|
|||
spin_lock_init(&kvm->mmu_lock);
|
||||
spin_lock_init(&kvm->requests_lock);
|
||||
kvm_io_bus_init(&kvm->pio_bus);
|
||||
kvm_eventfd_init(kvm);
|
||||
mutex_init(&kvm->lock);
|
||||
mutex_init(&kvm->irq_lock);
|
||||
kvm_io_bus_init(&kvm->mmio_bus);
|
||||
init_rwsem(&kvm->slots_lock);
|
||||
atomic_set(&kvm->users_count, 1);
|
||||
|
@ -1006,19 +1000,25 @@ out:
|
|||
static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
|
||||
struct kvm_memory_slot *dont)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!dont || free->rmap != dont->rmap)
|
||||
vfree(free->rmap);
|
||||
|
||||
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
|
||||
vfree(free->dirty_bitmap);
|
||||
|
||||
if (!dont || free->lpage_info != dont->lpage_info)
|
||||
vfree(free->lpage_info);
|
||||
|
||||
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
|
||||
if (!dont || free->lpage_info[i] != dont->lpage_info[i]) {
|
||||
vfree(free->lpage_info[i]);
|
||||
free->lpage_info[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
free->npages = 0;
|
||||
free->dirty_bitmap = NULL;
|
||||
free->rmap = NULL;
|
||||
free->lpage_info = NULL;
|
||||
}
|
||||
|
||||
void kvm_free_physmem(struct kvm *kvm)
|
||||
|
@ -1071,6 +1071,8 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
|
|||
{
|
||||
struct kvm *kvm = filp->private_data;
|
||||
|
||||
kvm_irqfd_release(kvm);
|
||||
|
||||
kvm_put_kvm(kvm);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1089,8 +1091,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
|||
{
|
||||
int r;
|
||||
gfn_t base_gfn;
|
||||
unsigned long npages, ugfn;
|
||||
unsigned long largepages, i;
|
||||
unsigned long npages;
|
||||
unsigned long i;
|
||||
struct kvm_memory_slot *memslot;
|
||||
struct kvm_memory_slot old, new;
|
||||
|
||||
|
@ -1164,31 +1166,51 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
|||
else
|
||||
new.userspace_addr = 0;
|
||||
}
|
||||
if (npages && !new.lpage_info) {
|
||||
largepages = 1 + (base_gfn + npages - 1) / KVM_PAGES_PER_HPAGE;
|
||||
largepages -= base_gfn / KVM_PAGES_PER_HPAGE;
|
||||
if (!npages)
|
||||
goto skip_lpage;
|
||||
|
||||
new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info));
|
||||
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
|
||||
unsigned long ugfn;
|
||||
unsigned long j;
|
||||
int lpages;
|
||||
int level = i + 2;
|
||||
|
||||
if (!new.lpage_info)
|
||||
/* Avoid unused variable warning if no large pages */
|
||||
(void)level;
|
||||
|
||||
if (new.lpage_info[i])
|
||||
continue;
|
||||
|
||||
lpages = 1 + (base_gfn + npages - 1) /
|
||||
KVM_PAGES_PER_HPAGE(level);
|
||||
lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level);
|
||||
|
||||
new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i]));
|
||||
|
||||
if (!new.lpage_info[i])
|
||||
goto out_free;
|
||||
|
||||
memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info));
|
||||
memset(new.lpage_info[i], 0,
|
||||
lpages * sizeof(*new.lpage_info[i]));
|
||||
|
||||
if (base_gfn % KVM_PAGES_PER_HPAGE)
|
||||
new.lpage_info[0].write_count = 1;
|
||||
if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE)
|
||||
new.lpage_info[largepages-1].write_count = 1;
|
||||
if (base_gfn % KVM_PAGES_PER_HPAGE(level))
|
||||
new.lpage_info[i][0].write_count = 1;
|
||||
if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level))
|
||||
new.lpage_info[i][lpages - 1].write_count = 1;
|
||||
ugfn = new.userspace_addr >> PAGE_SHIFT;
|
||||
/*
|
||||
* If the gfn and userspace address are not aligned wrt each
|
||||
* other, disable large page support for this slot
|
||||
* other, or if explicitly asked to, disable large page
|
||||
* support for this slot
|
||||
*/
|
||||
if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE - 1))
|
||||
for (i = 0; i < largepages; ++i)
|
||||
new.lpage_info[i].write_count = 1;
|
||||
if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
|
||||
!largepages_enabled)
|
||||
for (j = 0; j < lpages; ++j)
|
||||
new.lpage_info[i][j].write_count = 1;
|
||||
}
|
||||
|
||||
skip_lpage:
|
||||
|
||||
/* Allocate page dirty bitmap if needed */
|
||||
if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
|
||||
unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
|
||||
|
@ -1200,6 +1222,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
|||
if (old.npages)
|
||||
kvm_arch_flush_shadow(kvm);
|
||||
}
|
||||
#else /* not defined CONFIG_S390 */
|
||||
new.user_alloc = user_alloc;
|
||||
if (user_alloc)
|
||||
new.userspace_addr = mem->userspace_addr;
|
||||
#endif /* not defined CONFIG_S390 */
|
||||
|
||||
if (!npages)
|
||||
|
@ -1299,6 +1325,12 @@ out:
|
|||
return r;
|
||||
}
|
||||
|
||||
void kvm_disable_largepages(void)
|
||||
{
|
||||
largepages_enabled = false;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_disable_largepages);
|
||||
|
||||
int is_error_page(struct page *page)
|
||||
{
|
||||
return page == bad_page;
|
||||
|
@ -1635,9 +1667,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
|
|||
for (;;) {
|
||||
prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
|
||||
|
||||
if ((kvm_arch_interrupt_allowed(vcpu) &&
|
||||
kvm_cpu_has_interrupt(vcpu)) ||
|
||||
kvm_arch_vcpu_runnable(vcpu)) {
|
||||
if (kvm_arch_vcpu_runnable(vcpu)) {
|
||||
set_bit(KVM_REQ_UNHALT, &vcpu->requests);
|
||||
break;
|
||||
}
|
||||
|
@ -1714,24 +1744,18 @@ static struct file_operations kvm_vcpu_fops = {
|
|||
*/
|
||||
static int create_vcpu_fd(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0);
|
||||
if (fd < 0)
|
||||
kvm_put_kvm(vcpu->kvm);
|
||||
return fd;
|
||||
return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Creates some virtual cpus. Good luck creating more than one.
|
||||
*/
|
||||
static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
|
||||
static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
|
||||
{
|
||||
int r;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_vcpu *vcpu, *v;
|
||||
|
||||
if (!valid_vcpu(n))
|
||||
return -EINVAL;
|
||||
|
||||
vcpu = kvm_arch_vcpu_create(kvm, n);
|
||||
vcpu = kvm_arch_vcpu_create(kvm, id);
|
||||
if (IS_ERR(vcpu))
|
||||
return PTR_ERR(vcpu);
|
||||
|
||||
|
@ -1742,23 +1766,38 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
|
|||
return r;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
if (kvm->vcpus[n]) {
|
||||
r = -EEXIST;
|
||||
if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) {
|
||||
r = -EINVAL;
|
||||
goto vcpu_destroy;
|
||||
}
|
||||
kvm->vcpus[n] = vcpu;
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
kvm_for_each_vcpu(r, v, kvm)
|
||||
if (v->vcpu_id == id) {
|
||||
r = -EEXIST;
|
||||
goto vcpu_destroy;
|
||||
}
|
||||
|
||||
BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
|
||||
|
||||
/* Now it's all set up, let userspace reach it */
|
||||
kvm_get_kvm(kvm);
|
||||
r = create_vcpu_fd(vcpu);
|
||||
if (r < 0)
|
||||
goto unlink;
|
||||
if (r < 0) {
|
||||
kvm_put_kvm(kvm);
|
||||
goto vcpu_destroy;
|
||||
}
|
||||
|
||||
kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
|
||||
smp_wmb();
|
||||
atomic_inc(&kvm->online_vcpus);
|
||||
|
||||
#ifdef CONFIG_KVM_APIC_ARCHITECTURE
|
||||
if (kvm->bsp_vcpu_id == id)
|
||||
kvm->bsp_vcpu = vcpu;
|
||||
#endif
|
||||
mutex_unlock(&kvm->lock);
|
||||
return r;
|
||||
|
||||
unlink:
|
||||
mutex_lock(&kvm->lock);
|
||||
kvm->vcpus[n] = NULL;
|
||||
vcpu_destroy:
|
||||
mutex_unlock(&kvm->lock);
|
||||
kvm_arch_vcpu_destroy(vcpu);
|
||||
|
@ -2199,6 +2238,7 @@ static long kvm_vm_ioctl(struct file *filp,
|
|||
vfree(entries);
|
||||
break;
|
||||
}
|
||||
#endif /* KVM_CAP_IRQ_ROUTING */
|
||||
#ifdef __KVM_HAVE_MSIX
|
||||
case KVM_ASSIGN_SET_MSIX_NR: {
|
||||
struct kvm_assigned_msix_nr entry_nr;
|
||||
|
@ -2221,7 +2261,35 @@ static long kvm_vm_ioctl(struct file *filp,
|
|||
break;
|
||||
}
|
||||
#endif
|
||||
#endif /* KVM_CAP_IRQ_ROUTING */
|
||||
case KVM_IRQFD: {
|
||||
struct kvm_irqfd data;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&data, argp, sizeof data))
|
||||
goto out;
|
||||
r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags);
|
||||
break;
|
||||
}
|
||||
case KVM_IOEVENTFD: {
|
||||
struct kvm_ioeventfd data;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&data, argp, sizeof data))
|
||||
goto out;
|
||||
r = kvm_ioeventfd(kvm, &data);
|
||||
break;
|
||||
}
|
||||
#ifdef CONFIG_KVM_APIC_ARCHITECTURE
|
||||
case KVM_SET_BOOT_CPU_ID:
|
||||
r = 0;
|
||||
mutex_lock(&kvm->lock);
|
||||
if (atomic_read(&kvm->online_vcpus) != 0)
|
||||
r = -EBUSY;
|
||||
else
|
||||
kvm->bsp_vcpu_id = arg;
|
||||
mutex_unlock(&kvm->lock);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
|
||||
}
|
||||
|
@ -2288,6 +2356,9 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
|
|||
case KVM_CAP_USER_MEMORY:
|
||||
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
|
||||
case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
|
||||
#ifdef CONFIG_KVM_APIC_ARCHITECTURE
|
||||
case KVM_CAP_SET_BOOT_CPU_ID:
|
||||
#endif
|
||||
return 1;
|
||||
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
||||
case KVM_CAP_IRQ_ROUTING:
|
||||
|
@ -2335,7 +2406,7 @@ static long kvm_dev_ioctl(struct file *filp,
|
|||
case KVM_TRACE_ENABLE:
|
||||
case KVM_TRACE_PAUSE:
|
||||
case KVM_TRACE_DISABLE:
|
||||
r = kvm_trace_ioctl(ioctl, arg);
|
||||
r = -EOPNOTSUPP;
|
||||
break;
|
||||
default:
|
||||
return kvm_arch_dev_ioctl(filp, ioctl, arg);
|
||||
|
@ -2449,26 +2520,71 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus)
|
|||
}
|
||||
}
|
||||
|
||||
struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus,
|
||||
gpa_t addr, int len, int is_write)
|
||||
/* kvm_io_bus_write - called under kvm->slots_lock */
|
||||
int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
|
||||
int len, const void *val)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < bus->dev_count; i++)
|
||||
if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
|
||||
return 0;
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* kvm_io_bus_read - called under kvm->slots_lock */
|
||||
int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < bus->dev_count; i++)
|
||||
if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
|
||||
return 0;
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
|
||||
struct kvm_io_device *dev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
down_write(&kvm->slots_lock);
|
||||
ret = __kvm_io_bus_register_dev(bus, dev);
|
||||
up_write(&kvm->slots_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* An unlocked version. Caller must have write lock on slots_lock. */
|
||||
int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
|
||||
struct kvm_io_device *dev)
|
||||
{
|
||||
if (bus->dev_count > NR_IOBUS_DEVS-1)
|
||||
return -ENOSPC;
|
||||
|
||||
bus->devs[bus->dev_count++] = dev;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_io_bus_unregister_dev(struct kvm *kvm,
|
||||
struct kvm_io_bus *bus,
|
||||
struct kvm_io_device *dev)
|
||||
{
|
||||
down_write(&kvm->slots_lock);
|
||||
__kvm_io_bus_unregister_dev(bus, dev);
|
||||
up_write(&kvm->slots_lock);
|
||||
}
|
||||
|
||||
/* An unlocked version. Caller must have write lock on slots_lock. */
|
||||
void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
|
||||
struct kvm_io_device *dev)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < bus->dev_count; i++) {
|
||||
struct kvm_io_device *pos = bus->devs[i];
|
||||
|
||||
if (pos->in_range(pos, addr, len, is_write))
|
||||
return pos;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
|
||||
{
|
||||
BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1));
|
||||
|
||||
bus->devs[bus->dev_count++] = dev;
|
||||
for (i = 0; i < bus->dev_count; i++)
|
||||
if (bus->devs[i] == dev) {
|
||||
bus->devs[i] = bus->devs[--bus->dev_count];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static struct notifier_block kvm_cpu_notifier = {
|
||||
|
@ -2501,11 +2617,9 @@ static int vcpu_stat_get(void *_offset, u64 *val)
|
|||
*val = 0;
|
||||
spin_lock(&kvm_lock);
|
||||
list_for_each_entry(kvm, &vm_list, vm_list)
|
||||
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
|
||||
vcpu = kvm->vcpus[i];
|
||||
if (vcpu)
|
||||
*val += *(u32 *)((void *)vcpu + offset);
|
||||
}
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
*val += *(u32 *)((void *)vcpu + offset);
|
||||
|
||||
spin_unlock(&kvm_lock);
|
||||
return 0;
|
||||
}
|
||||
|
@ -2679,15 +2793,15 @@ out_free_0:
|
|||
__free_page(bad_page);
|
||||
out:
|
||||
kvm_arch_exit();
|
||||
kvm_exit_debug();
|
||||
out_fail:
|
||||
kvm_exit_debug();
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_init);
|
||||
|
||||
void kvm_exit(void)
|
||||
{
|
||||
kvm_trace_cleanup();
|
||||
tracepoint_synchronize_unregister();
|
||||
misc_deregister(&kvm_dev);
|
||||
kmem_cache_destroy(kvm_vcpu_cache);
|
||||
sysdev_unregister(&kvm_sysdev);
|
||||
|
|
|
@ -1,285 +0,0 @@
|
|||
/*
|
||||
* kvm trace
|
||||
*
|
||||
* It is designed to allow debugging traces of kvm to be generated
|
||||
* on UP / SMP machines. Each trace entry can be timestamped so that
|
||||
* it's possible to reconstruct a chronological record of trace events.
|
||||
* The implementation refers to blktrace kernel support.
|
||||
*
|
||||
* Copyright (c) 2008 Intel Corporation
|
||||
* Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
|
||||
*
|
||||
* Authors: Feng(Eric) Liu, eric.e.liu@intel.com
|
||||
*
|
||||
* Date: Feb 2008
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/relay.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/ktime.h>
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#define KVM_TRACE_STATE_RUNNING (1 << 0)
|
||||
#define KVM_TRACE_STATE_PAUSE (1 << 1)
|
||||
#define KVM_TRACE_STATE_CLEARUP (1 << 2)
|
||||
|
||||
struct kvm_trace {
|
||||
int trace_state;
|
||||
struct rchan *rchan;
|
||||
struct dentry *lost_file;
|
||||
atomic_t lost_records;
|
||||
};
|
||||
static struct kvm_trace *kvm_trace;
|
||||
|
||||
struct kvm_trace_probe {
|
||||
const char *name;
|
||||
const char *format;
|
||||
u32 timestamp_in;
|
||||
marker_probe_func *probe_func;
|
||||
};
|
||||
|
||||
static inline int calc_rec_size(int timestamp, int extra)
|
||||
{
|
||||
int rec_size = KVM_TRC_HEAD_SIZE;
|
||||
|
||||
rec_size += extra;
|
||||
return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
|
||||
}
|
||||
|
||||
static void kvm_add_trace(void *probe_private, void *call_data,
|
||||
const char *format, va_list *args)
|
||||
{
|
||||
struct kvm_trace_probe *p = probe_private;
|
||||
struct kvm_trace *kt = kvm_trace;
|
||||
struct kvm_trace_rec rec;
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i, size;
|
||||
u32 extra;
|
||||
|
||||
if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
|
||||
return;
|
||||
|
||||
rec.rec_val = TRACE_REC_EVENT_ID(va_arg(*args, u32));
|
||||
vcpu = va_arg(*args, struct kvm_vcpu *);
|
||||
rec.pid = current->tgid;
|
||||
rec.vcpu_id = vcpu->vcpu_id;
|
||||
|
||||
extra = va_arg(*args, u32);
|
||||
WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
|
||||
extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX);
|
||||
|
||||
rec.rec_val |= TRACE_REC_TCS(p->timestamp_in)
|
||||
| TRACE_REC_NUM_DATA_ARGS(extra);
|
||||
|
||||
if (p->timestamp_in) {
|
||||
rec.u.timestamp.timestamp = ktime_to_ns(ktime_get());
|
||||
|
||||
for (i = 0; i < extra; i++)
|
||||
rec.u.timestamp.extra_u32[i] = va_arg(*args, u32);
|
||||
} else {
|
||||
for (i = 0; i < extra; i++)
|
||||
rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32);
|
||||
}
|
||||
|
||||
size = calc_rec_size(p->timestamp_in, extra * sizeof(u32));
|
||||
relay_write(kt->rchan, &rec, size);
|
||||
}
|
||||
|
||||
static struct kvm_trace_probe kvm_trace_probes[] = {
|
||||
{ "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace },
|
||||
{ "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace },
|
||||
};
|
||||
|
||||
static int lost_records_get(void *data, u64 *val)
|
||||
{
|
||||
struct kvm_trace *kt = data;
|
||||
|
||||
*val = atomic_read(&kt->lost_records);
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n");
|
||||
|
||||
/*
|
||||
* The relay channel is used in "no-overwrite" mode, it keeps trace of how
|
||||
* many times we encountered a full subbuffer, to tell user space app the
|
||||
* lost records there were.
|
||||
*/
|
||||
static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
|
||||
void *prev_subbuf, size_t prev_padding)
|
||||
{
|
||||
struct kvm_trace *kt;
|
||||
|
||||
if (!relay_buf_full(buf)) {
|
||||
if (!prev_subbuf) {
|
||||
/*
|
||||
* executed only once when the channel is opened
|
||||
* save metadata as first record
|
||||
*/
|
||||
subbuf_start_reserve(buf, sizeof(u32));
|
||||
*(u32 *)subbuf = 0x12345678;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
kt = buf->chan->private_data;
|
||||
atomic_inc(&kt->lost_records);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct dentry *kvm_create_buf_file_callack(const char *filename,
|
||||
struct dentry *parent,
|
||||
int mode,
|
||||
struct rchan_buf *buf,
|
||||
int *is_global)
|
||||
{
|
||||
return debugfs_create_file(filename, mode, parent, buf,
|
||||
&relay_file_operations);
|
||||
}
|
||||
|
||||
static int kvm_remove_buf_file_callback(struct dentry *dentry)
|
||||
{
|
||||
debugfs_remove(dentry);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct rchan_callbacks kvm_relay_callbacks = {
|
||||
.subbuf_start = kvm_subbuf_start_callback,
|
||||
.create_buf_file = kvm_create_buf_file_callack,
|
||||
.remove_buf_file = kvm_remove_buf_file_callback,
|
||||
};
|
||||
|
||||
static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts)
|
||||
{
|
||||
struct kvm_trace *kt;
|
||||
int i, r = -ENOMEM;
|
||||
|
||||
if (!kuts->buf_size || !kuts->buf_nr)
|
||||
return -EINVAL;
|
||||
|
||||
kt = kzalloc(sizeof(*kt), GFP_KERNEL);
|
||||
if (!kt)
|
||||
goto err;
|
||||
|
||||
r = -EIO;
|
||||
atomic_set(&kt->lost_records, 0);
|
||||
kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir,
|
||||
kt, &kvm_trace_lost_ops);
|
||||
if (!kt->lost_file)
|
||||
goto err;
|
||||
|
||||
kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size,
|
||||
kuts->buf_nr, &kvm_relay_callbacks, kt);
|
||||
if (!kt->rchan)
|
||||
goto err;
|
||||
|
||||
kvm_trace = kt;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
|
||||
struct kvm_trace_probe *p = &kvm_trace_probes[i];
|
||||
|
||||
r = marker_probe_register(p->name, p->format, p->probe_func, p);
|
||||
if (r)
|
||||
printk(KERN_INFO "Unable to register probe %s\n",
|
||||
p->name);
|
||||
}
|
||||
|
||||
kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING;
|
||||
|
||||
return 0;
|
||||
err:
|
||||
if (kt) {
|
||||
if (kt->lost_file)
|
||||
debugfs_remove(kt->lost_file);
|
||||
if (kt->rchan)
|
||||
relay_close(kt->rchan);
|
||||
kfree(kt);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static int kvm_trace_enable(char __user *arg)
|
||||
{
|
||||
struct kvm_user_trace_setup kuts;
|
||||
int ret;
|
||||
|
||||
ret = copy_from_user(&kuts, arg, sizeof(kuts));
|
||||
if (ret)
|
||||
return -EFAULT;
|
||||
|
||||
ret = do_kvm_trace_enable(&kuts);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_trace_pause(void)
|
||||
{
|
||||
struct kvm_trace *kt = kvm_trace;
|
||||
int r = -EINVAL;
|
||||
|
||||
if (kt == NULL)
|
||||
return r;
|
||||
|
||||
if (kt->trace_state == KVM_TRACE_STATE_RUNNING) {
|
||||
kt->trace_state = KVM_TRACE_STATE_PAUSE;
|
||||
relay_flush(kt->rchan);
|
||||
r = 0;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void kvm_trace_cleanup(void)
|
||||
{
|
||||
struct kvm_trace *kt = kvm_trace;
|
||||
int i;
|
||||
|
||||
if (kt == NULL)
|
||||
return;
|
||||
|
||||
if (kt->trace_state == KVM_TRACE_STATE_RUNNING ||
|
||||
kt->trace_state == KVM_TRACE_STATE_PAUSE) {
|
||||
|
||||
kt->trace_state = KVM_TRACE_STATE_CLEARUP;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
|
||||
struct kvm_trace_probe *p = &kvm_trace_probes[i];
|
||||
marker_probe_unregister(p->name, p->probe_func, p);
|
||||
}
|
||||
marker_synchronize_unregister();
|
||||
|
||||
relay_close(kt->rchan);
|
||||
debugfs_remove(kt->lost_file);
|
||||
kfree(kt);
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
void __user *argp = (void __user *)arg;
|
||||
long r = -EINVAL;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
switch (ioctl) {
|
||||
case KVM_TRACE_ENABLE:
|
||||
r = kvm_trace_enable(argp);
|
||||
break;
|
||||
case KVM_TRACE_PAUSE:
|
||||
r = kvm_trace_pause();
|
||||
break;
|
||||
case KVM_TRACE_DISABLE:
|
||||
r = 0;
|
||||
kvm_trace_cleanup();
|
||||
break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
Loading…
Reference in a new issue