Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (106 commits)
  KVM: Replace enum by #define
  KVM: Skip pio instruction when it is emulated, not executed
  KVM: x86 emulator: popf
  KVM: x86 emulator: fix src, dst value initialization
  KVM: x86 emulator: jmp abs
  KVM: x86 emulator: lea
  KVM: X86 emulator: jump conditional short
  KVM: x86 emulator: imlpement jump conditional relative
  KVM: x86 emulator: sort opcodes into ascending order
  KVM: Improve emulation failure reporting
  KVM: x86 emulator: pushf
  KVM: x86 emulator: call near
  KVM: x86 emulator: push imm8
  KVM: VMX: Fix exit qualification width on i386
  KVM: Move main vcpu loop into subarch independent code
  KVM: VMX: Move vm entry failure handling to the exit handler
  KVM: MMU: Don't do GFP_NOWAIT allocations
  KVM: Rename kvm_arch_ops to kvm_x86_ops
  KVM: Simplify memory allocation
  KVM: Hoist SVM's get_cs_db_l_bits into core code.
  ...
This commit is contained in:
Linus Torvalds 2007-10-13 10:02:11 -07:00
commit 3749c66c67
20 changed files with 4861 additions and 1888 deletions

View file

@ -17,6 +17,7 @@ if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on X86 && EXPERIMENTAL
select PREEMPT_NOTIFIERS
select ANON_INODES
---help---
Support hosting fully virtualized guest machines using hardware

View file

@ -2,7 +2,7 @@
# Makefile for Kernel-based Virtual Machine module
#
kvm-objs := kvm_main.o mmu.o x86_emulate.o
kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o ioapic.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o

450
drivers/kvm/i8259.c Normal file
View file

@ -0,0 +1,450 @@
/*
* 8259 interrupt controller emulation
*
* Copyright (c) 2003-2004 Fabrice Bellard
* Copyright (c) 2007 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Authors:
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
* Port from Qemu.
*/
#include <linux/mm.h>
#include "irq.h"
/*
* set irq level. If an edge is detected, then the IRR is set to 1
*/
static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
{
int mask;
mask = 1 << irq;
if (s->elcr & mask) /* level triggered */
if (level) {
s->irr |= mask;
s->last_irr |= mask;
} else {
s->irr &= ~mask;
s->last_irr &= ~mask;
}
else /* edge triggered */
if (level) {
if ((s->last_irr & mask) == 0)
s->irr |= mask;
s->last_irr |= mask;
} else
s->last_irr &= ~mask;
}
/*
* return the highest priority found in mask (highest = smallest
* number). Return 8 if no irq
*/
static inline int get_priority(struct kvm_kpic_state *s, int mask)
{
int priority;
if (mask == 0)
return 8;
priority = 0;
while ((mask & (1 << ((priority + s->priority_add) & 7))) == 0)
priority++;
return priority;
}
/*
* return the pic wanted interrupt. return -1 if none
*/
static int pic_get_irq(struct kvm_kpic_state *s)
{
int mask, cur_priority, priority;
mask = s->irr & ~s->imr;
priority = get_priority(s, mask);
if (priority == 8)
return -1;
/*
* compute current priority. If special fully nested mode on the
* master, the IRQ coming from the slave is not taken into account
* for the priority computation.
*/
mask = s->isr;
if (s->special_fully_nested_mode && s == &s->pics_state->pics[0])
mask &= ~(1 << 2);
cur_priority = get_priority(s, mask);
if (priority < cur_priority)
/*
* higher priority found: an irq should be generated
*/
return (priority + s->priority_add) & 7;
else
return -1;
}
/*
* raise irq to CPU if necessary. must be called every time the active
* irq may change
*/
static void pic_update_irq(struct kvm_pic *s)
{
int irq2, irq;
irq2 = pic_get_irq(&s->pics[1]);
if (irq2 >= 0) {
/*
* if irq request by slave pic, signal master PIC
*/
pic_set_irq1(&s->pics[0], 2, 1);
pic_set_irq1(&s->pics[0], 2, 0);
}
irq = pic_get_irq(&s->pics[0]);
if (irq >= 0)
s->irq_request(s->irq_request_opaque, 1);
else
s->irq_request(s->irq_request_opaque, 0);
}
void kvm_pic_update_irq(struct kvm_pic *s)
{
pic_update_irq(s);
}
void kvm_pic_set_irq(void *opaque, int irq, int level)
{
struct kvm_pic *s = opaque;
pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
pic_update_irq(s);
}
/*
* acknowledge interrupt 'irq'
*/
static inline void pic_intack(struct kvm_kpic_state *s, int irq)
{
if (s->auto_eoi) {
if (s->rotate_on_auto_eoi)
s->priority_add = (irq + 1) & 7;
} else
s->isr |= (1 << irq);
/*
* We don't clear a level sensitive interrupt here
*/
if (!(s->elcr & (1 << irq)))
s->irr &= ~(1 << irq);
}
int kvm_pic_read_irq(struct kvm_pic *s)
{
int irq, irq2, intno;
irq = pic_get_irq(&s->pics[0]);
if (irq >= 0) {
pic_intack(&s->pics[0], irq);
if (irq == 2) {
irq2 = pic_get_irq(&s->pics[1]);
if (irq2 >= 0)
pic_intack(&s->pics[1], irq2);
else
/*
* spurious IRQ on slave controller
*/
irq2 = 7;
intno = s->pics[1].irq_base + irq2;
irq = irq2 + 8;
} else
intno = s->pics[0].irq_base + irq;
} else {
/*
* spurious IRQ on host controller
*/
irq = 7;
intno = s->pics[0].irq_base + irq;
}
pic_update_irq(s);
return intno;
}
static void pic_reset(void *opaque)
{
struct kvm_kpic_state *s = opaque;
s->last_irr = 0;
s->irr = 0;
s->imr = 0;
s->isr = 0;
s->priority_add = 0;
s->irq_base = 0;
s->read_reg_select = 0;
s->poll = 0;
s->special_mask = 0;
s->init_state = 0;
s->auto_eoi = 0;
s->rotate_on_auto_eoi = 0;
s->special_fully_nested_mode = 0;
s->init4 = 0;
}
static void pic_ioport_write(void *opaque, u32 addr, u32 val)
{
struct kvm_kpic_state *s = opaque;
int priority, cmd, irq;
addr &= 1;
if (addr == 0) {
if (val & 0x10) {
pic_reset(s); /* init */
/*
* deassert a pending interrupt
*/
s->pics_state->irq_request(s->pics_state->
irq_request_opaque, 0);
s->init_state = 1;
s->init4 = val & 1;
if (val & 0x02)
printk(KERN_ERR "single mode not supported");
if (val & 0x08)
printk(KERN_ERR
"level sensitive irq not supported");
} else if (val & 0x08) {
if (val & 0x04)
s->poll = 1;
if (val & 0x02)
s->read_reg_select = val & 1;
if (val & 0x40)
s->special_mask = (val >> 5) & 1;
} else {
cmd = val >> 5;
switch (cmd) {
case 0:
case 4:
s->rotate_on_auto_eoi = cmd >> 2;
break;
case 1: /* end of interrupt */
case 5:
priority = get_priority(s, s->isr);
if (priority != 8) {
irq = (priority + s->priority_add) & 7;
s->isr &= ~(1 << irq);
if (cmd == 5)
s->priority_add = (irq + 1) & 7;
pic_update_irq(s->pics_state);
}
break;
case 3:
irq = val & 7;
s->isr &= ~(1 << irq);
pic_update_irq(s->pics_state);
break;
case 6:
s->priority_add = (val + 1) & 7;
pic_update_irq(s->pics_state);
break;
case 7:
irq = val & 7;
s->isr &= ~(1 << irq);
s->priority_add = (irq + 1) & 7;
pic_update_irq(s->pics_state);
break;
default:
break; /* no operation */
}
}
} else
switch (s->init_state) {
case 0: /* normal mode */
s->imr = val;
pic_update_irq(s->pics_state);
break;
case 1:
s->irq_base = val & 0xf8;
s->init_state = 2;
break;
case 2:
if (s->init4)
s->init_state = 3;
else
s->init_state = 0;
break;
case 3:
s->special_fully_nested_mode = (val >> 4) & 1;
s->auto_eoi = (val >> 1) & 1;
s->init_state = 0;
break;
}
}
static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1)
{
int ret;
ret = pic_get_irq(s);
if (ret >= 0) {
if (addr1 >> 7) {
s->pics_state->pics[0].isr &= ~(1 << 2);
s->pics_state->pics[0].irr &= ~(1 << 2);
}
s->irr &= ~(1 << ret);
s->isr &= ~(1 << ret);
if (addr1 >> 7 || ret != 2)
pic_update_irq(s->pics_state);
} else {
ret = 0x07;
pic_update_irq(s->pics_state);
}
return ret;
}
static u32 pic_ioport_read(void *opaque, u32 addr1)
{
struct kvm_kpic_state *s = opaque;
unsigned int addr;
int ret;
addr = addr1;
addr &= 1;
if (s->poll) {
ret = pic_poll_read(s, addr1);
s->poll = 0;
} else
if (addr == 0)
if (s->read_reg_select)
ret = s->isr;
else
ret = s->irr;
else
ret = s->imr;
return ret;
}
static void elcr_ioport_write(void *opaque, u32 addr, u32 val)
{
struct kvm_kpic_state *s = opaque;
s->elcr = val & s->elcr_mask;
}
static u32 elcr_ioport_read(void *opaque, u32 addr1)
{
struct kvm_kpic_state *s = opaque;
return s->elcr;
}
static int picdev_in_range(struct kvm_io_device *this, gpa_t addr)
{
switch (addr) {
case 0x20:
case 0x21:
case 0xa0:
case 0xa1:
case 0x4d0:
case 0x4d1:
return 1;
default:
return 0;
}
}
static void picdev_write(struct kvm_io_device *this,
gpa_t addr, int len, const void *val)
{
struct kvm_pic *s = this->private;
unsigned char data = *(unsigned char *)val;
if (len != 1) {
if (printk_ratelimit())
printk(KERN_ERR "PIC: non byte write\n");
return;
}
switch (addr) {
case 0x20:
case 0x21:
case 0xa0:
case 0xa1:
pic_ioport_write(&s->pics[addr >> 7], addr, data);
break;
case 0x4d0:
case 0x4d1:
elcr_ioport_write(&s->pics[addr & 1], addr, data);
break;
}
}
static void picdev_read(struct kvm_io_device *this,
gpa_t addr, int len, void *val)
{
struct kvm_pic *s = this->private;
unsigned char data = 0;
if (len != 1) {
if (printk_ratelimit())
printk(KERN_ERR "PIC: non byte read\n");
return;
}
switch (addr) {
case 0x20:
case 0x21:
case 0xa0:
case 0xa1:
data = pic_ioport_read(&s->pics[addr >> 7], addr);
break;
case 0x4d0:
case 0x4d1:
data = elcr_ioport_read(&s->pics[addr & 1], addr);
break;
}
*(unsigned char *)val = data;
}
/*
* callback when PIC0 irq status changed
*/
static void pic_irq_request(void *opaque, int level)
{
struct kvm *kvm = opaque;
struct kvm_vcpu *vcpu = kvm->vcpus[0];
pic_irqchip(kvm)->output = level;
if (vcpu)
kvm_vcpu_kick(vcpu);
}
struct kvm_pic *kvm_create_pic(struct kvm *kvm)
{
struct kvm_pic *s;
s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
if (!s)
return NULL;
s->pics[0].elcr_mask = 0xf8;
s->pics[1].elcr_mask = 0xde;
s->irq_request = pic_irq_request;
s->irq_request_opaque = kvm;
s->pics[0].pics_state = s;
s->pics[1].pics_state = s;
/*
* Initialize PIO device
*/
s->dev.read = picdev_read;
s->dev.write = picdev_write;
s->dev.in_range = picdev_in_range;
s->dev.private = s;
kvm_io_bus_register_dev(&kvm->pio_bus, &s->dev);
return s;
}

388
drivers/kvm/ioapic.c Normal file
View file

@ -0,0 +1,388 @@
/*
* Copyright (C) 2001 MandrakeSoft S.A.
*
* MandrakeSoft S.A.
* 43, rue d'Aboukir
* 75002 Paris - France
* http://www.linux-mandrake.com/
* http://www.mandrakesoft.com/
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Yunhong Jiang <yunhong.jiang@intel.com>
* Yaozu (Eddie) Dong <eddie.dong@intel.com>
* Based on Xen 3.1 code.
*/
#include "kvm.h"
#include <linux/kvm.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/smp.h>
#include <linux/hrtimer.h>
#include <linux/io.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/page.h>
#include <asm/current.h>
#include <asm/apicdef.h>
#include <asm/io_apic.h>
#include "irq.h"
/* #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
#define ioapic_debug(fmt, arg...)
static void ioapic_deliver(struct kvm_ioapic *vioapic, int irq);
static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
unsigned long addr,
unsigned long length)
{
unsigned long result = 0;
switch (ioapic->ioregsel) {
case IOAPIC_REG_VERSION:
result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
| (IOAPIC_VERSION_ID & 0xff));
break;
case IOAPIC_REG_APIC_ID:
case IOAPIC_REG_ARB_ID:
result = ((ioapic->id & 0xf) << 24);
break;
default:
{
u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
u64 redir_content;
ASSERT(redir_index < IOAPIC_NUM_PINS);
redir_content = ioapic->redirtbl[redir_index].bits;
result = (ioapic->ioregsel & 0x1) ?
(redir_content >> 32) & 0xffffffff :
redir_content & 0xffffffff;
break;
}
}
return result;
}
static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
{
union ioapic_redir_entry *pent;
pent = &ioapic->redirtbl[idx];
if (!pent->fields.mask) {
ioapic_deliver(ioapic, idx);
if (pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
pent->fields.remote_irr = 1;
}
if (!pent->fields.trig_mode)
ioapic->irr &= ~(1 << idx);
}
static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
{
unsigned index;
switch (ioapic->ioregsel) {
case IOAPIC_REG_VERSION:
/* Writes are ignored. */
break;
case IOAPIC_REG_APIC_ID:
ioapic->id = (val >> 24) & 0xf;
break;
case IOAPIC_REG_ARB_ID:
break;
default:
index = (ioapic->ioregsel - 0x10) >> 1;
ioapic_debug("change redir index %x val %x", index, val);
if (index >= IOAPIC_NUM_PINS)
return;
if (ioapic->ioregsel & 1) {
ioapic->redirtbl[index].bits &= 0xffffffff;
ioapic->redirtbl[index].bits |= (u64) val << 32;
} else {
ioapic->redirtbl[index].bits &= ~0xffffffffULL;
ioapic->redirtbl[index].bits |= (u32) val;
ioapic->redirtbl[index].fields.remote_irr = 0;
}
if (ioapic->irr & (1 << index))
ioapic_service(ioapic, index);
break;
}
}
static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
struct kvm_lapic *target,
u8 vector, u8 trig_mode, u8 delivery_mode)
{
ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode,
delivery_mode);
ASSERT((delivery_mode == dest_Fixed) ||
(delivery_mode == dest_LowestPrio));
kvm_apic_set_irq(target, vector, trig_mode);
}
static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
u8 dest_mode)
{
u32 mask = 0;
int i;
struct kvm *kvm = ioapic->kvm;
struct kvm_vcpu *vcpu;
ioapic_debug("dest %d dest_mode %d", dest, dest_mode);
if (dest_mode == 0) { /* Physical mode. */
if (dest == 0xFF) { /* Broadcast. */
for (i = 0; i < KVM_MAX_VCPUS; ++i)
if (kvm->vcpus[i] && kvm->vcpus[i]->apic)
mask |= 1 << i;
return mask;
}
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
vcpu = kvm->vcpus[i];
if (!vcpu)
continue;
if (kvm_apic_match_physical_addr(vcpu->apic, dest)) {
if (vcpu->apic)
mask = 1 << i;
break;
}
}
} else if (dest != 0) /* Logical mode, MDA non-zero. */
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
vcpu = kvm->vcpus[i];
if (!vcpu)
continue;
if (vcpu->apic &&
kvm_apic_match_logical_addr(vcpu->apic, dest))
mask |= 1 << vcpu->vcpu_id;
}
ioapic_debug("mask %x", mask);
return mask;
}
static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
{
u8 dest = ioapic->redirtbl[irq].fields.dest_id;
u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode;
u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode;
u8 vector = ioapic->redirtbl[irq].fields.vector;
u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
u32 deliver_bitmask;
struct kvm_lapic *target;
struct kvm_vcpu *vcpu;
int vcpu_id;
ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
"vector=%x trig_mode=%x",
dest, dest_mode, delivery_mode, vector, trig_mode);
deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode);
if (!deliver_bitmask) {
ioapic_debug("no target on destination");
return;
}
switch (delivery_mode) {
case dest_LowestPrio:
target =
kvm_apic_round_robin(ioapic->kvm, vector, deliver_bitmask);
if (target != NULL)
ioapic_inj_irq(ioapic, target, vector,
trig_mode, delivery_mode);
else
ioapic_debug("null round robin: "
"mask=%x vector=%x delivery_mode=%x",
deliver_bitmask, vector, dest_LowestPrio);
break;
case dest_Fixed:
for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
if (!(deliver_bitmask & (1 << vcpu_id)))
continue;
deliver_bitmask &= ~(1 << vcpu_id);
vcpu = ioapic->kvm->vcpus[vcpu_id];
if (vcpu) {
target = vcpu->apic;
ioapic_inj_irq(ioapic, target, vector,
trig_mode, delivery_mode);
}
}
break;
/* TODO: NMI */
default:
printk(KERN_WARNING "Unsupported delivery mode %d\n",
delivery_mode);
break;
}
}
void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
{
u32 old_irr = ioapic->irr;
u32 mask = 1 << irq;
union ioapic_redir_entry entry;
if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
entry = ioapic->redirtbl[irq];
level ^= entry.fields.polarity;
if (!level)
ioapic->irr &= ~mask;
else {
ioapic->irr |= mask;
if ((!entry.fields.trig_mode && old_irr != ioapic->irr)
|| !entry.fields.remote_irr)
ioapic_service(ioapic, irq);
}
}
}
static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector)
{
int i;
for (i = 0; i < IOAPIC_NUM_PINS; i++)
if (ioapic->redirtbl[i].fields.vector == vector)
return i;
return -1;
}
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
{
struct kvm_ioapic *ioapic = kvm->vioapic;
union ioapic_redir_entry *ent;
int gsi;
gsi = get_eoi_gsi(ioapic, vector);
if (gsi == -1) {
printk(KERN_WARNING "Can't find redir item for %d EOI\n",
vector);
return;
}
ent = &ioapic->redirtbl[gsi];
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
ent->fields.remote_irr = 0;
if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
ioapic_deliver(ioapic, gsi);
}
static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr)
{
struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
return ((addr >= ioapic->base_address &&
(addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
}
static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
void *val)
{
struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
u32 result;
ioapic_debug("addr %lx", (unsigned long)addr);
ASSERT(!(addr & 0xf)); /* check alignment */
addr &= 0xff;
switch (addr) {
case IOAPIC_REG_SELECT:
result = ioapic->ioregsel;
break;
case IOAPIC_REG_WINDOW:
result = ioapic_read_indirect(ioapic, addr, len);
break;
default:
result = 0;
break;
}
switch (len) {
case 8:
*(u64 *) val = result;
break;
case 1:
case 2:
case 4:
memcpy(val, (char *)&result, len);
break;
default:
printk(KERN_WARNING "ioapic: wrong length %d\n", len);
}
}
static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
const void *val)
{
struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
u32 data;
ioapic_debug("ioapic_mmio_write addr=%lx len=%d val=%p\n",
addr, len, val);
ASSERT(!(addr & 0xf)); /* check alignment */
if (len == 4 || len == 8)
data = *(u32 *) val;
else {
printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
return;
}
addr &= 0xff;
switch (addr) {
case IOAPIC_REG_SELECT:
ioapic->ioregsel = data;
break;
case IOAPIC_REG_WINDOW:
ioapic_write_indirect(ioapic, data);
break;
default:
break;
}
}
int kvm_ioapic_init(struct kvm *kvm)
{
struct kvm_ioapic *ioapic;
int i;
ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
if (!ioapic)
return -ENOMEM;
kvm->vioapic = ioapic;
for (i = 0; i < IOAPIC_NUM_PINS; i++)
ioapic->redirtbl[i].fields.mask = 1;
ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
ioapic->dev.read = ioapic_mmio_read;
ioapic->dev.write = ioapic_mmio_write;
ioapic->dev.in_range = ioapic_in_range;
ioapic->dev.private = ioapic;
ioapic->kvm = kvm;
kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
return 0;
}

98
drivers/kvm/irq.c Normal file
View file

@ -0,0 +1,98 @@
/*
* irq.c: API for in kernel interrupt controller
* Copyright (c) 2007, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
* Authors:
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
*
*/
#include <linux/module.h>
#include "kvm.h"
#include "irq.h"
/*
* check if there is pending interrupt without
* intack.
*/
int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
{
struct kvm_pic *s;
if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */
if (kvm_apic_accept_pic_intr(v)) {
s = pic_irqchip(v->kvm); /* PIC */
return s->output;
} else
return 0;
}
return 1;
}
EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
/*
* Read pending interrupt vector and intack.
*/
int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
{
struct kvm_pic *s;
int vector;
vector = kvm_get_apic_interrupt(v); /* APIC */
if (vector == -1) {
if (kvm_apic_accept_pic_intr(v)) {
s = pic_irqchip(v->kvm);
s->output = 0; /* PIC */
vector = kvm_pic_read_irq(s);
}
}
return vector;
}
EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
static void vcpu_kick_intr(void *info)
{
#ifdef DEBUG
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
#endif
}
void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
{
int ipi_pcpu = vcpu->cpu;
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);
++vcpu->stat.halt_wakeup;
}
if (vcpu->guest_mode)
smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
}
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
{
kvm_inject_apic_timer_irqs(vcpu);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
{
kvm_apic_timer_intr_post(vcpu, vec);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_timer_intr_post);

165
drivers/kvm/irq.h Normal file
View file

@ -0,0 +1,165 @@
/*
* irq.h: in kernel interrupt controller related definitions
* Copyright (c) 2007, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
* Authors:
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
*
*/
#ifndef __IRQ_H
#define __IRQ_H
#include "kvm.h"
typedef void irq_request_func(void *opaque, int level);
struct kvm_kpic_state {
u8 last_irr; /* edge detection */
u8 irr; /* interrupt request register */
u8 imr; /* interrupt mask register */
u8 isr; /* interrupt service register */
u8 priority_add; /* highest irq priority */
u8 irq_base;
u8 read_reg_select;
u8 poll;
u8 special_mask;
u8 init_state;
u8 auto_eoi;
u8 rotate_on_auto_eoi;
u8 special_fully_nested_mode;
u8 init4; /* true if 4 byte init */
u8 elcr; /* PIIX edge/trigger selection */
u8 elcr_mask;
struct kvm_pic *pics_state;
};
struct kvm_pic {
struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
irq_request_func *irq_request;
void *irq_request_opaque;
int output; /* intr from master PIC */
struct kvm_io_device dev;
};
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
void kvm_pic_set_irq(void *opaque, int irq, int level);
int kvm_pic_read_irq(struct kvm_pic *s);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
void kvm_pic_update_irq(struct kvm_pic *s);
#define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS
#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
#define IOAPIC_EDGE_TRIG 0
#define IOAPIC_LEVEL_TRIG 1
#define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000
#define IOAPIC_MEM_LENGTH 0x100
/* Direct registers. */
#define IOAPIC_REG_SELECT 0x00
#define IOAPIC_REG_WINDOW 0x10
#define IOAPIC_REG_EOI 0x40 /* IA64 IOSAPIC only */
/* Indirect registers. */
#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */
#define IOAPIC_REG_VERSION 0x01
#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */
struct kvm_ioapic {
u64 base_address;
u32 ioregsel;
u32 id;
u32 irr;
u32 pad;
union ioapic_redir_entry {
u64 bits;
struct {
u8 vector;
u8 delivery_mode:3;
u8 dest_mode:1;
u8 delivery_status:1;
u8 polarity:1;
u8 remote_irr:1;
u8 trig_mode:1;
u8 mask:1;
u8 reserve:7;
u8 reserved[4];
u8 dest_id;
} fields;
} redirtbl[IOAPIC_NUM_PINS];
struct kvm_io_device dev;
struct kvm *kvm;
};
struct kvm_lapic {
unsigned long base_address;
struct kvm_io_device dev;
struct {
atomic_t pending;
s64 period; /* unit: ns */
u32 divide_count;
ktime_t last_update;
struct hrtimer dev;
} timer;
struct kvm_vcpu *vcpu;
struct page *regs_page;
void *regs;
};
#ifdef DEBUG
#define ASSERT(x) \
do { \
if (!(x)) { \
printk(KERN_EMERG "assertion failed %s: %d: %s\n", \
__FILE__, __LINE__, #x); \
BUG(); \
} \
} while (0)
#else
#define ASSERT(x) do { } while (0)
#endif
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
int kvm_create_lapic(struct kvm_vcpu *vcpu);
void kvm_lapic_reset(struct kvm_vcpu *vcpu);
void kvm_free_apic(struct kvm_lapic *apic);
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
unsigned long bitmap);
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig);
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
int kvm_ioapic_init(struct kvm *kvm);
void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
#endif

View file

@ -13,60 +13,38 @@
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/preempt.h>
#include <asm/signal.h>
#include "vmx.h"
#include <linux/kvm.h>
#include <linux/kvm_para.h>
#define CR0_PE_MASK (1ULL << 0)
#define CR0_MP_MASK (1ULL << 1)
#define CR0_TS_MASK (1ULL << 3)
#define CR0_NE_MASK (1ULL << 5)
#define CR0_WP_MASK (1ULL << 16)
#define CR0_NW_MASK (1ULL << 29)
#define CR0_CD_MASK (1ULL << 30)
#define CR0_PG_MASK (1ULL << 31)
#define CR3_WPT_MASK (1ULL << 3)
#define CR3_PCD_MASK (1ULL << 4)
#define CR3_RESEVED_BITS 0x07ULL
#define CR3_L_MODE_RESEVED_BITS (~((1ULL << 40) - 1) | 0x0fe7ULL)
#define CR3_FLAGS_MASK ((1ULL << 5) - 1)
#define CR4_VME_MASK (1ULL << 0)
#define CR4_PSE_MASK (1ULL << 4)
#define CR4_PAE_MASK (1ULL << 5)
#define CR4_PGE_MASK (1ULL << 7)
#define CR4_VMXE_MASK (1ULL << 13)
#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS|0xFFFFFF0000000000ULL)
#define KVM_GUEST_CR0_MASK \
(CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK \
| CR0_NW_MASK | CR0_CD_MASK)
(X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE \
| X86_CR0_NW | X86_CR0_CD)
#define KVM_VM_CR0_ALWAYS_ON \
(CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK | CR0_TS_MASK \
| CR0_MP_MASK)
(X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE | X86_CR0_TS \
| X86_CR0_MP)
#define KVM_GUEST_CR4_MASK \
(CR4_PSE_MASK | CR4_PAE_MASK | CR4_PGE_MASK | CR4_VMXE_MASK | CR4_VME_MASK)
#define KVM_PMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK)
#define KVM_RMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK | CR4_VME_MASK)
(X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE)
#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
#define INVALID_PAGE (~(hpa_t)0)
#define UNMAPPED_GVA (~(gpa_t)0)
#define KVM_MAX_VCPUS 4
#define KVM_ALIAS_SLOTS 4
#define KVM_MEMORY_SLOTS 4
#define KVM_MEMORY_SLOTS 8
#define KVM_NUM_MMU_PAGES 1024
#define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
#define KVM_MAX_CPUID_ENTRIES 40
#define FX_IMAGE_SIZE 512
#define FX_IMAGE_ALIGN 16
#define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN)
#define DE_VECTOR 0
#define NM_VECTOR 7
#define DF_VECTOR 8
@ -158,15 +136,8 @@ struct kvm_mmu_page {
};
};
struct vmcs {
u32 revision_id;
u32 abort;
char data[0];
};
#define vmx_msr_entry kvm_msr_entry
struct kvm_vcpu;
extern struct kmem_cache *kvm_vcpu_cache;
/*
* x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
@ -260,6 +231,7 @@ struct kvm_stat {
u32 signal_exits;
u32 irq_window_exits;
u32 halt_exits;
u32 halt_wakeup;
u32 request_irq_exits;
u32 irq_exits;
u32 light_exits;
@ -328,21 +300,17 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
struct kvm_vcpu {
struct kvm *kvm;
union {
struct vmcs *vmcs;
struct vcpu_svm *svm;
};
struct preempt_notifier preempt_notifier;
int vcpu_id;
struct mutex mutex;
int cpu;
int launched;
u64 host_tsc;
struct kvm_run *run;
int interrupt_window_open;
int guest_mode;
unsigned long requests;
unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
unsigned long irq_pending[NR_IRQ_WORDS];
DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
unsigned long rip; /* needs vcpu_load_rsp_rip() */
@ -357,15 +325,15 @@ struct kvm_vcpu {
u64 pdptrs[4]; /* pae */
u64 shadow_efer;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
#define VCPU_MP_STATE_RUNNABLE 0
#define VCPU_MP_STATE_UNINITIALIZED 1
#define VCPU_MP_STATE_INIT_RECEIVED 2
#define VCPU_MP_STATE_SIPI_RECEIVED 3
#define VCPU_MP_STATE_HALTED 4
int mp_state;
int sipi_vector;
u64 ia32_misc_enable_msr;
int nmsrs;
int save_nmsrs;
int msr_offset_efer;
#ifdef CONFIG_X86_64
int msr_offset_kernel_gs_base;
#endif
struct vmx_msr_entry *guest_msrs;
struct vmx_msr_entry *host_msrs;
struct kvm_mmu mmu;
@ -379,16 +347,10 @@ struct kvm_vcpu {
struct kvm_guest_debug guest_debug;
char fx_buf[FX_BUF_SIZE];
char *host_fx_image;
char *guest_fx_image;
struct i387_fxsave_struct host_fx_image;
struct i387_fxsave_struct guest_fx_image;
int fpu_active;
int guest_fpu_loaded;
struct vmx_host_state {
int loaded;
u16 fs_sel, gs_sel, ldt_sel;
int fs_gs_ldt_reload_needed;
} vmx_host_state;
int mmio_needed;
int mmio_read_completed;
@ -399,6 +361,7 @@ struct kvm_vcpu {
gva_t mmio_fault_cr2;
struct kvm_pio_request pio;
void *pio_data;
wait_queue_head_t wq;
int sigset_active;
sigset_t sigset;
@ -436,7 +399,7 @@ struct kvm_memory_slot {
};
struct kvm {
spinlock_t lock; /* protects everything except vcpus */
struct mutex lock; /* protects everything except vcpus */
int naliases;
struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
int nmemslots;
@ -447,39 +410,59 @@ struct kvm {
struct list_head active_mmu_pages;
int n_free_mmu_pages;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
int nvcpus;
struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
int memory_config_version;
int busy;
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
unsigned long rmap_overflow;
struct list_head vm_list;
struct file *filp;
struct kvm_io_bus mmio_bus;
struct kvm_io_bus pio_bus;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
int round_robin_prev_vcpu;
};
static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
{
return kvm->vpic;
}
static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
{
return kvm->vioapic;
}
static inline int irqchip_in_kernel(struct kvm *kvm)
{
return pic_irqchip(kvm) != 0;
}
struct descriptor_table {
u16 limit;
unsigned long base;
} __attribute__((packed));
struct kvm_arch_ops {
struct kvm_x86_ops {
int (*cpu_has_kvm_support)(void); /* __init */
int (*disabled_by_bios)(void); /* __init */
void (*hardware_enable)(void *dummy); /* __init */
void (*hardware_disable)(void *dummy);
void (*check_processor_compatibility)(void *rtn);
int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */
int (*vcpu_create)(struct kvm_vcpu *vcpu);
/* Create, but do not attach this VCPU */
struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
void (*vcpu_reset)(struct kvm_vcpu *vcpu);
void (*vcpu_load)(struct kvm_vcpu *vcpu);
void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
void (*vcpu_decache)(struct kvm_vcpu *vcpu);
int (*set_guest_debug)(struct kvm_vcpu *vcpu,
struct kvm_debug_guest *dbg);
void (*guest_debug_pre)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
@ -505,27 +488,43 @@ struct kvm_arch_ops {
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t addr);
void (*tlb_flush)(struct kvm_vcpu *vcpu);
void (*inject_page_fault)(struct kvm_vcpu *vcpu,
unsigned long addr, u32 err_code);
void (*inject_gp)(struct kvm_vcpu *vcpu, unsigned err_code);
int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
int (*vcpu_setup)(struct kvm_vcpu *vcpu);
void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*patch_hypercall)(struct kvm_vcpu *vcpu,
unsigned char *hypercall_addr);
int (*get_irq)(struct kvm_vcpu *vcpu);
void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
struct kvm_run *run);
};
extern struct kvm_arch_ops *kvm_arch_ops;
extern struct kvm_x86_ops *kvm_x86_ops;
/* The guest did something we don't support. */
#define pr_unimpl(vcpu, fmt, ...) \
do { \
if (printk_ratelimit()) \
printk(KERN_ERR "kvm: %i: cpu%i " fmt, \
current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \
} while(0)
#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module);
void kvm_exit_arch(void);
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size,
struct module *module);
void kvm_exit_x86(void);
int kvm_mmu_module_init(void);
void kvm_mmu_module_exit(void);
@ -545,8 +544,6 @@ static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva);
struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_emulator_want_group7_invlpg(void);
extern hpa_t bad_page_address;
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
@ -561,6 +558,7 @@ enum emulation_result {
int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
unsigned long cr2, u16 error_code);
void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
@ -574,9 +572,11 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
struct x86_emulate_ctxt;
int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int size, unsigned long count, int string, int down,
gva_t address, int rep, unsigned port);
int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int size, unsigned port);
int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int size, unsigned long count, int down,
gva_t address, int rep, unsigned port);
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int kvm_emulate_halt(struct kvm_vcpu *vcpu);
int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
@ -590,34 +590,33 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0);
void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0);
void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0);
unsigned long get_cr8(struct kvm_vcpu *vcpu);
void lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
void fx_init(struct kvm_vcpu *vcpu);
void load_msrs(struct vmx_msr_entry *e, int n);
void save_msrs(struct vmx_msr_entry *e, int n);
void kvm_resched(struct kvm_vcpu *vcpu);
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_flush_remote_tlbs(struct kvm *kvm);
int kvm_read_guest(struct kvm_vcpu *vcpu,
gva_t addr,
unsigned long size,
void *dest);
int kvm_write_guest(struct kvm_vcpu *vcpu,
gva_t addr,
unsigned long size,
void *data);
int emulator_read_std(unsigned long addr,
void *val,
unsigned int bytes,
struct kvm_vcpu *vcpu);
int emulator_write_emulated(unsigned long addr,
const void *val,
unsigned int bytes,
struct kvm_vcpu *vcpu);
unsigned long segment_base(u16 selector);
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *old, const u8 *new, int bytes);
const u8 *new, int bytes);
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
@ -656,17 +655,17 @@ static inline int is_long_mode(struct kvm_vcpu *vcpu)
static inline int is_pae(struct kvm_vcpu *vcpu)
{
return vcpu->cr4 & CR4_PAE_MASK;
return vcpu->cr4 & X86_CR4_PAE;
}
static inline int is_pse(struct kvm_vcpu *vcpu)
{
return vcpu->cr4 & CR4_PSE_MASK;
return vcpu->cr4 & X86_CR4_PSE;
}
static inline int is_paging(struct kvm_vcpu *vcpu)
{
return vcpu->cr0 & CR0_PG_MASK;
return vcpu->cr0 & X86_CR0_PG;
}
static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
@ -746,12 +745,12 @@ static inline unsigned long read_msr(unsigned long msr)
}
#endif
static inline void fx_save(void *image)
static inline void fx_save(struct i387_fxsave_struct *image)
{
asm ("fxsave (%0)":: "r" (image));
}
static inline void fx_restore(void *image)
static inline void fx_restore(struct i387_fxsave_struct *image)
{
asm ("fxrstor (%0)":: "r" (image));
}

File diff suppressed because it is too large Load diff

View file

@ -20,7 +20,10 @@ static const u32 host_save_user_msrs[] = {
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
#define NUM_DB_REGS 4
struct kvm_vcpu;
struct vcpu_svm {
struct kvm_vcpu vcpu;
struct vmcb *vmcb;
unsigned long vmcb_pa;
struct svm_cpu_data *svm_data;

1064
drivers/kvm/lapic.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -158,7 +158,7 @@ static struct kmem_cache *mmu_page_header_cache;
static int is_write_protection(struct kvm_vcpu *vcpu)
{
return vcpu->cr0 & CR0_WP_MASK;
return vcpu->cr0 & X86_CR0_WP;
}
static int is_cpuid_PSE36(void)
@ -202,15 +202,14 @@ static void set_shadow_pte(u64 *sptep, u64 spte)
}
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
struct kmem_cache *base_cache, int min,
gfp_t gfp_flags)
struct kmem_cache *base_cache, int min)
{
void *obj;
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
obj = kmem_cache_zalloc(base_cache, gfp_flags);
obj = kmem_cache_zalloc(base_cache, GFP_KERNEL);
if (!obj)
return -ENOMEM;
cache->objects[cache->nobjs++] = obj;
@ -225,14 +224,14 @@ static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
}
static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
int min, gfp_t gfp_flags)
int min)
{
struct page *page;
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
page = alloc_page(gfp_flags);
page = alloc_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
set_page_private(page, 0);
@ -247,41 +246,25 @@ static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc)
free_page((unsigned long)mc->objects[--mc->nobjs]);
}
static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags)
{
int r;
r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache,
pte_chain_cache, 4, gfp_flags);
if (r)
goto out;
r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,
rmap_desc_cache, 1, gfp_flags);
if (r)
goto out;
r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4, gfp_flags);
if (r)
goto out;
r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache,
mmu_page_header_cache, 4, gfp_flags);
out:
return r;
}
static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
{
int r;
r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT);
kvm_mmu_free_some_pages(vcpu);
if (r < 0) {
spin_unlock(&vcpu->kvm->lock);
kvm_arch_ops->vcpu_put(vcpu);
r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL);
kvm_arch_ops->vcpu_load(vcpu);
spin_lock(&vcpu->kvm->lock);
kvm_mmu_free_some_pages(vcpu);
}
r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache,
pte_chain_cache, 4);
if (r)
goto out;
r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,
rmap_desc_cache, 1);
if (r)
goto out;
r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4);
if (r)
goto out;
r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache,
mmu_page_header_cache, 4);
out:
return r;
}
@ -969,7 +952,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu)
static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
{
++vcpu->stat.tlb_flush;
kvm_arch_ops->tlb_flush(vcpu);
kvm_x86_ops->tlb_flush(vcpu);
}
static void paging_new_cr3(struct kvm_vcpu *vcpu)
@ -982,7 +965,7 @@ static void inject_page_fault(struct kvm_vcpu *vcpu,
u64 addr,
u32 err_code)
{
kvm_arch_ops->inject_page_fault(vcpu, addr, err_code);
kvm_x86_ops->inject_page_fault(vcpu, addr, err_code);
}
static void paging_free(struct kvm_vcpu *vcpu)
@ -1071,15 +1054,15 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
{
int r;
spin_lock(&vcpu->kvm->lock);
mutex_lock(&vcpu->kvm->lock);
r = mmu_topup_memory_caches(vcpu);
if (r)
goto out;
mmu_alloc_roots(vcpu);
kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
kvm_x86_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
kvm_mmu_flush_tlb(vcpu);
out:
spin_unlock(&vcpu->kvm->lock);
mutex_unlock(&vcpu->kvm->lock);
return r;
}
EXPORT_SYMBOL_GPL(kvm_mmu_load);
@ -1124,7 +1107,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
}
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *old, const u8 *new, int bytes)
const u8 *new, int bytes)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
struct kvm_mmu_page *page;

View file

@ -58,7 +58,10 @@ struct guest_walker {
int level;
gfn_t table_gfn[PT_MAX_FULL_LEVELS];
pt_element_t *table;
pt_element_t pte;
pt_element_t *ptep;
struct page *page;
int index;
pt_element_t inherited_ar;
gfn_t gfn;
u32 error_code;
@ -80,11 +83,14 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
walker->level = vcpu->mmu.root_level;
walker->table = NULL;
walker->page = NULL;
walker->ptep = NULL;
root = vcpu->cr3;
#if PTTYPE == 64
if (!is_long_mode(vcpu)) {
walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
root = *walker->ptep;
walker->pte = root;
if (!(root & PT_PRESENT_MASK))
goto not_present;
--walker->level;
@ -96,10 +102,11 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
walker->level - 1, table_gfn);
slot = gfn_to_memslot(vcpu->kvm, table_gfn);
hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK);
walker->table = kmap_atomic(pfn_to_page(hpa >> PAGE_SHIFT), KM_USER0);
walker->page = pfn_to_page(hpa >> PAGE_SHIFT);
walker->table = kmap_atomic(walker->page, KM_USER0);
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
(vcpu->cr3 & ~(PAGE_MASK | CR3_FLAGS_MASK)) == 0);
(vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
@ -108,6 +115,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
hpa_t paddr;
ptep = &walker->table[index];
walker->index = index;
ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
((unsigned long)ptep & PAGE_MASK));
@ -148,16 +156,20 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
walker->inherited_ar &= walker->table[index];
table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK);
kunmap_atomic(walker->table, KM_USER0);
walker->table = kmap_atomic(pfn_to_page(paddr >> PAGE_SHIFT),
KM_USER0);
paddr = safe_gpa_to_hpa(vcpu, table_gfn << PAGE_SHIFT);
walker->page = pfn_to_page(paddr >> PAGE_SHIFT);
walker->table = kmap_atomic(walker->page, KM_USER0);
--walker->level;
walker->table_gfn[walker->level - 1 ] = table_gfn;
pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
walker->level - 1, table_gfn);
}
walker->ptep = ptep;
walker->pte = *ptep;
if (walker->page)
walker->ptep = NULL;
if (walker->table)
kunmap_atomic(walker->table, KM_USER0);
pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
return 1;
@ -175,13 +187,9 @@ err:
walker->error_code |= PFERR_USER_MASK;
if (fetch_fault)
walker->error_code |= PFERR_FETCH_MASK;
return 0;
}
static void FNAME(release_walker)(struct guest_walker *walker)
{
if (walker->table)
kunmap_atomic(walker->table, KM_USER0);
return 0;
}
static void FNAME(mark_pagetable_dirty)(struct kvm *kvm,
@ -193,7 +201,7 @@ static void FNAME(mark_pagetable_dirty)(struct kvm *kvm,
static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
u64 *shadow_pte,
gpa_t gaddr,
pt_element_t *gpte,
pt_element_t gpte,
u64 access_bits,
int user_fault,
int write_fault,
@ -202,23 +210,34 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
gfn_t gfn)
{
hpa_t paddr;
int dirty = *gpte & PT_DIRTY_MASK;
int dirty = gpte & PT_DIRTY_MASK;
u64 spte = *shadow_pte;
int was_rmapped = is_rmap_pte(spte);
pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d"
" user_fault %d gfn %lx\n",
__FUNCTION__, spte, (u64)*gpte, access_bits,
__FUNCTION__, spte, (u64)gpte, access_bits,
write_fault, user_fault, gfn);
if (write_fault && !dirty) {
*gpte |= PT_DIRTY_MASK;
pt_element_t *guest_ent, *tmp = NULL;
if (walker->ptep)
guest_ent = walker->ptep;
else {
tmp = kmap_atomic(walker->page, KM_USER0);
guest_ent = &tmp[walker->index];
}
*guest_ent |= PT_DIRTY_MASK;
if (!walker->ptep)
kunmap_atomic(tmp, KM_USER0);
dirty = 1;
FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
}
spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK;
spte |= *gpte & PT64_NX_MASK;
spte |= gpte & PT64_NX_MASK;
if (!dirty)
access_bits &= ~PT_WRITABLE_MASK;
@ -255,7 +274,7 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
access_bits &= ~PT_WRITABLE_MASK;
if (is_writeble_pte(spte)) {
spte &= ~PT_WRITABLE_MASK;
kvm_arch_ops->tlb_flush(vcpu);
kvm_x86_ops->tlb_flush(vcpu);
}
if (write_fault)
*ptwrite = 1;
@ -273,13 +292,13 @@ unshadowed:
rmap_add(vcpu, shadow_pte);
}
static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t *gpte,
static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t gpte,
u64 *shadow_pte, u64 access_bits,
int user_fault, int write_fault, int *ptwrite,
struct guest_walker *walker, gfn_t gfn)
{
access_bits &= *gpte;
FNAME(set_pte_common)(vcpu, shadow_pte, *gpte & PT_BASE_ADDR_MASK,
access_bits &= gpte;
FNAME(set_pte_common)(vcpu, shadow_pte, gpte & PT_BASE_ADDR_MASK,
gpte, access_bits, user_fault, write_fault,
ptwrite, walker, gfn);
}
@ -295,22 +314,22 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK))
return;
pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
FNAME(set_pte)(vcpu, &gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0,
FNAME(set_pte)(vcpu, gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0,
0, NULL, NULL,
(gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT);
}
static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t *gpde,
static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t gpde,
u64 *shadow_pte, u64 access_bits,
int user_fault, int write_fault, int *ptwrite,
struct guest_walker *walker, gfn_t gfn)
{
gpa_t gaddr;
access_bits &= *gpde;
access_bits &= gpde;
gaddr = (gpa_t)gfn << PAGE_SHIFT;
if (PTTYPE == 32 && is_cpuid_PSE36())
gaddr |= (*gpde & PT32_DIR_PSE36_MASK) <<
gaddr |= (gpde & PT32_DIR_PSE36_MASK) <<
(32 - PT32_DIR_PSE36_SHIFT);
FNAME(set_pte_common)(vcpu, shadow_pte, gaddr,
gpde, access_bits, user_fault, write_fault,
@ -328,9 +347,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
int level;
u64 *shadow_ent;
u64 *prev_shadow_ent = NULL;
pt_element_t *guest_ent = walker->ptep;
if (!is_present_pte(*guest_ent))
if (!is_present_pte(walker->pte))
return NULL;
shadow_addr = vcpu->mmu.root_hpa;
@ -364,12 +382,12 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
if (level - 1 == PT_PAGE_TABLE_LEVEL
&& walker->level == PT_DIRECTORY_LEVEL) {
metaphysical = 1;
hugepage_access = *guest_ent;
hugepage_access = walker->pte;
hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK;
if (*guest_ent & PT64_NX_MASK)
if (walker->pte & PT64_NX_MASK)
hugepage_access |= (1 << 2);
hugepage_access >>= PT_WRITABLE_SHIFT;
table_gfn = (*guest_ent & PT_BASE_ADDR_MASK)
table_gfn = (walker->pte & PT_BASE_ADDR_MASK)
>> PAGE_SHIFT;
} else {
metaphysical = 0;
@ -386,12 +404,12 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
}
if (walker->level == PT_DIRECTORY_LEVEL) {
FNAME(set_pde)(vcpu, guest_ent, shadow_ent,
FNAME(set_pde)(vcpu, walker->pte, shadow_ent,
walker->inherited_ar, user_fault, write_fault,
ptwrite, walker, walker->gfn);
} else {
ASSERT(walker->level == PT_PAGE_TABLE_LEVEL);
FNAME(set_pte)(vcpu, guest_ent, shadow_ent,
FNAME(set_pte)(vcpu, walker->pte, shadow_ent,
walker->inherited_ar, user_fault, write_fault,
ptwrite, walker, walker->gfn);
}
@ -442,7 +460,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
if (!r) {
pgprintk("%s: guest page fault\n", __FUNCTION__);
inject_page_fault(vcpu, addr, walker.error_code);
FNAME(release_walker)(&walker);
vcpu->last_pt_write_count = 0; /* reset fork detector */
return 0;
}
@ -452,8 +469,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
shadow_pte, *shadow_pte, write_pt);
FNAME(release_walker)(&walker);
if (!write_pt)
vcpu->last_pt_write_count = 0; /* reset fork detector */
@ -482,7 +497,6 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
gpa |= vaddr & ~PAGE_MASK;
}
FNAME(release_walker)(&walker);
return gpa;
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -25,29 +25,36 @@
*
*/
#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
#define CPU_BASED_USE_TSC_OFFSETING 0x00000008
#define CPU_BASED_HLT_EXITING 0x00000080
#define CPU_BASED_INVDPG_EXITING 0x00000200
#define CPU_BASED_MWAIT_EXITING 0x00000400
#define CPU_BASED_RDPMC_EXITING 0x00000800
#define CPU_BASED_RDTSC_EXITING 0x00001000
#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
#define CPU_BASED_CR8_STORE_EXITING 0x00100000
#define CPU_BASED_TPR_SHADOW 0x00200000
#define CPU_BASED_MOV_DR_EXITING 0x00800000
#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
#define CPU_BASED_ACTIVATE_IO_BITMAP 0x02000000
#define CPU_BASED_MSR_BITMAPS 0x10000000
#define CPU_BASED_MONITOR_EXITING 0x20000000
#define CPU_BASED_PAUSE_EXITING 0x40000000
#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
#define CPU_BASED_USE_TSC_OFFSETING 0x00000008
#define CPU_BASED_HLT_EXITING 0x00000080
#define CPU_BASED_INVLPG_EXITING 0x00000200
#define CPU_BASED_MWAIT_EXITING 0x00000400
#define CPU_BASED_RDPMC_EXITING 0x00000800
#define CPU_BASED_RDTSC_EXITING 0x00001000
#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
#define CPU_BASED_CR8_STORE_EXITING 0x00100000
#define CPU_BASED_TPR_SHADOW 0x00200000
#define CPU_BASED_MOV_DR_EXITING 0x00800000
#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
#define CPU_BASED_USE_IO_BITMAPS 0x02000000
#define CPU_BASED_USE_MSR_BITMAPS 0x10000000
#define CPU_BASED_MONITOR_EXITING 0x20000000
#define CPU_BASED_PAUSE_EXITING 0x40000000
#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000
#define PIN_BASED_EXT_INTR_MASK 0x1
#define PIN_BASED_NMI_EXITING 0x8
#define PIN_BASED_EXT_INTR_MASK 0x00000001
#define PIN_BASED_NMI_EXITING 0x00000008
#define PIN_BASED_VIRTUAL_NMIS 0x00000020
#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
#define VM_EXIT_HOST_ADD_SPACE_SIZE 0x00000200
#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
#define VM_ENTRY_IA32E_MODE 0x00000200
#define VM_ENTRY_SMM 0x00000400
#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
/* VMCS Encodings */
enum vmcs_field {
@ -206,6 +213,7 @@ enum vmcs_field {
#define EXIT_REASON_MSR_READ 31
#define EXIT_REASON_MSR_WRITE 32
#define EXIT_REASON_MWAIT_INSTRUCTION 36
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
/*
* Interruption-information format
@ -261,9 +269,6 @@ enum vmcs_field {
/* segment AR */
#define SEGMENT_AR_L_MASK (1 << 13)
/* entry controls */
#define VM_ENTRY_CONTROLS_IA32E_MASK (1 << 9)
#define AR_TYPE_ACCESSES_MASK 1
#define AR_TYPE_READABLE_MASK (1 << 1)
#define AR_TYPE_WRITEABLE_MASK (1 << 2)
@ -285,13 +290,21 @@ enum vmcs_field {
#define AR_RESERVD_MASK 0xfffe0f00
#define CR4_VMXE 0x2000
#define MSR_IA32_VMX_BASIC 0x480
#define MSR_IA32_VMX_PINBASED_CTLS 0x481
#define MSR_IA32_VMX_PROCBASED_CTLS 0x482
#define MSR_IA32_VMX_EXIT_CTLS 0x483
#define MSR_IA32_VMX_ENTRY_CTLS 0x484
#define MSR_IA32_VMX_MISC 0x485
#define MSR_IA32_VMX_CR0_FIXED0 0x486
#define MSR_IA32_VMX_CR0_FIXED1 0x487
#define MSR_IA32_VMX_CR4_FIXED0 0x488
#define MSR_IA32_VMX_CR4_FIXED1 0x489
#define MSR_IA32_VMX_VMCS_ENUM 0x48a
#define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b
#define MSR_IA32_VMX_BASIC 0x480
#define MSR_IA32_FEATURE_CONTROL 0x03a
#define MSR_IA32_VMX_PINBASED_CTLS 0x481
#define MSR_IA32_VMX_PROCBASED_CTLS 0x482
#define MSR_IA32_VMX_EXIT_CTLS 0x483
#define MSR_IA32_VMX_ENTRY_CTLS 0x484
#define MSR_IA32_FEATURE_CONTROL 0x3a
#define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1
#define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4
#endif

View file

@ -6,7 +6,7 @@
* Copyright (c) 2005 Keir Fraser
*
* Linux coding style, mod r/m decoder, segment base fixes, real-mode
* privieged instructions:
* privileged instructions:
*
* Copyright (C) 2006 Qumranet
*
@ -83,7 +83,7 @@ static u8 opcode_table[256] = {
/* 0x20 - 0x27 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
0, 0, 0, 0,
SrcImmByte, SrcImm, 0, 0,
/* 0x28 - 0x2F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@ -99,15 +99,24 @@ static u8 opcode_table[256] = {
/* 0x40 - 0x4F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0x50 - 0x57 */
0, 0, 0, 0, 0, 0, 0, 0,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
/* 0x58 - 0x5F */
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
/* 0x60 - 0x6F */
/* 0x60 - 0x67 */
0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0x70 - 0x7F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0,
/* 0x68 - 0x6F */
0, 0, ImplicitOps|Mov, 0,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */
/* 0x70 - 0x77 */
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
/* 0x78 - 0x7F */
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
/* 0x80 - 0x87 */
ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
@ -116,9 +125,9 @@ static u8 opcode_table[256] = {
/* 0x88 - 0x8F */
ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
0, 0, 0, DstMem | SrcNone | ModRM | Mov,
0, ModRM | DstReg, 0, DstMem | SrcNone | ModRM | Mov,
/* 0x90 - 0x9F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps, ImplicitOps, 0, 0,
/* 0xA0 - 0xA7 */
ByteOp | DstReg | SrcMem | Mov, DstReg | SrcMem | Mov,
ByteOp | DstMem | SrcReg | Mov, DstMem | SrcReg | Mov,
@ -142,8 +151,10 @@ static u8 opcode_table[256] = {
0, 0, 0, 0,
/* 0xD8 - 0xDF */
0, 0, 0, 0, 0, 0, 0, 0,
/* 0xE0 - 0xEF */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xE0 - 0xE7 */
0, 0, 0, 0, 0, 0, 0, 0,
/* 0xE8 - 0xEF */
ImplicitOps, SrcImm|ImplicitOps, 0, SrcImmByte|ImplicitOps, 0, 0, 0, 0,
/* 0xF0 - 0xF7 */
0, 0, 0, 0,
ImplicitOps, 0,
@ -181,7 +192,10 @@ static u16 twobyte_table[256] = {
/* 0x70 - 0x7F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0x80 - 0x8F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
/* 0x90 - 0x9F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xA0 - 0xA7 */
@ -207,19 +221,6 @@ static u16 twobyte_table[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
/*
* Tell the emulator that of the Group 7 instructions (sgdt, lidt, etc.) we
* are interested only in invlpg and not in any of the rest.
*
* invlpg is a special instruction in that the data it references may not
* be mapped.
*/
void kvm_emulator_want_group7_invlpg(void)
{
twobyte_table[1] &= ~SrcMem;
}
EXPORT_SYMBOL_GPL(kvm_emulator_want_group7_invlpg);
/* Type, address-of, and value of an instruction's operand. */
struct operand {
enum { OP_REG, OP_MEM, OP_IMM } type;
@ -420,7 +421,7 @@ struct operand {
#define insn_fetch(_type, _size, _eip) \
({ unsigned long _x; \
rc = ops->read_std((unsigned long)(_eip) + ctxt->cs_base, &_x, \
(_size), ctxt); \
(_size), ctxt->vcpu); \
if ( rc != 0 ) \
goto done; \
(_eip) += (_size); \
@ -428,10 +429,11 @@ struct operand {
})
/* Access/update address held in a register, based on addressing mode. */
#define address_mask(reg) \
((ad_bytes == sizeof(unsigned long)) ? \
(reg) : ((reg) & ((1UL << (ad_bytes << 3)) - 1)))
#define register_address(base, reg) \
((base) + ((ad_bytes == sizeof(unsigned long)) ? (reg) : \
((reg) & ((1UL << (ad_bytes << 3)) - 1))))
((base) + address_mask(reg))
#define register_address_increment(reg, inc) \
do { \
/* signed type ensures sign extension to long */ \
@ -443,8 +445,19 @@ struct operand {
(((reg) + _inc) & ((1UL << (ad_bytes << 3)) - 1)); \
} while (0)
void *decode_register(u8 modrm_reg, unsigned long *regs,
int highbyte_regs)
#define JMP_REL(rel) \
do { \
_eip += (int)(rel); \
_eip = ((op_bytes == 2) ? (uint16_t)_eip : (uint32_t)_eip); \
} while (0)
/*
* Given the 'reg' portion of a ModRM byte, and a register block, return a
* pointer into the block that addresses the relevant register.
* @highbyte_regs specifies whether to decode AH,CH,DH,BH.
*/
static void *decode_register(u8 modrm_reg, unsigned long *regs,
int highbyte_regs)
{
void *p;
@ -464,13 +477,50 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
if (op_bytes == 2)
op_bytes = 3;
*address = 0;
rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, ctxt);
rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
ctxt->vcpu);
if (rc)
return rc;
rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, ctxt);
rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
ctxt->vcpu);
return rc;
}
static int test_cc(unsigned int condition, unsigned int flags)
{
int rc = 0;
switch ((condition & 15) >> 1) {
case 0: /* o */
rc |= (flags & EFLG_OF);
break;
case 1: /* b/c/nae */
rc |= (flags & EFLG_CF);
break;
case 2: /* z/e */
rc |= (flags & EFLG_ZF);
break;
case 3: /* be/na */
rc |= (flags & (EFLG_CF|EFLG_ZF));
break;
case 4: /* s */
rc |= (flags & EFLG_SF);
break;
case 5: /* p/pe */
rc |= (flags & EFLG_PF);
break;
case 7: /* le/ng */
rc |= (flags & EFLG_ZF);
/* fall through */
case 6: /* l/nge */
rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
break;
}
/* Odd condition identifiers (lsb == 1) have inverted sense. */
return (!!rc ^ (condition & 1));
}
int
x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
{
@ -771,11 +821,15 @@ done_prefixes:
goto srcmem_common;
case SrcMem:
src.bytes = (d & ByteOp) ? 1 : op_bytes;
/* Don't fetch the address for invlpg: it could be unmapped. */
if (twobyte && b == 0x01 && modrm_reg == 7)
break;
srcmem_common:
src.type = OP_MEM;
src.ptr = (unsigned long *)cr2;
src.val = 0;
if ((rc = ops->read_emulated((unsigned long)src.ptr,
&src.val, src.bytes, ctxt)) != 0)
&src.val, src.bytes, ctxt->vcpu)) != 0)
goto done;
src.orig_val = src.val;
break;
@ -814,7 +868,7 @@ done_prefixes:
case DstReg:
dst.type = OP_REG;
if ((d & ByteOp)
&& !(twobyte_table && (b == 0xb6 || b == 0xb7))) {
&& !(twobyte && (b == 0xb6 || b == 0xb7))) {
dst.ptr = decode_register(modrm_reg, _regs,
(rex_prefix == 0));
dst.val = *(u8 *) dst.ptr;
@ -838,6 +892,7 @@ done_prefixes:
dst.type = OP_MEM;
dst.ptr = (unsigned long *)cr2;
dst.bytes = (d & ByteOp) ? 1 : op_bytes;
dst.val = 0;
if (d & BitOp) {
unsigned long mask = ~(dst.bytes * 8 - 1);
@ -845,7 +900,7 @@ done_prefixes:
}
if (!(d & Mov) && /* optimisation - avoid slow emulated read */
((rc = ops->read_emulated((unsigned long)dst.ptr,
&dst.val, dst.bytes, ctxt)) != 0))
&dst.val, dst.bytes, ctxt->vcpu)) != 0))
goto done;
break;
}
@ -871,10 +926,27 @@ done_prefixes:
sbb: /* sbb */
emulate_2op_SrcV("sbb", src, dst, _eflags);
break;
case 0x20 ... 0x25:
case 0x20 ... 0x23:
and: /* and */
emulate_2op_SrcV("and", src, dst, _eflags);
break;
case 0x24: /* and al imm8 */
dst.type = OP_REG;
dst.ptr = &_regs[VCPU_REGS_RAX];
dst.val = *(u8 *)dst.ptr;
dst.bytes = 1;
dst.orig_val = dst.val;
goto and;
case 0x25: /* and ax imm16, or eax imm32 */
dst.type = OP_REG;
dst.bytes = op_bytes;
dst.ptr = &_regs[VCPU_REGS_RAX];
if (op_bytes == 2)
dst.val = *(u16 *)dst.ptr;
else
dst.val = *(u32 *)dst.ptr;
dst.orig_val = dst.val;
goto and;
case 0x28 ... 0x2d:
sub: /* sub */
emulate_2op_SrcV("sub", src, dst, _eflags);
@ -892,6 +964,17 @@ done_prefixes:
goto cannot_emulate;
dst.val = (s32) src.val;
break;
case 0x6a: /* push imm8 */
src.val = 0L;
src.val = insn_fetch(s8, 1, _eip);
push:
dst.type = OP_MEM;
dst.bytes = op_bytes;
dst.val = src.val;
register_address_increment(_regs[VCPU_REGS_RSP], -op_bytes);
dst.ptr = (void *) register_address(ctxt->ss_base,
_regs[VCPU_REGS_RSP]);
break;
case 0x80 ... 0x83: /* Grp1 */
switch (modrm_reg) {
case 0:
@ -939,6 +1022,21 @@ done_prefixes:
dst.val = src.val;
lock_prefix = 1;
break;
case 0x88 ... 0x8b: /* mov */
goto mov;
case 0x8d: /* lea r16/r32, m */
dst.val = modrm_val;
break;
case 0x8f: /* pop (sole member of Grp1a) */
/* 64-bit mode: POP always pops a 64-bit operand. */
if (mode == X86EMUL_MODE_PROT64)
dst.bytes = 8;
if ((rc = ops->read_std(register_address(ctxt->ss_base,
_regs[VCPU_REGS_RSP]),
&dst.val, dst.bytes, ctxt->vcpu)) != 0)
goto done;
register_address_increment(_regs[VCPU_REGS_RSP], dst.bytes);
break;
case 0xa0 ... 0xa1: /* mov */
dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX];
dst.val = src.val;
@ -948,20 +1046,6 @@ done_prefixes:
dst.val = (unsigned long)_regs[VCPU_REGS_RAX];
_eip += ad_bytes; /* skip dst displacement */
break;
case 0x88 ... 0x8b: /* mov */
case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
dst.val = src.val;
break;
case 0x8f: /* pop (sole member of Grp1a) */
/* 64-bit mode: POP always pops a 64-bit operand. */
if (mode == X86EMUL_MODE_PROT64)
dst.bytes = 8;
if ((rc = ops->read_std(register_address(ctxt->ss_base,
_regs[VCPU_REGS_RSP]),
&dst.val, dst.bytes, ctxt)) != 0)
goto done;
register_address_increment(_regs[VCPU_REGS_RSP], dst.bytes);
break;
case 0xc0 ... 0xc1:
grp2: /* Grp2 */
switch (modrm_reg) {
@ -989,12 +1073,41 @@ done_prefixes:
break;
}
break;
case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
mov:
dst.val = src.val;
break;
case 0xd0 ... 0xd1: /* Grp2 */
src.val = 1;
goto grp2;
case 0xd2 ... 0xd3: /* Grp2 */
src.val = _regs[VCPU_REGS_RCX];
goto grp2;
case 0xe8: /* call (near) */ {
long int rel;
switch (op_bytes) {
case 2:
rel = insn_fetch(s16, 2, _eip);
break;
case 4:
rel = insn_fetch(s32, 4, _eip);
break;
case 8:
rel = insn_fetch(s64, 8, _eip);
break;
default:
DPRINTF("Call: Invalid op_bytes\n");
goto cannot_emulate;
}
src.val = (unsigned long) _eip;
JMP_REL(rel);
goto push;
}
case 0xe9: /* jmp rel */
case 0xeb: /* jmp rel short */
JMP_REL(src.val);
no_wb = 1; /* Disable writeback. */
break;
case 0xf6 ... 0xf7: /* Grp3 */
switch (modrm_reg) {
case 0 ... 1: /* test */
@ -1037,13 +1150,19 @@ done_prefixes:
case 1: /* dec */
emulate_1op("dec", dst, _eflags);
break;
case 4: /* jmp abs */
if (b == 0xff)
_eip = dst.val;
else
goto cannot_emulate;
break;
case 6: /* push */
/* 64-bit mode: PUSH always pushes a 64-bit operand. */
if (mode == X86EMUL_MODE_PROT64) {
dst.bytes = 8;
if ((rc = ops->read_std((unsigned long)dst.ptr,
&dst.val, 8,
ctxt)) != 0)
ctxt->vcpu)) != 0)
goto done;
}
register_address_increment(_regs[VCPU_REGS_RSP],
@ -1051,7 +1170,7 @@ done_prefixes:
if ((rc = ops->write_std(
register_address(ctxt->ss_base,
_regs[VCPU_REGS_RSP]),
&dst.val, dst.bytes, ctxt)) != 0)
&dst.val, dst.bytes, ctxt->vcpu)) != 0)
goto done;
no_wb = 1;
break;
@ -1086,11 +1205,11 @@ writeback:
rc = ops->cmpxchg_emulated((unsigned long)dst.
ptr, &dst.orig_val,
&dst.val, dst.bytes,
ctxt);
ctxt->vcpu);
else
rc = ops->write_emulated((unsigned long)dst.ptr,
&dst.val, dst.bytes,
ctxt);
ctxt->vcpu);
if (rc != 0)
goto done;
default:
@ -1109,6 +1228,81 @@ done:
special_insn:
if (twobyte)
goto twobyte_special_insn;
switch(b) {
case 0x50 ... 0x57: /* push reg */
if (op_bytes == 2)
src.val = (u16) _regs[b & 0x7];
else
src.val = (u32) _regs[b & 0x7];
dst.type = OP_MEM;
dst.bytes = op_bytes;
dst.val = src.val;
register_address_increment(_regs[VCPU_REGS_RSP], -op_bytes);
dst.ptr = (void *) register_address(
ctxt->ss_base, _regs[VCPU_REGS_RSP]);
break;
case 0x58 ... 0x5f: /* pop reg */
dst.ptr = (unsigned long *)&_regs[b & 0x7];
pop_instruction:
if ((rc = ops->read_std(register_address(ctxt->ss_base,
_regs[VCPU_REGS_RSP]), dst.ptr, op_bytes, ctxt->vcpu))
!= 0)
goto done;
register_address_increment(_regs[VCPU_REGS_RSP], op_bytes);
no_wb = 1; /* Disable writeback. */
break;
case 0x6c: /* insb */
case 0x6d: /* insw/insd */
if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
1, /* in */
(d & ByteOp) ? 1 : op_bytes, /* size */
rep_prefix ?
address_mask(_regs[VCPU_REGS_RCX]) : 1, /* count */
(_eflags & EFLG_DF), /* down */
register_address(ctxt->es_base,
_regs[VCPU_REGS_RDI]), /* address */
rep_prefix,
_regs[VCPU_REGS_RDX] /* port */
) == 0)
return -1;
return 0;
case 0x6e: /* outsb */
case 0x6f: /* outsw/outsd */
if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
0, /* in */
(d & ByteOp) ? 1 : op_bytes, /* size */
rep_prefix ?
address_mask(_regs[VCPU_REGS_RCX]) : 1, /* count */
(_eflags & EFLG_DF), /* down */
register_address(override_base ?
*override_base : ctxt->ds_base,
_regs[VCPU_REGS_RSI]), /* address */
rep_prefix,
_regs[VCPU_REGS_RDX] /* port */
) == 0)
return -1;
return 0;
case 0x70 ... 0x7f: /* jcc (short) */ {
int rel = insn_fetch(s8, 1, _eip);
if (test_cc(b, _eflags))
JMP_REL(rel);
break;
}
case 0x9c: /* pushf */
src.val = (unsigned long) _eflags;
goto push;
case 0x9d: /* popf */
dst.ptr = (unsigned long *) &_eflags;
goto pop_instruction;
case 0xc3: /* ret */
dst.ptr = &_eip;
goto pop_instruction;
case 0xf4: /* hlt */
ctxt->vcpu->halt_request = 1;
goto done;
}
if (rep_prefix) {
if (_regs[VCPU_REGS_RCX] == 0) {
ctxt->vcpu->rip = _eip;
@ -1125,7 +1319,7 @@ special_insn:
_regs[VCPU_REGS_RDI]);
if ((rc = ops->read_emulated(register_address(
override_base ? *override_base : ctxt->ds_base,
_regs[VCPU_REGS_RSI]), &dst.val, dst.bytes, ctxt)) != 0)
_regs[VCPU_REGS_RSI]), &dst.val, dst.bytes, ctxt->vcpu)) != 0)
goto done;
register_address_increment(_regs[VCPU_REGS_RSI],
(_eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
@ -1147,7 +1341,8 @@ special_insn:
dst.type = OP_REG;
dst.bytes = (d & ByteOp) ? 1 : op_bytes;
dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX];
if ((rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0)
if ((rc = ops->read_emulated(cr2, &dst.val, dst.bytes,
ctxt->vcpu)) != 0)
goto done;
register_address_increment(_regs[VCPU_REGS_RSI],
(_eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
@ -1155,23 +1350,7 @@ special_insn:
case 0xae ... 0xaf: /* scas */
DPRINTF("Urk! I don't handle SCAS.\n");
goto cannot_emulate;
case 0xf4: /* hlt */
ctxt->vcpu->halt_request = 1;
goto done;
case 0xc3: /* ret */
dst.ptr = &_eip;
goto pop_instruction;
case 0x58 ... 0x5f: /* pop reg */
dst.ptr = (unsigned long *)&_regs[b & 0x7];
pop_instruction:
if ((rc = ops->read_std(register_address(ctxt->ss_base,
_regs[VCPU_REGS_RSP]), dst.ptr, op_bytes, ctxt)) != 0)
goto done;
register_address_increment(_regs[VCPU_REGS_RSP], op_bytes);
no_wb = 1; /* Disable writeback. */
break;
}
goto writeback;
@ -1230,40 +1409,50 @@ twobyte_insn:
break;
case 0x40 ... 0x4f: /* cmov */
dst.val = dst.orig_val = src.val;
d &= ~Mov; /* default to no move */
no_wb = 1;
/*
* First, assume we're decoding an even cmov opcode
* (lsb == 0).
*/
switch ((b & 15) >> 1) {
case 0: /* cmovo */
d |= (_eflags & EFLG_OF) ? Mov : 0;
no_wb = (_eflags & EFLG_OF) ? 0 : 1;
break;
case 1: /* cmovb/cmovc/cmovnae */
d |= (_eflags & EFLG_CF) ? Mov : 0;
no_wb = (_eflags & EFLG_CF) ? 0 : 1;
break;
case 2: /* cmovz/cmove */
d |= (_eflags & EFLG_ZF) ? Mov : 0;
no_wb = (_eflags & EFLG_ZF) ? 0 : 1;
break;
case 3: /* cmovbe/cmovna */
d |= (_eflags & (EFLG_CF | EFLG_ZF)) ? Mov : 0;
no_wb = (_eflags & (EFLG_CF | EFLG_ZF)) ? 0 : 1;
break;
case 4: /* cmovs */
d |= (_eflags & EFLG_SF) ? Mov : 0;
no_wb = (_eflags & EFLG_SF) ? 0 : 1;
break;
case 5: /* cmovp/cmovpe */
d |= (_eflags & EFLG_PF) ? Mov : 0;
no_wb = (_eflags & EFLG_PF) ? 0 : 1;
break;
case 7: /* cmovle/cmovng */
d |= (_eflags & EFLG_ZF) ? Mov : 0;
no_wb = (_eflags & EFLG_ZF) ? 0 : 1;
/* fall through */
case 6: /* cmovl/cmovnge */
d |= (!(_eflags & EFLG_SF) !=
!(_eflags & EFLG_OF)) ? Mov : 0;
no_wb &= (!(_eflags & EFLG_SF) !=
!(_eflags & EFLG_OF)) ? 0 : 1;
break;
}
/* Odd cmov opcodes (lsb == 1) have inverted sense. */
d ^= (b & 1) ? Mov : 0;
no_wb ^= b & 1;
break;
case 0xa3:
bt: /* bt */
src.val &= (dst.bytes << 3) - 1; /* only subword offset */
emulate_2op_SrcV_nobyte("bt", src, dst, _eflags);
break;
case 0xab:
bts: /* bts */
src.val &= (dst.bytes << 3) - 1; /* only subword offset */
emulate_2op_SrcV_nobyte("bts", src, dst, _eflags);
break;
case 0xb0 ... 0xb1: /* cmpxchg */
/*
@ -1273,8 +1462,6 @@ twobyte_insn:
src.orig_val = src.val;
src.val = _regs[VCPU_REGS_RAX];
emulate_2op_SrcV("cmp", src, dst, _eflags);
/* Always write back. The question is: where to? */
d |= Mov;
if (_eflags & EFLG_ZF) {
/* Success: write back to memory. */
dst.val = src.orig_val;
@ -1284,30 +1471,15 @@ twobyte_insn:
dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX];
}
break;
case 0xa3:
bt: /* bt */
src.val &= (dst.bytes << 3) - 1; /* only subword offset */
emulate_2op_SrcV_nobyte("bt", src, dst, _eflags);
break;
case 0xb3:
btr: /* btr */
src.val &= (dst.bytes << 3) - 1; /* only subword offset */
emulate_2op_SrcV_nobyte("btr", src, dst, _eflags);
break;
case 0xab:
bts: /* bts */
src.val &= (dst.bytes << 3) - 1; /* only subword offset */
emulate_2op_SrcV_nobyte("bts", src, dst, _eflags);
break;
case 0xb6 ... 0xb7: /* movzx */
dst.bytes = op_bytes;
dst.val = (d & ByteOp) ? (u8) src.val : (u16) src.val;
break;
case 0xbb:
btc: /* btc */
src.val &= (dst.bytes << 3) - 1; /* only subword offset */
emulate_2op_SrcV_nobyte("btc", src, dst, _eflags);
break;
case 0xba: /* Grp8 */
switch (modrm_reg & 3) {
case 0:
@ -1320,6 +1492,11 @@ twobyte_insn:
goto btc;
}
break;
case 0xbb:
btc: /* btc */
src.val &= (dst.bytes << 3) - 1; /* only subword offset */
emulate_2op_SrcV_nobyte("btc", src, dst, _eflags);
break;
case 0xbe ... 0xbf: /* movsx */
dst.bytes = op_bytes;
dst.val = (d & ByteOp) ? (s8) src.val : (s16) src.val;
@ -1331,14 +1508,14 @@ twobyte_special_insn:
/* Disable writeback. */
no_wb = 1;
switch (b) {
case 0x06:
emulate_clts(ctxt->vcpu);
break;
case 0x09: /* wbinvd */
break;
case 0x0d: /* GrpP (prefetch) */
case 0x18: /* Grp16 (prefetch/nop) */
break;
case 0x06:
emulate_clts(ctxt->vcpu);
break;
case 0x20: /* mov cr, reg */
if (modrm_mod != 3)
goto cannot_emulate;
@ -1355,7 +1532,7 @@ twobyte_special_insn:
| ((u64)_regs[VCPU_REGS_RDX] << 32);
rc = kvm_set_msr(ctxt->vcpu, _regs[VCPU_REGS_RCX], msr_data);
if (rc) {
kvm_arch_ops->inject_gp(ctxt->vcpu, 0);
kvm_x86_ops->inject_gp(ctxt->vcpu, 0);
_eip = ctxt->vcpu->rip;
}
rc = X86EMUL_CONTINUE;
@ -1364,7 +1541,7 @@ twobyte_special_insn:
/* rdmsr */
rc = kvm_get_msr(ctxt->vcpu, _regs[VCPU_REGS_RCX], &msr_data);
if (rc) {
kvm_arch_ops->inject_gp(ctxt->vcpu, 0);
kvm_x86_ops->inject_gp(ctxt->vcpu, 0);
_eip = ctxt->vcpu->rip;
} else {
_regs[VCPU_REGS_RAX] = (u32)msr_data;
@ -1372,10 +1549,32 @@ twobyte_special_insn:
}
rc = X86EMUL_CONTINUE;
break;
case 0x80 ... 0x8f: /* jnz rel, etc*/ {
long int rel;
switch (op_bytes) {
case 2:
rel = insn_fetch(s16, 2, _eip);
break;
case 4:
rel = insn_fetch(s32, 4, _eip);
break;
case 8:
rel = insn_fetch(s64, 8, _eip);
break;
default:
DPRINTF("jnz: Invalid op_bytes\n");
goto cannot_emulate;
}
if (test_cc(b, _eflags))
JMP_REL(rel);
break;
}
case 0xc7: /* Grp9 (cmpxchg8b) */
{
u64 old, new;
if ((rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0)
if ((rc = ops->read_emulated(cr2, &old, 8, ctxt->vcpu))
!= 0)
goto done;
if (((u32) (old >> 0) != (u32) _regs[VCPU_REGS_RAX]) ||
((u32) (old >> 32) != (u32) _regs[VCPU_REGS_RDX])) {
@ -1386,7 +1585,7 @@ twobyte_special_insn:
new = ((u64)_regs[VCPU_REGS_RCX] << 32)
| (u32) _regs[VCPU_REGS_RBX];
if ((rc = ops->cmpxchg_emulated(cr2, &old,
&new, 8, ctxt)) != 0)
&new, 8, ctxt->vcpu)) != 0)
goto done;
_eflags |= EFLG_ZF;
}

View file

@ -60,7 +60,7 @@ struct x86_emulate_ops {
* @bytes: [IN ] Number of bytes to read from memory.
*/
int (*read_std)(unsigned long addr, void *val,
unsigned int bytes, struct x86_emulate_ctxt * ctxt);
unsigned int bytes, struct kvm_vcpu *vcpu);
/*
* write_std: Write bytes of standard (non-emulated/special) memory.
@ -71,7 +71,7 @@ struct x86_emulate_ops {
* @bytes: [IN ] Number of bytes to write to memory.
*/
int (*write_std)(unsigned long addr, const void *val,
unsigned int bytes, struct x86_emulate_ctxt * ctxt);
unsigned int bytes, struct kvm_vcpu *vcpu);
/*
* read_emulated: Read bytes from emulated/special memory area.
@ -82,7 +82,7 @@ struct x86_emulate_ops {
int (*read_emulated) (unsigned long addr,
void *val,
unsigned int bytes,
struct x86_emulate_ctxt * ctxt);
struct kvm_vcpu *vcpu);
/*
* write_emulated: Read bytes from emulated/special memory area.
@ -94,7 +94,7 @@ struct x86_emulate_ops {
int (*write_emulated) (unsigned long addr,
const void *val,
unsigned int bytes,
struct x86_emulate_ctxt * ctxt);
struct kvm_vcpu *vcpu);
/*
* cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
@ -108,12 +108,10 @@ struct x86_emulate_ops {
const void *old,
const void *new,
unsigned int bytes,
struct x86_emulate_ctxt * ctxt);
struct kvm_vcpu *vcpu);
};
struct cpu_user_regs;
struct x86_emulate_ctxt {
/* Register state before/after emulation. */
struct kvm_vcpu *vcpu;
@ -154,12 +152,4 @@ struct x86_emulate_ctxt {
int x86_emulate_memop(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops);
/*
* Given the 'reg' portion of a ModRM byte, and a register block, return a
* pointer into the block that addresses the relevant register.
* @highbyte_regs specifies whether to decode AH,CH,DH,BH.
*/
void *decode_register(u8 modrm_reg, unsigned long *regs,
int highbyte_regs);
#endif /* __X86_EMULATE_H__ */

View file

@ -11,8 +11,6 @@
* Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
*/
#ifdef CONFIG_X86_IO_APIC
/*
* The structure of the IO-APIC:
*/
@ -55,12 +53,6 @@ union IO_APIC_reg_03 {
} __attribute__ ((packed)) bits;
};
/*
* # of IO-APICs and # of IRQ routing registers
*/
extern int nr_ioapics;
extern int nr_ioapic_registers[MAX_IO_APICS];
enum ioapic_irq_destination_types {
dest_Fixed = 0,
dest_LowestPrio = 1,
@ -100,6 +92,14 @@ struct IO_APIC_route_entry {
} __attribute__ ((packed));
#ifdef CONFIG_X86_IO_APIC
/*
* # of IO-APICs and # of IRQ routing registers
*/
extern int nr_ioapics;
extern int nr_ioapic_registers[MAX_IO_APICS];
/*
* MP-BIOS irq configuration table structures:
*/

View file

@ -63,7 +63,7 @@
/*
* x86-64 Task Priority Register, CR8
*/
#define X86_CR8_TPR 0x00000007 /* task priority register */
#define X86_CR8_TPR 0x0000000F /* task priority register */
/*
* AMD and Transmeta use MSRs for configuration; see <asm/msr-index.h>

View file

@ -4,8 +4,7 @@
/*
* Userspace interface for /dev/kvm - kernel based virtual machine
*
* Note: this interface is considered experimental and may change without
* notice.
* Note: you must update KVM_API_VERSION if you change this interface.
*/
#include <asm/types.h>
@ -13,14 +12,8 @@
#define KVM_API_VERSION 12
/*
* Architectural interrupt line count, and the size of the bitmap needed
* to hold them.
*/
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
#define KVM_IRQ_BITMAP_SIZE_BYTES ((KVM_NR_INTERRUPTS + 7) / 8)
#define KVM_IRQ_BITMAP_SIZE(type) (KVM_IRQ_BITMAP_SIZE_BYTES / sizeof(type))
/* for KVM_CREATE_MEMORY_REGION */
struct kvm_memory_region {
@ -41,20 +34,89 @@ struct kvm_memory_alias {
__u64 target_phys_addr;
};
enum kvm_exit_reason {
KVM_EXIT_UNKNOWN = 0,
KVM_EXIT_EXCEPTION = 1,
KVM_EXIT_IO = 2,
KVM_EXIT_HYPERCALL = 3,
KVM_EXIT_DEBUG = 4,
KVM_EXIT_HLT = 5,
KVM_EXIT_MMIO = 6,
KVM_EXIT_IRQ_WINDOW_OPEN = 7,
KVM_EXIT_SHUTDOWN = 8,
KVM_EXIT_FAIL_ENTRY = 9,
KVM_EXIT_INTR = 10,
/* for KVM_IRQ_LINE */
struct kvm_irq_level {
/*
* ACPI gsi notion of irq.
* For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
* For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
*/
__u32 irq;
__u32 level;
};
/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
struct kvm_pic_state {
__u8 last_irr; /* edge detection */
__u8 irr; /* interrupt request register */
__u8 imr; /* interrupt mask register */
__u8 isr; /* interrupt service register */
__u8 priority_add; /* highest irq priority */
__u8 irq_base;
__u8 read_reg_select;
__u8 poll;
__u8 special_mask;
__u8 init_state;
__u8 auto_eoi;
__u8 rotate_on_auto_eoi;
__u8 special_fully_nested_mode;
__u8 init4; /* true if 4 byte init */
__u8 elcr; /* PIIX edge/trigger selection */
__u8 elcr_mask;
};
#define KVM_IOAPIC_NUM_PINS 24
struct kvm_ioapic_state {
__u64 base_address;
__u32 ioregsel;
__u32 id;
__u32 irr;
__u32 pad;
union {
__u64 bits;
struct {
__u8 vector;
__u8 delivery_mode:3;
__u8 dest_mode:1;
__u8 delivery_status:1;
__u8 polarity:1;
__u8 remote_irr:1;
__u8 trig_mode:1;
__u8 mask:1;
__u8 reserve:7;
__u8 reserved[4];
__u8 dest_id;
} fields;
} redirtbl[KVM_IOAPIC_NUM_PINS];
};
#define KVM_IRQCHIP_PIC_MASTER 0
#define KVM_IRQCHIP_PIC_SLAVE 1
#define KVM_IRQCHIP_IOAPIC 2
struct kvm_irqchip {
__u32 chip_id;
__u32 pad;
union {
char dummy[512]; /* reserving space */
struct kvm_pic_state pic;
struct kvm_ioapic_state ioapic;
} chip;
};
#define KVM_EXIT_UNKNOWN 0
#define KVM_EXIT_EXCEPTION 1
#define KVM_EXIT_IO 2
#define KVM_EXIT_HYPERCALL 3
#define KVM_EXIT_DEBUG 4
#define KVM_EXIT_HLT 5
#define KVM_EXIT_MMIO 6
#define KVM_EXIT_IRQ_WINDOW_OPEN 7
#define KVM_EXIT_SHUTDOWN 8
#define KVM_EXIT_FAIL_ENTRY 9
#define KVM_EXIT_INTR 10
#define KVM_EXIT_SET_TPR 11
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
/* in */
@ -106,11 +168,14 @@ struct kvm_run {
} mmio;
/* KVM_EXIT_HYPERCALL */
struct {
__u64 nr;
__u64 args[6];
__u64 ret;
__u32 longmode;
__u32 pad;
} hypercall;
/* Fix the size of the union. */
char padding[256];
};
};
@ -139,6 +204,12 @@ struct kvm_fpu {
__u32 pad2;
};
/* for KVM_GET_LAPIC and KVM_SET_LAPIC */
#define KVM_APIC_REG_SIZE 0x400
struct kvm_lapic_state {
char regs[KVM_APIC_REG_SIZE];
};
struct kvm_segment {
__u64 base;
__u32 limit;
@ -164,7 +235,7 @@ struct kvm_sregs {
__u64 cr0, cr2, cr3, cr4, cr8;
__u64 efer;
__u64 apic_base;
__u64 interrupt_bitmap[KVM_IRQ_BITMAP_SIZE(__u64)];
__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
};
struct kvm_msr_entry {
@ -271,6 +342,12 @@ struct kvm_signal_mask {
*/
#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */
/*
* Extension capability list.
*/
#define KVM_CAP_IRQCHIP 0
#define KVM_CAP_HLT 1
/*
* ioctls for VM fds
*/
@ -282,6 +359,11 @@ struct kvm_signal_mask {
#define KVM_CREATE_VCPU _IO(KVMIO, 0x41)
#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log)
#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias)
/* Device model IOC */
#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60)
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
/*
* ioctls for vcpu fds
@ -300,5 +382,7 @@ struct kvm_signal_mask {
#define KVM_SET_SIGNAL_MASK _IOW(KVMIO, 0x8b, struct kvm_signal_mask)
#define KVM_GET_FPU _IOR(KVMIO, 0x8c, struct kvm_fpu)
#define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu)
#define KVM_GET_LAPIC _IOR(KVMIO, 0x8e, struct kvm_lapic_state)
#define KVM_SET_LAPIC _IOW(KVMIO, 0x8f, struct kvm_lapic_state)
#endif