Merge branch 'kvm-updates/3.2' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/3.2' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: PPC: e500: include linux/export.h KVM: PPC: fix kvmppc_start_thread() for CONFIG_SMP=N KVM: PPC: protect use of kvmppc_h_pr KVM: PPC: move compute_tlbie_rb to book3s_64 common header KVM: Don't automatically expose the TSC deadline timer in cpuid KVM: Device assignment permission checks KVM: Remove ability to assign a device without iommu support KVM: x86: Prevent starting PIT timers in the absence of irqchip support
This commit is contained in:
commit
7f54492fbc
10 changed files with 154 additions and 56 deletions
|
@ -1100,6 +1100,15 @@ emulate them efficiently. The fields in each entry are defined as follows:
|
|||
eax, ebx, ecx, edx: the values returned by the cpuid instruction for
|
||||
this function/index combination
|
||||
|
||||
The TSC deadline timer feature (CPUID leaf 1, ecx[24]) is always returned
|
||||
as false, since the feature depends on KVM_CREATE_IRQCHIP for local APIC
|
||||
support. Instead it is reported via
|
||||
|
||||
ioctl(KVM_CHECK_EXTENSION, KVM_CAP_TSC_DEADLINE_TIMER)
|
||||
|
||||
if that returns true and you use KVM_CREATE_IRQCHIP, or if you emulate the
|
||||
feature in userspace, then you can enable the feature for KVM_SET_CPUID2.
|
||||
|
||||
4.47 KVM_PPC_GET_PVINFO
|
||||
|
||||
Capability: KVM_CAP_PPC_GET_PVINFO
|
||||
|
@ -1151,6 +1160,13 @@ following flags are specified:
|
|||
/* Depends on KVM_CAP_IOMMU */
|
||||
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
|
||||
|
||||
The KVM_DEV_ASSIGN_ENABLE_IOMMU flag is a mandatory option to ensure
|
||||
isolation of the device. Usages not specifying this flag are deprecated.
|
||||
|
||||
Only PCI header type 0 devices with PCI BAR resources are supported by
|
||||
device assignment. The user requesting this ioctl must have read/write
|
||||
access to the PCI sysfs resource files associated with the device.
|
||||
|
||||
4.49 KVM_DEASSIGN_PCI_DEVICE
|
||||
|
||||
Capability: KVM_CAP_DEVICE_DEASSIGNMENT
|
||||
|
|
|
@ -381,39 +381,6 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
#endif
|
||||
|
||||
static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
|
||||
unsigned long pte_index)
|
||||
{
|
||||
unsigned long rb, va_low;
|
||||
|
||||
rb = (v & ~0x7fUL) << 16; /* AVA field */
|
||||
va_low = pte_index >> 3;
|
||||
if (v & HPTE_V_SECONDARY)
|
||||
va_low = ~va_low;
|
||||
/* xor vsid from AVA */
|
||||
if (!(v & HPTE_V_1TB_SEG))
|
||||
va_low ^= v >> 12;
|
||||
else
|
||||
va_low ^= v >> 24;
|
||||
va_low &= 0x7ff;
|
||||
if (v & HPTE_V_LARGE) {
|
||||
rb |= 1; /* L field */
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_206) &&
|
||||
(r & 0xff000)) {
|
||||
/* non-16MB large page, must be 64k */
|
||||
/* (masks depend on page size) */
|
||||
rb |= 0x1000; /* page encoding in LP field */
|
||||
rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
|
||||
rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */
|
||||
}
|
||||
} else {
|
||||
/* 4kB page */
|
||||
rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */
|
||||
}
|
||||
rb |= (v >> 54) & 0x300; /* B field */
|
||||
return rb;
|
||||
}
|
||||
|
||||
/* Magic register values loaded into r3 and r4 before the 'sc' assembly
|
||||
* instruction for the OSI hypercalls */
|
||||
#define OSI_SC_MAGIC_R3 0x113724FA
|
||||
|
|
|
@ -29,4 +29,37 @@ static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu)
|
|||
|
||||
#define SPAPR_TCE_SHIFT 12
|
||||
|
||||
static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
|
||||
unsigned long pte_index)
|
||||
{
|
||||
unsigned long rb, va_low;
|
||||
|
||||
rb = (v & ~0x7fUL) << 16; /* AVA field */
|
||||
va_low = pte_index >> 3;
|
||||
if (v & HPTE_V_SECONDARY)
|
||||
va_low = ~va_low;
|
||||
/* xor vsid from AVA */
|
||||
if (!(v & HPTE_V_1TB_SEG))
|
||||
va_low ^= v >> 12;
|
||||
else
|
||||
va_low ^= v >> 24;
|
||||
va_low &= 0x7ff;
|
||||
if (v & HPTE_V_LARGE) {
|
||||
rb |= 1; /* L field */
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_206) &&
|
||||
(r & 0xff000)) {
|
||||
/* non-16MB large page, must be 64k */
|
||||
/* (masks depend on page size) */
|
||||
rb |= 0x1000; /* page encoding in LP field */
|
||||
rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
|
||||
rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */
|
||||
}
|
||||
} else {
|
||||
/* 4kB page */
|
||||
rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */
|
||||
}
|
||||
rb |= (v >> 54) & 0x300; /* B field */
|
||||
return rb;
|
||||
}
|
||||
|
||||
#endif /* __ASM_KVM_BOOK3S_64_H__ */
|
||||
|
|
|
@ -538,7 +538,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
|
|||
tpaca->kvm_hstate.napping = 0;
|
||||
vcpu->cpu = vc->pcpu;
|
||||
smp_wmb();
|
||||
#ifdef CONFIG_PPC_ICP_NATIVE
|
||||
#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
|
||||
if (vcpu->arch.ptid) {
|
||||
tpaca->cpu_start = 0x80;
|
||||
wmb();
|
||||
|
|
|
@ -658,10 +658,12 @@ program_interrupt:
|
|||
ulong cmd = kvmppc_get_gpr(vcpu, 3);
|
||||
int i;
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_64_PR
|
||||
if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
|
||||
r = RESUME_GUEST;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
run->papr_hcall.nr = cmd;
|
||||
for (i = 0; i < 9; ++i) {
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <linux/kvm_host.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/export.h>
|
||||
|
||||
#include <asm/reg.h>
|
||||
#include <asm/cputable.h>
|
||||
|
|
|
@ -338,11 +338,15 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
|
|||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
|
||||
static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
|
||||
{
|
||||
struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
|
||||
struct kvm_timer *pt = &ps->pit_timer;
|
||||
s64 interval;
|
||||
|
||||
if (!irqchip_in_kernel(kvm))
|
||||
return;
|
||||
|
||||
interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
|
||||
|
||||
pr_debug("create pit timer, interval is %llu nsec\n", interval);
|
||||
|
@ -394,13 +398,13 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
|
|||
/* FIXME: enhance mode 4 precision */
|
||||
case 4:
|
||||
if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) {
|
||||
create_pit_timer(ps, val, 0);
|
||||
create_pit_timer(kvm, val, 0);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
case 3:
|
||||
if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){
|
||||
create_pit_timer(ps, val, 1);
|
||||
create_pit_timer(kvm, val, 1);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -602,7 +602,6 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
u32 timer_mode_mask;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 1, 0);
|
||||
if (!best)
|
||||
|
@ -615,15 +614,12 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
|
|||
best->ecx |= bit(X86_FEATURE_OSXSAVE);
|
||||
}
|
||||
|
||||
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
|
||||
best->function == 0x1) {
|
||||
best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER);
|
||||
timer_mode_mask = 3 << 17;
|
||||
} else
|
||||
timer_mode_mask = 1 << 17;
|
||||
|
||||
if (apic)
|
||||
apic->lapic_timer.timer_mode_mask = timer_mode_mask;
|
||||
if (apic) {
|
||||
if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER))
|
||||
apic->lapic_timer.timer_mode_mask = 3 << 17;
|
||||
else
|
||||
apic->lapic_timer.timer_mode_mask = 1 << 17;
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
|
||||
|
@ -2135,6 +2131,9 @@ int kvm_dev_ioctl_check_extension(long ext)
|
|||
case KVM_CAP_TSC_CONTROL:
|
||||
r = kvm_has_tsc_control;
|
||||
break;
|
||||
case KVM_CAP_TSC_DEADLINE_TIMER:
|
||||
r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
|
||||
break;
|
||||
default:
|
||||
r = 0;
|
||||
break;
|
||||
|
|
|
@ -557,6 +557,7 @@ struct kvm_ppc_pvinfo {
|
|||
#define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */
|
||||
#define KVM_CAP_PPC_PAPR 68
|
||||
#define KVM_CAP_S390_GMAP 71
|
||||
#define KVM_CAP_TSC_DEADLINE_TIMER 72
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
#include <linux/pci.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/fs.h>
|
||||
#include "irq.h"
|
||||
|
||||
static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
|
||||
|
@ -480,12 +482,76 @@ out:
|
|||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* We want to test whether the caller has been granted permissions to
|
||||
* use this device. To be able to configure and control the device,
|
||||
* the user needs access to PCI configuration space and BAR resources.
|
||||
* These are accessed through PCI sysfs. PCI config space is often
|
||||
* passed to the process calling this ioctl via file descriptor, so we
|
||||
* can't rely on access to that file. We can check for permissions
|
||||
* on each of the BAR resource files, which is a pretty clear
|
||||
* indicator that the user has been granted access to the device.
|
||||
*/
|
||||
static int probe_sysfs_permissions(struct pci_dev *dev)
|
||||
{
|
||||
#ifdef CONFIG_SYSFS
|
||||
int i;
|
||||
bool bar_found = false;
|
||||
|
||||
for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
|
||||
char *kpath, *syspath;
|
||||
struct path path;
|
||||
struct inode *inode;
|
||||
int r;
|
||||
|
||||
if (!pci_resource_len(dev, i))
|
||||
continue;
|
||||
|
||||
kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
|
||||
if (!kpath)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Per sysfs-rules, sysfs is always at /sys */
|
||||
syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
|
||||
kfree(kpath);
|
||||
if (!syspath)
|
||||
return -ENOMEM;
|
||||
|
||||
r = kern_path(syspath, LOOKUP_FOLLOW, &path);
|
||||
kfree(syspath);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
inode = path.dentry->d_inode;
|
||||
|
||||
r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
|
||||
path_put(&path);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
bar_found = true;
|
||||
}
|
||||
|
||||
/* If no resources, probably something special */
|
||||
if (!bar_found)
|
||||
return -EPERM;
|
||||
|
||||
return 0;
|
||||
#else
|
||||
return -EINVAL; /* No way to control the device without sysfs */
|
||||
#endif
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
|
||||
struct kvm_assigned_pci_dev *assigned_dev)
|
||||
{
|
||||
int r = 0, idx;
|
||||
struct kvm_assigned_dev_kernel *match;
|
||||
struct pci_dev *dev;
|
||||
u8 header_type;
|
||||
|
||||
if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
|
@ -513,6 +579,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
|
|||
r = -EINVAL;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
/* Don't allow bridges to be assigned */
|
||||
pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
|
||||
if ((header_type & PCI_HEADER_TYPE) != PCI_HEADER_TYPE_NORMAL) {
|
||||
r = -EPERM;
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
r = probe_sysfs_permissions(dev);
|
||||
if (r)
|
||||
goto out_put;
|
||||
|
||||
if (pci_enable_device(dev)) {
|
||||
printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
|
||||
r = -EBUSY;
|
||||
|
@ -544,16 +622,14 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
|
|||
|
||||
list_add(&match->list, &kvm->arch.assigned_dev_head);
|
||||
|
||||
if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
|
||||
if (!kvm->arch.iommu_domain) {
|
||||
r = kvm_iommu_map_guest(kvm);
|
||||
if (r)
|
||||
goto out_list_del;
|
||||
}
|
||||
r = kvm_assign_device(kvm, match);
|
||||
if (!kvm->arch.iommu_domain) {
|
||||
r = kvm_iommu_map_guest(kvm);
|
||||
if (r)
|
||||
goto out_list_del;
|
||||
}
|
||||
r = kvm_assign_device(kvm, match);
|
||||
if (r)
|
||||
goto out_list_del;
|
||||
|
||||
out:
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
@ -593,8 +669,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
|
||||
kvm_deassign_device(kvm, match);
|
||||
kvm_deassign_device(kvm, match);
|
||||
|
||||
kvm_free_assigned_device(kvm, match);
|
||||
|
||||
|
|
Loading…
Reference in a new issue