KVM: propagate fault r/w information to gup(), allow read-only memory

As suggested by Andrea, pass r/w error code to gup(), upgrading read fault
to writable if host pte allows it.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
This commit is contained in:
Marcelo Tosatti 2010-10-22 14:18:18 -02:00 committed by Avi Kivity
parent 7905d9a5ad
commit 612819c3c6
4 changed files with 71 additions and 25 deletions

View file

@ -2216,7 +2216,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
} }
static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
int level, gfn_t gfn, pfn_t pfn) int map_writable, int level, gfn_t gfn, pfn_t pfn)
{ {
struct kvm_shadow_walk_iterator iterator; struct kvm_shadow_walk_iterator iterator;
struct kvm_mmu_page *sp; struct kvm_mmu_page *sp;
@ -2225,9 +2225,13 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
if (iterator.level == level) { if (iterator.level == level) {
mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, unsigned pte_access = ACC_ALL;
if (!map_writable)
pte_access &= ~ACC_WRITE_MASK;
mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access,
0, write, 1, &pt_write, 0, write, 1, &pt_write,
level, gfn, pfn, false, true); level, gfn, pfn, false, map_writable);
direct_pte_prefetch(vcpu, iterator.sptep); direct_pte_prefetch(vcpu, iterator.sptep);
++vcpu->stat.pf_fixed; ++vcpu->stat.pf_fixed;
break; break;
@ -2288,6 +2292,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
int level; int level;
pfn_t pfn; pfn_t pfn;
unsigned long mmu_seq; unsigned long mmu_seq;
bool map_writable;
level = mapping_level(vcpu, gfn); level = mapping_level(vcpu, gfn);
@ -2302,7 +2307,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
mmu_seq = vcpu->kvm->mmu_notifier_seq; mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb(); smp_rmb();
pfn = gfn_to_pfn(vcpu->kvm, gfn); pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, &map_writable);
/* mmio */ /* mmio */
if (is_error_pfn(pfn)) if (is_error_pfn(pfn))
@ -2312,7 +2317,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
if (mmu_notifier_retry(vcpu, mmu_seq)) if (mmu_notifier_retry(vcpu, mmu_seq))
goto out_unlock; goto out_unlock;
kvm_mmu_free_some_pages(vcpu); kvm_mmu_free_some_pages(vcpu);
r = __direct_map(vcpu, v, write, level, gfn, pfn); r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn);
spin_unlock(&vcpu->kvm->mmu_lock); spin_unlock(&vcpu->kvm->mmu_lock);
@ -2611,11 +2616,11 @@ static bool can_do_async_pf(struct kvm_vcpu *vcpu)
} }
static bool try_async_pf(struct kvm_vcpu *vcpu, bool no_apf, gfn_t gfn, static bool try_async_pf(struct kvm_vcpu *vcpu, bool no_apf, gfn_t gfn,
gva_t gva, pfn_t *pfn) gva_t gva, pfn_t *pfn, bool write, bool *writable)
{ {
bool async; bool async;
*pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async); *pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async, write, writable);
if (!async) if (!async)
return false; /* *pfn has correct page already */ return false; /* *pfn has correct page already */
@ -2632,7 +2637,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool no_apf, gfn_t gfn,
return true; return true;
} }
*pfn = gfn_to_pfn(vcpu->kvm, gfn); *pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, writable);
return false; return false;
} }
@ -2645,6 +2650,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
int level; int level;
gfn_t gfn = gpa >> PAGE_SHIFT; gfn_t gfn = gpa >> PAGE_SHIFT;
unsigned long mmu_seq; unsigned long mmu_seq;
int write = error_code & PFERR_WRITE_MASK;
bool map_writable;
ASSERT(vcpu); ASSERT(vcpu);
ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
@ -2660,7 +2667,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
mmu_seq = vcpu->kvm->mmu_notifier_seq; mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb(); smp_rmb();
if (try_async_pf(vcpu, no_apf, gfn, gpa, &pfn)) if (try_async_pf(vcpu, no_apf, gfn, gpa, &pfn, write, &map_writable))
return 0; return 0;
/* mmio */ /* mmio */
@ -2670,7 +2677,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
if (mmu_notifier_retry(vcpu, mmu_seq)) if (mmu_notifier_retry(vcpu, mmu_seq))
goto out_unlock; goto out_unlock;
kvm_mmu_free_some_pages(vcpu); kvm_mmu_free_some_pages(vcpu);
r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, r = __direct_map(vcpu, gpa, write, map_writable,
level, gfn, pfn); level, gfn, pfn);
spin_unlock(&vcpu->kvm->mmu_lock); spin_unlock(&vcpu->kvm->mmu_lock);

View file

@ -427,7 +427,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker *gw, struct guest_walker *gw,
int user_fault, int write_fault, int hlevel, int user_fault, int write_fault, int hlevel,
int *ptwrite, pfn_t pfn) int *ptwrite, pfn_t pfn, bool map_writable)
{ {
unsigned access = gw->pt_access; unsigned access = gw->pt_access;
struct kvm_mmu_page *sp = NULL; struct kvm_mmu_page *sp = NULL;
@ -501,7 +501,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access,
user_fault, write_fault, dirty, ptwrite, it.level, user_fault, write_fault, dirty, ptwrite, it.level,
gw->gfn, pfn, false, true); gw->gfn, pfn, false, map_writable);
FNAME(pte_prefetch)(vcpu, gw, it.sptep); FNAME(pte_prefetch)(vcpu, gw, it.sptep);
return it.sptep; return it.sptep;
@ -539,6 +539,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
pfn_t pfn; pfn_t pfn;
int level = PT_PAGE_TABLE_LEVEL; int level = PT_PAGE_TABLE_LEVEL;
unsigned long mmu_seq; unsigned long mmu_seq;
bool map_writable;
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
@ -569,13 +570,17 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
mmu_seq = vcpu->kvm->mmu_notifier_seq; mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb(); smp_rmb();
if (try_async_pf(vcpu, no_apf, walker.gfn, addr, &pfn)) if (try_async_pf(vcpu, no_apf, walker.gfn, addr, &pfn, write_fault,
&map_writable))
return 0; return 0;
/* mmio */ /* mmio */
if (is_error_pfn(pfn)) if (is_error_pfn(pfn))
return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn); return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn);
if (!map_writable)
walker.pte_access &= ~ACC_WRITE_MASK;
spin_lock(&vcpu->kvm->mmu_lock); spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu, mmu_seq)) if (mmu_notifier_retry(vcpu, mmu_seq))
goto out_unlock; goto out_unlock;
@ -583,7 +588,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
kvm_mmu_free_some_pages(vcpu); kvm_mmu_free_some_pages(vcpu);
sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
level, &write_pt, pfn); level, &write_pt, pfn, map_writable);
(void)sptep; (void)sptep;
pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
sptep, *sptep, write_pt); sptep, *sptep, write_pt);

View file

@ -334,8 +334,11 @@ void kvm_set_page_accessed(struct page *page);
pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr); pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async); pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
bool write_fault, bool *writable);
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
bool *writable);
pfn_t gfn_to_pfn_memslot(struct kvm *kvm, pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn); struct kvm_memory_slot *slot, gfn_t gfn);
int memslot_id(struct kvm *kvm, gfn_t gfn); int memslot_id(struct kvm *kvm, gfn_t gfn);

View file

@ -959,7 +959,7 @@ static pfn_t get_fault_pfn(void)
} }
static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
bool *async) bool *async, bool write_fault, bool *writable)
{ {
struct page *page[1]; struct page *page[1];
int npages = 0; int npages = 0;
@ -968,12 +968,34 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
/* we can do it either atomically or asynchronously, not both */ /* we can do it either atomically or asynchronously, not both */
BUG_ON(atomic && async); BUG_ON(atomic && async);
BUG_ON(!write_fault && !writable);
if (writable)
*writable = true;
if (atomic || async) if (atomic || async)
npages = __get_user_pages_fast(addr, 1, 1, page); npages = __get_user_pages_fast(addr, 1, 1, page);
if (unlikely(npages != 1) && !atomic) { if (unlikely(npages != 1) && !atomic) {
might_sleep(); might_sleep();
npages = get_user_pages_fast(addr, 1, 1, page);
if (writable)
*writable = write_fault;
npages = get_user_pages_fast(addr, 1, write_fault, page);
/* map read fault as writable if possible */
if (unlikely(!write_fault) && npages == 1) {
struct page *wpage[1];
npages = __get_user_pages_fast(addr, 1, 1, wpage);
if (npages == 1) {
*writable = true;
put_page(page[0]);
page[0] = wpage[0];
}
npages = 1;
}
} }
if (unlikely(npages != 1)) { if (unlikely(npages != 1)) {
@ -1011,11 +1033,12 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr) pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
{ {
return hva_to_pfn(kvm, addr, true, NULL); return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
} }
EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async) static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
bool write_fault, bool *writable)
{ {
unsigned long addr; unsigned long addr;
@ -1028,32 +1051,40 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async)
return page_to_pfn(bad_page); return page_to_pfn(bad_page);
} }
return hva_to_pfn(kvm, addr, atomic, async); return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
} }
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
{ {
return __gfn_to_pfn(kvm, gfn, true, NULL); return __gfn_to_pfn(kvm, gfn, true, NULL, true, NULL);
} }
EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic); EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async) pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
bool write_fault, bool *writable)
{ {
return __gfn_to_pfn(kvm, gfn, false, async); return __gfn_to_pfn(kvm, gfn, false, async, write_fault, writable);
} }
EXPORT_SYMBOL_GPL(gfn_to_pfn_async); EXPORT_SYMBOL_GPL(gfn_to_pfn_async);
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
{ {
return __gfn_to_pfn(kvm, gfn, false, NULL); return __gfn_to_pfn(kvm, gfn, false, NULL, true, NULL);
} }
EXPORT_SYMBOL_GPL(gfn_to_pfn); EXPORT_SYMBOL_GPL(gfn_to_pfn);
pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
bool *writable)
{
return __gfn_to_pfn(kvm, gfn, false, NULL, write_fault, writable);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
pfn_t gfn_to_pfn_memslot(struct kvm *kvm, pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn) struct kvm_memory_slot *slot, gfn_t gfn)
{ {
unsigned long addr = gfn_to_hva_memslot(slot, gfn); unsigned long addr = gfn_to_hva_memslot(slot, gfn);
return hva_to_pfn(kvm, addr, false, NULL); return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
} }
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,