mm: extract code to fault in a page from __get_user_pages()
Nesting level in __get_user_pages() is just insane. Let's try to fix it a bit. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
69e68b4f03
commit
1674448345
1 changed files with 71 additions and 67 deletions
136
mm/gup.c
136
mm/gup.c
|
@ -214,12 +214,6 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
|
|||
return follow_page_pte(vma, address, pmd, flags);
|
||||
}
|
||||
|
||||
static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
|
||||
{
|
||||
return stack_guard_page_start(vma, addr) ||
|
||||
stack_guard_page_end(vma, addr+PAGE_SIZE);
|
||||
}
|
||||
|
||||
static int get_gate_page(struct mm_struct *mm, unsigned long address,
|
||||
unsigned int gup_flags, struct vm_area_struct **vma,
|
||||
struct page **page)
|
||||
|
@ -264,6 +258,63 @@ unmap:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
|
||||
unsigned long address, unsigned int *flags, int *nonblocking)
|
||||
{
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
unsigned int fault_flags = 0;
|
||||
int ret;
|
||||
|
||||
/* For mlock, just skip the stack guard page. */
|
||||
if ((*flags & FOLL_MLOCK) &&
|
||||
(stack_guard_page_start(vma, address) ||
|
||||
stack_guard_page_end(vma, address + PAGE_SIZE)))
|
||||
return -ENOENT;
|
||||
if (*flags & FOLL_WRITE)
|
||||
fault_flags |= FAULT_FLAG_WRITE;
|
||||
if (nonblocking)
|
||||
fault_flags |= FAULT_FLAG_ALLOW_RETRY;
|
||||
if (*flags & FOLL_NOWAIT)
|
||||
fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
|
||||
|
||||
ret = handle_mm_fault(mm, vma, address, fault_flags);
|
||||
if (ret & VM_FAULT_ERROR) {
|
||||
if (ret & VM_FAULT_OOM)
|
||||
return -ENOMEM;
|
||||
if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
|
||||
return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT;
|
||||
if (ret & VM_FAULT_SIGBUS)
|
||||
return -EFAULT;
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (tsk) {
|
||||
if (ret & VM_FAULT_MAJOR)
|
||||
tsk->maj_flt++;
|
||||
else
|
||||
tsk->min_flt++;
|
||||
}
|
||||
|
||||
if (ret & VM_FAULT_RETRY) {
|
||||
if (nonblocking)
|
||||
*nonblocking = 0;
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
/*
|
||||
* The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when
|
||||
* necessary, even if maybe_mkwrite decided not to set pte_write. We
|
||||
* can thus safely do subsequent page lookups as if they were reads.
|
||||
* But only do so when looping for pte_write is futile: in some cases
|
||||
* userspace may also be wanting to write to the gotten user page,
|
||||
* which a read fault here might prevent (a readonly page might get
|
||||
* reCOWed by userspace write).
|
||||
*/
|
||||
if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
|
||||
*flags &= ~FOLL_WRITE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* __get_user_pages() - pin user pages in memory
|
||||
* @tsk: task_struct of target task
|
||||
|
@ -410,69 +461,22 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
|||
while (!(page = follow_page_mask(vma, start,
|
||||
foll_flags, &page_mask))) {
|
||||
int ret;
|
||||
unsigned int fault_flags = 0;
|
||||
|
||||
/* For mlock, just skip the stack guard page. */
|
||||
if (foll_flags & FOLL_MLOCK) {
|
||||
if (stack_guard_page(vma, start))
|
||||
goto next_page;
|
||||
}
|
||||
if (foll_flags & FOLL_WRITE)
|
||||
fault_flags |= FAULT_FLAG_WRITE;
|
||||
if (nonblocking)
|
||||
fault_flags |= FAULT_FLAG_ALLOW_RETRY;
|
||||
if (foll_flags & FOLL_NOWAIT)
|
||||
fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
|
||||
|
||||
ret = handle_mm_fault(mm, vma, start,
|
||||
fault_flags);
|
||||
|
||||
if (ret & VM_FAULT_ERROR) {
|
||||
if (ret & VM_FAULT_OOM)
|
||||
return i ? i : -ENOMEM;
|
||||
if (ret & (VM_FAULT_HWPOISON |
|
||||
VM_FAULT_HWPOISON_LARGE)) {
|
||||
if (i)
|
||||
ret = faultin_page(tsk, vma, start, &foll_flags,
|
||||
nonblocking);
|
||||
switch (ret) {
|
||||
case 0:
|
||||
break;
|
||||
case -EFAULT:
|
||||
case -ENOMEM:
|
||||
case -EHWPOISON:
|
||||
return i ? i : ret;
|
||||
case -EBUSY:
|
||||
return i;
|
||||
else if (gup_flags & FOLL_HWPOISON)
|
||||
return -EHWPOISON;
|
||||
else
|
||||
return -EFAULT;
|
||||
}
|
||||
if (ret & VM_FAULT_SIGBUS)
|
||||
goto efault;
|
||||
case -ENOENT:
|
||||
goto next_page;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (tsk) {
|
||||
if (ret & VM_FAULT_MAJOR)
|
||||
tsk->maj_flt++;
|
||||
else
|
||||
tsk->min_flt++;
|
||||
}
|
||||
|
||||
if (ret & VM_FAULT_RETRY) {
|
||||
if (nonblocking)
|
||||
*nonblocking = 0;
|
||||
return i;
|
||||
}
|
||||
|
||||
/*
|
||||
* The VM_FAULT_WRITE bit tells us that
|
||||
* do_wp_page has broken COW when necessary,
|
||||
* even if maybe_mkwrite decided not to set
|
||||
* pte_write. We can thus safely do subsequent
|
||||
* page lookups as if they were reads. But only
|
||||
* do so when looping for pte_write is futile:
|
||||
* in some cases userspace may also be wanting
|
||||
* to write to the gotten user page, which a
|
||||
* read fault here might prevent (a readonly
|
||||
* page might get reCOWed by userspace write).
|
||||
*/
|
||||
if ((ret & VM_FAULT_WRITE) &&
|
||||
!(vma->vm_flags & VM_WRITE))
|
||||
foll_flags &= ~FOLL_WRITE;
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
if (IS_ERR(page))
|
||||
|
|
Loading…
Reference in a new issue