de60f5f10c
The cost of faulting in all memory to be locked can be very high when working with large mappings. If only portions of the mapping will be used this can incur a high penalty for locking. For the example of a large file, this is the usage pattern for a large statical language model (probably applies to other statical or graphical models as well). For the security example, any application transacting in data that cannot be swapped out (credit card data, medical records, etc). This patch introduces the ability to request that pages are not pre-faulted, but are placed on the unevictable LRU when they are finally faulted in. The VM_LOCKONFAULT flag will be used together with VM_LOCKED and has no effect when set without VM_LOCKED. Setting the VM_LOCKONFAULT flag for a VMA will cause pages faulted into that VMA to be added to the unevictable LRU when they are faulted or if they are already present, but will not cause any missing pages to be faulted in. Exposing this new lock state means that we cannot overload the meaning of the FOLL_POPULATE flag any longer. Prior to this patch it was used to mean that the VMA for a fault was locked. This means we need the new FOLL_MLOCK flag to communicate the locked state of a VMA. FOLL_POPULATE will now only control if the VMA should be populated and in the case of VM_LOCKONFAULT, it will not be set. Signed-off-by: Eric B Munson <emunson@akamai.com> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Michal Hocko <mhocko@suse.cz> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Guenter Roeck <linux@roeck-us.net> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Shuah Khan <shuahkh@osg.samsung.com> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
245 lines
6.7 KiB
C
245 lines
6.7 KiB
C
/*
|
|
* mm/debug.c
|
|
*
|
|
* mm/ specific debug routines.
|
|
*
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/trace_events.h>
|
|
#include <linux/memcontrol.h>
|
|
|
|
static const struct trace_print_flags pageflag_names[] = {
|
|
{1UL << PG_locked, "locked" },
|
|
{1UL << PG_error, "error" },
|
|
{1UL << PG_referenced, "referenced" },
|
|
{1UL << PG_uptodate, "uptodate" },
|
|
{1UL << PG_dirty, "dirty" },
|
|
{1UL << PG_lru, "lru" },
|
|
{1UL << PG_active, "active" },
|
|
{1UL << PG_slab, "slab" },
|
|
{1UL << PG_owner_priv_1, "owner_priv_1" },
|
|
{1UL << PG_arch_1, "arch_1" },
|
|
{1UL << PG_reserved, "reserved" },
|
|
{1UL << PG_private, "private" },
|
|
{1UL << PG_private_2, "private_2" },
|
|
{1UL << PG_writeback, "writeback" },
|
|
#ifdef CONFIG_PAGEFLAGS_EXTENDED
|
|
{1UL << PG_head, "head" },
|
|
{1UL << PG_tail, "tail" },
|
|
#else
|
|
{1UL << PG_compound, "compound" },
|
|
#endif
|
|
{1UL << PG_swapcache, "swapcache" },
|
|
{1UL << PG_mappedtodisk, "mappedtodisk" },
|
|
{1UL << PG_reclaim, "reclaim" },
|
|
{1UL << PG_swapbacked, "swapbacked" },
|
|
{1UL << PG_unevictable, "unevictable" },
|
|
#ifdef CONFIG_MMU
|
|
{1UL << PG_mlocked, "mlocked" },
|
|
#endif
|
|
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
|
|
{1UL << PG_uncached, "uncached" },
|
|
#endif
|
|
#ifdef CONFIG_MEMORY_FAILURE
|
|
{1UL << PG_hwpoison, "hwpoison" },
|
|
#endif
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
{1UL << PG_compound_lock, "compound_lock" },
|
|
#endif
|
|
#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
|
|
{1UL << PG_young, "young" },
|
|
{1UL << PG_idle, "idle" },
|
|
#endif
|
|
};
|
|
|
|
static void dump_flags(unsigned long flags,
|
|
const struct trace_print_flags *names, int count)
|
|
{
|
|
const char *delim = "";
|
|
unsigned long mask;
|
|
int i;
|
|
|
|
pr_emerg("flags: %#lx(", flags);
|
|
|
|
/* remove zone id */
|
|
flags &= (1UL << NR_PAGEFLAGS) - 1;
|
|
|
|
for (i = 0; i < count && flags; i++) {
|
|
|
|
mask = names[i].mask;
|
|
if ((flags & mask) != mask)
|
|
continue;
|
|
|
|
flags &= ~mask;
|
|
pr_cont("%s%s", delim, names[i].name);
|
|
delim = "|";
|
|
}
|
|
|
|
/* check for left over flags */
|
|
if (flags)
|
|
pr_cont("%s%#lx", delim, flags);
|
|
|
|
pr_cont(")\n");
|
|
}
|
|
|
|
void dump_page_badflags(struct page *page, const char *reason,
|
|
unsigned long badflags)
|
|
{
|
|
pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx\n",
|
|
page, atomic_read(&page->_count), page_mapcount(page),
|
|
page->mapping, page->index);
|
|
BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS);
|
|
dump_flags(page->flags, pageflag_names, ARRAY_SIZE(pageflag_names));
|
|
if (reason)
|
|
pr_alert("page dumped because: %s\n", reason);
|
|
if (page->flags & badflags) {
|
|
pr_alert("bad because of flags:\n");
|
|
dump_flags(page->flags & badflags,
|
|
pageflag_names, ARRAY_SIZE(pageflag_names));
|
|
}
|
|
#ifdef CONFIG_MEMCG
|
|
if (page->mem_cgroup)
|
|
pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup);
|
|
#endif
|
|
}
|
|
|
|
void dump_page(struct page *page, const char *reason)
|
|
{
|
|
dump_page_badflags(page, reason, 0);
|
|
}
|
|
EXPORT_SYMBOL(dump_page);
|
|
|
|
#ifdef CONFIG_DEBUG_VM
|
|
|
|
static const struct trace_print_flags vmaflags_names[] = {
|
|
{VM_READ, "read" },
|
|
{VM_WRITE, "write" },
|
|
{VM_EXEC, "exec" },
|
|
{VM_SHARED, "shared" },
|
|
{VM_MAYREAD, "mayread" },
|
|
{VM_MAYWRITE, "maywrite" },
|
|
{VM_MAYEXEC, "mayexec" },
|
|
{VM_MAYSHARE, "mayshare" },
|
|
{VM_GROWSDOWN, "growsdown" },
|
|
{VM_PFNMAP, "pfnmap" },
|
|
{VM_DENYWRITE, "denywrite" },
|
|
{VM_LOCKONFAULT, "lockonfault" },
|
|
{VM_LOCKED, "locked" },
|
|
{VM_IO, "io" },
|
|
{VM_SEQ_READ, "seqread" },
|
|
{VM_RAND_READ, "randread" },
|
|
{VM_DONTCOPY, "dontcopy" },
|
|
{VM_DONTEXPAND, "dontexpand" },
|
|
{VM_ACCOUNT, "account" },
|
|
{VM_NORESERVE, "noreserve" },
|
|
{VM_HUGETLB, "hugetlb" },
|
|
#if defined(CONFIG_X86)
|
|
{VM_PAT, "pat" },
|
|
#elif defined(CONFIG_PPC)
|
|
{VM_SAO, "sao" },
|
|
#elif defined(CONFIG_PARISC) || defined(CONFIG_METAG) || defined(CONFIG_IA64)
|
|
{VM_GROWSUP, "growsup" },
|
|
#elif !defined(CONFIG_MMU)
|
|
{VM_MAPPED_COPY, "mappedcopy" },
|
|
#else
|
|
{VM_ARCH_1, "arch_1" },
|
|
#endif
|
|
{VM_DONTDUMP, "dontdump" },
|
|
#ifdef CONFIG_MEM_SOFT_DIRTY
|
|
{VM_SOFTDIRTY, "softdirty" },
|
|
#endif
|
|
{VM_MIXEDMAP, "mixedmap" },
|
|
{VM_HUGEPAGE, "hugepage" },
|
|
{VM_NOHUGEPAGE, "nohugepage" },
|
|
{VM_MERGEABLE, "mergeable" },
|
|
};
|
|
|
|
void dump_vma(const struct vm_area_struct *vma)
|
|
{
|
|
pr_emerg("vma %p start %p end %p\n"
|
|
"next %p prev %p mm %p\n"
|
|
"prot %lx anon_vma %p vm_ops %p\n"
|
|
"pgoff %lx file %p private_data %p\n",
|
|
vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next,
|
|
vma->vm_prev, vma->vm_mm,
|
|
(unsigned long)pgprot_val(vma->vm_page_prot),
|
|
vma->anon_vma, vma->vm_ops, vma->vm_pgoff,
|
|
vma->vm_file, vma->vm_private_data);
|
|
dump_flags(vma->vm_flags, vmaflags_names, ARRAY_SIZE(vmaflags_names));
|
|
}
|
|
EXPORT_SYMBOL(dump_vma);
|
|
|
|
void dump_mm(const struct mm_struct *mm)
|
|
{
|
|
pr_emerg("mm %p mmap %p seqnum %d task_size %lu\n"
|
|
#ifdef CONFIG_MMU
|
|
"get_unmapped_area %p\n"
|
|
#endif
|
|
"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
|
|
"pgd %p mm_users %d mm_count %d nr_ptes %lu nr_pmds %lu map_count %d\n"
|
|
"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
|
|
"pinned_vm %lx shared_vm %lx exec_vm %lx stack_vm %lx\n"
|
|
"start_code %lx end_code %lx start_data %lx end_data %lx\n"
|
|
"start_brk %lx brk %lx start_stack %lx\n"
|
|
"arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
|
|
"binfmt %p flags %lx core_state %p\n"
|
|
#ifdef CONFIG_AIO
|
|
"ioctx_table %p\n"
|
|
#endif
|
|
#ifdef CONFIG_MEMCG
|
|
"owner %p "
|
|
#endif
|
|
"exe_file %p\n"
|
|
#ifdef CONFIG_MMU_NOTIFIER
|
|
"mmu_notifier_mm %p\n"
|
|
#endif
|
|
#ifdef CONFIG_NUMA_BALANCING
|
|
"numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n"
|
|
#endif
|
|
#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
|
|
"tlb_flush_pending %d\n"
|
|
#endif
|
|
"%s", /* This is here to hold the comma */
|
|
|
|
mm, mm->mmap, mm->vmacache_seqnum, mm->task_size,
|
|
#ifdef CONFIG_MMU
|
|
mm->get_unmapped_area,
|
|
#endif
|
|
mm->mmap_base, mm->mmap_legacy_base, mm->highest_vm_end,
|
|
mm->pgd, atomic_read(&mm->mm_users),
|
|
atomic_read(&mm->mm_count),
|
|
atomic_long_read((atomic_long_t *)&mm->nr_ptes),
|
|
mm_nr_pmds((struct mm_struct *)mm),
|
|
mm->map_count,
|
|
mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
|
|
mm->pinned_vm, mm->shared_vm, mm->exec_vm, mm->stack_vm,
|
|
mm->start_code, mm->end_code, mm->start_data, mm->end_data,
|
|
mm->start_brk, mm->brk, mm->start_stack,
|
|
mm->arg_start, mm->arg_end, mm->env_start, mm->env_end,
|
|
mm->binfmt, mm->flags, mm->core_state,
|
|
#ifdef CONFIG_AIO
|
|
mm->ioctx_table,
|
|
#endif
|
|
#ifdef CONFIG_MEMCG
|
|
mm->owner,
|
|
#endif
|
|
mm->exe_file,
|
|
#ifdef CONFIG_MMU_NOTIFIER
|
|
mm->mmu_notifier_mm,
|
|
#endif
|
|
#ifdef CONFIG_NUMA_BALANCING
|
|
mm->numa_next_scan, mm->numa_scan_offset, mm->numa_scan_seq,
|
|
#endif
|
|
#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
|
|
mm->tlb_flush_pending,
|
|
#endif
|
|
"" /* This is here to not have a comma! */
|
|
);
|
|
|
|
dump_flags(mm->def_flags, vmaflags_names,
|
|
ARRAY_SIZE(vmaflags_names));
|
|
}
|
|
|
|
#endif /* CONFIG_DEBUG_VM */
|