Merge branch 'booke-hugetlb' into next

This commit is contained in:
Benjamin Herrenschmidt 2011-12-08 13:20:34 +11:00
commit faa8bf8878
14 changed files with 155 additions and 87 deletions

View file

@ -155,6 +155,7 @@ CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_HUGETLBFS=y
CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y

View file

@ -81,6 +81,7 @@ CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_HUGETLBFS=y
# CONFIG_MISC_FILESYSTEMS is not set
CONFIG_PARTITION_ADVANCED=y
CONFIG_MAC_PARTITION=y

View file

@ -182,6 +182,7 @@ CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_HUGETLBFS=y
CONFIG_ADFS_FS=m
CONFIG_AFFS_FS=m
CONFIG_HFS_FS=m

View file

@ -183,6 +183,7 @@ CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_HUGETLBFS=y
CONFIG_ADFS_FS=m
CONFIG_AFFS_FS=m
CONFIG_HFS_FS=m

View file

@ -5,7 +5,6 @@
#include <asm/page.h>
extern struct kmem_cache *hugepte_cache;
extern void __init reserve_hugetlb_gpages(void);
static inline pte_t *hugepd_page(hugepd_t hpd)
{
@ -22,14 +21,14 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
unsigned pdshift)
{
/*
* On 32-bit, we have multiple higher-level table entries that point to
* the same hugepte. Just use the first one since they're all
* On FSL BookE, we have multiple higher-level table entries that
* point to the same hugepte. Just use the first one since they're all
* identical. So for that case, idx=0.
*/
unsigned long idx = 0;
pte_t *dir = hugepd_page(*hpdp);
#ifdef CONFIG_PPC64
#ifndef CONFIG_PPC_FSL_BOOK3E
idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp);
#endif
@ -53,7 +52,8 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
}
#endif
void book3e_hugetlb_preload(struct mm_struct *mm, unsigned long ea, pte_t pte);
void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
pte_t pte);
void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
@ -124,7 +124,17 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty)
{
#ifdef HUGETLB_NEED_PRELOAD
/*
* The "return 1" forces a call of update_mmu_cache, which will write a
* TLB entry. Without this, platforms that don't do a write of the TLB
* entry in the TLB miss handler asm will fault ad infinitum.
*/
ptep_set_access_flags(vma, addr, ptep, pte, dirty);
return 1;
#else
return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
#endif
}
static inline pte_t huge_ptep_get(pte_t *ptep)
@ -142,14 +152,24 @@ static inline void arch_release_hugepage(struct page *page)
}
#else /* ! CONFIG_HUGETLB_PAGE */
static inline void reserve_hugetlb_gpages(void)
{
pr_err("Cannot reserve gpages without hugetlb enabled\n");
}
static inline void flush_hugetlb_page(struct vm_area_struct *vma,
unsigned long vmaddr)
{
}
#endif /* CONFIG_HUGETLB_PAGE */
/*
* FSL Book3E platforms require special gpage handling - the gpages
* are reserved early in the boot process by memblock instead of via
* the .dts as on IBM platforms.
*/
#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_FSL_BOOK3E)
extern void __init reserve_hugetlb_gpages(void);
#else
static inline void reserve_hugetlb_gpages(void)
{
}
#endif
#endif /* _ASM_POWERPC_HUGETLB_H */

View file

@ -258,6 +258,13 @@ extern int mmu_vmemmap_psize;
#ifdef CONFIG_PPC64
extern unsigned long linear_map_top;
/*
* 64-bit booke platforms don't load the tlb in the tlb miss handler code.
* HUGETLB_NEED_PRELOAD handles this - it causes huge_ptep_set_access_flags to
* return 1, indicating that the tlb requires preloading.
*/
#define HUGETLB_NEED_PRELOAD
#endif
#endif /* !__ASSEMBLY__ */

View file

@ -130,7 +130,9 @@ do { \
#ifdef CONFIG_HUGETLB_PAGE
#ifdef CONFIG_PPC_MM_SLICES
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#endif
#endif /* !CONFIG_HUGETLB_PAGE */

View file

@ -35,6 +35,8 @@
#include <linux/pci.h>
#include <linux/lockdep.h>
#include <linux/memblock.h>
#include <linux/hugetlb.h>
#include <asm/io.h>
#include <asm/kdump.h>
#include <asm/prom.h>
@ -64,6 +66,7 @@
#include <asm/mmu_context.h>
#include <asm/code-patching.h>
#include <asm/kvm_ppc.h>
#include <asm/hugetlb.h>
#include "setup.h"
@ -217,6 +220,13 @@ void __init early_setup(unsigned long dt_ptr)
/* Initialize the hash table or TLB handling */
early_init_mmu();
/*
* Reserve any gigantic pages requested on the command line.
* memblock needs to have been initialized by the time this is
* called since this will reserve memory.
*/
reserve_hugetlb_gpages();
DBG(" <- early_setup()\n");
}

View file

@ -37,31 +37,32 @@ static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid)
return found;
}
void book3e_hugetlb_preload(struct mm_struct *mm, unsigned long ea, pte_t pte)
void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
pte_t pte)
{
unsigned long mas1, mas2;
u64 mas7_3;
unsigned long psize, tsize, shift;
unsigned long flags;
struct mm_struct *mm;
#ifdef CONFIG_PPC_FSL_BOOK3E
int index, lz, ncams;
struct vm_area_struct *vma;
int index, ncams;
#endif
if (unlikely(is_kernel_addr(ea)))
return;
mm = vma->vm_mm;
#ifdef CONFIG_PPC_MM_SLICES
psize = mmu_get_tsize(get_slice_psize(mm, ea));
tsize = mmu_get_psize(psize);
psize = get_slice_psize(mm, ea);
tsize = mmu_get_tsize(psize);
shift = mmu_psize_defs[psize].shift;
#else
vma = find_vma(mm, ea);
psize = vma_mmu_pagesize(vma); /* returns actual size in bytes */
asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (psize));
shift = 31 - lz;
tsize = 21 - lz;
psize = vma_mmu_pagesize(vma);
shift = __ilog2(psize);
tsize = shift - 10;
#endif
/*

View file

@ -28,22 +28,22 @@ unsigned int HPAGE_SHIFT;
/*
* Tracks gpages after the device tree is scanned and before the
* huge_boot_pages list is ready. On 64-bit implementations, this is
* just used to track 16G pages and so is a single array. 32-bit
* implementations may have more than one gpage size due to limitations
* of the memory allocators, so we need multiple arrays
* huge_boot_pages list is ready. On non-Freescale implementations, this is
* just used to track 16G pages and so is a single array. FSL-based
* implementations may have more than one gpage size, so we need multiple
* arrays
*/
#ifdef CONFIG_PPC64
#define MAX_NUMBER_GPAGES 1024
static u64 gpage_freearray[MAX_NUMBER_GPAGES];
static unsigned nr_gpages;
#else
#ifdef CONFIG_PPC_FSL_BOOK3E
#define MAX_NUMBER_GPAGES 128
struct psize_gpages {
u64 gpage_list[MAX_NUMBER_GPAGES];
unsigned int nr_gpages;
};
static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT];
#else
#define MAX_NUMBER_GPAGES 1024
static u64 gpage_freearray[MAX_NUMBER_GPAGES];
static unsigned nr_gpages;
#endif
static inline int shift_to_mmu_psize(unsigned int shift)
@ -114,12 +114,12 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
struct kmem_cache *cachep;
pte_t *new;
#ifdef CONFIG_PPC64
cachep = PGT_CACHE(pdshift - pshift);
#else
#ifdef CONFIG_PPC_FSL_BOOK3E
int i;
int num_hugepd = 1 << (pshift - pdshift);
cachep = hugepte_cache;
#else
cachep = PGT_CACHE(pdshift - pshift);
#endif
new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
@ -131,12 +131,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
return -ENOMEM;
spin_lock(&mm->page_table_lock);
#ifdef CONFIG_PPC64
if (!hugepd_none(*hpdp))
kmem_cache_free(cachep, new);
else
hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
#else
#ifdef CONFIG_PPC_FSL_BOOK3E
/*
* We have multiple higher-level entries that point to the same
* actual pte location. Fill in each as we go and backtrack on error.
@ -155,11 +150,28 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
hpdp->pd = 0;
kmem_cache_free(cachep, new);
}
#else
if (!hugepd_none(*hpdp))
kmem_cache_free(cachep, new);
else
hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
#endif
spin_unlock(&mm->page_table_lock);
return 0;
}
/*
* These macros define how to determine which level of the page table holds
* the hpdp.
*/
#ifdef CONFIG_PPC_FSL_BOOK3E
#define HUGEPD_PGD_SHIFT PGDIR_SHIFT
#define HUGEPD_PUD_SHIFT PUD_SHIFT
#else
#define HUGEPD_PGD_SHIFT PUD_SHIFT
#define HUGEPD_PUD_SHIFT PMD_SHIFT
#endif
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
{
pgd_t *pg;
@ -172,12 +184,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
addr &= ~(sz-1);
pg = pgd_offset(mm, addr);
if (pshift >= PUD_SHIFT) {
if (pshift >= HUGEPD_PGD_SHIFT) {
hpdp = (hugepd_t *)pg;
} else {
pdshift = PUD_SHIFT;
pu = pud_alloc(mm, pg, addr);
if (pshift >= PMD_SHIFT) {
if (pshift >= HUGEPD_PUD_SHIFT) {
hpdp = (hugepd_t *)pu;
} else {
pdshift = PMD_SHIFT;
@ -197,7 +210,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
return hugepte_offset(hpdp, addr, pdshift);
}
#ifdef CONFIG_PPC32
#ifdef CONFIG_PPC_FSL_BOOK3E
/* Build list of addresses of gigantic pages. This function is used in early
* boot before the buddy or bootmem allocator is setup.
*/
@ -317,7 +330,7 @@ void __init reserve_hugetlb_gpages(void)
}
}
#else /* PPC64 */
#else /* !PPC_FSL_BOOK3E */
/* Build list of addresses of gigantic pages. This function is used in early
* boot before the buddy or bootmem allocator is setup.
@ -355,7 +368,7 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
return 0;
}
#ifdef CONFIG_PPC32
#ifdef CONFIG_PPC_FSL_BOOK3E
#define HUGEPD_FREELIST_SIZE \
((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
@ -415,11 +428,11 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
unsigned long pdmask = ~((1UL << pdshift) - 1);
unsigned int num_hugepd = 1;
#ifdef CONFIG_PPC64
unsigned int shift = hugepd_shift(*hpdp);
#else
/* Note: On 32-bit the hpdp may be the first of several */
#ifdef CONFIG_PPC_FSL_BOOK3E
/* Note: On fsl the hpdp may be the first of several */
num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift));
#else
unsigned int shift = hugepd_shift(*hpdp);
#endif
start &= pdmask;
@ -437,10 +450,11 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
hpdp->pd = 0;
tlb->need_flush = 1;
#ifdef CONFIG_PPC64
pgtable_free_tlb(tlb, hugepte, pdshift - shift);
#else
#ifdef CONFIG_PPC_FSL_BOOK3E
hugepd_free(tlb, hugepte);
#else
pgtable_free_tlb(tlb, hugepte, pdshift - shift);
#endif
}
@ -453,14 +467,23 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
unsigned long start;
start = addr;
pmd = pmd_offset(pud, addr);
do {
pmd = pmd_offset(pud, addr);
next = pmd_addr_end(addr, end);
if (pmd_none(*pmd))
continue;
#ifdef CONFIG_PPC_FSL_BOOK3E
/*
* Increment next by the size of the huge mapping since
* there may be more than one entry at this level for a
* single hugepage, but all of them point to
* the same kmem cache that holds the hugepte.
*/
next = addr + (1 << hugepd_shift(*(hugepd_t *)pmd));
#endif
free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
addr, next, floor, ceiling);
} while (pmd++, addr = next, addr != end);
} while (addr = next, addr != end);
start &= PUD_MASK;
if (start < floor)
@ -487,8 +510,8 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
unsigned long start;
start = addr;
pud = pud_offset(pgd, addr);
do {
pud = pud_offset(pgd, addr);
next = pud_addr_end(addr, end);
if (!is_hugepd(pud)) {
if (pud_none_or_clear_bad(pud))
@ -496,10 +519,19 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
ceiling);
} else {
#ifdef CONFIG_PPC_FSL_BOOK3E
/*
* Increment next by the size of the huge mapping since
* there may be more than one entry at this level for a
* single hugepage, but all of them point to
* the same kmem cache that holds the hugepte.
*/
next = addr + (1 << hugepd_shift(*(hugepd_t *)pud));
#endif
free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
addr, next, floor, ceiling);
}
} while (pud++, addr = next, addr != end);
} while (addr = next, addr != end);
start &= PGDIR_MASK;
if (start < floor)
@ -554,12 +586,12 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
continue;
hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
} else {
#ifdef CONFIG_PPC32
#ifdef CONFIG_PPC_FSL_BOOK3E
/*
* Increment next by the size of the huge mapping since
* on 32-bit there may be more than one entry at the pgd
* level for a single hugepage, but all of them point to
* the same kmem cache that holds the hugepte.
* there may be more than one entry at the pgd level
* for a single hugepage, but all of them point to the
* same kmem cache that holds the hugepte.
*/
next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
#endif
@ -697,19 +729,17 @@ int gup_hugepd(hugepd_t *hugepd, unsigned pdshift,
return 1;
}
#ifdef CONFIG_PPC_MM_SLICES
unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags)
{
#ifdef CONFIG_PPC_MM_SLICES
struct hstate *hstate = hstate_file(file);
int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
#else
return get_unmapped_area(file, addr, len, pgoff, flags);
#endif
}
#endif
unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
{
@ -783,7 +813,7 @@ static int __init hugepage_setup_sz(char *str)
}
__setup("hugepagesz=", hugepage_setup_sz);
#ifdef CONFIG_FSL_BOOKE
#ifdef CONFIG_PPC_FSL_BOOK3E
struct kmem_cache *hugepte_cache;
static int __init hugetlbpage_init(void)
{

View file

@ -553,7 +553,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \
&& defined(CONFIG_HUGETLB_PAGE)
if (is_vm_hugetlb_page(vma))
book3e_hugetlb_preload(vma->vm_mm, address, *ptep);
book3e_hugetlb_preload(vma, address, *ptep);
#endif
}

View file

@ -94,11 +94,11 @@
srdi r15,r16,60 /* get region */
rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
bne- dtlb_miss_fault_bolted
bne- dtlb_miss_fault_bolted /* Bail if fault addr is invalid */
rlwinm r10,r11,32-19,27,27
rlwimi r10,r11,32-16,19,19
cmpwi r15,0
cmpwi r15,0 /* user vs kernel check */
ori r10,r10,_PAGE_PRESENT
oris r11,r10,_PAGE_ACCESSED@h
@ -120,44 +120,38 @@ tlb_miss_common_bolted:
rldicl r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3
cmpldi cr0,r14,0
clrrdi r15,r15,3
beq tlb_miss_fault_bolted
beq tlb_miss_fault_bolted /* No PGDIR, bail */
BEGIN_MMU_FTR_SECTION
/* Set the TLB reservation and search for existing entry. Then load
* the entry.
*/
PPC_TLBSRX_DOT(0,r16)
ldx r14,r14,r15
beq normal_tlb_miss_done
ldx r14,r14,r15 /* grab pgd entry */
beq normal_tlb_miss_done /* tlb exists already, bail */
MMU_FTR_SECTION_ELSE
ldx r14,r14,r15
ldx r14,r14,r15 /* grab pgd entry */
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
#ifndef CONFIG_PPC_64K_PAGES
rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
clrrdi r15,r15,3
cmpldi cr0,r14,0
beq tlb_miss_fault_bolted
ldx r14,r14,r15
cmpdi cr0,r14,0
bge tlb_miss_fault_bolted /* Bad pgd entry or hugepage; bail */
ldx r14,r14,r15 /* grab pud entry */
#endif /* CONFIG_PPC_64K_PAGES */
rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
clrrdi r15,r15,3
cmpldi cr0,r14,0
beq tlb_miss_fault_bolted
ldx r14,r14,r15
cmpdi cr0,r14,0
bge tlb_miss_fault_bolted
ldx r14,r14,r15 /* Grab pmd entry */
rldicl r15,r16,64-PAGE_SHIFT+3,64-PTE_INDEX_SIZE-3
clrrdi r15,r15,3
cmpldi cr0,r14,0
beq tlb_miss_fault_bolted
ldx r14,r14,r15
cmpdi cr0,r14,0
bge tlb_miss_fault_bolted
ldx r14,r14,r15 /* Grab PTE, normal (!huge) page */
/* Check if required permissions are met */
andc. r15,r11,r14

View file

@ -52,7 +52,7 @@
* indirect page table entries.
*/
#ifdef CONFIG_PPC_BOOK3E_MMU
#ifdef CONFIG_FSL_BOOKE
#ifdef CONFIG_PPC_FSL_BOOK3E
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
[MMU_PAGE_4K] = {
.shift = 12,

View file

@ -174,7 +174,6 @@ config BOOKE
config FSL_BOOKE
bool
depends on (E200 || E500) && PPC32
select SYS_SUPPORTS_HUGETLBFS if PHYS_64BIT
default y
# this is for common code between PPC32 & PPC64 FSL BOOKE
@ -182,6 +181,7 @@ config PPC_FSL_BOOK3E
bool
select FSL_EMB_PERFMON
select PPC_SMP_MUXED_IPI
select SYS_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64
default y if FSL_BOOKE
config PTE_64BIT
@ -309,7 +309,7 @@ config PPC_BOOK3E_MMU
config PPC_MM_SLICES
bool
default y if (PPC64 && HUGETLB_PAGE) || (PPC_STD_MMU_64 && PPC_64K_PAGES)
default y if (!PPC_FSL_BOOK3E && PPC64 && HUGETLB_PAGE) || (PPC_STD_MMU_64 && PPC_64K_PAGES)
default n
config VIRT_CPU_ACCOUNTING