s390/mm: implement 5 level pages tables
Add the logic to upgrade the page table for a 64-bit process to five levels. This increases the TASK_SIZE from 8PB to 16EB-4K. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
parent
16ddcc34b8
commit
1aea9b3f92
15 changed files with 289 additions and 72 deletions
|
@ -184,7 +184,7 @@ config SCHED_OMIT_FRAME_POINTER
|
|||
|
||||
config PGTABLE_LEVELS
|
||||
int
|
||||
default 4
|
||||
default 5
|
||||
|
||||
source "init/Kconfig"
|
||||
|
||||
|
|
|
@ -74,6 +74,7 @@ typedef struct { unsigned long pgste; } pgste_t;
|
|||
typedef struct { unsigned long pte; } pte_t;
|
||||
typedef struct { unsigned long pmd; } pmd_t;
|
||||
typedef struct { unsigned long pud; } pud_t;
|
||||
typedef struct { unsigned long p4d; } p4d_t;
|
||||
typedef struct { unsigned long pgd; } pgd_t;
|
||||
typedef pte_t *pgtable_t;
|
||||
|
||||
|
@ -82,12 +83,14 @@ typedef pte_t *pgtable_t;
|
|||
#define pte_val(x) ((x).pte)
|
||||
#define pmd_val(x) ((x).pmd)
|
||||
#define pud_val(x) ((x).pud)
|
||||
#define p4d_val(x) ((x).p4d)
|
||||
#define pgd_val(x) ((x).pgd)
|
||||
|
||||
#define __pgste(x) ((pgste_t) { (x) } )
|
||||
#define __pte(x) ((pte_t) { (x) } )
|
||||
#define __pmd(x) ((pmd_t) { (x) } )
|
||||
#define __pud(x) ((pud_t) { (x) } )
|
||||
#define __p4d(x) ((p4d_t) { (x) } )
|
||||
#define __pgd(x) ((pgd_t) { (x) } )
|
||||
#define __pgprot(x) ((pgprot_t) { (x) } )
|
||||
|
||||
|
|
|
@ -51,12 +51,24 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm)
|
|||
return _SEGMENT_ENTRY_EMPTY;
|
||||
if (mm->context.asce_limit <= (1UL << 42))
|
||||
return _REGION3_ENTRY_EMPTY;
|
||||
return _REGION2_ENTRY_EMPTY;
|
||||
if (mm->context.asce_limit <= (1UL << 53))
|
||||
return _REGION2_ENTRY_EMPTY;
|
||||
return _REGION1_ENTRY_EMPTY;
|
||||
}
|
||||
|
||||
int crst_table_upgrade(struct mm_struct *);
|
||||
int crst_table_upgrade(struct mm_struct *mm, unsigned long limit);
|
||||
void crst_table_downgrade(struct mm_struct *);
|
||||
|
||||
static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
|
||||
{
|
||||
unsigned long *table = crst_table_alloc(mm);
|
||||
|
||||
if (table)
|
||||
crst_table_init(table, _REGION2_ENTRY_EMPTY);
|
||||
return (p4d_t *) table;
|
||||
}
|
||||
#define p4d_free(mm, p4d) crst_table_free(mm, (unsigned long *) p4d)
|
||||
|
||||
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
|
||||
{
|
||||
unsigned long *table = crst_table_alloc(mm);
|
||||
|
@ -86,9 +98,14 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
|
|||
crst_table_free(mm, (unsigned long *) pmd);
|
||||
}
|
||||
|
||||
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
|
||||
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
|
||||
{
|
||||
pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud);
|
||||
pgd_val(*pgd) = _REGION1_ENTRY | __pa(p4d);
|
||||
}
|
||||
|
||||
static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
|
||||
{
|
||||
p4d_val(*p4d) = _REGION2_ENTRY | __pa(pud);
|
||||
}
|
||||
|
||||
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
|
||||
|
|
|
@ -24,7 +24,6 @@
|
|||
* the S390 page table tree.
|
||||
*/
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <asm-generic/5level-fixup.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/page-flags.h>
|
||||
|
@ -87,12 +86,15 @@ extern unsigned long zero_page_mask;
|
|||
*/
|
||||
#define PMD_SHIFT 20
|
||||
#define PUD_SHIFT 31
|
||||
#define PGDIR_SHIFT 42
|
||||
#define P4D_SHIFT 42
|
||||
#define PGDIR_SHIFT 53
|
||||
|
||||
#define PMD_SIZE (1UL << PMD_SHIFT)
|
||||
#define PMD_MASK (~(PMD_SIZE-1))
|
||||
#define PUD_SIZE (1UL << PUD_SHIFT)
|
||||
#define PUD_MASK (~(PUD_SIZE-1))
|
||||
#define P4D_SIZE (1UL << P4D_SHIFT)
|
||||
#define P4D_MASK (~(P4D_SIZE-1))
|
||||
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
|
||||
#define PGDIR_MASK (~(PGDIR_SIZE-1))
|
||||
|
||||
|
@ -105,6 +107,7 @@ extern unsigned long zero_page_mask;
|
|||
#define PTRS_PER_PTE 256
|
||||
#define PTRS_PER_PMD 2048
|
||||
#define PTRS_PER_PUD 2048
|
||||
#define PTRS_PER_P4D 2048
|
||||
#define PTRS_PER_PGD 2048
|
||||
|
||||
#define FIRST_USER_ADDRESS 0UL
|
||||
|
@ -115,6 +118,8 @@ extern unsigned long zero_page_mask;
|
|||
printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e))
|
||||
#define pud_ERROR(e) \
|
||||
printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e))
|
||||
#define p4d_ERROR(e) \
|
||||
printk("%s:%d: bad p4d %p.\n", __FILE__, __LINE__, (void *) p4d_val(e))
|
||||
#define pgd_ERROR(e) \
|
||||
printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e))
|
||||
|
||||
|
@ -310,8 +315,8 @@ static inline int is_module_addr(void *addr)
|
|||
#define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */
|
||||
#endif
|
||||
|
||||
#define _REGION_ENTRY_BITS 0xfffffffffffff227UL
|
||||
#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe27UL
|
||||
#define _REGION_ENTRY_BITS 0xfffffffffffff22fUL
|
||||
#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL
|
||||
|
||||
/* Bits in the segment table entry */
|
||||
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
|
||||
|
@ -564,14 +569,14 @@ static inline void crdte(unsigned long old, unsigned long new,
|
|||
*/
|
||||
static inline int pgd_present(pgd_t pgd)
|
||||
{
|
||||
if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
|
||||
if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1)
|
||||
return 1;
|
||||
return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL;
|
||||
}
|
||||
|
||||
static inline int pgd_none(pgd_t pgd)
|
||||
{
|
||||
if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
|
||||
if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1)
|
||||
return 0;
|
||||
return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL;
|
||||
}
|
||||
|
@ -589,6 +594,28 @@ static inline int pgd_bad(pgd_t pgd)
|
|||
return (pgd_val(pgd) & mask) != 0;
|
||||
}
|
||||
|
||||
static inline int p4d_present(p4d_t p4d)
|
||||
{
|
||||
if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
|
||||
return 1;
|
||||
return (p4d_val(p4d) & _REGION_ENTRY_ORIGIN) != 0UL;
|
||||
}
|
||||
|
||||
static inline int p4d_none(p4d_t p4d)
|
||||
{
|
||||
if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
|
||||
return 0;
|
||||
return p4d_val(p4d) == _REGION2_ENTRY_EMPTY;
|
||||
}
|
||||
|
||||
static inline unsigned long p4d_pfn(p4d_t p4d)
|
||||
{
|
||||
unsigned long origin_mask;
|
||||
|
||||
origin_mask = _REGION_ENTRY_ORIGIN;
|
||||
return (p4d_val(p4d) & origin_mask) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static inline int pud_present(pud_t pud)
|
||||
{
|
||||
if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
|
||||
|
@ -641,6 +668,13 @@ static inline int pud_bad(pud_t pud)
|
|||
return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
|
||||
}
|
||||
|
||||
static inline int p4d_bad(p4d_t p4d)
|
||||
{
|
||||
if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
|
||||
return pud_bad(__pud(p4d_val(p4d)));
|
||||
return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0;
|
||||
}
|
||||
|
||||
static inline int pmd_present(pmd_t pmd)
|
||||
{
|
||||
return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY;
|
||||
|
@ -794,8 +828,14 @@ static inline int pte_unused(pte_t pte)
|
|||
|
||||
static inline void pgd_clear(pgd_t *pgd)
|
||||
{
|
||||
if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
|
||||
pgd_val(*pgd) = _REGION2_ENTRY_EMPTY;
|
||||
if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
|
||||
pgd_val(*pgd) = _REGION1_ENTRY_EMPTY;
|
||||
}
|
||||
|
||||
static inline void p4d_clear(p4d_t *p4d)
|
||||
{
|
||||
if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
|
||||
p4d_val(*p4d) = _REGION2_ENTRY_EMPTY;
|
||||
}
|
||||
|
||||
static inline void pud_clear(pud_t *pud)
|
||||
|
@ -1089,6 +1129,7 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
|
|||
}
|
||||
|
||||
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
|
||||
#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
|
||||
#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
|
||||
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
|
||||
#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
|
||||
|
@ -1098,19 +1139,31 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
|
|||
|
||||
#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
|
||||
#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
|
||||
#define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN)
|
||||
#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
|
||||
|
||||
static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
|
||||
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
|
||||
{
|
||||
pud_t *pud = (pud_t *) pgd;
|
||||
if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
|
||||
pud = (pud_t *) pgd_deref(*pgd);
|
||||
return pud + pud_index(address);
|
||||
p4d_t *p4d = (p4d_t *) pgd;
|
||||
|
||||
if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
|
||||
p4d = (p4d_t *) pgd_deref(*pgd);
|
||||
return p4d + p4d_index(address);
|
||||
}
|
||||
|
||||
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
|
||||
{
|
||||
pud_t *pud = (pud_t *) p4d;
|
||||
|
||||
if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
|
||||
pud = (pud_t *) p4d_deref(*p4d);
|
||||
return pud + pud_index(address);
|
||||
}
|
||||
|
||||
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
|
||||
{
|
||||
pmd_t *pmd = (pmd_t *) pud;
|
||||
|
||||
if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
|
||||
pmd = (pmd_t *) pud_deref(*pud);
|
||||
return pmd + pmd_index(address);
|
||||
|
@ -1122,6 +1175,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
|
|||
|
||||
#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
|
||||
#define pud_page(pud) pfn_to_page(pud_pfn(pud))
|
||||
#define p4d_page(pud) pfn_to_page(p4d_pfn(p4d))
|
||||
|
||||
/* Find an entry in the lowest level page table.. */
|
||||
#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
|
||||
|
|
|
@ -92,11 +92,11 @@ extern void execve_tail(void);
|
|||
*/
|
||||
|
||||
#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_31BIT) ? \
|
||||
(1UL << 31) : (1UL << 53))
|
||||
(1UL << 31) : -PAGE_SIZE)
|
||||
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
|
||||
(1UL << 30) : (1UL << 41))
|
||||
#define TASK_SIZE TASK_SIZE_OF(current)
|
||||
#define TASK_SIZE_MAX (1UL << 53)
|
||||
#define TASK_SIZE_MAX (-PAGE_SIZE)
|
||||
|
||||
#define STACK_TOP (test_thread_flag(TIF_31BIT) ? \
|
||||
(1UL << 31) : (1UL << 42))
|
||||
|
|
|
@ -136,6 +136,21 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
|
|||
tlb_remove_table(tlb, pmd);
|
||||
}
|
||||
|
||||
/*
|
||||
* p4d_free_tlb frees a pud table and clears the CRSTE for the
|
||||
* region second table entry from the tlb.
|
||||
* If the mm uses a four level page table the single p4d is freed
|
||||
* as the pgd. p4d_free_tlb checks the asce_limit against 8PB
|
||||
* to avoid the double free of the p4d in this case.
|
||||
*/
|
||||
static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
|
||||
unsigned long address)
|
||||
{
|
||||
if (tlb->mm->context.asce_limit <= (1UL << 53))
|
||||
return;
|
||||
tlb_remove_table(tlb, p4d);
|
||||
}
|
||||
|
||||
/*
|
||||
* pud_free_tlb frees a pud table and clears the CRSTE for the
|
||||
* region third table entry from the tlb.
|
||||
|
|
|
@ -149,7 +149,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
|
|||
}
|
||||
|
||||
static void walk_pud_level(struct seq_file *m, struct pg_state *st,
|
||||
pgd_t *pgd, unsigned long addr)
|
||||
p4d_t *p4d, unsigned long addr)
|
||||
{
|
||||
unsigned int prot;
|
||||
pud_t *pud;
|
||||
|
@ -157,7 +157,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st,
|
|||
|
||||
for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) {
|
||||
st->current_address = addr;
|
||||
pud = pud_offset(pgd, addr);
|
||||
pud = pud_offset(p4d, addr);
|
||||
if (!pud_none(*pud))
|
||||
if (pud_large(*pud)) {
|
||||
prot = pud_val(*pud) &
|
||||
|
@ -172,6 +172,23 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st,
|
|||
}
|
||||
}
|
||||
|
||||
static void walk_p4d_level(struct seq_file *m, struct pg_state *st,
|
||||
pgd_t *pgd, unsigned long addr)
|
||||
{
|
||||
p4d_t *p4d;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++) {
|
||||
st->current_address = addr;
|
||||
p4d = p4d_offset(pgd, addr);
|
||||
if (!p4d_none(*p4d))
|
||||
walk_pud_level(m, st, p4d, addr);
|
||||
else
|
||||
note_page(m, st, _PAGE_INVALID, 2);
|
||||
addr += P4D_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void walk_pgd_level(struct seq_file *m)
|
||||
{
|
||||
unsigned long addr = 0;
|
||||
|
@ -184,7 +201,7 @@ static void walk_pgd_level(struct seq_file *m)
|
|||
st.current_address = addr;
|
||||
pgd = pgd_offset_k(addr);
|
||||
if (!pgd_none(*pgd))
|
||||
walk_pud_level(m, &st, pgd, addr);
|
||||
walk_p4d_level(m, &st, pgd, addr);
|
||||
else
|
||||
note_page(m, &st, _PAGE_INVALID, 1);
|
||||
addr += PGDIR_SIZE;
|
||||
|
|
|
@ -537,6 +537,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
|
|||
unsigned long *table;
|
||||
spinlock_t *ptl;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
int rc;
|
||||
|
@ -573,7 +574,9 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
|
|||
mm = gmap->mm;
|
||||
pgd = pgd_offset(mm, vmaddr);
|
||||
VM_BUG_ON(pgd_none(*pgd));
|
||||
pud = pud_offset(pgd, vmaddr);
|
||||
p4d = p4d_offset(pgd, vmaddr);
|
||||
VM_BUG_ON(p4d_none(*p4d));
|
||||
pud = pud_offset(p4d, vmaddr);
|
||||
VM_BUG_ON(pud_none(*pud));
|
||||
/* large puds cannot yet be handled */
|
||||
if (pud_large(*pud))
|
||||
|
|
|
@ -166,15 +166,15 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
|
|||
return 1;
|
||||
}
|
||||
|
||||
static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
|
||||
static inline int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
|
||||
unsigned long end, int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long next;
|
||||
pud_t *pudp, pud;
|
||||
|
||||
pudp = (pud_t *) pgdp;
|
||||
if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
|
||||
pudp = (pud_t *) pgd_deref(pgd);
|
||||
pudp = (pud_t *) p4dp;
|
||||
if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
|
||||
pudp = (pud_t *) p4d_deref(p4d);
|
||||
pudp += pud_index(addr);
|
||||
do {
|
||||
pud = *pudp;
|
||||
|
@ -194,6 +194,29 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
|
|||
return 1;
|
||||
}
|
||||
|
||||
static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
|
||||
unsigned long end, int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long next;
|
||||
p4d_t *p4dp, p4d;
|
||||
|
||||
p4dp = (p4d_t *) pgdp;
|
||||
if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
|
||||
p4dp = (p4d_t *) pgd_deref(pgd);
|
||||
p4dp += p4d_index(addr);
|
||||
do {
|
||||
p4d = *p4dp;
|
||||
barrier();
|
||||
next = p4d_addr_end(addr, end);
|
||||
if (p4d_none(p4d))
|
||||
return 0;
|
||||
if (!gup_pud_range(p4dp, p4d, addr, next, write, pages, nr))
|
||||
return 0;
|
||||
} while (p4dp++, addr = next, addr != end);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
|
||||
* back to the regular GUP.
|
||||
|
@ -228,7 +251,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
|||
next = pgd_addr_end(addr, end);
|
||||
if (pgd_none(pgd))
|
||||
break;
|
||||
if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr))
|
||||
if (!gup_p4d_range(pgdp, pgd, addr, next, write, pages, &nr))
|
||||
break;
|
||||
} while (pgdp++, addr = next, addr != end);
|
||||
local_irq_restore(flags);
|
||||
|
|
|
@ -162,16 +162,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
|
|||
unsigned long addr, unsigned long sz)
|
||||
{
|
||||
pgd_t *pgdp;
|
||||
p4d_t *p4dp;
|
||||
pud_t *pudp;
|
||||
pmd_t *pmdp = NULL;
|
||||
|
||||
pgdp = pgd_offset(mm, addr);
|
||||
pudp = pud_alloc(mm, pgdp, addr);
|
||||
if (pudp) {
|
||||
if (sz == PUD_SIZE)
|
||||
return (pte_t *) pudp;
|
||||
else if (sz == PMD_SIZE)
|
||||
pmdp = pmd_alloc(mm, pudp, addr);
|
||||
p4dp = p4d_alloc(mm, pgdp, addr);
|
||||
if (p4dp) {
|
||||
pudp = pud_alloc(mm, p4dp, addr);
|
||||
if (pudp) {
|
||||
if (sz == PUD_SIZE)
|
||||
return (pte_t *) pudp;
|
||||
else if (sz == PMD_SIZE)
|
||||
pmdp = pmd_alloc(mm, pudp, addr);
|
||||
}
|
||||
}
|
||||
return (pte_t *) pmdp;
|
||||
}
|
||||
|
@ -179,16 +183,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
|
|||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
||||
{
|
||||
pgd_t *pgdp;
|
||||
p4d_t *p4dp;
|
||||
pud_t *pudp;
|
||||
pmd_t *pmdp = NULL;
|
||||
|
||||
pgdp = pgd_offset(mm, addr);
|
||||
if (pgd_present(*pgdp)) {
|
||||
pudp = pud_offset(pgdp, addr);
|
||||
if (pud_present(*pudp)) {
|
||||
if (pud_large(*pudp))
|
||||
return (pte_t *) pudp;
|
||||
pmdp = pmd_offset(pudp, addr);
|
||||
p4dp = p4d_offset(pgdp, addr);
|
||||
if (p4d_present(*p4dp)) {
|
||||
pudp = pud_offset(p4dp, addr);
|
||||
if (pud_present(*pudp)) {
|
||||
if (pud_large(*pudp))
|
||||
return (pte_t *) pudp;
|
||||
pmdp = pmd_offset(pudp, addr);
|
||||
}
|
||||
}
|
||||
}
|
||||
return (pte_t *) pmdp;
|
||||
|
|
|
@ -120,7 +120,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
|
|||
|
||||
check_asce_limit:
|
||||
if (addr + len > current->mm->context.asce_limit) {
|
||||
rc = crst_table_upgrade(mm);
|
||||
rc = crst_table_upgrade(mm, addr + len);
|
||||
if (rc)
|
||||
return (unsigned long) rc;
|
||||
}
|
||||
|
@ -184,7 +184,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
|
|||
|
||||
check_asce_limit:
|
||||
if (addr + len > current->mm->context.asce_limit) {
|
||||
rc = crst_table_upgrade(mm);
|
||||
rc = crst_table_upgrade(mm, addr + len);
|
||||
if (rc)
|
||||
return (unsigned long) rc;
|
||||
}
|
||||
|
|
|
@ -229,14 +229,14 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr,
|
|||
pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
|
||||
}
|
||||
|
||||
static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end,
|
||||
static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
|
||||
unsigned long flags)
|
||||
{
|
||||
unsigned long next;
|
||||
pud_t *pudp;
|
||||
int rc = 0;
|
||||
|
||||
pudp = pud_offset(pgd, addr);
|
||||
pudp = pud_offset(p4d, addr);
|
||||
do {
|
||||
if (pud_none(*pudp))
|
||||
return -EINVAL;
|
||||
|
@ -259,6 +259,26 @@ static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end,
|
|||
return rc;
|
||||
}
|
||||
|
||||
static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end,
|
||||
unsigned long flags)
|
||||
{
|
||||
unsigned long next;
|
||||
p4d_t *p4dp;
|
||||
int rc = 0;
|
||||
|
||||
p4dp = p4d_offset(pgd, addr);
|
||||
do {
|
||||
if (p4d_none(*p4dp))
|
||||
return -EINVAL;
|
||||
next = p4d_addr_end(addr, end);
|
||||
rc = walk_pud_level(p4dp, addr, next, flags);
|
||||
p4dp++;
|
||||
addr = next;
|
||||
cond_resched();
|
||||
} while (addr < end && !rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static DEFINE_MUTEX(cpa_mutex);
|
||||
|
||||
static int change_page_attr(unsigned long addr, unsigned long end,
|
||||
|
@ -278,7 +298,7 @@ static int change_page_attr(unsigned long addr, unsigned long end,
|
|||
if (pgd_none(*pgdp))
|
||||
break;
|
||||
next = pgd_addr_end(addr, end);
|
||||
rc = walk_pud_level(pgdp, addr, next, flags);
|
||||
rc = walk_p4d_level(pgdp, addr, next, flags);
|
||||
if (rc)
|
||||
break;
|
||||
cond_resched();
|
||||
|
@ -319,6 +339,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
|
|||
unsigned long address;
|
||||
int nr, i, j;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
@ -326,7 +347,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
|
|||
for (i = 0; i < numpages;) {
|
||||
address = page_to_phys(page + i);
|
||||
pgd = pgd_offset_k(address);
|
||||
pud = pud_offset(pgd, address);
|
||||
p4d = p4d_offset(pgd, address);
|
||||
pud = pud_offset(p4d, address);
|
||||
pmd = pmd_offset(pud, address);
|
||||
pte = pte_offset_kernel(pmd, address);
|
||||
nr = (unsigned long)pte >> ilog2(sizeof(long));
|
||||
|
|
|
@ -76,29 +76,46 @@ static void __crst_table_upgrade(void *arg)
|
|||
__tlb_flush_local();
|
||||
}
|
||||
|
||||
int crst_table_upgrade(struct mm_struct *mm)
|
||||
int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
|
||||
{
|
||||
unsigned long *table, *pgd;
|
||||
int rc, notify;
|
||||
|
||||
/* upgrade should only happen from 3 to 4 levels */
|
||||
BUG_ON(mm->context.asce_limit != (1UL << 42));
|
||||
|
||||
table = crst_table_alloc(mm);
|
||||
if (!table)
|
||||
/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
|
||||
BUG_ON(mm->context.asce_limit < (1UL << 42));
|
||||
if (end >= TASK_SIZE_MAX)
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock_bh(&mm->page_table_lock);
|
||||
pgd = (unsigned long *) mm->pgd;
|
||||
crst_table_init(table, _REGION2_ENTRY_EMPTY);
|
||||
pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
|
||||
mm->pgd = (pgd_t *) table;
|
||||
mm->context.asce_limit = 1UL << 53;
|
||||
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
|
||||
_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
|
||||
spin_unlock_bh(&mm->page_table_lock);
|
||||
|
||||
on_each_cpu(__crst_table_upgrade, mm, 0);
|
||||
return 0;
|
||||
rc = 0;
|
||||
notify = 0;
|
||||
while (mm->context.asce_limit < end) {
|
||||
table = crst_table_alloc(mm);
|
||||
if (!table) {
|
||||
rc = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
spin_lock_bh(&mm->page_table_lock);
|
||||
pgd = (unsigned long *) mm->pgd;
|
||||
if (mm->context.asce_limit == (1UL << 42)) {
|
||||
crst_table_init(table, _REGION2_ENTRY_EMPTY);
|
||||
p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
|
||||
mm->pgd = (pgd_t *) table;
|
||||
mm->context.asce_limit = 1UL << 53;
|
||||
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
|
||||
_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
|
||||
} else {
|
||||
crst_table_init(table, _REGION1_ENTRY_EMPTY);
|
||||
pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
|
||||
mm->pgd = (pgd_t *) table;
|
||||
mm->context.asce_limit = -PAGE_SIZE;
|
||||
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
|
||||
_ASCE_USER_BITS | _ASCE_TYPE_REGION1;
|
||||
}
|
||||
notify = 1;
|
||||
spin_unlock_bh(&mm->page_table_lock);
|
||||
}
|
||||
if (notify)
|
||||
on_each_cpu(__crst_table_upgrade, mm, 0);
|
||||
return rc;
|
||||
}
|
||||
|
||||
void crst_table_downgrade(struct mm_struct *mm)
|
||||
|
@ -274,7 +291,7 @@ static void __tlb_remove_table(void *_table)
|
|||
struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
|
||||
|
||||
switch (mask) {
|
||||
case 0: /* pmd or pud */
|
||||
case 0: /* pmd, pud, or p4d */
|
||||
free_pages((unsigned long) table, 2);
|
||||
break;
|
||||
case 1: /* lower 2K of a 4K page table */
|
||||
|
|
|
@ -610,6 +610,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
|
|||
{
|
||||
spinlock_t *ptl;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pgste_t pgste;
|
||||
|
@ -618,7 +619,10 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
|
|||
bool dirty;
|
||||
|
||||
pgd = pgd_offset(mm, addr);
|
||||
pud = pud_alloc(mm, pgd, addr);
|
||||
p4d = p4d_alloc(mm, pgd, addr);
|
||||
if (!p4d)
|
||||
return false;
|
||||
pud = pud_alloc(mm, p4d, addr);
|
||||
if (!pud)
|
||||
return false;
|
||||
pmd = pmd_alloc(mm, pud, addr);
|
||||
|
|
|
@ -38,6 +38,17 @@ static void __ref *vmem_alloc_pages(unsigned int order)
|
|||
return (void *) memblock_alloc(size, size);
|
||||
}
|
||||
|
||||
static inline p4d_t *vmem_p4d_alloc(void)
|
||||
{
|
||||
p4d_t *p4d = NULL;
|
||||
|
||||
p4d = vmem_alloc_pages(2);
|
||||
if (!p4d)
|
||||
return NULL;
|
||||
clear_table((unsigned long *) p4d, _REGION2_ENTRY_EMPTY, PAGE_SIZE * 4);
|
||||
return p4d;
|
||||
}
|
||||
|
||||
static inline pud_t *vmem_pud_alloc(void)
|
||||
{
|
||||
pud_t *pud = NULL;
|
||||
|
@ -85,6 +96,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
|
|||
unsigned long end = start + size;
|
||||
unsigned long address = start;
|
||||
pgd_t *pg_dir;
|
||||
p4d_t *p4_dir;
|
||||
pud_t *pu_dir;
|
||||
pmd_t *pm_dir;
|
||||
pte_t *pt_dir;
|
||||
|
@ -102,12 +114,19 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
|
|||
while (address < end) {
|
||||
pg_dir = pgd_offset_k(address);
|
||||
if (pgd_none(*pg_dir)) {
|
||||
p4_dir = vmem_p4d_alloc();
|
||||
if (!p4_dir)
|
||||
goto out;
|
||||
pgd_populate(&init_mm, pg_dir, p4_dir);
|
||||
}
|
||||
p4_dir = p4d_offset(pg_dir, address);
|
||||
if (p4d_none(*p4_dir)) {
|
||||
pu_dir = vmem_pud_alloc();
|
||||
if (!pu_dir)
|
||||
goto out;
|
||||
pgd_populate(&init_mm, pg_dir, pu_dir);
|
||||
p4d_populate(&init_mm, p4_dir, pu_dir);
|
||||
}
|
||||
pu_dir = pud_offset(pg_dir, address);
|
||||
pu_dir = pud_offset(p4_dir, address);
|
||||
if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
|
||||
!(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
|
||||
!debug_pagealloc_enabled()) {
|
||||
|
@ -161,6 +180,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
|
|||
unsigned long end = start + size;
|
||||
unsigned long address = start;
|
||||
pgd_t *pg_dir;
|
||||
p4d_t *p4_dir;
|
||||
pud_t *pu_dir;
|
||||
pmd_t *pm_dir;
|
||||
pte_t *pt_dir;
|
||||
|
@ -172,7 +192,12 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
|
|||
address += PGDIR_SIZE;
|
||||
continue;
|
||||
}
|
||||
pu_dir = pud_offset(pg_dir, address);
|
||||
p4_dir = p4d_offset(pg_dir, address);
|
||||
if (p4d_none(*p4_dir)) {
|
||||
address += P4D_SIZE;
|
||||
continue;
|
||||
}
|
||||
pu_dir = pud_offset(p4_dir, address);
|
||||
if (pud_none(*pu_dir)) {
|
||||
address += PUD_SIZE;
|
||||
continue;
|
||||
|
@ -213,6 +238,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
|
|||
unsigned long pgt_prot, sgt_prot;
|
||||
unsigned long address = start;
|
||||
pgd_t *pg_dir;
|
||||
p4d_t *p4_dir;
|
||||
pud_t *pu_dir;
|
||||
pmd_t *pm_dir;
|
||||
pte_t *pt_dir;
|
||||
|
@ -227,13 +253,21 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
|
|||
for (address = start; address < end;) {
|
||||
pg_dir = pgd_offset_k(address);
|
||||
if (pgd_none(*pg_dir)) {
|
||||
p4_dir = vmem_p4d_alloc();
|
||||
if (!p4_dir)
|
||||
goto out;
|
||||
pgd_populate(&init_mm, pg_dir, p4_dir);
|
||||
}
|
||||
|
||||
p4_dir = p4d_offset(pg_dir, address);
|
||||
if (p4d_none(*p4_dir)) {
|
||||
pu_dir = vmem_pud_alloc();
|
||||
if (!pu_dir)
|
||||
goto out;
|
||||
pgd_populate(&init_mm, pg_dir, pu_dir);
|
||||
p4d_populate(&init_mm, p4_dir, pu_dir);
|
||||
}
|
||||
|
||||
pu_dir = pud_offset(pg_dir, address);
|
||||
pu_dir = pud_offset(p4_dir, address);
|
||||
if (pud_none(*pu_dir)) {
|
||||
pm_dir = vmem_pmd_alloc();
|
||||
if (!pm_dir)
|
||||
|
|
Loading…
Reference in a new issue