Merge branch 'fixes' into next

There's a non-trivial dependency between some commits we want to put in
next and the KVM prefetch work around that went into fixes. So merge
fixes into next.
This commit is contained in:
Michael Ellerman 2017-08-23 22:20:10 +10:00
commit 15c659ff9d
28 changed files with 373 additions and 127 deletions

View file

@ -59,6 +59,19 @@ machine-$(CONFIG_PPC64) += 64
machine-$(CONFIG_CPU_LITTLE_ENDIAN) += le
UTS_MACHINE := $(subst $(space),,$(machine-y))
# XXX This needs to be before we override LD below
ifdef CONFIG_PPC32
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
else
ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
# Have the linker provide sfpr if possible.
# There is a corresponding test in arch/powerpc/lib/Makefile
KBUILD_LDFLAGS_MODULE += --save-restore-funcs
else
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
endif
endif
ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
override LD += -EL
LDEMULATION := lppc
@ -190,18 +203,6 @@ else
CHECKFLAGS += -D__LITTLE_ENDIAN__
endif
ifdef CONFIG_PPC32
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
else
ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
# Have the linker provide sfpr if possible.
# There is a corresponding test in arch/powerpc/lib/Makefile
KBUILD_LDFLAGS_MODULE += --save-restore-funcs
else
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
endif
endif
ifeq ($(CONFIG_476FPE_ERR46),y)
KBUILD_LDFLAGS_MODULE += --ppc476-workaround \
-T $(srctree)/arch/powerpc/platforms/44x/ppc476_modules.lds

View file

@ -25,12 +25,20 @@ compress-$(CONFIG_KERNEL_XZ) := CONFIG_KERNEL_XZ
BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-fno-strict-aliasing -Os -msoft-float -pipe \
-fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
-isystem $(shell $(CROSS32CC) -print-file-name=include) \
-D$(compress-y)
BOOTCC := $(CC)
ifdef CONFIG_PPC64_BOOT_WRAPPER
BOOTCFLAGS += -m64
else
BOOTCFLAGS += -m32
ifdef CROSS32_COMPILE
BOOTCC := $(CROSS32_COMPILE)gcc
endif
endif
BOOTCFLAGS += -isystem $(shell $(BOOTCC) -print-file-name=include)
ifdef CONFIG_CPU_BIG_ENDIAN
BOOTCFLAGS += -mbig-endian
else
@ -183,10 +191,10 @@ clean-files := $(zlib-) $(zlibheader-) $(zliblinuxheader-) \
empty.c zImage.coff.lds zImage.ps3.lds zImage.lds
quiet_cmd_bootcc = BOOTCC $@
cmd_bootcc = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $<
cmd_bootcc = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $<
quiet_cmd_bootas = BOOTAS $@
cmd_bootas = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $<
cmd_bootas = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $<
quiet_cmd_bootar = BOOTAR $@
cmd_bootar = $(CROSS32AR) -cr$(KBUILD_ARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@

View file

@ -293,7 +293,8 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_LOCKUP_DETECTOR=y
CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y

View file

@ -324,7 +324,8 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_LOCKUP_DETECTOR=y
CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_DEBUG_MUTEXES=y
CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y

View file

@ -291,7 +291,8 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_LOCKUP_DETECTOR=y
CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y

View file

@ -59,13 +59,14 @@ extern struct patb_entry *partition_tb;
#define PRTS_MASK 0x1f /* process table size field */
#define PRTB_MASK 0x0ffffffffffff000UL
/*
* Limit process table to PAGE_SIZE table. This
* also limit the max pid we can support.
* MAX_USER_CONTEXT * 16 bytes of space.
*/
#define PRTB_SIZE_SHIFT (CONTEXT_BITS + 4)
#define PRTB_ENTRIES (1ul << CONTEXT_BITS)
/* Number of supported PID bits */
extern unsigned int mmu_pid_bits;
/* Base PID to allocate from */
extern unsigned int mmu_base_pid;
#define PRTB_SIZE_SHIFT (mmu_pid_bits + 4)
#define PRTB_ENTRIES (1ul << mmu_pid_bits)
/*
* Power9 currently only support 64K partition table size.

View file

@ -614,9 +614,17 @@ static inline pte_t pte_mkdevmap(pte_t pte)
return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP);
}
/*
* This is potentially called with a pmd as the argument, in which case it's not
* safe to check _PAGE_DEVMAP unless we also confirm that _PAGE_PTE is set.
* That's because the bit we use for _PAGE_DEVMAP is not reserved for software
* use in page directory entries (ie. non-ptes).
*/
static inline int pte_devmap(pte_t pte)
{
return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DEVMAP));
u64 mask = cpu_to_be64(_PAGE_DEVMAP | _PAGE_PTE);
return (pte_raw(pte) & mask) == mask;
}
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)

View file

@ -45,7 +45,7 @@ extern void set_context(unsigned long id, pgd_t *pgd);
#ifdef CONFIG_PPC_BOOK3S_64
extern void radix__switch_mmu_context(struct mm_struct *prev,
struct mm_struct *next);
struct mm_struct *next);
static inline void switch_mmu_context(struct mm_struct *prev,
struct mm_struct *next,
struct task_struct *tsk)
@ -67,6 +67,12 @@ extern void __destroy_context(unsigned long context_id);
extern void mmu_context_init(void);
#endif
#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
#else
static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
#endif
extern void switch_cop(struct mm_struct *next);
extern int use_cop(unsigned long acop, struct mm_struct *mm);
extern void drop_cop(unsigned long acop, struct mm_struct *mm);
@ -79,10 +85,32 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev,
struct mm_struct *next,
struct task_struct *tsk)
{
bool new_on_cpu = false;
/* Mark this context has been used on the new CPU */
if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next)))
if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
/*
* This full barrier orders the store to the cpumask above vs
* a subsequent operation which allows this CPU to begin loading
* translations for next.
*
* When using the radix MMU that operation is the load of the
* MMU context id, which is then moved to SPRN_PID.
*
* For the hash MMU it is either the first load from slb_cache
* in switch_slb(), and/or the store of paca->mm_ctx_id in
* copy_mm_to_paca().
*
* On the read side the barrier is in pte_xchg(), which orders
* the store to the PTE vs the load of mm_cpumask.
*/
smp_mb();
new_on_cpu = true;
}
/* 32-bit keeps track of the current PGDIR in the thread struct */
#ifdef CONFIG_PPC32
tsk->thread.pgdir = next->pgd;
@ -103,6 +131,10 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev,
if (cpu_has_feature(CPU_FTR_ALTIVEC))
asm volatile ("dssall");
#endif /* CONFIG_ALTIVEC */
if (new_on_cpu)
radix_kvm_prefetch_workaround(next);
/*
* The actual HW switching method differs between the various
* sub architectures. Out of line for now

View file

@ -87,6 +87,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
unsigned long *p = (unsigned long *)ptep;
__be64 prev;
/* See comment in switch_mm_irqs_off() */
prev = (__force __be64)__cmpxchg_u64(p, (__force unsigned long)pte_raw(old),
(__force unsigned long)pte_raw(new));

View file

@ -62,6 +62,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
{
unsigned long *p = (unsigned long *)ptep;
/* See comment in switch_mm_irqs_off() */
return pte_val(old) == __cmpxchg_u64(p, pte_val(old), pte_val(new));
}
#endif

View file

@ -223,17 +223,27 @@ system_call_exit:
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
bne- .Lsyscall_exit_work
/* If MSR_FP and MSR_VEC are set in user msr, then no need to restore */
li r7,MSR_FP
andi. r0,r8,MSR_FP
beq 2f
#ifdef CONFIG_ALTIVEC
oris r7,r7,MSR_VEC@h
andis. r0,r8,MSR_VEC@h
bne 3f
#endif
and r0,r8,r7
cmpd r0,r7
bne .Lsyscall_restore_math
.Lsyscall_restore_math_cont:
2: addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_BOOK3S
li r10,MSR_RI
mtmsrd r10,1 /* Restore RI */
#endif
bl restore_math
#ifdef CONFIG_PPC_BOOK3S
li r11,0
mtmsrd r11,1
#endif
ld r8,_MSR(r1)
ld r3,RESULT(r1)
li r11,-MAX_ERRNO
cmpld r3,r11
3: cmpld r3,r11
ld r5,_CCR(r1)
bge- .Lsyscall_error
.Lsyscall_error_cont:
@ -267,40 +277,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r5,_CCR(r1)
b .Lsyscall_error_cont
.Lsyscall_restore_math:
/*
* Some initial tests from restore_math to avoid the heavyweight
* C code entry and MSR manipulations.
*/
LOAD_REG_IMMEDIATE(r0, MSR_TS_MASK)
and. r0,r0,r8
bne 1f
ld r7,PACACURRENT(r13)
lbz r0,THREAD+THREAD_LOAD_FP(r7)
#ifdef CONFIG_ALTIVEC
lbz r6,THREAD+THREAD_LOAD_VEC(r7)
add r0,r0,r6
#endif
cmpdi r0,0
beq .Lsyscall_restore_math_cont
1: addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_BOOK3S
li r10,MSR_RI
mtmsrd r10,1 /* Restore RI */
#endif
bl restore_math
#ifdef CONFIG_PPC_BOOK3S
li r11,0
mtmsrd r11,1
#endif
/* Restore volatiles, reload MSR from updated one */
ld r8,_MSR(r1)
ld r3,RESULT(r1)
li r11,-MAX_ERRNO
b .Lsyscall_restore_math_cont
/* Traced system call support */
.Lsyscall_dotrace:
bl save_nvgprs

View file

@ -1325,10 +1325,18 @@ EXC_VIRT_NONE(0x5800, 0x100)
std r10,PACA_EXGEN+EX_R13(r13); \
EXCEPTION_PROLOG_PSERIES_1(soft_nmi_common, _H)
/*
* Branch to soft_nmi_interrupt using the emergency stack. The emergency
* stack is one that is usable by maskable interrupts so long as MSR_EE
* remains off. It is used for recovery when something has corrupted the
* normal kernel stack, for example. The "soft NMI" must not use the process
* stack because we want irq disabled sections to avoid touching the stack
* at all (other than PMU interrupts), so use the emergency stack for this,
* and run it entirely with interrupts hard disabled.
*/
EXC_COMMON_BEGIN(soft_nmi_common)
mr r10,r1
ld r1,PACAEMERGSP(r13)
ld r1,PACA_NMI_EMERG_SP(r13)
subi r1,r1,INT_FRAME_SIZE
EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900,
system_reset, soft_nmi_interrupt,

View file

@ -514,11 +514,17 @@ pnv_restore_hyp_resource_arch300:
/*
* Workaround for POWER9, if we lost resources, the ERAT
* might have been mixed up and needs flushing. We also need
* to reload MMCR0 (see comment above).
* to reload MMCR0 (see comment above). We also need to set
* then clear bit 60 in MMCRA to ensure the PMU starts running.
*/
blt cr3,1f
PPC_INVALIDATE_ERAT
ld r1,PACAR1(r13)
mfspr r4,SPRN_MMCRA
ori r4,r4,(1 << (63-60))
mtspr SPRN_MMCRA,r4
xori r4,r4,(1 << (63-60))
mtspr SPRN_MMCRA,r4
ld r4,_MMCR0(r1)
mtspr SPRN_MMCR0,r4
1:

View file

@ -145,6 +145,19 @@ notrace unsigned int __check_irq_replay(void)
/* Clear bit 0 which we wouldn't clear otherwise */
local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
if (happened & PACA_IRQ_HARD_DIS) {
/*
* We may have missed a decrementer interrupt if hard disabled.
* Check the decrementer register in case we had a rollover
* while hard disabled.
*/
if (!(happened & PACA_IRQ_DEC)) {
if (decrementer_check_overflow()) {
local_paca->irq_happened |= PACA_IRQ_DEC;
happened |= PACA_IRQ_DEC;
}
}
}
/*
* Force the delivery of pending soft-disabled interrupts on PS3.
@ -170,7 +183,7 @@ notrace unsigned int __check_irq_replay(void)
* in case we also had a rollover while hard disabled
*/
local_paca->irq_happened &= ~PACA_IRQ_DEC;
if ((happened & PACA_IRQ_DEC) || decrementer_check_overflow())
if (happened & PACA_IRQ_DEC)
return 0x900;
/* Finally check if an external interrupt happened */

View file

@ -363,7 +363,8 @@ void enable_kernel_vsx(void)
cpumsr = msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) {
if (current->thread.regs &&
(current->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP))) {
check_if_tm_restore_required(current);
/*
* If a thread has already been reclaimed then the
@ -383,7 +384,7 @@ void flush_vsx_to_thread(struct task_struct *tsk)
{
if (tsk->thread.regs) {
preempt_disable();
if (tsk->thread.regs->msr & MSR_VSX) {
if (tsk->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP)) {
BUG_ON(tsk != current);
giveup_vsx(tsk);
}
@ -505,10 +506,6 @@ void restore_math(struct pt_regs *regs)
{
unsigned long msr;
/*
* Syscall exit makes a similar initial check before branching
* to restore_math. Keep them in synch.
*/
if (!msr_tm_active(regs->msr) &&
!current->thread.load_fp && !loadvec(current->thread))
return;

View file

@ -127,12 +127,19 @@ static void flush_tmregs_to_thread(struct task_struct *tsk)
* If task is not current, it will have been flushed already to
* it's thread_struct during __switch_to().
*
* A reclaim flushes ALL the state.
* A reclaim flushes ALL the state or if not in TM save TM SPRs
* in the appropriate thread structures from live.
*/
if (tsk == current && MSR_TM_SUSPENDED(mfmsr()))
tm_reclaim_current(TM_CAUSE_SIGNAL);
if (tsk != current)
return;
if (MSR_TM_SUSPENDED(mfmsr())) {
tm_reclaim_current(TM_CAUSE_SIGNAL);
} else {
tm_enable();
tm_save_sprs(&(tsk->thread));
}
}
#else
static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }

View file

@ -351,7 +351,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
hard_irq_disable();
while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
raw_local_irq_restore(*flags);
cpu_relax();
spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
raw_local_irq_save(*flags);
hard_irq_disable();
}
@ -360,7 +360,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
static void nmi_ipi_lock(void)
{
while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
cpu_relax();
spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
}
static void nmi_ipi_unlock(void)
@ -475,7 +475,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
nmi_ipi_lock_start(&flags);
while (nmi_ipi_busy_count) {
nmi_ipi_unlock_end(&flags);
cpu_relax();
spin_until_cond(nmi_ipi_busy_count == 0);
nmi_ipi_lock_start(&flags);
}
@ -1003,21 +1003,13 @@ static struct sched_domain_topology_level powerpc_topology[] = {
{ NULL, },
};
static __init long smp_setup_cpu_workfn(void *data __always_unused)
{
smp_ops->setup_cpu(boot_cpuid);
return 0;
}
void __init smp_cpus_done(unsigned int max_cpus)
{
/*
* We want the setup_cpu() here to be called on the boot CPU, but
* init might run on any CPU, so make sure it's invoked on the boot
* CPU.
* We are running pinned to the boot CPU, see rest_init().
*/
if (smp_ops && smp_ops->setup_cpu)
work_on_cpu_safe(boot_cpuid, smp_setup_cpu_workfn, NULL);
smp_ops->setup_cpu(boot_cpuid);
if (smp_ops && smp_ops->bringup_done)
smp_ops->bringup_done();

View file

@ -71,15 +71,20 @@ static inline void wd_smp_lock(unsigned long *flags)
* This may be called from low level interrupt handlers at some
* point in future.
*/
local_irq_save(*flags);
while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock)))
cpu_relax();
raw_local_irq_save(*flags);
hard_irq_disable(); /* Make it soft-NMI safe */
while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) {
raw_local_irq_restore(*flags);
spin_until_cond(!test_bit(0, &__wd_smp_lock));
raw_local_irq_save(*flags);
hard_irq_disable();
}
}
static inline void wd_smp_unlock(unsigned long *flags)
{
clear_bit_unlock(0, &__wd_smp_lock);
local_irq_restore(*flags);
raw_local_irq_restore(*flags);
}
static void wd_lockup_ipi(struct pt_regs *regs)
@ -96,10 +101,10 @@ static void wd_lockup_ipi(struct pt_regs *regs)
nmi_panic(regs, "Hard LOCKUP");
}
static void set_cpu_stuck(int cpu, u64 tb)
static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
{
cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
if (cpumask_empty(&wd_smp_cpus_pending)) {
wd_smp_last_reset_tb = tb;
cpumask_andnot(&wd_smp_cpus_pending,
@ -107,6 +112,10 @@ static void set_cpu_stuck(int cpu, u64 tb)
&wd_smp_cpus_stuck);
}
}
static void set_cpu_stuck(int cpu, u64 tb)
{
set_cpumask_stuck(cpumask_of(cpu), tb);
}
static void watchdog_smp_panic(int cpu, u64 tb)
{
@ -135,11 +144,9 @@ static void watchdog_smp_panic(int cpu, u64 tb)
}
smp_flush_nmi_ipi(1000000);
/* Take the stuck CPU out of the watch group */
for_each_cpu(c, &wd_smp_cpus_pending)
set_cpu_stuck(c, tb);
/* Take the stuck CPUs out of the watch group */
set_cpumask_stuck(&wd_smp_cpus_pending, tb);
out:
wd_smp_unlock(&flags);
printk_safe_flush();
@ -152,6 +159,11 @@ out:
if (hardlockup_panic)
nmi_panic(NULL, "Hard LOCKUP");
return;
out:
wd_smp_unlock(&flags);
}
static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
@ -261,9 +273,11 @@ static void wd_timer_fn(unsigned long data)
void arch_touch_nmi_watchdog(void)
{
unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
int cpu = smp_processor_id();
watchdog_timer_interrupt(cpu);
if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks)
watchdog_timer_interrupt(cpu);
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
@ -286,6 +300,8 @@ static void stop_watchdog_timer_on(unsigned int cpu)
static int start_wd_on_cpu(unsigned int cpu)
{
unsigned long flags;
if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
WARN_ON(1);
return 0;
@ -300,12 +316,14 @@ static int start_wd_on_cpu(unsigned int cpu)
if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
return 0;
wd_smp_lock(&flags);
cpumask_set_cpu(cpu, &wd_cpus_enabled);
if (cpumask_weight(&wd_cpus_enabled) == 1) {
cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
wd_smp_last_reset_tb = get_tb();
}
smp_wmb();
wd_smp_unlock(&flags);
start_watchdog_timer_on(cpu);
return 0;
@ -313,12 +331,17 @@ static int start_wd_on_cpu(unsigned int cpu)
static int stop_wd_on_cpu(unsigned int cpu)
{
unsigned long flags;
if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
return 0; /* Can happen in CPU unplug case */
stop_watchdog_timer_on(cpu);
wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_cpus_enabled);
wd_smp_unlock(&flags);
wd_smp_clear_cpu_pending(cpu, get_tb());
return 0;

View file

@ -1443,12 +1443,14 @@ mc_cont:
ori r6,r6,1
mtspr SPRN_CTRLT,r6
4:
/* Read the guest SLB and save it away */
/* Check if we are running hash or radix and store it in cr2 */
ld r5, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r5)
cmpwi r0, 0
cmpwi cr2,r0,0
/* Read the guest SLB and save it away */
li r5, 0
bne 3f /* for radix, save 0 entries */
bne cr2, 3f /* for radix, save 0 entries */
lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
mtctr r0
li r6,0
@ -1712,11 +1714,6 @@ BEGIN_FTR_SECTION_NESTED(96)
END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
22:
/* Clear out SLB */
li r5,0
slbmte r5,r5
slbia
ptesync
/* Restore host values of some registers */
BEGIN_FTR_SECTION
@ -1737,10 +1734,56 @@ BEGIN_FTR_SECTION
mtspr SPRN_PID, r7
mtspr SPRN_IAMR, r8
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
#ifdef CONFIG_PPC_RADIX_MMU
/*
* Are we running hash or radix ?
*/
beq cr2,3f
/* Radix: Handle the case where the guest used an illegal PID */
LOAD_REG_ADDR(r4, mmu_base_pid)
lwz r3, VCPU_GUEST_PID(r9)
lwz r5, 0(r4)
cmpw cr0,r3,r5
blt 2f
/*
* Illegal PID, the HW might have prefetched and cached in the TLB
* some translations for the LPID 0 / guest PID combination which
* Linux doesn't know about, so we need to flush that PID out of
* the TLB. First we need to set LPIDR to 0 so tlbiel applies to
* the right context.
*/
li r0,0
mtspr SPRN_LPID,r0
isync
/* Then do a congruence class local flush */
ld r6,VCPU_KVM(r9)
lwz r0,KVM_TLB_SETS(r6)
mtctr r0
li r7,0x400 /* IS field = 0b01 */
ptesync
sldi r0,r3,32 /* RS has PID */
1: PPC_TLBIEL(7,0,2,1,1) /* RIC=2, PRS=1, R=1 */
addi r7,r7,0x1000
bdnz 1b
ptesync
2: /* Flush the ERAT on radix P9 DD1 guest exit */
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
b 4f
#endif /* CONFIG_PPC_RADIX_MMU */
/* Hash: clear out SLB */
3: li r5,0
slbmte r5,r5
slbia
ptesync
4:
/*
* POWER7/POWER8 guest -> host partition switch code.
* We don't have to lock against tlbies but we do

View file

@ -124,9 +124,10 @@ static int hash__init_new_context(struct mm_struct *mm)
static int radix__init_new_context(struct mm_struct *mm)
{
unsigned long rts_field;
int index;
int index, max_id;
index = alloc_context_id(1, PRTB_ENTRIES - 1);
max_id = (1 << mmu_pid_bits) - 1;
index = alloc_context_id(mmu_base_pid, max_id);
if (index < 0)
return index;

View file

@ -25,6 +25,9 @@
#include <trace/events/thp.h>
unsigned int mmu_pid_bits;
unsigned int mmu_base_pid;
static int native_register_process_table(unsigned long base, unsigned long pg_sz,
unsigned long table_size)
{
@ -265,11 +268,34 @@ static void __init radix_init_pgtable(void)
for_each_memblock(memory, reg)
WARN_ON(create_physical_mapping(reg->base,
reg->base + reg->size));
/* Find out how many PID bits are supported */
if (cpu_has_feature(CPU_FTR_HVMODE)) {
if (!mmu_pid_bits)
mmu_pid_bits = 20;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* When KVM is possible, we only use the top half of the
* PID space to avoid collisions between host and guest PIDs
* which can cause problems due to prefetch when exiting the
* guest with AIL=3
*/
mmu_base_pid = 1 << (mmu_pid_bits - 1);
#else
mmu_base_pid = 1;
#endif
} else {
/* The guest uses the bottom half of the PID space */
if (!mmu_pid_bits)
mmu_pid_bits = 19;
mmu_base_pid = 1;
}
/*
* Allocate Partition table and process table for the
* host.
*/
BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 36), "Process table size too large.");
BUG_ON(PRTB_SIZE_SHIFT > 36);
process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
/*
* Fill in the process table.
@ -343,6 +369,12 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
if (type == NULL || strcmp(type, "cpu") != 0)
return 0;
/* Find MMU PID size */
prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
if (prop && size == 4)
mmu_pid_bits = be32_to_cpup(prop);
/* Grab page size encodings */
prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
if (!prop)
return 0;

View file

@ -36,7 +36,7 @@ void subpage_prot_free(struct mm_struct *mm)
}
}
addr = 0;
for (i = 0; i < 2; ++i) {
for (i = 0; i < (TASK_SIZE_USER64 >> 43); ++i) {
p = spt->protptrs[i];
if (!p)
continue;

View file

@ -12,12 +12,12 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/memblock.h>
#include <asm/ppc-opcode.h>
#include <asm/ppc-opcode.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/trace.h>
#include <asm/cputhreads.h>
#define RIC_FLUSH_TLB 0
#define RIC_FLUSH_PWC 1
@ -478,3 +478,44 @@ void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
else
radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize);
}
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
{
unsigned int pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
return;
/*
* If this context hasn't run on that CPU before and KVM is
* around, there's a slim chance that the guest on another
* CPU just brought in obsolete translation into the TLB of
* this CPU due to a bad prefetch using the guest PID on
* the way into the hypervisor.
*
* We work around this here. If KVM is possible, we check if
* any sibling thread is in KVM. If it is, the window may exist
* and thus we flush that PID from the core.
*
* A potential future improvement would be to mark which PIDs
* have never been used on the system and avoid it if the PID
* is new and the process has no other cpumask bit set.
*/
if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
int cpu = smp_processor_id();
int sib = cpu_first_thread_sibling(cpu);
bool flush = false;
for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
if (sib == cpu)
continue;
if (paca[sib].kvm_hstate.kvm_vcpu)
flush = true;
}
if (flush)
_tlbiel_pid(pid, RIC_FLUSH_ALL);
}
}
EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */

View file

@ -89,7 +89,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk,
goto err;
ret = of_irq_to_resource(np, 0, &res[1]);
if (!ret)
if (ret <= 0)
goto err;
pdev = platform_device_alloc("mpc83xx_spi", i);

View file

@ -56,6 +56,7 @@ u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
*/
static u64 pnv_deepest_stop_psscr_val;
static u64 pnv_deepest_stop_psscr_mask;
static u64 pnv_deepest_stop_flag;
static bool deepest_stop_found;
static int pnv_save_sprs_for_deep_states(void)
@ -185,8 +186,40 @@ static void pnv_alloc_idle_core_states(void)
update_subcore_sibling_mask();
if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
pnv_save_sprs_for_deep_states();
if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
int rc = pnv_save_sprs_for_deep_states();
if (likely(!rc))
return;
/*
* The stop-api is unable to restore hypervisor
* resources on wakeup from platform idle states which
* lose full context. So disable such states.
*/
supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
if (cpu_has_feature(CPU_FTR_ARCH_300) &&
(pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
/*
* Use the default stop state for CPU-Hotplug
* if available.
*/
if (default_stop_found) {
pnv_deepest_stop_psscr_val =
pnv_default_stop_val;
pnv_deepest_stop_psscr_mask =
pnv_default_stop_mask;
pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
pnv_deepest_stop_psscr_val);
} else { /* Fallback to snooze loop for CPU-Hotplug */
deepest_stop_found = false;
pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
}
}
}
}
u32 pnv_get_supported_cpuidle_states(void)
@ -397,7 +430,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
pnv_deepest_stop_psscr_val;
srr1 = power9_idle_stop(psscr);
} else if (idle_states & OPAL_PM_WINKLE_ENABLED) {
} else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
(idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
} else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
(idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
@ -585,6 +619,7 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
max_residency_ns = residency_ns[i];
pnv_deepest_stop_psscr_val = psscr_val[i];
pnv_deepest_stop_psscr_mask = psscr_mask[i];
pnv_deepest_stop_flag = flags[i];
deepest_stop_found = true;
}

View file

@ -1851,6 +1851,14 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
/* 4GB offset bypasses 32-bit space */
set_dma_offset(&pdev->dev, (1ULL << 32));
set_dma_ops(&pdev->dev, &dma_direct_ops);
} else if (dma_mask >> 32 && dma_mask != DMA_BIT_MASK(64)) {
/*
* Fail the request if a DMA mask between 32 and 64 bits
* was requested but couldn't be fulfilled. Ideally we
* would do this for 64-bits but historically we have
* always fallen back to 32-bits.
*/
return -ENOMEM;
} else {
dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
set_dma_ops(&pdev->dev, &dma_iommu_ops);

View file

@ -82,7 +82,6 @@ static int pSeries_reconfig_remove_node(struct device_node *np)
of_detach_node(np);
of_node_put(parent);
of_node_put(np); /* Must decrement the refcount */
return 0;
}

View file

@ -235,6 +235,7 @@ static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len,
return -1;
}
extern u32 pnv_get_supported_cpuidle_states(void);
static int powernv_add_idle_states(void)
{
struct device_node *power_mgt;
@ -248,6 +249,8 @@ static int powernv_add_idle_states(void)
const char *names[CPUIDLE_STATE_MAX];
u32 has_stop_states = 0;
int i, rc;
u32 supported_flags = pnv_get_supported_cpuidle_states();
/* Currently we have snooze statically defined */
@ -362,6 +365,13 @@ static int powernv_add_idle_states(void)
for (i = 0; i < dt_idle_states; i++) {
unsigned int exit_latency, target_residency;
bool stops_timebase = false;
/*
* Skip the platform idle state whose flag isn't in
* the supported_cpuidle_states flag mask.
*/
if ((flags[i] & supported_flags) != flags[i])
continue;
/*
* If an idle state has exit latency beyond
* POWERNV_THRESHOLD_LATENCY_NS then don't use it