arch/tile: finish enabling support for TILE-Gx 64-bit chip

This support was partially present in the existing code (look for
"__tilegx__" ifdefs) but with this change you can build a working
kernel using the TILE-Gx toolchain and ARCH=tilegx.

Most of these files are new, generally adding a foo_64.c file
where previously there was just a foo_32.c file.

The ARCH=tilegx directive redirects to arch/tile, not arch/tilegx,
using the existing SRCARCH mechanism in the top-level Makefile.

Changes to existing files:

- <asm/bitops.h> and <asm/bitops_32.h> changed to factor the
  include of <asm-generic/bitops/non-atomic.h> in the common header.

- <asm/compat.h> and arch/tile/kernel/compat.c changed to remove
  the "const" markers I had put on compat_sys_execve() when trying
  to match some recent similar changes to the non-compat execve.
  It turns out the compat version wasn't "upgraded" to use const.

- <asm/opcode-tile_64.h> and <asm/opcode_constants_64.h> were
  previously included accidentally, with the 32-bit contents.  Now
  they have the proper 64-bit contents.

Finally, I had to hack the existing hacky drivers/input/input-compat.h
to add yet another "#ifdef" for INPUT_COMPAT_TEST (same as x86_64).

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> [drivers/input]
This commit is contained in:
Chris Metcalf 2011-05-04 14:38:26 -04:00
parent be84cb4383
commit 18aecc2b64
30 changed files with 9349 additions and 1373 deletions

View file

@ -220,6 +220,11 @@ ifeq ($(ARCH),sh64)
SRCARCH := sh
endif
# Additional ARCH settings for tile
ifeq ($(ARCH),tilegx)
SRCARCH := tile
endif
# Where to locate arch specific headers
hdr-arch := $(SRCARCH)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,258 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
/*
* @file
* Global header file.
* This header file specifies defines for TILE-Gx.
*/
#ifndef __ARCH_CHIP_H__
#define __ARCH_CHIP_H__
/** Specify chip version.
* When possible, prefer the CHIP_xxx symbols below for future-proofing.
* This is intended for cross-compiling; native compilation should
* use the predefined __tile_chip__ symbol.
*/
#define TILE_CHIP 10
/** Specify chip revision.
* This provides for the case of a respin of a particular chip type;
* the normal value for this symbol is "0".
* This is intended for cross-compiling; native compilation should
* use the predefined __tile_chip_rev__ symbol.
*/
#define TILE_CHIP_REV 0
/** The name of this architecture. */
#define CHIP_ARCH_NAME "tilegx"
/** The ELF e_machine type for binaries for this chip. */
#define CHIP_ELF_TYPE() EM_TILEGX
/** The alternate ELF e_machine type for binaries for this chip. */
#define CHIP_COMPAT_ELF_TYPE() 0x2597
/** What is the native word size of the machine? */
#define CHIP_WORD_SIZE() 64
/** How many bits of a virtual address are used. Extra bits must be
* the sign extension of the low bits.
*/
#define CHIP_VA_WIDTH() 42
/** How many bits are in a physical address? */
#define CHIP_PA_WIDTH() 40
/** Size of the L2 cache, in bytes. */
#define CHIP_L2_CACHE_SIZE() 262144
/** Log size of an L2 cache line in bytes. */
#define CHIP_L2_LOG_LINE_SIZE() 6
/** Size of an L2 cache line, in bytes. */
#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE())
/** Associativity of the L2 cache. */
#define CHIP_L2_ASSOC() 8
/** Size of the L1 data cache, in bytes. */
#define CHIP_L1D_CACHE_SIZE() 32768
/** Log size of an L1 data cache line in bytes. */
#define CHIP_L1D_LOG_LINE_SIZE() 6
/** Size of an L1 data cache line, in bytes. */
#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE())
/** Associativity of the L1 data cache. */
#define CHIP_L1D_ASSOC() 2
/** Size of the L1 instruction cache, in bytes. */
#define CHIP_L1I_CACHE_SIZE() 32768
/** Log size of an L1 instruction cache line in bytes. */
#define CHIP_L1I_LOG_LINE_SIZE() 6
/** Size of an L1 instruction cache line, in bytes. */
#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE())
/** Associativity of the L1 instruction cache. */
#define CHIP_L1I_ASSOC() 2
/** Stride with which flush instructions must be issued. */
#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE()
/** Stride with which inv instructions must be issued. */
#define CHIP_INV_STRIDE() CHIP_L2_LINE_SIZE()
/** Stride with which finv instructions must be issued. */
#define CHIP_FINV_STRIDE() CHIP_L2_LINE_SIZE()
/** Can the local cache coherently cache data that is homed elsewhere? */
#define CHIP_HAS_COHERENT_LOCAL_CACHE() 1
/** How many simultaneous outstanding victims can the L2 cache have? */
#define CHIP_MAX_OUTSTANDING_VICTIMS() 128
/** Does the TLB support the NC and NOALLOC bits? */
#define CHIP_HAS_NC_AND_NOALLOC_BITS() 1
/** Does the chip support hash-for-home caching? */
#define CHIP_HAS_CBOX_HOME_MAP() 1
/** Number of entries in the chip's home map tables. */
#define CHIP_CBOX_HOME_MAP_SIZE() 128
/** Do uncacheable requests miss in the cache regardless of whether
* there is matching data? */
#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 1
/** Does the mf instruction wait for victims? */
#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 0
/** Does the chip have an "inv" instruction that doesn't also flush? */
#define CHIP_HAS_INV() 1
/** Does the chip have a "wh64" instruction? */
#define CHIP_HAS_WH64() 1
/** Does this chip have a 'dword_align' instruction? */
#define CHIP_HAS_DWORD_ALIGN() 0
/** Number of performance counters. */
#define CHIP_PERFORMANCE_COUNTERS() 4
/** Does this chip have auxiliary performance counters? */
#define CHIP_HAS_AUX_PERF_COUNTERS() 1
/** Is the CBOX_MSR1 SPR supported? */
#define CHIP_HAS_CBOX_MSR1() 0
/** Is the TILE_RTF_HWM SPR supported? */
#define CHIP_HAS_TILE_RTF_HWM() 1
/** Is the TILE_WRITE_PENDING SPR supported? */
#define CHIP_HAS_TILE_WRITE_PENDING() 0
/** Is the PROC_STATUS SPR supported? */
#define CHIP_HAS_PROC_STATUS_SPR() 1
/** Is the DSTREAM_PF SPR supported? */
#define CHIP_HAS_DSTREAM_PF() 1
/** Log of the number of mshims we have. */
#define CHIP_LOG_NUM_MSHIMS() 2
/** Are the bases of the interrupt vector areas fixed? */
#define CHIP_HAS_FIXED_INTVEC_BASE() 0
/** Are the interrupt masks split up into 2 SPRs? */
#define CHIP_HAS_SPLIT_INTR_MASK() 0
/** Is the cycle count split up into 2 SPRs? */
#define CHIP_HAS_SPLIT_CYCLE() 0
/** Does the chip have a static network? */
#define CHIP_HAS_SN() 0
/** Does the chip have a static network processor? */
#define CHIP_HAS_SN_PROC() 0
/** Size of the L1 static network processor instruction cache, in bytes. */
/* #define CHIP_L1SNI_CACHE_SIZE() -- does not apply to chip 10 */
/** Does the chip have DMA support in each tile? */
#define CHIP_HAS_TILE_DMA() 0
/** Does the chip have the second revision of the directly accessible
* dynamic networks? This encapsulates a number of characteristics,
* including the absence of the catch-all, the absence of inline message
* tags, the absence of support for network context-switching, and so on.
*/
#define CHIP_HAS_REV1_XDN() 1
/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */
#define CHIP_HAS_CMPEXCH() 1
/** Does the chip have memory-mapped I/O support? */
#define CHIP_HAS_MMIO() 1
/** Does the chip have post-completion interrupts? */
#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 1
/** Does the chip have native single step support? */
#define CHIP_HAS_SINGLE_STEP() 1
#ifndef __OPEN_SOURCE__ /* features only relevant to hypervisor-level code */
/** How many entries are present in the instruction TLB? */
#define CHIP_ITLB_ENTRIES() 16
/** How many entries are present in the data TLB? */
#define CHIP_DTLB_ENTRIES() 32
/** How many MAF entries does the XAUI shim have? */
#define CHIP_XAUI_MAF_ENTRIES() 32
/** Does the memory shim have a source-id table? */
#define CHIP_HAS_MSHIM_SRCID_TABLE() 0
/** Does the L1 instruction cache clear on reset? */
#define CHIP_HAS_L1I_CLEAR_ON_RESET() 1
/** Does the chip come out of reset with valid coordinates on all tiles?
* Note that if defined, this also implies that the upper left is 1,1.
*/
#define CHIP_HAS_VALID_TILE_COORD_RESET() 1
/** Does the chip have unified packet formats? */
#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 1
/** Does the chip support write reordering? */
#define CHIP_HAS_WRITE_REORDERING() 1
/** Does the chip support Y-X routing as well as X-Y? */
#define CHIP_HAS_Y_X_ROUTING() 1
/** Is INTCTRL_3 managed with the correct MPL? */
#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 1
/** Is it possible to configure the chip to be big-endian? */
#define CHIP_HAS_BIG_ENDIAN_CONFIG() 1
/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */
#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 0
/** Is the DIAG_TRACE_WAY SPR supported? */
#define CHIP_HAS_DIAG_TRACE_WAY() 0
/** Is the MEM_STRIPE_CONFIG SPR supported? */
#define CHIP_HAS_MEM_STRIPE_CONFIG() 1
/** Are the TLB_PERF SPRs supported? */
#define CHIP_HAS_TLB_PERF() 1
/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */
#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 0
/** Does the chip support rev1 DMA packets? */
#define CHIP_HAS_REV1_DMA_PACKETS() 1
/** Does the chip have an IPI shim? */
#define CHIP_HAS_IPI() 1
#endif /* !__OPEN_SOURCE__ */
#endif /* __ARCH_CHIP_H__ */

View file

@ -0,0 +1,276 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#ifndef __ARCH_INTERRUPTS_H__
#define __ARCH_INTERRUPTS_H__
/** Mask for an interrupt. */
#ifdef __ASSEMBLER__
/* Note: must handle breaking interrupts into high and low words manually. */
#define INT_MASK(intno) (1 << (intno))
#else
#define INT_MASK(intno) (1ULL << (intno))
#endif
/** Where a given interrupt executes */
#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8))
/** Where to store a vector for a given interrupt. */
#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0)
/** The base address of user-level interrupts. */
#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0)
/** Additional synthetic interrupt. */
#define INT_BREAKPOINT (63)
#define INT_MEM_ERROR 0
#define INT_SINGLE_STEP_3 1
#define INT_SINGLE_STEP_2 2
#define INT_SINGLE_STEP_1 3
#define INT_SINGLE_STEP_0 4
#define INT_IDN_COMPLETE 5
#define INT_UDN_COMPLETE 6
#define INT_ITLB_MISS 7
#define INT_ILL 8
#define INT_GPV 9
#define INT_IDN_ACCESS 10
#define INT_UDN_ACCESS 11
#define INT_SWINT_3 12
#define INT_SWINT_2 13
#define INT_SWINT_1 14
#define INT_SWINT_0 15
#define INT_ILL_TRANS 16
#define INT_UNALIGN_DATA 17
#define INT_DTLB_MISS 18
#define INT_DTLB_ACCESS 19
#define INT_IDN_FIREWALL 20
#define INT_UDN_FIREWALL 21
#define INT_TILE_TIMER 22
#define INT_AUX_TILE_TIMER 23
#define INT_IDN_TIMER 24
#define INT_UDN_TIMER 25
#define INT_IDN_AVAIL 26
#define INT_UDN_AVAIL 27
#define INT_IPI_3 28
#define INT_IPI_2 29
#define INT_IPI_1 30
#define INT_IPI_0 31
#define INT_PERF_COUNT 32
#define INT_AUX_PERF_COUNT 33
#define INT_INTCTRL_3 34
#define INT_INTCTRL_2 35
#define INT_INTCTRL_1 36
#define INT_INTCTRL_0 37
#define INT_BOOT_ACCESS 38
#define INT_WORLD_ACCESS 39
#define INT_I_ASID 40
#define INT_D_ASID 41
#define INT_DOUBLE_FAULT 42
#define NUM_INTERRUPTS 43
#ifndef __ASSEMBLER__
#define QUEUED_INTERRUPTS ( \
INT_MASK(INT_MEM_ERROR) | \
INT_MASK(INT_IDN_COMPLETE) | \
INT_MASK(INT_UDN_COMPLETE) | \
INT_MASK(INT_IDN_FIREWALL) | \
INT_MASK(INT_UDN_FIREWALL) | \
INT_MASK(INT_TILE_TIMER) | \
INT_MASK(INT_AUX_TILE_TIMER) | \
INT_MASK(INT_IDN_TIMER) | \
INT_MASK(INT_UDN_TIMER) | \
INT_MASK(INT_IDN_AVAIL) | \
INT_MASK(INT_UDN_AVAIL) | \
INT_MASK(INT_IPI_3) | \
INT_MASK(INT_IPI_2) | \
INT_MASK(INT_IPI_1) | \
INT_MASK(INT_IPI_0) | \
INT_MASK(INT_PERF_COUNT) | \
INT_MASK(INT_AUX_PERF_COUNT) | \
INT_MASK(INT_INTCTRL_3) | \
INT_MASK(INT_INTCTRL_2) | \
INT_MASK(INT_INTCTRL_1) | \
INT_MASK(INT_INTCTRL_0) | \
INT_MASK(INT_BOOT_ACCESS) | \
INT_MASK(INT_WORLD_ACCESS) | \
INT_MASK(INT_I_ASID) | \
INT_MASK(INT_D_ASID) | \
INT_MASK(INT_DOUBLE_FAULT) | \
0)
#define NONQUEUED_INTERRUPTS ( \
INT_MASK(INT_SINGLE_STEP_3) | \
INT_MASK(INT_SINGLE_STEP_2) | \
INT_MASK(INT_SINGLE_STEP_1) | \
INT_MASK(INT_SINGLE_STEP_0) | \
INT_MASK(INT_ITLB_MISS) | \
INT_MASK(INT_ILL) | \
INT_MASK(INT_GPV) | \
INT_MASK(INT_IDN_ACCESS) | \
INT_MASK(INT_UDN_ACCESS) | \
INT_MASK(INT_SWINT_3) | \
INT_MASK(INT_SWINT_2) | \
INT_MASK(INT_SWINT_1) | \
INT_MASK(INT_SWINT_0) | \
INT_MASK(INT_ILL_TRANS) | \
INT_MASK(INT_UNALIGN_DATA) | \
INT_MASK(INT_DTLB_MISS) | \
INT_MASK(INT_DTLB_ACCESS) | \
0)
#define CRITICAL_MASKED_INTERRUPTS ( \
INT_MASK(INT_MEM_ERROR) | \
INT_MASK(INT_SINGLE_STEP_3) | \
INT_MASK(INT_SINGLE_STEP_2) | \
INT_MASK(INT_SINGLE_STEP_1) | \
INT_MASK(INT_SINGLE_STEP_0) | \
INT_MASK(INT_IDN_COMPLETE) | \
INT_MASK(INT_UDN_COMPLETE) | \
INT_MASK(INT_IDN_FIREWALL) | \
INT_MASK(INT_UDN_FIREWALL) | \
INT_MASK(INT_TILE_TIMER) | \
INT_MASK(INT_AUX_TILE_TIMER) | \
INT_MASK(INT_IDN_TIMER) | \
INT_MASK(INT_UDN_TIMER) | \
INT_MASK(INT_IDN_AVAIL) | \
INT_MASK(INT_UDN_AVAIL) | \
INT_MASK(INT_IPI_3) | \
INT_MASK(INT_IPI_2) | \
INT_MASK(INT_IPI_1) | \
INT_MASK(INT_IPI_0) | \
INT_MASK(INT_PERF_COUNT) | \
INT_MASK(INT_AUX_PERF_COUNT) | \
INT_MASK(INT_INTCTRL_3) | \
INT_MASK(INT_INTCTRL_2) | \
INT_MASK(INT_INTCTRL_1) | \
INT_MASK(INT_INTCTRL_0) | \
0)
#define CRITICAL_UNMASKED_INTERRUPTS ( \
INT_MASK(INT_ITLB_MISS) | \
INT_MASK(INT_ILL) | \
INT_MASK(INT_GPV) | \
INT_MASK(INT_IDN_ACCESS) | \
INT_MASK(INT_UDN_ACCESS) | \
INT_MASK(INT_SWINT_3) | \
INT_MASK(INT_SWINT_2) | \
INT_MASK(INT_SWINT_1) | \
INT_MASK(INT_SWINT_0) | \
INT_MASK(INT_ILL_TRANS) | \
INT_MASK(INT_UNALIGN_DATA) | \
INT_MASK(INT_DTLB_MISS) | \
INT_MASK(INT_DTLB_ACCESS) | \
INT_MASK(INT_BOOT_ACCESS) | \
INT_MASK(INT_WORLD_ACCESS) | \
INT_MASK(INT_I_ASID) | \
INT_MASK(INT_D_ASID) | \
INT_MASK(INT_DOUBLE_FAULT) | \
0)
#define MASKABLE_INTERRUPTS ( \
INT_MASK(INT_MEM_ERROR) | \
INT_MASK(INT_SINGLE_STEP_3) | \
INT_MASK(INT_SINGLE_STEP_2) | \
INT_MASK(INT_SINGLE_STEP_1) | \
INT_MASK(INT_SINGLE_STEP_0) | \
INT_MASK(INT_IDN_COMPLETE) | \
INT_MASK(INT_UDN_COMPLETE) | \
INT_MASK(INT_IDN_FIREWALL) | \
INT_MASK(INT_UDN_FIREWALL) | \
INT_MASK(INT_TILE_TIMER) | \
INT_MASK(INT_AUX_TILE_TIMER) | \
INT_MASK(INT_IDN_TIMER) | \
INT_MASK(INT_UDN_TIMER) | \
INT_MASK(INT_IDN_AVAIL) | \
INT_MASK(INT_UDN_AVAIL) | \
INT_MASK(INT_IPI_3) | \
INT_MASK(INT_IPI_2) | \
INT_MASK(INT_IPI_1) | \
INT_MASK(INT_IPI_0) | \
INT_MASK(INT_PERF_COUNT) | \
INT_MASK(INT_AUX_PERF_COUNT) | \
INT_MASK(INT_INTCTRL_3) | \
INT_MASK(INT_INTCTRL_2) | \
INT_MASK(INT_INTCTRL_1) | \
INT_MASK(INT_INTCTRL_0) | \
0)
#define UNMASKABLE_INTERRUPTS ( \
INT_MASK(INT_ITLB_MISS) | \
INT_MASK(INT_ILL) | \
INT_MASK(INT_GPV) | \
INT_MASK(INT_IDN_ACCESS) | \
INT_MASK(INT_UDN_ACCESS) | \
INT_MASK(INT_SWINT_3) | \
INT_MASK(INT_SWINT_2) | \
INT_MASK(INT_SWINT_1) | \
INT_MASK(INT_SWINT_0) | \
INT_MASK(INT_ILL_TRANS) | \
INT_MASK(INT_UNALIGN_DATA) | \
INT_MASK(INT_DTLB_MISS) | \
INT_MASK(INT_DTLB_ACCESS) | \
INT_MASK(INT_BOOT_ACCESS) | \
INT_MASK(INT_WORLD_ACCESS) | \
INT_MASK(INT_I_ASID) | \
INT_MASK(INT_D_ASID) | \
INT_MASK(INT_DOUBLE_FAULT) | \
0)
#define SYNC_INTERRUPTS ( \
INT_MASK(INT_SINGLE_STEP_3) | \
INT_MASK(INT_SINGLE_STEP_2) | \
INT_MASK(INT_SINGLE_STEP_1) | \
INT_MASK(INT_SINGLE_STEP_0) | \
INT_MASK(INT_IDN_COMPLETE) | \
INT_MASK(INT_UDN_COMPLETE) | \
INT_MASK(INT_ITLB_MISS) | \
INT_MASK(INT_ILL) | \
INT_MASK(INT_GPV) | \
INT_MASK(INT_IDN_ACCESS) | \
INT_MASK(INT_UDN_ACCESS) | \
INT_MASK(INT_SWINT_3) | \
INT_MASK(INT_SWINT_2) | \
INT_MASK(INT_SWINT_1) | \
INT_MASK(INT_SWINT_0) | \
INT_MASK(INT_ILL_TRANS) | \
INT_MASK(INT_UNALIGN_DATA) | \
INT_MASK(INT_DTLB_MISS) | \
INT_MASK(INT_DTLB_ACCESS) | \
0)
#define NON_SYNC_INTERRUPTS ( \
INT_MASK(INT_MEM_ERROR) | \
INT_MASK(INT_IDN_FIREWALL) | \
INT_MASK(INT_UDN_FIREWALL) | \
INT_MASK(INT_TILE_TIMER) | \
INT_MASK(INT_AUX_TILE_TIMER) | \
INT_MASK(INT_IDN_TIMER) | \
INT_MASK(INT_UDN_TIMER) | \
INT_MASK(INT_IDN_AVAIL) | \
INT_MASK(INT_UDN_AVAIL) | \
INT_MASK(INT_IPI_3) | \
INT_MASK(INT_IPI_2) | \
INT_MASK(INT_IPI_1) | \
INT_MASK(INT_IPI_0) | \
INT_MASK(INT_PERF_COUNT) | \
INT_MASK(INT_AUX_PERF_COUNT) | \
INT_MASK(INT_INTCTRL_3) | \
INT_MASK(INT_INTCTRL_2) | \
INT_MASK(INT_INTCTRL_1) | \
INT_MASK(INT_INTCTRL_0) | \
INT_MASK(INT_BOOT_ACCESS) | \
INT_MASK(INT_WORLD_ACCESS) | \
INT_MASK(INT_I_ASID) | \
INT_MASK(INT_D_ASID) | \
INT_MASK(INT_DOUBLE_FAULT) | \
0)
#endif /* !__ASSEMBLER__ */
#endif /* !__ARCH_INTERRUPTS_H__ */

View file

@ -0,0 +1,173 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#ifndef __DOXYGEN__
#ifndef __ARCH_SPR_DEF_H__
#define __ARCH_SPR_DEF_H__
#define SPR_AUX_PERF_COUNT_0 0x2105
#define SPR_AUX_PERF_COUNT_1 0x2106
#define SPR_AUX_PERF_COUNT_CTL 0x2107
#define SPR_AUX_PERF_COUNT_STS 0x2108
#define SPR_CMPEXCH_VALUE 0x2780
#define SPR_CYCLE 0x2781
#define SPR_DONE 0x2705
#define SPR_DSTREAM_PF 0x2706
#define SPR_EVENT_BEGIN 0x2782
#define SPR_EVENT_END 0x2783
#define SPR_EX_CONTEXT_0_0 0x2580
#define SPR_EX_CONTEXT_0_1 0x2581
#define SPR_EX_CONTEXT_0_1__PL_SHIFT 0
#define SPR_EX_CONTEXT_0_1__PL_RMASK 0x3
#define SPR_EX_CONTEXT_0_1__PL_MASK 0x3
#define SPR_EX_CONTEXT_0_1__ICS_SHIFT 2
#define SPR_EX_CONTEXT_0_1__ICS_RMASK 0x1
#define SPR_EX_CONTEXT_0_1__ICS_MASK 0x4
#define SPR_EX_CONTEXT_1_0 0x2480
#define SPR_EX_CONTEXT_1_1 0x2481
#define SPR_EX_CONTEXT_1_1__PL_SHIFT 0
#define SPR_EX_CONTEXT_1_1__PL_RMASK 0x3
#define SPR_EX_CONTEXT_1_1__PL_MASK 0x3
#define SPR_EX_CONTEXT_1_1__ICS_SHIFT 2
#define SPR_EX_CONTEXT_1_1__ICS_RMASK 0x1
#define SPR_EX_CONTEXT_1_1__ICS_MASK 0x4
#define SPR_EX_CONTEXT_2_0 0x2380
#define SPR_EX_CONTEXT_2_1 0x2381
#define SPR_EX_CONTEXT_2_1__PL_SHIFT 0
#define SPR_EX_CONTEXT_2_1__PL_RMASK 0x3
#define SPR_EX_CONTEXT_2_1__PL_MASK 0x3
#define SPR_EX_CONTEXT_2_1__ICS_SHIFT 2
#define SPR_EX_CONTEXT_2_1__ICS_RMASK 0x1
#define SPR_EX_CONTEXT_2_1__ICS_MASK 0x4
#define SPR_FAIL 0x2707
#define SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK 0x1
#define SPR_INTCTRL_0_STATUS 0x2505
#define SPR_INTCTRL_1_STATUS 0x2405
#define SPR_INTCTRL_2_STATUS 0x2305
#define SPR_INTERRUPT_CRITICAL_SECTION 0x2708
#define SPR_INTERRUPT_MASK_0 0x2506
#define SPR_INTERRUPT_MASK_1 0x2406
#define SPR_INTERRUPT_MASK_2 0x2306
#define SPR_INTERRUPT_MASK_RESET_0 0x2507
#define SPR_INTERRUPT_MASK_RESET_1 0x2407
#define SPR_INTERRUPT_MASK_RESET_2 0x2307
#define SPR_INTERRUPT_MASK_SET_0 0x2508
#define SPR_INTERRUPT_MASK_SET_1 0x2408
#define SPR_INTERRUPT_MASK_SET_2 0x2308
#define SPR_INTERRUPT_VECTOR_BASE_0 0x2509
#define SPR_INTERRUPT_VECTOR_BASE_1 0x2409
#define SPR_INTERRUPT_VECTOR_BASE_2 0x2309
#define SPR_INTERRUPT_VECTOR_BASE_3 0x2209
#define SPR_IPI_EVENT_0 0x1f05
#define SPR_IPI_EVENT_1 0x1e05
#define SPR_IPI_EVENT_2 0x1d05
#define SPR_IPI_EVENT_RESET_0 0x1f06
#define SPR_IPI_EVENT_RESET_1 0x1e06
#define SPR_IPI_EVENT_RESET_2 0x1d06
#define SPR_IPI_EVENT_SET_0 0x1f07
#define SPR_IPI_EVENT_SET_1 0x1e07
#define SPR_IPI_EVENT_SET_2 0x1d07
#define SPR_IPI_MASK_0 0x1f08
#define SPR_IPI_MASK_1 0x1e08
#define SPR_IPI_MASK_2 0x1d08
#define SPR_IPI_MASK_RESET_0 0x1f09
#define SPR_IPI_MASK_RESET_1 0x1e09
#define SPR_IPI_MASK_RESET_2 0x1d09
#define SPR_IPI_MASK_SET_0 0x1f0a
#define SPR_IPI_MASK_SET_1 0x1e0a
#define SPR_IPI_MASK_SET_2 0x1d0a
#define SPR_MPL_AUX_TILE_TIMER_SET_0 0x1700
#define SPR_MPL_AUX_TILE_TIMER_SET_1 0x1701
#define SPR_MPL_AUX_TILE_TIMER_SET_2 0x1702
#define SPR_MPL_INTCTRL_0_SET_0 0x2500
#define SPR_MPL_INTCTRL_0_SET_1 0x2501
#define SPR_MPL_INTCTRL_0_SET_2 0x2502
#define SPR_MPL_INTCTRL_1_SET_0 0x2400
#define SPR_MPL_INTCTRL_1_SET_1 0x2401
#define SPR_MPL_INTCTRL_1_SET_2 0x2402
#define SPR_MPL_INTCTRL_2_SET_0 0x2300
#define SPR_MPL_INTCTRL_2_SET_1 0x2301
#define SPR_MPL_INTCTRL_2_SET_2 0x2302
#define SPR_MPL_UDN_ACCESS_SET_0 0x0b00
#define SPR_MPL_UDN_ACCESS_SET_1 0x0b01
#define SPR_MPL_UDN_ACCESS_SET_2 0x0b02
#define SPR_MPL_UDN_AVAIL_SET_0 0x1b00
#define SPR_MPL_UDN_AVAIL_SET_1 0x1b01
#define SPR_MPL_UDN_AVAIL_SET_2 0x1b02
#define SPR_MPL_UDN_COMPLETE_SET_0 0x0600
#define SPR_MPL_UDN_COMPLETE_SET_1 0x0601
#define SPR_MPL_UDN_COMPLETE_SET_2 0x0602
#define SPR_MPL_UDN_FIREWALL_SET_0 0x1500
#define SPR_MPL_UDN_FIREWALL_SET_1 0x1501
#define SPR_MPL_UDN_FIREWALL_SET_2 0x1502
#define SPR_MPL_UDN_TIMER_SET_0 0x1900
#define SPR_MPL_UDN_TIMER_SET_1 0x1901
#define SPR_MPL_UDN_TIMER_SET_2 0x1902
#define SPR_MPL_WORLD_ACCESS_SET_0 0x2700
#define SPR_MPL_WORLD_ACCESS_SET_1 0x2701
#define SPR_MPL_WORLD_ACCESS_SET_2 0x2702
#define SPR_PASS 0x2709
#define SPR_PERF_COUNT_0 0x2005
#define SPR_PERF_COUNT_1 0x2006
#define SPR_PERF_COUNT_CTL 0x2007
#define SPR_PERF_COUNT_DN_CTL 0x2008
#define SPR_PERF_COUNT_STS 0x2009
#define SPR_PROC_STATUS 0x2784
#define SPR_SIM_CONTROL 0x2785
#define SPR_SINGLE_STEP_CONTROL_0 0x0405
#define SPR_SINGLE_STEP_CONTROL_0__CANCELED_MASK 0x1
#define SPR_SINGLE_STEP_CONTROL_0__INHIBIT_MASK 0x2
#define SPR_SINGLE_STEP_CONTROL_1 0x0305
#define SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK 0x1
#define SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK 0x2
#define SPR_SINGLE_STEP_CONTROL_2 0x0205
#define SPR_SINGLE_STEP_CONTROL_2__CANCELED_MASK 0x1
#define SPR_SINGLE_STEP_CONTROL_2__INHIBIT_MASK 0x2
#define SPR_SINGLE_STEP_EN_0_0 0x250a
#define SPR_SINGLE_STEP_EN_0_1 0x240a
#define SPR_SINGLE_STEP_EN_0_2 0x230a
#define SPR_SINGLE_STEP_EN_1_0 0x250b
#define SPR_SINGLE_STEP_EN_1_1 0x240b
#define SPR_SINGLE_STEP_EN_1_2 0x230b
#define SPR_SINGLE_STEP_EN_2_0 0x250c
#define SPR_SINGLE_STEP_EN_2_1 0x240c
#define SPR_SINGLE_STEP_EN_2_2 0x230c
#define SPR_SYSTEM_SAVE_0_0 0x2582
#define SPR_SYSTEM_SAVE_0_1 0x2583
#define SPR_SYSTEM_SAVE_0_2 0x2584
#define SPR_SYSTEM_SAVE_0_3 0x2585
#define SPR_SYSTEM_SAVE_1_0 0x2482
#define SPR_SYSTEM_SAVE_1_1 0x2483
#define SPR_SYSTEM_SAVE_1_2 0x2484
#define SPR_SYSTEM_SAVE_1_3 0x2485
#define SPR_SYSTEM_SAVE_2_0 0x2382
#define SPR_SYSTEM_SAVE_2_1 0x2383
#define SPR_SYSTEM_SAVE_2_2 0x2384
#define SPR_SYSTEM_SAVE_2_3 0x2385
#define SPR_TILE_COORD 0x270b
#define SPR_TILE_RTF_HWM 0x270c
#define SPR_TILE_TIMER_CONTROL 0x1605
#define SPR_UDN_AVAIL_EN 0x1b05
#define SPR_UDN_DATA_AVAIL 0x0b80
#define SPR_UDN_DEADLOCK_TIMEOUT 0x1906
#define SPR_UDN_DEMUX_COUNT_0 0x0b05
#define SPR_UDN_DEMUX_COUNT_1 0x0b06
#define SPR_UDN_DEMUX_COUNT_2 0x0b07
#define SPR_UDN_DEMUX_COUNT_3 0x0b08
#define SPR_UDN_DIRECTION_PROTECT 0x1505
#endif /* !defined(__ARCH_SPR_DEF_H__) */
#endif /* !defined(__DOXYGEN__) */

View file

@ -0,0 +1,169 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*
* Do not include directly; use <asm/atomic.h>.
*/
#ifndef _ASM_TILE_ATOMIC_64_H
#define _ASM_TILE_ATOMIC_64_H
#ifndef __ASSEMBLY__
#include <arch/spr_def.h>
/* First, the 32-bit atomic ops that are "real" on our 64-bit platform. */
#define atomic_set(v, i) ((v)->counter = (i))
/*
* The smp_mb() operations throughout are to support the fact that
* Linux requires memory barriers before and after the operation,
* on any routine which updates memory and returns a value.
*/
static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
{
int val;
__insn_mtspr(SPR_CMPEXCH_VALUE, o);
smp_mb(); /* barrier for proper semantics */
val = __insn_cmpexch4((void *)&v->counter, n);
smp_mb(); /* barrier for proper semantics */
return val;
}
static inline int atomic_xchg(atomic_t *v, int n)
{
int val;
smp_mb(); /* barrier for proper semantics */
val = __insn_exch4((void *)&v->counter, n);
smp_mb(); /* barrier for proper semantics */
return val;
}
static inline void atomic_add(int i, atomic_t *v)
{
__insn_fetchadd4((void *)&v->counter, i);
}
static inline int atomic_add_return(int i, atomic_t *v)
{
int val;
smp_mb(); /* barrier for proper semantics */
val = __insn_fetchadd4((void *)&v->counter, i) + i;
barrier(); /* the "+ i" above will wait on memory */
return val;
}
static inline int atomic_add_unless(atomic_t *v, int a, int u)
{
int guess, oldval = v->counter;
do {
if (oldval == u)
break;
guess = oldval;
oldval = atomic_cmpxchg(v, guess, guess + a);
} while (guess != oldval);
return oldval != u;
}
/* Now the true 64-bit operations. */
#define ATOMIC64_INIT(i) { (i) }
#define atomic64_read(v) ((v)->counter)
#define atomic64_set(v, i) ((v)->counter = (i))
static inline long atomic64_cmpxchg(atomic64_t *v, long o, long n)
{
long val;
smp_mb(); /* barrier for proper semantics */
__insn_mtspr(SPR_CMPEXCH_VALUE, o);
val = __insn_cmpexch((void *)&v->counter, n);
smp_mb(); /* barrier for proper semantics */
return val;
}
static inline long atomic64_xchg(atomic64_t *v, long n)
{
long val;
smp_mb(); /* barrier for proper semantics */
val = __insn_exch((void *)&v->counter, n);
smp_mb(); /* barrier for proper semantics */
return val;
}
static inline void atomic64_add(long i, atomic64_t *v)
{
__insn_fetchadd((void *)&v->counter, i);
}
static inline long atomic64_add_return(long i, atomic64_t *v)
{
int val;
smp_mb(); /* barrier for proper semantics */
val = __insn_fetchadd((void *)&v->counter, i) + i;
barrier(); /* the "+ i" above will wait on memory */
return val;
}
static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
{
long guess, oldval = v->counter;
do {
if (oldval == u)
break;
guess = oldval;
oldval = atomic64_cmpxchg(v, guess, guess + a);
} while (guess != oldval);
return oldval != u;
}
#define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v))
#define atomic64_sub(i, v) atomic64_add(-(i), (v))
#define atomic64_inc_return(v) atomic64_add_return(1, (v))
#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
#define atomic64_inc(v) atomic64_add(1, (v))
#define atomic64_dec(v) atomic64_sub(1, (v))
#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
#define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0)
#define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0)
#define atomic64_add_negative(i, v) (atomic64_add_return((i), (v)) < 0)
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
/* Atomic dec and inc don't implement barrier, so provide them if needed. */
#define smp_mb__before_atomic_dec() smp_mb()
#define smp_mb__after_atomic_dec() smp_mb()
#define smp_mb__before_atomic_inc() smp_mb()
#define smp_mb__after_atomic_inc() smp_mb()
#define xchg(ptr, x) \
((typeof(*(ptr))) \
((sizeof(*(ptr)) == sizeof(atomic_t)) ? \
atomic_xchg((atomic_t *)(ptr), (long)(x)) : \
(sizeof(*(ptr)) == sizeof(atomic_long_t)) ? \
atomic_long_xchg((atomic_long_t *)(ptr), (long)(x)) : \
__xchg_called_with_bad_pointer()))
#define cmpxchg(ptr, o, n) \
((typeof(*(ptr))) \
((sizeof(*(ptr)) == sizeof(atomic_t)) ? \
atomic_cmpxchg((atomic_t *)(ptr), (long)(o), (long)(n)) : \
(sizeof(*(ptr)) == sizeof(atomic_long_t)) ? \
atomic_long_cmpxchg((atomic_long_t *)(ptr), (long)(o), (long)(n)) : \
__cmpxchg_called_with_bad_pointer()))
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_TILE_ATOMIC_64_H */

View file

@ -122,6 +122,7 @@ static inline unsigned long __arch_hweight64(__u64 w)
#include <asm-generic/bitops/lock.h>
#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/non-atomic.h>
#include <asm-generic/bitops/le.h>
#endif /* _ASM_TILE_BITOPS_H */

View file

@ -126,7 +126,6 @@ static inline int test_and_change_bit(unsigned nr,
#define smp_mb__before_clear_bit() smp_mb()
#define smp_mb__after_clear_bit() do {} while (0)
#include <asm-generic/bitops/non-atomic.h>
#include <asm-generic/bitops/ext2-atomic.h>
#endif /* _ASM_TILE_BITOPS_32_H */

View file

@ -0,0 +1,105 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#ifndef _ASM_TILE_BITOPS_64_H
#define _ASM_TILE_BITOPS_64_H
#include <linux/compiler.h>
#include <asm/atomic.h>
#include <asm/system.h>
/* See <asm/bitops.h> for API comments. */
static inline void set_bit(unsigned nr, volatile unsigned long *addr)
{
unsigned long mask = (1UL << (nr % BITS_PER_LONG));
__insn_fetchor((void *)(addr + nr / BITS_PER_LONG), mask);
}
static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
{
unsigned long mask = (1UL << (nr % BITS_PER_LONG));
__insn_fetchand((void *)(addr + nr / BITS_PER_LONG), ~mask);
}
#define smp_mb__before_clear_bit() smp_mb()
#define smp_mb__after_clear_bit() smp_mb()
static inline void change_bit(unsigned nr, volatile unsigned long *addr)
{
unsigned long old, mask = (1UL << (nr % BITS_PER_LONG));
long guess, oldval;
addr += nr / BITS_PER_LONG;
old = *addr;
do {
guess = oldval;
oldval = atomic64_cmpxchg((atomic64_t *)addr,
guess, guess ^ mask);
} while (guess != oldval);
}
/*
* The test_and_xxx_bit() routines require a memory fence before we
* start the operation, and after the operation completes. We use
* smp_mb() before, and rely on the "!= 0" comparison, plus a compiler
* barrier(), to block until the atomic op is complete.
*/
static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr)
{
int val;
unsigned long mask = (1UL << (nr % BITS_PER_LONG));
smp_mb(); /* barrier for proper semantics */
val = (__insn_fetchor((void *)(addr + nr / BITS_PER_LONG), mask)
& mask) != 0;
barrier();
return val;
}
static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr)
{
int val;
unsigned long mask = (1UL << (nr % BITS_PER_LONG));
smp_mb(); /* barrier for proper semantics */
val = (__insn_fetchand((void *)(addr + nr / BITS_PER_LONG), ~mask)
& mask) != 0;
barrier();
return val;
}
static inline int test_and_change_bit(unsigned nr,
volatile unsigned long *addr)
{
unsigned long mask = (1UL << (nr % BITS_PER_LONG));
long guess, oldval = *addr;
addr += nr / BITS_PER_LONG;
oldval = *addr;
do {
guess = oldval;
oldval = atomic64_cmpxchg((atomic64_t *)addr,
guess, guess ^ mask);
} while (guess != oldval);
return (oldval & mask) != 0;
}
#define ext2_set_bit_atomic(lock, nr, addr) \
test_and_set_bit((nr), (unsigned long *)(addr))
#define ext2_clear_bit_atomic(lock, nr, addr) \
test_and_clear_bit((nr), (unsigned long *)(addr))
#endif /* _ASM_TILE_BITOPS_64_H */

View file

@ -215,8 +215,8 @@ struct compat_sigaction;
struct compat_siginfo;
struct compat_sigaltstack;
long compat_sys_execve(const char __user *path,
const compat_uptr_t __user *argv,
const compat_uptr_t __user *envp, struct pt_regs *);
compat_uptr_t __user *argv,
compat_uptr_t __user *envp, struct pt_regs *);
long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act,
struct compat_sigaction __user *oact,
size_t sigsetsize);

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,175 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*
*/
#ifndef _ASM_TILE_PGTABLE_64_H
#define _ASM_TILE_PGTABLE_64_H
/* The level-0 page table breaks the address space into 32-bit chunks. */
#define PGDIR_SHIFT HV_LOG2_L1_SPAN
#define PGDIR_SIZE HV_L1_SPAN
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define PTRS_PER_PGD HV_L0_ENTRIES
#define SIZEOF_PGD (PTRS_PER_PGD * sizeof(pgd_t))
/*
* The level-1 index is defined by the huge page size. A PMD is composed
* of PTRS_PER_PMD pgd_t's and is the middle level of the page table.
*/
#define PMD_SHIFT HV_LOG2_PAGE_SIZE_LARGE
#define PMD_SIZE HV_PAGE_SIZE_LARGE
#define PMD_MASK (~(PMD_SIZE-1))
#define PTRS_PER_PMD (1 << (PGDIR_SHIFT - PMD_SHIFT))
#define SIZEOF_PMD (PTRS_PER_PMD * sizeof(pmd_t))
/*
* The level-2 index is defined by the difference between the huge
* page size and the normal page size. A PTE is composed of
* PTRS_PER_PTE pte_t's and is the bottom level of the page table.
* Note that the hypervisor docs use PTE for what we call pte_t, so
* this nomenclature is somewhat confusing.
*/
#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL))
#define SIZEOF_PTE (PTRS_PER_PTE * sizeof(pte_t))
/*
* Align the vmalloc area to an L2 page table, and leave a guard page
* at the beginning and end. The vmalloc code also puts in an internal
* guard page between each allocation.
*/
#define _VMALLOC_END HUGE_VMAP_BASE
#define VMALLOC_END (_VMALLOC_END - PAGE_SIZE)
#define VMALLOC_START (_VMALLOC_START + PAGE_SIZE)
#define HUGE_VMAP_END (HUGE_VMAP_BASE + PGDIR_SIZE)
#ifndef __ASSEMBLY__
/* We have no pud since we are a three-level page table. */
#include <asm-generic/pgtable-nopud.h>
static inline int pud_none(pud_t pud)
{
return pud_val(pud) == 0;
}
static inline int pud_present(pud_t pud)
{
return pud_val(pud) & _PAGE_PRESENT;
}
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd 0x%016llx.\n", __FILE__, __LINE__, pmd_val(e))
static inline void pud_clear(pud_t *pudp)
{
__pte_clear(&pudp->pgd);
}
static inline int pud_bad(pud_t pud)
{
return ((pud_val(pud) & _PAGE_ALL) != _PAGE_TABLE);
}
/* Return the page-table frame number (ptfn) that a pud_t points at. */
#define pud_ptfn(pud) hv_pte_get_ptfn((pud).pgd)
/*
* A given kernel pud_t maps to a kernel pmd_t table at a specific
* virtual address. Since kernel pmd_t tables can be aligned at
* sub-page granularity, this macro can return non-page-aligned
* pointers, despite its name.
*/
#define pud_page_vaddr(pud) \
(__va((phys_addr_t)pud_ptfn(pud) << HV_LOG2_PAGE_TABLE_ALIGN))
/*
* A pud_t points to a pmd_t array. Since we can have multiple per
* page, we don't have a one-to-one mapping of pud_t's to pages.
*/
#define pud_page(pud) pfn_to_page(HV_PTFN_TO_PFN(pud_ptfn(pud)))
static inline unsigned long pud_index(unsigned long address)
{
return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
}
#define pmd_offset(pud, address) \
((pmd_t *)pud_page_vaddr(*(pud)) + pmd_index(address))
static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval)
{
set_pte(pmdp, pmdval);
}
/* Create a pmd from a PTFN and pgprot. */
static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot)
{
return hv_pte_set_ptfn(prot, ptfn);
}
/* Return the page-table frame number (ptfn) that a pmd_t points at. */
static inline unsigned long pmd_ptfn(pmd_t pmd)
{
return hv_pte_get_ptfn(pmd);
}
static inline void pmd_clear(pmd_t *pmdp)
{
__pte_clear(pmdp);
}
/* Normalize an address to having the correct high bits set. */
#define pgd_addr_normalize pgd_addr_normalize
static inline unsigned long pgd_addr_normalize(unsigned long addr)
{
return ((long)addr << (CHIP_WORD_SIZE() - CHIP_VA_WIDTH())) >>
(CHIP_WORD_SIZE() - CHIP_VA_WIDTH());
}
/* We don't define any pgds for these addresses. */
static inline int pgd_addr_invalid(unsigned long addr)
{
return addr >= MEM_HV_START ||
(addr > MEM_LOW_END && addr < MEM_HIGH_START);
}
/*
* Use atomic instructions to provide atomicity against the hypervisor.
*/
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
return (__insn_fetchand(&ptep->val, ~HV_PTE_ACCESSED) >>
HV_PTE_INDEX_ACCESSED) & 0x1;
}
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
__insn_fetchand(&ptep->val, ~HV_PTE_WRITABLE);
}
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
return hv_pte(__insn_exch(&ptep->val, 0UL));
}
#endif /* __ASSEMBLY__ */
#endif /* _ASM_TILE_PGTABLE_64_H */

View file

@ -0,0 +1,161 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*
* 64-bit SMP ticket spinlocks, allowing only a single CPU anywhere
* (the type definitions are in asm/spinlock_types.h)
*/
#ifndef _ASM_TILE_SPINLOCK_64_H
#define _ASM_TILE_SPINLOCK_64_H
/* Shifts and masks for the various fields in "lock". */
#define __ARCH_SPIN_CURRENT_SHIFT 17
#define __ARCH_SPIN_NEXT_MASK 0x7fff
#define __ARCH_SPIN_NEXT_OVERFLOW 0x8000
/*
* Return the "current" portion of a ticket lock value,
* i.e. the number that currently owns the lock.
*/
static inline int arch_spin_current(u32 val)
{
return val >> __ARCH_SPIN_CURRENT_SHIFT;
}
/*
* Return the "next" portion of a ticket lock value,
* i.e. the number that the next task to try to acquire the lock will get.
*/
static inline int arch_spin_next(u32 val)
{
return val & __ARCH_SPIN_NEXT_MASK;
}
/* The lock is locked if a task would have to wait to get it. */
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
u32 val = lock->lock;
return arch_spin_current(val) != arch_spin_next(val);
}
/* Bump the current ticket so the next task owns the lock. */
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
wmb(); /* guarantee anything modified under the lock is visible */
__insn_fetchadd4(&lock->lock, 1U << __ARCH_SPIN_CURRENT_SHIFT);
}
void arch_spin_unlock_wait(arch_spinlock_t *lock);
void arch_spin_lock_slow(arch_spinlock_t *lock, u32 val);
/* Grab the "next" ticket number and bump it atomically.
* If the current ticket is not ours, go to the slow path.
* We also take the slow path if the "next" value overflows.
*/
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
u32 val = __insn_fetchadd4(&lock->lock, 1);
u32 ticket = val & (__ARCH_SPIN_NEXT_MASK | __ARCH_SPIN_NEXT_OVERFLOW);
if (unlikely(arch_spin_current(val) != ticket))
arch_spin_lock_slow(lock, ticket);
}
/* Try to get the lock, and return whether we succeeded. */
int arch_spin_trylock(arch_spinlock_t *lock);
/* We cannot take an interrupt after getting a ticket, so don't enable them. */
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
/*
* Read-write spinlocks, allowing multiple readers
* but only one writer.
*
* We use fetchadd() for readers, and fetchor() with the sign bit
* for writers.
*/
#define __WRITE_LOCK_BIT (1 << 31)
static inline int arch_write_val_locked(int val)
{
return val < 0; /* Optimize "val & __WRITE_LOCK_BIT". */
}
/**
* read_can_lock - would read_trylock() succeed?
* @lock: the rwlock in question.
*/
static inline int arch_read_can_lock(arch_rwlock_t *rw)
{
return !arch_write_val_locked(rw->lock);
}
/**
* write_can_lock - would write_trylock() succeed?
* @lock: the rwlock in question.
*/
static inline int arch_write_can_lock(arch_rwlock_t *rw)
{
return rw->lock == 0;
}
extern void __read_lock_failed(arch_rwlock_t *rw);
static inline void arch_read_lock(arch_rwlock_t *rw)
{
u32 val = __insn_fetchaddgez4(&rw->lock, 1);
if (unlikely(arch_write_val_locked(val)))
__read_lock_failed(rw);
}
extern void __write_lock_failed(arch_rwlock_t *rw, u32 val);
static inline void arch_write_lock(arch_rwlock_t *rw)
{
u32 val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT);
if (unlikely(val != 0))
__write_lock_failed(rw, val);
}
static inline void arch_read_unlock(arch_rwlock_t *rw)
{
__insn_mf();
__insn_fetchadd4(&rw->lock, -1);
}
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
__insn_mf();
rw->lock = 0;
}
static inline int arch_read_trylock(arch_rwlock_t *rw)
{
return !arch_write_val_locked(__insn_fetchaddgez4(&rw->lock, 1));
}
static inline int arch_write_trylock(arch_rwlock_t *rw)
{
u32 val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT);
if (likely(val == 0))
return 1;
if (!arch_write_val_locked(val))
__insn_fetchand4(&rw->lock, ~__WRITE_LOCK_BIT);
return 0;
}
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
#endif /* _ASM_TILE_SPINLOCK_64_H */

View file

@ -0,0 +1,55 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*
* Atomically access user memory, but use MMU to avoid propagating
* kernel exceptions.
*/
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/futex.h>
#include <asm/page.h>
#include <asm/processor.h>
/*
* Provide a set of atomic memory operations supporting <asm/futex.h>.
*
* r0: user address to manipulate
* r1: new value to write, or for cmpxchg, old value to compare against
* r2: (cmpxchg only) new value to write
*
* Return __get_user struct, r0 with value, r1 with error.
*/
#define FUTEX_OP(name, ...) \
STD_ENTRY(futex_##name) \
__VA_ARGS__; \
{ \
move r1, zero; \
jrp lr \
}; \
STD_ENDPROC(futex_##name); \
.pushsection __ex_table,"a"; \
.quad 1b, get_user_fault; \
.popsection
.pushsection .fixup,"ax"
get_user_fault:
{ movei r1, -EFAULT; jrp lr }
ENDPROC(get_user_fault)
.popsection
FUTEX_OP(cmpxchg, mtspr CMPEXCH_VALUE, r1; 1: cmpexch4 r0, r0, r2)
FUTEX_OP(set, 1: exch4 r0, r0, r1)
FUTEX_OP(add, 1: fetchadd4 r0, r0, r1)
FUTEX_OP(or, 1: fetchor4 r0, r0, r1)
FUTEX_OP(andn, nor r1, r1, zero; 1: fetchand4 r0, r0, r1)

269
arch/tile/kernel/head_64.S Normal file
View file

@ -0,0 +1,269 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*
* TILE startup code.
*/
#include <linux/linkage.h>
#include <linux/init.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/thread_info.h>
#include <asm/processor.h>
#include <asm/asm-offsets.h>
#include <hv/hypervisor.h>
#include <arch/chip.h>
#include <arch/spr_def.h>
/*
* This module contains the entry code for kernel images. It performs the
* minimal setup needed to call the generic C routines.
*/
__HEAD
ENTRY(_start)
/* Notify the hypervisor of what version of the API we want */
{
movei r1, TILE_CHIP
movei r2, TILE_CHIP_REV
}
{
moveli r0, _HV_VERSION
jal hv_init
}
/* Get a reasonable default ASID in r0 */
{
move r0, zero
jal hv_inquire_asid
}
/*
* Install the default page table. The relocation required to
* statically define the table is a bit too complex, so we have
* to plug in the pointer from the L0 to the L1 table by hand.
* We only do this on the first cpu to boot, though, since the
* other CPUs should see a properly-constructed page table.
*/
{
v4int_l r2, zero, r0 /* ASID for hv_install_context */
moveli r4, hw1_last(swapper_pgprot - PAGE_OFFSET)
}
{
shl16insli r4, r4, hw0(swapper_pgprot - PAGE_OFFSET)
}
{
ld r1, r4 /* access_pte for hv_install_context */
}
{
moveli r0, hw1_last(.Lsv_data_pmd - PAGE_OFFSET)
moveli r6, hw1_last(temp_data_pmd - PAGE_OFFSET)
}
{
/* After initializing swapper_pgprot, HV_PTE_GLOBAL is set. */
bfextu r7, r1, HV_PTE_INDEX_GLOBAL, HV_PTE_INDEX_GLOBAL
inv r4
}
bnez r7, .Lno_write
{
shl16insli r0, r0, hw0(.Lsv_data_pmd - PAGE_OFFSET)
shl16insli r6, r6, hw0(temp_data_pmd - PAGE_OFFSET)
}
{
/* Cut off the low bits of the PT address. */
shrui r6, r6, HV_LOG2_PAGE_TABLE_ALIGN
/* Start with our access pte. */
move r5, r1
}
{
/* Stuff the address into the page table pointer slot of the PTE. */
bfins r5, r6, HV_PTE_INDEX_PTFN, \
HV_PTE_INDEX_PTFN + HV_PTE_PTFN_BITS - 1
}
{
/* Store the L0 data PTE. */
st r0, r5
addli r6, r6, (temp_code_pmd - temp_data_pmd) >> \
HV_LOG2_PAGE_TABLE_ALIGN
}
{
addli r0, r0, .Lsv_code_pmd - .Lsv_data_pmd
bfins r5, r6, HV_PTE_INDEX_PTFN, \
HV_PTE_INDEX_PTFN + HV_PTE_PTFN_BITS - 1
}
/* Store the L0 code PTE. */
st r0, r5
.Lno_write:
moveli lr, hw2_last(1f)
{
shl16insli lr, lr, hw1(1f)
moveli r0, hw1_last(swapper_pg_dir - PAGE_OFFSET)
}
{
shl16insli lr, lr, hw0(1f)
shl16insli r0, r0, hw0(swapper_pg_dir - PAGE_OFFSET)
}
{
move r3, zero
j hv_install_context
}
1:
/* Install the interrupt base. */
moveli r0, hw2_last(MEM_SV_START)
shl16insli r0, r0, hw1(MEM_SV_START)
shl16insli r0, r0, hw0(MEM_SV_START)
mtspr SPR_INTERRUPT_VECTOR_BASE_K, r0
/*
* Get our processor number and save it away in SAVE_K_0.
* Extract stuff from the topology structure: r4 = y, r6 = x,
* r5 = width. FIXME: consider whether we want to just make these
* 64-bit values (and if so fix smp_topology write below, too).
*/
jal hv_inquire_topology
{
v4int_l r5, zero, r1 /* r5 = width */
shrui r4, r0, 32 /* r4 = y */
}
{
v4int_l r6, zero, r0 /* r6 = x */
mul_lu_lu r4, r4, r5
}
{
add r4, r4, r6 /* r4 == cpu == y*width + x */
}
#ifdef CONFIG_SMP
/*
* Load up our per-cpu offset. When the first (master) tile
* boots, this value is still zero, so we will load boot_pc
* with start_kernel, and boot_sp with init_stack + THREAD_SIZE.
* The master tile initializes the per-cpu offset array, so that
* when subsequent (secondary) tiles boot, they will instead load
* from their per-cpu versions of boot_sp and boot_pc.
*/
moveli r5, hw2_last(__per_cpu_offset)
shl16insli r5, r5, hw1(__per_cpu_offset)
shl16insli r5, r5, hw0(__per_cpu_offset)
shl3add r5, r4, r5
ld r5, r5
bnez r5, 1f
/*
* Save the width and height to the smp_topology variable
* for later use.
*/
moveli r0, hw2_last(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET)
shl16insli r0, r0, hw1(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET)
shl16insli r0, r0, hw0(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET)
st r0, r1
1:
#else
move r5, zero
#endif
/* Load and go with the correct pc and sp. */
{
moveli r1, hw2_last(boot_sp)
moveli r0, hw2_last(boot_pc)
}
{
shl16insli r1, r1, hw1(boot_sp)
shl16insli r0, r0, hw1(boot_pc)
}
{
shl16insli r1, r1, hw0(boot_sp)
shl16insli r0, r0, hw0(boot_pc)
}
{
add r1, r1, r5
add r0, r0, r5
}
ld r0, r0
ld sp, r1
or r4, sp, r4
mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */
addi sp, sp, -STACK_TOP_DELTA
{
move lr, zero /* stop backtraces in the called function */
jr r0
}
ENDPROC(_start)
__PAGE_ALIGNED_BSS
.align PAGE_SIZE
ENTRY(empty_zero_page)
.fill PAGE_SIZE,1,0
END(empty_zero_page)
.macro PTE cpa, bits1
.quad HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED |\
HV_PTE_GLOBAL | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) |\
(\bits1) | (HV_CPA_TO_PFN(\cpa) << HV_PTE_INDEX_PFN)
.endm
__PAGE_ALIGNED_DATA
.align PAGE_SIZE
ENTRY(swapper_pg_dir)
.org swapper_pg_dir + HV_L0_INDEX(PAGE_OFFSET) * HV_PTE_SIZE
.Lsv_data_pmd:
.quad 0 /* PTE temp_data_pmd - PAGE_OFFSET, 0 */
.org swapper_pg_dir + HV_L0_INDEX(MEM_SV_START) * HV_PTE_SIZE
.Lsv_code_pmd:
.quad 0 /* PTE temp_code_pmd - PAGE_OFFSET, 0 */
.org swapper_pg_dir + HV_L0_SIZE
END(swapper_pg_dir)
.align HV_PAGE_TABLE_ALIGN
ENTRY(temp_data_pmd)
/*
* We fill the PAGE_OFFSET pmd with huge pages with
* VA = PA + PAGE_OFFSET. We remap things with more precise access
* permissions later.
*/
.set addr, 0
.rept HV_L1_ENTRIES
PTE addr, HV_PTE_READABLE | HV_PTE_WRITABLE
.set addr, addr + HV_PAGE_SIZE_LARGE
.endr
.org temp_data_pmd + HV_L1_SIZE
END(temp_data_pmd)
.align HV_PAGE_TABLE_ALIGN
ENTRY(temp_code_pmd)
/*
* We fill the MEM_SV_START pmd with huge pages with
* VA = PA + PAGE_OFFSET. We remap things with more precise access
* permissions later.
*/
.set addr, 0
.rept HV_L1_ENTRIES
PTE addr, HV_PTE_READABLE | HV_PTE_EXECUTABLE
.set addr, addr + HV_PAGE_SIZE_LARGE
.endr
.org temp_code_pmd + HV_L1_SIZE
END(temp_code_pmd)
/*
* Isolate swapper_pgprot to its own cache line, since each cpu
* starting up will read it using VA-is-PA and local homing.
* This would otherwise likely conflict with other data on the cache
* line, once we have set its permanent home in the page tables.
*/
__INITDATA
.align CHIP_L2_LINE_SIZE()
ENTRY(swapper_pgprot)
.quad HV_PTE_PRESENT | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE)
.align CHIP_L2_LINE_SIZE()
END(swapper_pgprot)

1231
arch/tile/kernel/intvec_64.S Normal file

File diff suppressed because it is too large Load diff

View file

@ -630,8 +630,8 @@ out:
#ifdef CONFIG_COMPAT
long compat_sys_execve(const char __user *path,
const compat_uptr_t __user *argv,
const compat_uptr_t __user *envp,
compat_uptr_t __user *argv,
compat_uptr_t __user *envp,
struct pt_regs *regs)
{
long error;

145
arch/tile/kernel/regs_64.S Normal file
View file

@ -0,0 +1,145 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#include <linux/linkage.h>
#include <asm/system.h>
#include <asm/ptrace.h>
#include <asm/asm-offsets.h>
#include <arch/spr_def.h>
#include <asm/processor.h>
/*
* See <asm/system.h>; called with prev and next task_struct pointers.
* "prev" is returned in r0 for _switch_to and also for ret_from_fork.
*
* We want to save pc/sp in "prev", and get the new pc/sp from "next".
* We also need to save all the callee-saved registers on the stack.
*
* Intel enables/disables access to the hardware cycle counter in
* seccomp (secure computing) environments if necessary, based on
* has_secure_computing(). We might want to do this at some point,
* though it would require virtualizing the other SPRs under WORLD_ACCESS.
*
* Since we're saving to the stack, we omit sp from this list.
* And for parallels with other architectures, we save lr separately,
* in the thread_struct itself (as the "pc" field).
*
* This code also needs to be aligned with process.c copy_thread()
*/
#if CALLEE_SAVED_REGS_COUNT != 24
# error Mismatch between <asm/system.h> and kernel/entry.S
#endif
#define FRAME_SIZE ((2 + CALLEE_SAVED_REGS_COUNT) * 8)
#define SAVE_REG(r) { st r12, r; addi r12, r12, 8 }
#define LOAD_REG(r) { ld r, r12; addi r12, r12, 8 }
#define FOR_EACH_CALLEE_SAVED_REG(f) \
f(r30); f(r31); \
f(r32); f(r33); f(r34); f(r35); f(r36); f(r37); f(r38); f(r39); \
f(r40); f(r41); f(r42); f(r43); f(r44); f(r45); f(r46); f(r47); \
f(r48); f(r49); f(r50); f(r51); f(r52);
STD_ENTRY_SECTION(__switch_to, .sched.text)
{
move r10, sp
st sp, lr
}
{
addli r11, sp, -FRAME_SIZE + 8
addli sp, sp, -FRAME_SIZE
}
{
st r11, r10
addli r4, r1, TASK_STRUCT_THREAD_KSP_OFFSET
}
{
ld r13, r4 /* Load new sp to a temp register early. */
addi r12, sp, 16
}
FOR_EACH_CALLEE_SAVED_REG(SAVE_REG)
addli r3, r0, TASK_STRUCT_THREAD_KSP_OFFSET
{
st r3, sp
addli r3, r0, TASK_STRUCT_THREAD_PC_OFFSET
}
{
st r3, lr
addli r4, r1, TASK_STRUCT_THREAD_PC_OFFSET
}
{
ld lr, r4
addi r12, r13, 16
}
{
/* Update sp and ksp0 simultaneously to avoid backtracer warnings. */
move sp, r13
mtspr SPR_SYSTEM_SAVE_K_0, r2
}
FOR_EACH_CALLEE_SAVED_REG(LOAD_REG)
.L__switch_to_pc:
{
addli sp, sp, FRAME_SIZE
jrp lr /* r0 is still valid here, so return it */
}
STD_ENDPROC(__switch_to)
/* Return a suitable address for the backtracer for suspended threads */
STD_ENTRY_SECTION(get_switch_to_pc, .sched.text)
lnk r0
{
addli r0, r0, .L__switch_to_pc - .
jrp lr
}
STD_ENDPROC(get_switch_to_pc)
STD_ENTRY(get_pt_regs)
.irp reg, r0, r1, r2, r3, r4, r5, r6, r7, \
r8, r9, r10, r11, r12, r13, r14, r15, \
r16, r17, r18, r19, r20, r21, r22, r23, \
r24, r25, r26, r27, r28, r29, r30, r31, \
r32, r33, r34, r35, r36, r37, r38, r39, \
r40, r41, r42, r43, r44, r45, r46, r47, \
r48, r49, r50, r51, r52, tp, sp
{
st r0, \reg
addi r0, r0, 8
}
.endr
{
st r0, lr
addi r0, r0, PTREGS_OFFSET_PC - PTREGS_OFFSET_LR
}
lnk r1
{
st r0, r1
addi r0, r0, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC
}
mfspr r1, INTERRUPT_CRITICAL_SECTION
shli r1, r1, SPR_EX_CONTEXT_1_1__ICS_SHIFT
ori r1, r1, KERNEL_PL
{
st r0, r1
addi r0, r0, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1
}
{
st r0, zero /* clear faultnum */
addi r0, r0, PTREGS_OFFSET_ORIG_R0 - PTREGS_OFFSET_FAULTNUM
}
{
st r0, zero /* clear orig_r0 */
addli r0, r0, -PTREGS_OFFSET_ORIG_R0 /* restore r0 to base */
}
jrp lr
STD_ENDPROC(get_pt_regs)

File diff suppressed because it is too large Load diff

71
arch/tile/lib/memchr_64.c Normal file
View file

@ -0,0 +1,71 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#include <linux/types.h>
#include <linux/string.h>
#include <linux/module.h>
void *memchr(const void *s, int c, size_t n)
{
const uint64_t *last_word_ptr;
const uint64_t *p;
const char *last_byte_ptr;
uintptr_t s_int;
uint64_t goal, before_mask, v, bits;
char *ret;
if (__builtin_expect(n == 0, 0)) {
/* Don't dereference any memory if the array is empty. */
return NULL;
}
/* Get an aligned pointer. */
s_int = (uintptr_t) s;
p = (const uint64_t *)(s_int & -8);
/* Create eight copies of the byte for which we are looking. */
goal = 0x0101010101010101ULL * (uint8_t) c;
/* Read the first word, but munge it so that bytes before the array
* will not match goal.
*
* Note that this shift count expression works because we know
* shift counts are taken mod 64.
*/
before_mask = (1ULL << (s_int << 3)) - 1;
v = (*p | before_mask) ^ (goal & before_mask);
/* Compute the address of the last byte. */
last_byte_ptr = (const char *)s + n - 1;
/* Compute the address of the word containing the last byte. */
last_word_ptr = (const uint64_t *)((uintptr_t) last_byte_ptr & -8);
while ((bits = __insn_v1cmpeq(v, goal)) == 0) {
if (__builtin_expect(p == last_word_ptr, 0)) {
/* We already read the last word in the array,
* so give up.
*/
return NULL;
}
v = *++p;
}
/* We found a match, but it might be in a byte past the end
* of the array.
*/
ret = ((char *)p) + (__insn_ctz(bits) >> 3);
return (ret <= last_byte_ptr) ? ret : NULL;
}
EXPORT_SYMBOL(memchr);

220
arch/tile/lib/memcpy_64.c Normal file
View file

@ -0,0 +1,220 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#include <linux/types.h>
#include <linux/string.h>
#include <linux/module.h>
#define __memcpy memcpy
/* EXPORT_SYMBOL() is in arch/tile/lib/exports.c since this should be asm. */
/* Must be 8 bytes in size. */
#define word_t uint64_t
#if CHIP_L2_LINE_SIZE() != 64 && CHIP_L2_LINE_SIZE() != 128
#error "Assumes 64 or 128 byte line size"
#endif
/* How many cache lines ahead should we prefetch? */
#define PREFETCH_LINES_AHEAD 3
/*
* Provide "base versions" of load and store for the normal code path.
* The kernel provides other versions for userspace copies.
*/
#define ST(p, v) (*(p) = (v))
#define LD(p) (*(p))
#ifndef USERCOPY_FUNC
#define ST1 ST
#define ST2 ST
#define ST4 ST
#define ST8 ST
#define LD1 LD
#define LD2 LD
#define LD4 LD
#define LD8 LD
#define RETVAL dstv
void *memcpy(void *__restrict dstv, const void *__restrict srcv, size_t n)
#else
/*
* Special kernel version will provide implementation of the LDn/STn
* macros to return a count of uncopied bytes due to mm fault.
*/
#define RETVAL 0
int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n)
#endif
{
char *__restrict dst1 = (char *)dstv;
const char *__restrict src1 = (const char *)srcv;
const char *__restrict src1_end;
const char *__restrict prefetch;
word_t *__restrict dst8; /* 8-byte pointer to destination memory. */
word_t final; /* Final bytes to write to trailing word, if any */
long i;
if (n < 16) {
for (; n; n--)
ST1(dst1++, LD1(src1++));
return RETVAL;
}
/*
* Locate the end of source memory we will copy. Don't
* prefetch past this.
*/
src1_end = src1 + n - 1;
/* Prefetch ahead a few cache lines, but not past the end. */
prefetch = src1;
for (i = 0; i < PREFETCH_LINES_AHEAD; i++) {
__insn_prefetch(prefetch);
prefetch += CHIP_L2_LINE_SIZE();
prefetch = (prefetch > src1_end) ? prefetch : src1;
}
/* Copy bytes until dst is word-aligned. */
for (; (uintptr_t)dst1 & (sizeof(word_t) - 1); n--)
ST1(dst1++, LD1(src1++));
/* 8-byte pointer to destination memory. */
dst8 = (word_t *)dst1;
if (__builtin_expect((uintptr_t)src1 & (sizeof(word_t) - 1), 0)) {
/*
* Misaligned copy. Copy 8 bytes at a time, but don't
* bother with other fanciness.
*
* TODO: Consider prefetching and using wh64 as well.
*/
/* Create an aligned src8. */
const word_t *__restrict src8 =
(const word_t *)((uintptr_t)src1 & -sizeof(word_t));
word_t b;
word_t a = LD8(src8++);
for (; n >= sizeof(word_t); n -= sizeof(word_t)) {
b = LD8(src8++);
a = __insn_dblalign(a, b, src1);
ST8(dst8++, a);
a = b;
}
if (n == 0)
return RETVAL;
b = ((const char *)src8 <= src1_end) ? *src8 : 0;
/*
* Final source bytes to write to trailing partial
* word, if any.
*/
final = __insn_dblalign(a, b, src1);
} else {
/* Aligned copy. */
const word_t* __restrict src8 = (const word_t *)src1;
/* src8 and dst8 are both word-aligned. */
if (n >= CHIP_L2_LINE_SIZE()) {
/* Copy until 'dst' is cache-line-aligned. */
for (; (uintptr_t)dst8 & (CHIP_L2_LINE_SIZE() - 1);
n -= sizeof(word_t))
ST8(dst8++, LD8(src8++));
for (; n >= CHIP_L2_LINE_SIZE(); ) {
__insn_wh64(dst8);
/*
* Prefetch and advance to next line
* to prefetch, but don't go past the end
*/
__insn_prefetch(prefetch);
prefetch += CHIP_L2_LINE_SIZE();
prefetch = (prefetch > src1_end) ? prefetch :
(const char *)src8;
/*
* Copy an entire cache line. Manually
* unrolled to avoid idiosyncracies of
* compiler unrolling.
*/
#define COPY_WORD(offset) ({ ST8(dst8+offset, LD8(src8+offset)); n -= 8; })
COPY_WORD(0);
COPY_WORD(1);
COPY_WORD(2);
COPY_WORD(3);
COPY_WORD(4);
COPY_WORD(5);
COPY_WORD(6);
COPY_WORD(7);
#if CHIP_L2_LINE_SIZE() == 128
COPY_WORD(8);
COPY_WORD(9);
COPY_WORD(10);
COPY_WORD(11);
COPY_WORD(12);
COPY_WORD(13);
COPY_WORD(14);
COPY_WORD(15);
#elif CHIP_L2_LINE_SIZE() != 64
# error Fix code that assumes particular L2 cache line sizes
#endif
dst8 += CHIP_L2_LINE_SIZE() / sizeof(word_t);
src8 += CHIP_L2_LINE_SIZE() / sizeof(word_t);
}
}
for (; n >= sizeof(word_t); n -= sizeof(word_t))
ST8(dst8++, LD8(src8++));
if (__builtin_expect(n == 0, 1))
return RETVAL;
final = LD8(src8);
}
/* n != 0 if we get here. Write out any trailing bytes. */
dst1 = (char *)dst8;
if (n & 4) {
ST4((uint32_t *)dst1, final);
dst1 += 4;
final >>= 32;
n &= 3;
}
if (n & 2) {
ST2((uint16_t *)dst1, final);
dst1 += 2;
final >>= 16;
n &= 1;
}
if (n)
ST1((uint8_t *)dst1, final);
return RETVAL;
}
#ifdef USERCOPY_FUNC
#undef ST1
#undef ST2
#undef ST4
#undef ST8
#undef LD1
#undef LD2
#undef LD4
#undef LD8
#undef USERCOPY_FUNC
#endif

View file

@ -0,0 +1,86 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*
* Do memcpy(), but trap and return "n" when a load or store faults.
*
* Note: this idiom only works when memcpy() compiles to a leaf function.
* If "sp" is updated during memcpy, the "jrp lr" will be incorrect.
*
* Also note that we are capturing "n" from the containing scope here.
*/
#define _ST(p, inst, v) \
({ \
asm("1: " #inst " %0, %1;" \
".pushsection .coldtext.memcpy,\"ax\";" \
"2: { move r0, %2; jrp lr };" \
".section __ex_table,\"a\";" \
".quad 1b, 2b;" \
".popsection" \
: "=m" (*(p)) : "r" (v), "r" (n)); \
})
#define _LD(p, inst) \
({ \
unsigned long __v; \
asm("1: " #inst " %0, %1;" \
".pushsection .coldtext.memcpy,\"ax\";" \
"2: { move r0, %2; jrp lr };" \
".section __ex_table,\"a\";" \
".quad 1b, 2b;" \
".popsection" \
: "=r" (__v) : "m" (*(p)), "r" (n)); \
__v; \
})
#define USERCOPY_FUNC __copy_to_user_inatomic
#define ST1(p, v) _ST((p), st1, (v))
#define ST2(p, v) _ST((p), st2, (v))
#define ST4(p, v) _ST((p), st4, (v))
#define ST8(p, v) _ST((p), st, (v))
#define LD1 LD
#define LD2 LD
#define LD4 LD
#define LD8 LD
#include "memcpy_64.c"
#define USERCOPY_FUNC __copy_from_user_inatomic
#define ST1 ST
#define ST2 ST
#define ST4 ST
#define ST8 ST
#define LD1(p) _LD((p), ld1u)
#define LD2(p) _LD((p), ld2u)
#define LD4(p) _LD((p), ld4u)
#define LD8(p) _LD((p), ld)
#include "memcpy_64.c"
#define USERCOPY_FUNC __copy_in_user_inatomic
#define ST1(p, v) _ST((p), st1, (v))
#define ST2(p, v) _ST((p), st2, (v))
#define ST4(p, v) _ST((p), st4, (v))
#define ST8(p, v) _ST((p), st, (v))
#define LD1(p) _LD((p), ld1u)
#define LD2(p) _LD((p), ld2u)
#define LD4(p) _LD((p), ld4u)
#define LD8(p) _LD((p), ld)
#include "memcpy_64.c"
unsigned long __copy_from_user_zeroing(void *to, const void __user *from,
unsigned long n)
{
unsigned long rc = __copy_from_user_inatomic(to, from, n);
if (unlikely(rc))
memset(to + n - rc, 0, rc);
return rc;
}

145
arch/tile/lib/memset_64.c Normal file
View file

@ -0,0 +1,145 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#include <arch/chip.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/module.h>
#undef memset
void *memset(void *s, int c, size_t n)
{
uint64_t *out64;
int n64, to_align64;
uint64_t v64;
uint8_t *out8 = s;
/* Experimentation shows that a trivial tight loop is a win up until
* around a size of 20, where writing a word at a time starts to win.
*/
#define BYTE_CUTOFF 20
#if BYTE_CUTOFF < 7
/* This must be at least at least this big, or some code later
* on doesn't work.
*/
#error "BYTE_CUTOFF is too small"
#endif
if (n < BYTE_CUTOFF) {
/* Strangely, this turns out to be the tightest way to
* write this loop.
*/
if (n != 0) {
do {
/* Strangely, combining these into one line
* performs worse.
*/
*out8 = c;
out8++;
} while (--n != 0);
}
return s;
}
/* Align 'out8'. We know n >= 7 so this won't write past the end. */
while (((uintptr_t) out8 & 7) != 0) {
*out8++ = c;
--n;
}
/* Align 'n'. */
while (n & 7)
out8[--n] = c;
out64 = (uint64_t *) out8;
n64 = n >> 3;
/* Tile input byte out to 64 bits. */
/* KLUDGE */
v64 = 0x0101010101010101ULL * (uint8_t)c;
/* This must be at least 8 or the following loop doesn't work. */
#define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8)
/* Determine how many words we need to emit before the 'out32'
* pointer becomes aligned modulo the cache line size.
*/
to_align64 = (-((uintptr_t)out64 >> 3)) &
(CACHE_LINE_SIZE_IN_DOUBLEWORDS - 1);
/* Only bother aligning and using wh64 if there is at least
* one full cache line to process. This check also prevents
* overrunning the end of the buffer with alignment words.
*/
if (to_align64 <= n64 - CACHE_LINE_SIZE_IN_DOUBLEWORDS) {
int lines_left;
/* Align out64 mod the cache line size so we can use wh64. */
n64 -= to_align64;
for (; to_align64 != 0; to_align64--) {
*out64 = v64;
out64++;
}
/* Use unsigned divide to turn this into a right shift. */
lines_left = (unsigned)n64 / CACHE_LINE_SIZE_IN_DOUBLEWORDS;
do {
/* Only wh64 a few lines at a time, so we don't
* exceed the maximum number of victim lines.
*/
int x = ((lines_left < CHIP_MAX_OUTSTANDING_VICTIMS())
? lines_left
: CHIP_MAX_OUTSTANDING_VICTIMS());
uint64_t *wh = out64;
int i = x;
int j;
lines_left -= x;
do {
__insn_wh64(wh);
wh += CACHE_LINE_SIZE_IN_DOUBLEWORDS;
} while (--i);
for (j = x * (CACHE_LINE_SIZE_IN_DOUBLEWORDS / 4);
j != 0; j--) {
*out64++ = v64;
*out64++ = v64;
*out64++ = v64;
*out64++ = v64;
}
} while (lines_left != 0);
/* We processed all full lines above, so only this many
* words remain to be processed.
*/
n64 &= CACHE_LINE_SIZE_IN_DOUBLEWORDS - 1;
}
/* Now handle any leftover values. */
if (n64 != 0) {
do {
*out64 = v64;
out64++;
} while (--n64 != 0);
}
return s;
}
EXPORT_SYMBOL(memset);

104
arch/tile/lib/spinlock_64.c Normal file
View file

@ -0,0 +1,104 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#include <linux/spinlock.h>
#include <linux/module.h>
#include <asm/processor.h>
#include "spinlock_common.h"
/*
* Read the spinlock value without allocating in our cache and without
* causing an invalidation to another cpu with a copy of the cacheline.
* This is important when we are spinning waiting for the lock.
*/
static inline u32 arch_spin_read_noalloc(void *lock)
{
return atomic_cmpxchg((atomic_t *)lock, -1, -1);
}
/*
* Wait until the high bits (current) match my ticket.
* If we notice the overflow bit set on entry, we clear it.
*/
void arch_spin_lock_slow(arch_spinlock_t *lock, u32 my_ticket)
{
if (unlikely(my_ticket & __ARCH_SPIN_NEXT_OVERFLOW)) {
__insn_fetchand4(&lock->lock, ~__ARCH_SPIN_NEXT_OVERFLOW);
my_ticket &= ~__ARCH_SPIN_NEXT_OVERFLOW;
}
for (;;) {
u32 val = arch_spin_read_noalloc(lock);
u32 delta = my_ticket - arch_spin_current(val);
if (delta == 0)
return;
relax((128 / CYCLES_PER_RELAX_LOOP) * delta);
}
}
EXPORT_SYMBOL(arch_spin_lock_slow);
/*
* Check the lock to see if it is plausible, and try to get it with cmpxchg().
*/
int arch_spin_trylock(arch_spinlock_t *lock)
{
u32 val = arch_spin_read_noalloc(lock);
if (unlikely(arch_spin_current(val) != arch_spin_next(val)))
return 0;
return cmpxchg(&lock->lock, val, (val + 1) & ~__ARCH_SPIN_NEXT_OVERFLOW)
== val;
}
EXPORT_SYMBOL(arch_spin_trylock);
void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
u32 iterations = 0;
while (arch_spin_is_locked(lock))
delay_backoff(iterations++);
}
EXPORT_SYMBOL(arch_spin_unlock_wait);
/*
* If the read lock fails due to a writer, we retry periodically
* until the value is positive and we write our incremented reader count.
*/
void __read_lock_failed(arch_rwlock_t *rw)
{
u32 val;
int iterations = 0;
do {
delay_backoff(iterations++);
val = __insn_fetchaddgez4(&rw->lock, 1);
} while (unlikely(arch_write_val_locked(val)));
}
EXPORT_SYMBOL(__read_lock_failed);
/*
* If we failed because there were readers, clear the "writer" bit
* so we don't block additional readers. Otherwise, there was another
* writer anyway, so our "fetchor" made no difference. Then wait,
* issuing periodic fetchor instructions, till we get the lock.
*/
void __write_lock_failed(arch_rwlock_t *rw, u32 val)
{
int iterations = 0;
do {
if (!arch_write_val_locked(val))
val = __insn_fetchand4(&rw->lock, ~__WRITE_LOCK_BIT);
delay_backoff(iterations++);
val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT);
} while (val != 0);
}
EXPORT_SYMBOL(__write_lock_failed);

67
arch/tile/lib/strchr_64.c Normal file
View file

@ -0,0 +1,67 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#include <linux/types.h>
#include <linux/string.h>
#include <linux/module.h>
#undef strchr
char *strchr(const char *s, int c)
{
int z, g;
/* Get an aligned pointer. */
const uintptr_t s_int = (uintptr_t) s;
const uint64_t *p = (const uint64_t *)(s_int & -8);
/* Create eight copies of the byte for which we are looking. */
const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
/* Read the first aligned word, but force bytes before the string to
* match neither zero nor goal (we make sure the high bit of each
* byte is 1, and the low 7 bits are all the opposite of the goal
* byte).
*
* Note that this shift count expression works because we know shift
* counts are taken mod 64.
*/
const uint64_t before_mask = (1ULL << (s_int << 3)) - 1;
uint64_t v = (*p | before_mask) ^
(goal & __insn_v1shrsi(before_mask, 1));
uint64_t zero_matches, goal_matches;
while (1) {
/* Look for a terminating '\0'. */
zero_matches = __insn_v1cmpeqi(v, 0);
/* Look for the goal byte. */
goal_matches = __insn_v1cmpeq(v, goal);
if (__builtin_expect((zero_matches | goal_matches) != 0, 0))
break;
v = *++p;
}
z = __insn_ctz(zero_matches);
g = __insn_ctz(goal_matches);
/* If we found c before '\0' we got a match. Note that if c == '\0'
* then g == z, and we correctly return the address of the '\0'
* rather than NULL.
*/
return (g <= z) ? ((char *)p) + (g >> 3) : NULL;
}
EXPORT_SYMBOL(strchr);

38
arch/tile/lib/strlen_64.c Normal file
View file

@ -0,0 +1,38 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#include <linux/types.h>
#include <linux/string.h>
#include <linux/module.h>
#undef strlen
size_t strlen(const char *s)
{
/* Get an aligned pointer. */
const uintptr_t s_int = (uintptr_t) s;
const uint64_t *p = (const uint64_t *)(s_int & -8);
/* Read the first word, but force bytes before the string to be nonzero.
* This expression works because we know shift counts are taken mod 64.
*/
uint64_t v = *p | ((1ULL << (s_int << 3)) - 1);
uint64_t bits;
while ((bits = __insn_v1cmpeqi(v, 0)) == 0)
v = *++p;
return ((const char *)p) + (__insn_ctz(bits) >> 3) - s;
}
EXPORT_SYMBOL(strlen);

196
arch/tile/lib/usercopy_64.S Normal file
View file

@ -0,0 +1,196 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/cache.h>
#include <arch/chip.h>
/* Access user memory, but use MMU to avoid propagating kernel exceptions. */
.pushsection .fixup,"ax"
get_user_fault:
{ movei r1, -EFAULT; move r0, zero }
jrp lr
ENDPROC(get_user_fault)
put_user_fault:
{ movei r0, -EFAULT; jrp lr }
ENDPROC(put_user_fault)
.popsection
/*
* __get_user_N functions take a pointer in r0, and return 0 in r1
* on success, with the value in r0; or else -EFAULT in r1.
*/
#define __get_user_N(bytes, LOAD) \
STD_ENTRY(__get_user_##bytes); \
1: { LOAD r0, r0; move r1, zero }; \
jrp lr; \
STD_ENDPROC(__get_user_##bytes); \
.pushsection __ex_table,"a"; \
.quad 1b, get_user_fault; \
.popsection
__get_user_N(1, ld1u)
__get_user_N(2, ld2u)
__get_user_N(4, ld4u)
__get_user_N(8, ld)
/*
* __put_user_N functions take a value in r0 and a pointer in r1,
* and return 0 in r0 on success or -EFAULT on failure.
*/
#define __put_user_N(bytes, STORE) \
STD_ENTRY(__put_user_##bytes); \
1: { STORE r1, r0; move r0, zero }; \
jrp lr; \
STD_ENDPROC(__put_user_##bytes); \
.pushsection __ex_table,"a"; \
.quad 1b, put_user_fault; \
.popsection
__put_user_N(1, st1)
__put_user_N(2, st2)
__put_user_N(4, st4)
__put_user_N(8, st)
/*
* strnlen_user_asm takes the pointer in r0, and the length bound in r1.
* It returns the length, including the terminating NUL, or zero on exception.
* If length is greater than the bound, returns one plus the bound.
*/
STD_ENTRY(strnlen_user_asm)
{ beqz r1, 2f; addi r3, r0, -1 } /* bias down to include NUL */
1: { ld1u r4, r0; addi r1, r1, -1 }
beqz r4, 2f
{ bnezt r1, 1b; addi r0, r0, 1 }
2: { sub r0, r0, r3; jrp lr }
STD_ENDPROC(strnlen_user_asm)
.pushsection .fixup,"ax"
strnlen_user_fault:
{ move r0, zero; jrp lr }
ENDPROC(strnlen_user_fault)
.section __ex_table,"a"
.quad 1b, strnlen_user_fault
.popsection
/*
* strncpy_from_user_asm takes the kernel target pointer in r0,
* the userspace source pointer in r1, and the length bound (including
* the trailing NUL) in r2. On success, it returns the string length
* (not including the trailing NUL), or -EFAULT on failure.
*/
STD_ENTRY(strncpy_from_user_asm)
{ beqz r2, 2f; move r3, r0 }
1: { ld1u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
{ st1 r0, r4; addi r0, r0, 1 }
beqz r2, 2f
bnezt r4, 1b
addi r0, r0, -1 /* don't count the trailing NUL */
2: { sub r0, r0, r3; jrp lr }
STD_ENDPROC(strncpy_from_user_asm)
.pushsection .fixup,"ax"
strncpy_from_user_fault:
{ movei r0, -EFAULT; jrp lr }
ENDPROC(strncpy_from_user_fault)
.section __ex_table,"a"
.quad 1b, strncpy_from_user_fault
.popsection
/*
* clear_user_asm takes the user target address in r0 and the
* number of bytes to zero in r1.
* It returns the number of uncopiable bytes (hopefully zero) in r0.
* Note that we don't use a separate .fixup section here since we fall
* through into the "fixup" code as the last straight-line bundle anyway.
*/
STD_ENTRY(clear_user_asm)
{ beqz r1, 2f; or r2, r0, r1 }
andi r2, r2, 7
beqzt r2, .Lclear_aligned_user_asm
1: { st1 r0, zero; addi r0, r0, 1; addi r1, r1, -1 }
bnezt r1, 1b
2: { move r0, r1; jrp lr }
.pushsection __ex_table,"a"
.quad 1b, 2b
.popsection
.Lclear_aligned_user_asm:
1: { st r0, zero; addi r0, r0, 8; addi r1, r1, -8 }
bnezt r1, 1b
2: { move r0, r1; jrp lr }
STD_ENDPROC(clear_user_asm)
.pushsection __ex_table,"a"
.quad 1b, 2b
.popsection
/*
* flush_user_asm takes the user target address in r0 and the
* number of bytes to flush in r1.
* It returns the number of unflushable bytes (hopefully zero) in r0.
*/
STD_ENTRY(flush_user_asm)
beqz r1, 2f
{ movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
{ sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
{ and r0, r0, r2; and r1, r1, r2 }
{ sub r1, r1, r0 }
1: { flush r0; addi r1, r1, -CHIP_FLUSH_STRIDE() }
{ addi r0, r0, CHIP_FLUSH_STRIDE(); bnezt r1, 1b }
2: { move r0, r1; jrp lr }
STD_ENDPROC(flush_user_asm)
.pushsection __ex_table,"a"
.quad 1b, 2b
.popsection
/*
* inv_user_asm takes the user target address in r0 and the
* number of bytes to invalidate in r1.
* It returns the number of not inv'able bytes (hopefully zero) in r0.
*/
STD_ENTRY(inv_user_asm)
beqz r1, 2f
{ movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
{ sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
{ and r0, r0, r2; and r1, r1, r2 }
{ sub r1, r1, r0 }
1: { inv r0; addi r1, r1, -CHIP_INV_STRIDE() }
{ addi r0, r0, CHIP_INV_STRIDE(); bnezt r1, 1b }
2: { move r0, r1; jrp lr }
STD_ENDPROC(inv_user_asm)
.pushsection __ex_table,"a"
.quad 1b, 2b
.popsection
/*
* finv_user_asm takes the user target address in r0 and the
* number of bytes to flush-invalidate in r1.
* It returns the number of not finv'able bytes (hopefully zero) in r0.
*/
STD_ENTRY(finv_user_asm)
beqz r1, 2f
{ movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
{ sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
{ and r0, r0, r2; and r1, r1, r2 }
{ sub r1, r1, r0 }
1: { finv r0; addi r1, r1, -CHIP_FINV_STRIDE() }
{ addi r0, r0, CHIP_FINV_STRIDE(); bnezt r1, 1b }
2: { move r0, r1; jrp lr }
STD_ENDPROC(finv_user_asm)
.pushsection __ex_table,"a"
.quad 1b, 2b
.popsection

187
arch/tile/mm/migrate_64.S Normal file
View file

@ -0,0 +1,187 @@
/*
* Copyright 2011 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*
* This routine is a helper for migrating the home of a set of pages to
* a new cpu. See the documentation in homecache.c for more information.
*/
#include <linux/linkage.h>
#include <linux/threads.h>
#include <asm/page.h>
#include <asm/thread_info.h>
#include <asm/types.h>
#include <asm/asm-offsets.h>
#include <hv/hypervisor.h>
.text
/*
* First, some definitions that apply to all the code in the file.
*/
/* Locals (caller-save) */
#define r_tmp r10
#define r_save_sp r11
/* What we save where in the stack frame; must include all callee-saves. */
#define FRAME_SP 8
#define FRAME_R30 16
#define FRAME_R31 24
#define FRAME_R32 32
#define FRAME_R33 40
#define FRAME_SIZE 48
/*
* On entry:
*
* r0 the new context PA to install (moved to r_context)
* r1 PTE to use for context access (moved to r_access)
* r2 ASID to use for new context (moved to r_asid)
* r3 pointer to cpumask with just this cpu set in it (r_my_cpumask)
*/
/* Arguments (caller-save) */
#define r_context_in r0
#define r_access_in r1
#define r_asid_in r2
#define r_my_cpumask r3
/* Locals (callee-save); must not be more than FRAME_xxx above. */
#define r_save_ics r30
#define r_context r31
#define r_access r32
#define r_asid r33
/*
* Caller-save locals and frame constants are the same as
* for homecache_migrate_stack_and_flush.
*/
STD_ENTRY(flush_and_install_context)
/*
* Create a stack frame; we can't touch it once we flush the
* cache until we install the new page table and flush the TLB.
*/
{
move r_save_sp, sp
st sp, lr
addi sp, sp, -FRAME_SIZE
}
addi r_tmp, sp, FRAME_SP
{
st r_tmp, r_save_sp
addi r_tmp, sp, FRAME_R30
}
{
st r_tmp, r30
addi r_tmp, sp, FRAME_R31
}
{
st r_tmp, r31
addi r_tmp, sp, FRAME_R32
}
{
st r_tmp, r32
addi r_tmp, sp, FRAME_R33
}
st r_tmp, r33
/* Move some arguments to callee-save registers. */
{
move r_context, r_context_in
move r_access, r_access_in
}
move r_asid, r_asid_in
/* Disable interrupts, since we can't use our stack. */
{
mfspr r_save_ics, INTERRUPT_CRITICAL_SECTION
movei r_tmp, 1
}
mtspr INTERRUPT_CRITICAL_SECTION, r_tmp
/* First, flush our L2 cache. */
{
move r0, zero /* cache_pa */
moveli r1, hw2_last(HV_FLUSH_EVICT_L2) /* cache_control */
}
{
shl16insli r1, r1, hw1(HV_FLUSH_EVICT_L2)
move r2, r_my_cpumask /* cache_cpumask */
}
{
shl16insli r1, r1, hw0(HV_FLUSH_EVICT_L2)
move r3, zero /* tlb_va */
}
{
move r4, zero /* tlb_length */
move r5, zero /* tlb_pgsize */
}
{
move r6, zero /* tlb_cpumask */
move r7, zero /* asids */
}
{
move r8, zero /* asidcount */
jal hv_flush_remote
}
bnez r0, 1f
/* Now install the new page table. */
{
move r0, r_context
move r1, r_access
}
{
move r2, r_asid
movei r3, HV_CTX_DIRECTIO
}
jal hv_install_context
bnez r0, 1f
/* Finally, flush the TLB. */
{
movei r0, 0 /* preserve_global */
jal hv_flush_all
}
1: /* Reset interrupts back how they were before. */
mtspr INTERRUPT_CRITICAL_SECTION, r_save_ics
/* Restore the callee-saved registers and return. */
addli lr, sp, FRAME_SIZE
{
ld lr, lr
addli r_tmp, sp, FRAME_R30
}
{
ld r30, r_tmp
addli r_tmp, sp, FRAME_R31
}
{
ld r31, r_tmp
addli r_tmp, sp, FRAME_R32
}
{
ld r32, r_tmp
addli r_tmp, sp, FRAME_R33
}
{
ld r33, r_tmp
addi sp, sp, FRAME_SIZE
}
jrp lr
STD_ENDPROC(flush_and_install_context)

View file

@ -19,7 +19,7 @@
/* Note to the author of this code: did it ever occur to
you why the ifdefs are needed? Think about it again. -AK */
#ifdef CONFIG_X86_64
#if defined(CONFIG_X86_64) || defined(CONFIG_TILE)
# define INPUT_COMPAT_TEST is_compat_task()
#elif defined(CONFIG_S390)
# define INPUT_COMPAT_TEST test_thread_flag(TIF_31BIT)