From ac10f81d94f49f1bd9618680263400d275ddf825 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 25 May 2011 01:00:45 -0400 Subject: [PATCH 001/347] drm/radeon/kms/blit: workaround some hw issues on evergreen+ Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/evergreen_blit_kms.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c index ba06a69c6de8..40867290863c 100644 --- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c +++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c @@ -199,6 +199,16 @@ static void set_scissors(struct radeon_device *rdev, int x1, int y1, int x2, int y2) { + /* workaround some hw bugs */ + if (x2 == 0) + x1 = 1; + if (y2 == 0) + y1 = 1; + if (rdev->family == CHIP_CAYMAN) { + if ((x2 == 1) && (y2 == 1)) + x2 = 2; + } + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); radeon_ring_write(rdev, (x1 << 0) | (y1 << 16)); From f90e5b5b136ede1f0fd15999e95f13124d6b0dbd Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 24 May 2011 10:44:42 -0400 Subject: [PATCH 002/347] GFS2: Processes waiting on inode glock that no processes are holding This patch fixes a race in the GFS2 glock state machine that may result in lockups. The symptom is that all nodes but one will hang, waiting for a particular glock. All the holder records will have the "W" (Waiting) bit set. The other node will typically have the glock stuck in Exclusive mode (EX) with no holder records, but the dinode will be cached. In other words, an entry with "I:" will appear in the glock dump for that glock, but nothing else. The race has to do with the glock "Pending Demote" bit, which can be set, then immediately reset, thus losing the fact that another node needs the glock. The sequence of events is: 1. Something schedules the glock workqueue (e.g. glock request from fs) 2. The glock workqueue gets to the point between the test of the reply pending bit and the spin lock: if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { finish_xmote(gl, gl->gl_reply); drop_ref = 1; } down_read(&gfs2_umount_flush_sem); <---- i.e. here spin_lock(&gl->gl_spin); 3. In comes (a) the reply to our EX lock request setting GLF_REPLY_PENDING and (b) the demote request which sets GLF_PENDING_DEMOTE 4. The following test is executed: if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && gl->gl_state != LM_ST_UNLOCKED && gl->gl_demote_state != LM_ST_EXCLUSIVE) { This resets the pending demote flag, and gl->gl_demote_state is not equal to exclusive, however because the reply from the dlm arrived after we checked for the GLF_REPLY_PENDING flag, gl->gl_state is still equal to unlocked, so although we reset the GLF_PENDING_DEMOTE flag, we didn't then set the GLF_DEMOTE flag or reinstate the GLF_PENDING_DEMOTE_FLAG. The patch closes the timing window by only transitioning the "Pending demote" bit to the "demote" flag once we know the other conditions (not unlocked and not exclusive) are met. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a2a6abbccc07..7137750f17f0 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -663,14 +663,19 @@ static void glock_work_func(struct work_struct *work) drop_ref = 1; } spin_lock(&gl->gl_spin); - if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && + if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && gl->gl_state != LM_ST_UNLOCKED && gl->gl_demote_state != LM_ST_EXCLUSIVE) { unsigned long holdtime, now = jiffies; + holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; if (time_before(now, holdtime)) delay = holdtime - now; - set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags); + + if (!delay) { + clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); + set_bit(GLF_DEMOTE, &gl->gl_flags); + } } run_queue(gl, 0); spin_unlock(&gl->gl_spin); From cb92d452ba665205ad6bfb424c0ef009cf26587d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 25 May 2011 16:39:00 -0400 Subject: [PATCH 003/347] drm/radeon/kms: add blit support for cayman (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows us to use the 3D engine for memory management and allows us to use vram beyond the BAR aperture. v2: fix copy paste typo Reported-by: Nils Wallménius Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/cayman_blit_shaders.c | 326 +++++++++++- drivers/gpu/drm/radeon/cayman_blit_shaders.h | 3 + drivers/gpu/drm/radeon/evergreen_blit_kms.c | 503 ++++++++++--------- drivers/gpu/drm/radeon/ni.c | 13 +- drivers/gpu/drm/radeon/radeon_asic.c | 6 +- 5 files changed, 597 insertions(+), 254 deletions(-) diff --git a/drivers/gpu/drm/radeon/cayman_blit_shaders.c b/drivers/gpu/drm/radeon/cayman_blit_shaders.c index e148ab04b80b..7b4eeb7b4a8c 100644 --- a/drivers/gpu/drm/radeon/cayman_blit_shaders.c +++ b/drivers/gpu/drm/radeon/cayman_blit_shaders.c @@ -39,17 +39,335 @@ const u32 cayman_default_state[] = { - /* XXX fill in additional blit state */ + 0xc0066900, + 0x00000000, + 0x00000060, /* DB_RENDER_CONTROL */ + 0x00000000, /* DB_COUNT_CONTROL */ + 0x00000000, /* DB_DEPTH_VIEW */ + 0x0000002a, /* DB_RENDER_OVERRIDE */ + 0x00000000, /* DB_RENDER_OVERRIDE2 */ + 0x00000000, /* DB_HTILE_DATA_BASE */ 0xc0026900, - 0x00000316, - 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ - 0x00000010, /* */ + 0x0000000a, + 0x00000000, /* DB_STENCIL_CLEAR */ + 0x00000000, /* DB_DEPTH_CLEAR */ + + 0xc0036900, + 0x0000000f, + 0x00000000, /* DB_DEPTH_INFO */ + 0x00000000, /* DB_Z_INFO */ + 0x00000000, /* DB_STENCIL_INFO */ + + 0xc0016900, + 0x00000080, + 0x00000000, /* PA_SC_WINDOW_OFFSET */ + + 0xc00d6900, + 0x00000083, + 0x0000ffff, /* PA_SC_CLIPRECT_RULE */ + 0x00000000, /* PA_SC_CLIPRECT_0_TL */ + 0x20002000, /* PA_SC_CLIPRECT_0_BR */ + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0xaaaaaaaa, /* PA_SC_EDGERULE */ + 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */ + 0x0000000f, /* CB_TARGET_MASK */ + 0x0000000f, /* CB_SHADER_MASK */ + + 0xc0226900, + 0x00000094, + 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ + 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ + 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */ + + 0xc0016900, + 0x000000d4, + 0x00000000, /* SX_MISC */ 0xc0026900, 0x000000d9, 0x00000000, /* CP_RINGID */ 0x00000000, /* CP_VMID */ + + 0xc0096900, + 0x00000100, + 0x00ffffff, /* VGT_MAX_VTX_INDX */ + 0x00000000, /* VGT_MIN_VTX_INDX */ + 0x00000000, /* VGT_INDX_OFFSET */ + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */ + 0x00000000, /* SX_ALPHA_TEST_CONTROL */ + 0x00000000, /* CB_BLEND_RED */ + 0x00000000, /* CB_BLEND_GREEN */ + 0x00000000, /* CB_BLEND_BLUE */ + 0x00000000, /* CB_BLEND_ALPHA */ + + 0xc0016900, + 0x00000187, + 0x00000100, /* SPI_VS_OUT_ID_0 */ + + 0xc0026900, + 0x00000191, + 0x00000100, /* SPI_PS_INPUT_CNTL_0 */ + 0x00000101, /* SPI_PS_INPUT_CNTL_1 */ + + 0xc0016900, + 0x000001b1, + 0x00000000, /* SPI_VS_OUT_CONFIG */ + + 0xc0106900, + 0x000001b3, + 0x20000001, /* SPI_PS_IN_CONTROL_0 */ + 0x00000000, /* SPI_PS_IN_CONTROL_1 */ + 0x00000000, /* SPI_INTERP_CONTROL_0 */ + 0x00000000, /* SPI_INPUT_Z */ + 0x00000000, /* SPI_FOG_CNTL */ + 0x00100000, /* SPI_BARYC_CNTL */ + 0x00000000, /* SPI_PS_IN_CONTROL_2 */ + 0x00000000, /* SPI_COMPUTE_INPUT_CNTL */ + 0x00000000, /* SPI_COMPUTE_NUM_THREAD_X */ + 0x00000000, /* SPI_COMPUTE_NUM_THREAD_Y */ + 0x00000000, /* SPI_COMPUTE_NUM_THREAD_Z */ + 0x00000000, /* SPI_GPR_MGMT */ + 0x00000000, /* SPI_LDS_MGMT */ + 0x00000000, /* SPI_STACK_MGMT */ + 0x00000000, /* SPI_WAVE_MGMT_1 */ + 0x00000000, /* SPI_WAVE_MGMT_2 */ + + 0xc0016900, + 0x000001e0, + 0x00000000, /* CB_BLEND0_CONTROL */ + + 0xc00e6900, + 0x00000200, + 0x00000000, /* DB_DEPTH_CONTROL */ + 0x00000000, /* DB_EQAA */ + 0x00cc0010, /* CB_COLOR_CONTROL */ + 0x00000210, /* DB_SHADER_CONTROL */ + 0x00010000, /* PA_CL_CLIP_CNTL */ + 0x00000004, /* PA_SU_SC_MODE_CNTL */ + 0x00000100, /* PA_CL_VTE_CNTL */ + 0x00000000, /* PA_CL_VS_OUT_CNTL */ + 0x00000000, /* PA_CL_NANINF_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */ + 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */ + 0x00000000, /* */ + 0x00000000, /* */ + + 0xc0026900, + 0x00000229, + 0x00000000, /* SQ_PGM_START_FS */ + 0x00000000, + + 0xc0016900, + 0x0000023b, + 0x00000000, /* SQ_LDS_ALLOC_PS */ + + 0xc0066900, + 0x00000240, + 0x00000000, /* SQ_ESGS_RING_ITEMSIZE */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0046900, + 0x00000247, + 0x00000000, /* SQ_GS_VERT_ITEMSIZE */ + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0116900, + 0x00000280, + 0x00000000, /* PA_SU_POINT_SIZE */ + 0x00000000, /* PA_SU_POINT_MINMAX */ + 0x00000008, /* PA_SU_LINE_CNTL */ + 0x00000000, /* PA_SC_LINE_STIPPLE */ + 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ + 0x00000000, /* VGT_HOS_CNTL */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, /* VGT_GS_MODE */ + + 0xc0026900, + 0x00000292, + 0x00000000, /* PA_SC_MODE_CNTL_0 */ + 0x00000000, /* PA_SC_MODE_CNTL_1 */ + + 0xc0016900, + 0x000002a1, + 0x00000000, /* VGT_PRIMITIVEID_EN */ + + 0xc0016900, + 0x000002a5, + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */ + + 0xc0026900, + 0x000002a8, + 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ + 0x00000000, + + 0xc0026900, + 0x000002ad, + 0x00000000, /* VGT_REUSE_OFF */ + 0x00000000, + + 0xc0016900, + 0x000002d5, + 0x00000000, /* VGT_SHADER_STAGES_EN */ + + 0xc0016900, + 0x000002dc, + 0x0000aa00, /* DB_ALPHA_TO_MASK */ + + 0xc0066900, + 0x000002de, + 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0026900, + 0x000002e5, + 0x00000000, /* VGT_STRMOUT_CONFIG */ + 0x00000000, + + 0xc01b6900, + 0x000002f5, + 0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */ + 0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */ + 0x00000000, /* PA_SC_LINE_CNTL */ + 0x00000000, /* PA_SC_AA_CONFIG */ + 0x00000005, /* PA_SU_VTX_CNTL */ + 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */ + 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */ + 0xffffffff, + + 0xc0026900, + 0x00000316, + 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ + 0x00000010, /* */ }; +const u32 cayman_vs[] = +{ + 0x00000004, + 0x80400400, + 0x0000a03c, + 0x95000688, + 0x00004000, + 0x15000688, + 0x00000000, + 0x88000000, + 0x04000000, + 0x67961001, +#ifdef __BIG_ENDIAN + 0x00020000, +#else + 0x00000000, +#endif + 0x00000000, + 0x04000000, + 0x67961000, +#ifdef __BIG_ENDIAN + 0x00020008, +#else + 0x00000008, +#endif + 0x00000000, +}; + +const u32 cayman_ps[] = +{ + 0x00000004, + 0xa00c0000, + 0x00000008, + 0x80400000, + 0x00000000, + 0x95000688, + 0x00000000, + 0x88000000, + 0x00380400, + 0x00146b10, + 0x00380000, + 0x20146b10, + 0x00380400, + 0x40146b00, + 0x80380000, + 0x60146b00, + 0x00000010, + 0x000d1000, + 0xb0800000, + 0x00000000, +}; + +const u32 cayman_ps_size = ARRAY_SIZE(cayman_ps); +const u32 cayman_vs_size = ARRAY_SIZE(cayman_vs); const u32 cayman_default_size = ARRAY_SIZE(cayman_default_state); diff --git a/drivers/gpu/drm/radeon/cayman_blit_shaders.h b/drivers/gpu/drm/radeon/cayman_blit_shaders.h index 33b75e5d0fa4..f5d0e9a60267 100644 --- a/drivers/gpu/drm/radeon/cayman_blit_shaders.h +++ b/drivers/gpu/drm/radeon/cayman_blit_shaders.h @@ -25,8 +25,11 @@ #ifndef CAYMAN_BLIT_SHADERS_H #define CAYMAN_BLIT_SHADERS_H +extern const u32 cayman_ps[]; +extern const u32 cayman_vs[]; extern const u32 cayman_default_state[]; +extern const u32 cayman_ps_size, cayman_vs_size; extern const u32 cayman_default_size; #endif diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c index 40867290863c..a60ad28b0389 100644 --- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c +++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c @@ -31,6 +31,7 @@ #include "evergreend.h" #include "evergreen_blit_shaders.h" +#include "cayman_blit_shaders.h" #define DI_PT_RECTLIST 0x11 #define DI_INDEX_SIZE_16_BIT 0x0 @@ -265,238 +266,240 @@ set_default_state(struct radeon_device *rdev) u64 gpu_addr; int dwords; - switch (rdev->family) { - case CHIP_CEDAR: - default: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 96; - num_vs_threads = 16; - num_gs_threads = 16; - num_es_threads = 16; - num_hs_threads = 16; - num_ls_threads = 16; - num_ps_stack_entries = 42; - num_vs_stack_entries = 42; - num_gs_stack_entries = 42; - num_es_stack_entries = 42; - num_hs_stack_entries = 42; - num_ls_stack_entries = 42; - break; - case CHIP_REDWOOD: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 42; - num_vs_stack_entries = 42; - num_gs_stack_entries = 42; - num_es_stack_entries = 42; - num_hs_stack_entries = 42; - num_ls_stack_entries = 42; - break; - case CHIP_JUNIPER: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 85; - num_vs_stack_entries = 85; - num_gs_stack_entries = 85; - num_es_stack_entries = 85; - num_hs_stack_entries = 85; - num_ls_stack_entries = 85; - break; - case CHIP_CYPRESS: - case CHIP_HEMLOCK: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 85; - num_vs_stack_entries = 85; - num_gs_stack_entries = 85; - num_es_stack_entries = 85; - num_hs_stack_entries = 85; - num_ls_stack_entries = 85; - break; - case CHIP_PALM: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 96; - num_vs_threads = 16; - num_gs_threads = 16; - num_es_threads = 16; - num_hs_threads = 16; - num_ls_threads = 16; - num_ps_stack_entries = 42; - num_vs_stack_entries = 42; - num_gs_stack_entries = 42; - num_es_stack_entries = 42; - num_hs_stack_entries = 42; - num_ls_stack_entries = 42; - break; - case CHIP_BARTS: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 85; - num_vs_stack_entries = 85; - num_gs_stack_entries = 85; - num_es_stack_entries = 85; - num_hs_stack_entries = 85; - num_ls_stack_entries = 85; - break; - case CHIP_TURKS: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 20; - num_gs_threads = 20; - num_es_threads = 20; - num_hs_threads = 20; - num_ls_threads = 20; - num_ps_stack_entries = 42; - num_vs_stack_entries = 42; - num_gs_stack_entries = 42; - num_es_stack_entries = 42; - num_hs_stack_entries = 42; - num_ls_stack_entries = 42; - break; - case CHIP_CAICOS: - num_ps_gprs = 93; - num_vs_gprs = 46; - num_temp_gprs = 4; - num_gs_gprs = 31; - num_es_gprs = 31; - num_hs_gprs = 23; - num_ls_gprs = 23; - num_ps_threads = 128; - num_vs_threads = 10; - num_gs_threads = 10; - num_es_threads = 10; - num_hs_threads = 10; - num_ls_threads = 10; - num_ps_stack_entries = 42; - num_vs_stack_entries = 42; - num_gs_stack_entries = 42; - num_es_stack_entries = 42; - num_hs_stack_entries = 42; - num_ls_stack_entries = 42; - break; - } - - if ((rdev->family == CHIP_CEDAR) || - (rdev->family == CHIP_PALM) || - (rdev->family == CHIP_CAICOS)) - sq_config = 0; - else - sq_config = VC_ENABLE; - - sq_config |= (EXPORT_SRC_C | - CS_PRIO(0) | - LS_PRIO(0) | - HS_PRIO(0) | - PS_PRIO(0) | - VS_PRIO(1) | - GS_PRIO(2) | - ES_PRIO(3)); - - sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) | - NUM_VS_GPRS(num_vs_gprs) | - NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); - sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) | - NUM_ES_GPRS(num_es_gprs)); - sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) | - NUM_LS_GPRS(num_ls_gprs)); - sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) | - NUM_VS_THREADS(num_vs_threads) | - NUM_GS_THREADS(num_gs_threads) | - NUM_ES_THREADS(num_es_threads)); - sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) | - NUM_LS_THREADS(num_ls_threads)); - sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | - NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); - sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | - NUM_ES_STACK_ENTRIES(num_es_stack_entries)); - sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) | - NUM_LS_STACK_ENTRIES(num_ls_stack_entries)); - /* set clear context state */ radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0)); radeon_ring_write(rdev, 0); - /* disable dyn gprs */ - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(rdev, 0); + if (rdev->family < CHIP_CAYMAN) { + switch (rdev->family) { + case CHIP_CEDAR: + default: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 96; + num_vs_threads = 16; + num_gs_threads = 16; + num_es_threads = 16; + num_hs_threads = 16; + num_ls_threads = 16; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_REDWOOD: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_JUNIPER: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; + case CHIP_PALM: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 96; + num_vs_threads = 16; + num_gs_threads = 16; + num_es_threads = 16; + num_hs_threads = 16; + num_ls_threads = 16; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_BARTS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; + case CHIP_TURKS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_CAICOS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 10; + num_gs_threads = 10; + num_es_threads = 10; + num_hs_threads = 10; + num_ls_threads = 10; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + } - /* SQ config */ - radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11)); - radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(rdev, sq_config); - radeon_ring_write(rdev, sq_gpr_resource_mgmt_1); - radeon_ring_write(rdev, sq_gpr_resource_mgmt_2); - radeon_ring_write(rdev, sq_gpr_resource_mgmt_3); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, sq_thread_resource_mgmt); - radeon_ring_write(rdev, sq_thread_resource_mgmt_2); - radeon_ring_write(rdev, sq_stack_resource_mgmt_1); - radeon_ring_write(rdev, sq_stack_resource_mgmt_2); - radeon_ring_write(rdev, sq_stack_resource_mgmt_3); + if ((rdev->family == CHIP_CEDAR) || + (rdev->family == CHIP_PALM) || + (rdev->family == CHIP_CAICOS)) + sq_config = 0; + else + sq_config = VC_ENABLE; + + sq_config |= (EXPORT_SRC_C | + CS_PRIO(0) | + LS_PRIO(0) | + HS_PRIO(0) | + PS_PRIO(0) | + VS_PRIO(1) | + GS_PRIO(2) | + ES_PRIO(3)); + + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) | + NUM_VS_GPRS(num_vs_gprs) | + NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) | + NUM_ES_GPRS(num_es_gprs)); + sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) | + NUM_LS_GPRS(num_ls_gprs)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) | + NUM_VS_THREADS(num_vs_threads) | + NUM_GS_THREADS(num_gs_threads) | + NUM_ES_THREADS(num_es_threads)); + sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) | + NUM_LS_THREADS(num_ls_threads)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | + NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | + NUM_ES_STACK_ENTRIES(num_es_stack_entries)); + sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) | + NUM_LS_STACK_ENTRIES(num_ls_stack_entries)); + + /* disable dyn gprs */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2); + radeon_ring_write(rdev, 0); + + /* SQ config */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11)); + radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2); + radeon_ring_write(rdev, sq_config); + radeon_ring_write(rdev, sq_gpr_resource_mgmt_1); + radeon_ring_write(rdev, sq_gpr_resource_mgmt_2); + radeon_ring_write(rdev, sq_gpr_resource_mgmt_3); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, sq_thread_resource_mgmt); + radeon_ring_write(rdev, sq_thread_resource_mgmt_2); + radeon_ring_write(rdev, sq_stack_resource_mgmt_1); + radeon_ring_write(rdev, sq_stack_resource_mgmt_2); + radeon_ring_write(rdev, sq_stack_resource_mgmt_3); + } /* CONTEXT_CONTROL */ radeon_ring_write(rdev, 0xc0012800); @@ -570,7 +573,10 @@ int evergreen_blit_init(struct radeon_device *rdev) mutex_init(&rdev->r600_blit.mutex); rdev->r600_blit.state_offset = 0; - rdev->r600_blit.state_len = evergreen_default_size; + if (rdev->family < CHIP_CAYMAN) + rdev->r600_blit.state_len = evergreen_default_size; + else + rdev->r600_blit.state_len = cayman_default_size; dwords = rdev->r600_blit.state_len; while (dwords & 0xf) { @@ -582,11 +588,17 @@ int evergreen_blit_init(struct radeon_device *rdev) obj_size = ALIGN(obj_size, 256); rdev->r600_blit.vs_offset = obj_size; - obj_size += evergreen_vs_size * 4; + if (rdev->family < CHIP_CAYMAN) + obj_size += evergreen_vs_size * 4; + else + obj_size += cayman_vs_size * 4; obj_size = ALIGN(obj_size, 256); rdev->r600_blit.ps_offset = obj_size; - obj_size += evergreen_ps_size * 4; + if (rdev->family < CHIP_CAYMAN) + obj_size += evergreen_ps_size * 4; + else + obj_size += cayman_ps_size * 4; obj_size = ALIGN(obj_size, 256); r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, @@ -609,16 +621,29 @@ int evergreen_blit_init(struct radeon_device *rdev) return r; } - memcpy_toio(ptr + rdev->r600_blit.state_offset, - evergreen_default_state, rdev->r600_blit.state_len * 4); + if (rdev->family < CHIP_CAYMAN) { + memcpy_toio(ptr + rdev->r600_blit.state_offset, + evergreen_default_state, rdev->r600_blit.state_len * 4); - if (num_packet2s) - memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4), - packet2s, num_packet2s * 4); - for (i = 0; i < evergreen_vs_size; i++) - *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]); - for (i = 0; i < evergreen_ps_size; i++) - *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]); + if (num_packet2s) + memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4), + packet2s, num_packet2s * 4); + for (i = 0; i < evergreen_vs_size; i++) + *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]); + for (i = 0; i < evergreen_ps_size; i++) + *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]); + } else { + memcpy_toio(ptr + rdev->r600_blit.state_offset, + cayman_default_state, rdev->r600_blit.state_len * 4); + + if (num_packet2s) + memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4), + packet2s, num_packet2s * 4); + for (i = 0; i < cayman_vs_size; i++) + *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(cayman_vs[i]); + for (i = 0; i < cayman_ps_size; i++) + *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(cayman_ps[i]); + } radeon_bo_kunmap(rdev->r600_blit.shader_obj); radeon_bo_unreserve(rdev->r600_blit.shader_obj); diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index a0cc7a5ff031..c023b0ad89f4 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1387,14 +1387,12 @@ static int cayman_startup(struct radeon_device *rdev) return r; cayman_gpu_init(rdev); -#if 0 - r = cayman_blit_init(rdev); + r = evergreen_blit_init(rdev); if (r) { - cayman_blit_fini(rdev); + evergreen_blit_fini(rdev); rdev->asic->copy = NULL; dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); } -#endif /* allocate wb buffer */ r = radeon_wb_init(rdev); @@ -1452,7 +1450,7 @@ int cayman_resume(struct radeon_device *rdev) int cayman_suspend(struct radeon_device *rdev) { - /* int r; */ + int r; /* FIXME: we should wait for ring to be empty */ cayman_cp_enable(rdev, false); @@ -1461,14 +1459,13 @@ int cayman_suspend(struct radeon_device *rdev) radeon_wb_disable(rdev); cayman_pcie_gart_disable(rdev); -#if 0 /* unpin shaders bo */ r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); if (likely(r == 0)) { radeon_bo_unpin(rdev->r600_blit.shader_obj); radeon_bo_unreserve(rdev->r600_blit.shader_obj); } -#endif + return 0; } @@ -1580,7 +1577,7 @@ int cayman_init(struct radeon_device *rdev) void cayman_fini(struct radeon_device *rdev) { - /* cayman_blit_fini(rdev); */ + evergreen_blit_fini(rdev); cayman_cp_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index d948265db87e..b9b3c2a2b119 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -906,9 +906,9 @@ static struct radeon_asic cayman_asic = { .get_vblank_counter = &evergreen_get_vblank_counter, .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &evergreen_cs_parse, - .copy_blit = NULL, - .copy_dma = NULL, - .copy = NULL, + .copy_blit = &evergreen_copy_blit, + .copy_dma = &evergreen_copy_blit, + .copy = &evergreen_copy_blit, .get_engine_clock = &radeon_atom_get_engine_clock, .set_engine_clock = &radeon_atom_set_engine_clock, .get_memory_clock = &radeon_atom_get_memory_clock, From 457558eda1545c22163574f6dbb883394705e9dd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 25 May 2011 17:49:54 -0400 Subject: [PATCH 004/347] drm/radeon/kms: add missing case for cayman thermal sensor The rest of the code is already in place. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 86eda1ea94df..aaa19dc418a0 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -487,6 +487,7 @@ static int radeon_hwmon_init(struct radeon_device *rdev) case THERMAL_TYPE_RV6XX: case THERMAL_TYPE_RV770: case THERMAL_TYPE_EVERGREEN: + case THERMAL_TYPE_NI: case THERMAL_TYPE_SUMO: rdev->pm.int_hwmon_dev = hwmon_device_register(rdev->dev); if (IS_ERR(rdev->pm.int_hwmon_dev)) { From 67b3f823ec78d08aea8835bce2655674237abc1d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 25 May 2011 18:45:37 -0400 Subject: [PATCH 005/347] drm/radeon/kms: fix thermal sensor reading on juniper Uses a different method than other evergreen asics. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/evergreen.c | 44 ++++++++++++++++++++--------- drivers/gpu/drm/radeon/evergreend.h | 8 +++++- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 34cd5a878088..8f446aadccd6 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -88,21 +88,39 @@ u32 evergreen_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) /* get temperature in millidegrees */ int evergreen_get_temp(struct radeon_device *rdev) { - u32 temp = (RREG32(CG_MULT_THERMAL_STATUS) & ASIC_T_MASK) >> - ASIC_T_SHIFT; - u32 actual_temp = 0; + u32 temp, toffset, actual_temp = 0; - if (temp & 0x400) - actual_temp = -256; - else if (temp & 0x200) - actual_temp = 255; - else if (temp & 0x100) { - actual_temp = temp & 0x1ff; - actual_temp |= ~0x1ff; - } else - actual_temp = temp & 0xff; + if (rdev->family == CHIP_JUNIPER) { + toffset = (RREG32(CG_THERMAL_CTRL) & TOFFSET_MASK) >> + TOFFSET_SHIFT; + temp = (RREG32(CG_TS0_STATUS) & TS0_ADC_DOUT_MASK) >> + TS0_ADC_DOUT_SHIFT; - return (actual_temp * 1000) / 2; + if (toffset & 0x100) + actual_temp = temp / 2 - (0x200 - toffset); + else + actual_temp = temp / 2 + toffset; + + actual_temp = actual_temp * 1000; + + } else { + temp = (RREG32(CG_MULT_THERMAL_STATUS) & ASIC_T_MASK) >> + ASIC_T_SHIFT; + + if (temp & 0x400) + actual_temp = -256; + else if (temp & 0x200) + actual_temp = 255; + else if (temp & 0x100) { + actual_temp = temp & 0x1ff; + actual_temp |= ~0x1ff; + } else + actual_temp = temp & 0xff; + + actual_temp = (actual_temp * 1000) / 2; + } + + return actual_temp; } int sumo_get_temp(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index 75b57e394f2b..103250ff3cbe 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -168,10 +168,16 @@ #define SE_DB_BUSY (1 << 30) #define SE_CB_BUSY (1 << 31) /* evergreen */ +#define CG_THERMAL_CTRL 0x72c +#define TOFFSET_MASK 0x00003FE0 +#define TOFFSET_SHIFT 5 #define CG_MULT_THERMAL_STATUS 0x740 #define ASIC_T(x) ((x) << 16) -#define ASIC_T_MASK 0x7FF0000 +#define ASIC_T_MASK 0x07FF0000 #define ASIC_T_SHIFT 16 +#define CG_TS0_STATUS 0x760 +#define TS0_ADC_DOUT_MASK 0x000003FF +#define TS0_ADC_DOUT_SHIFT 0 /* APU */ #define CG_THERMAL_STATUS 0x678 From 2f2f96d1ea72e6602ae440ac2867a6004edb37a1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 25 May 2011 12:51:44 -0400 Subject: [PATCH 006/347] drm/radeon/kms: clean up the radeon kms Kconfig - no longer in staging - all radeons supported Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/Kconfig | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/gpu/drm/radeon/Kconfig b/drivers/gpu/drm/radeon/Kconfig index 9746fee59f56..ea92bbe3ed37 100644 --- a/drivers/gpu/drm/radeon/Kconfig +++ b/drivers/gpu/drm/radeon/Kconfig @@ -28,11 +28,4 @@ config DRM_RADEON_KMS The kernel will also perform security check on command stream provided by the user, we want to catch and forbid any illegal use of the GPU such as DMA into random system memory or into memory - not owned by the process supplying the command stream. This part - of the code is still incomplete and this why we propose that patch - as a staging driver addition, future security might forbid current - experimental userspace to run. - - This code support the following hardware : R1XX,R2XX,R3XX,R4XX,R5XX - (radeon up to X1950). Works is underway to provide support for R6XX, - R7XX and newer hardware (radeon from HD2XXX to HD4XXX). + not owned by the process supplying the command stream. From 9e3bb6b6f6a0c535eb053fbf0005a8e79e053374 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 24 May 2011 07:51:27 +0200 Subject: [PATCH 007/347] KVM: add missing void __user * cast to access_ok() call fa3d315a "KVM: Validate userspace_addr of memslot when registered" introduced this new warning onn s390: kvm_main.c: In function '__kvm_set_memory_region': kvm_main.c:654:7: warning: passing argument 1 of '__access_ok' makes pointer from integer without a cast arch/s390/include/asm/uaccess.h:53:19: note: expected 'const void *' but argument is of type '__u64' Add the missing cast to get rid of it again... Cc: Takuya Yoshikawa Signed-off-by: Heiko Carstens Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 22cdb960660a..383f492a6603 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -651,7 +651,9 @@ int __kvm_set_memory_region(struct kvm *kvm, /* We can read the guest memory with __xxx_user() later on. */ if (user_alloc && ((mem->userspace_addr & (PAGE_SIZE - 1)) || - !access_ok(VERIFY_WRITE, mem->userspace_addr, mem->memory_size))) + !access_ok(VERIFY_WRITE, + (void __user *)(unsigned long)mem->userspace_addr, + mem->memory_size))) goto out; if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) goto out; From 27721a52d6c8e33327ec3cae9f730204be99d251 Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Mon, 30 May 2011 10:24:47 +0900 Subject: [PATCH 008/347] gpio: Fix gpio-exynos4 build fails in mainline After the GPIO driver move, some symbols became selectable when they shouldn't be. Tighten the dependencies. Reported-by: Randy Dunlap Signed-off-by: Grant Likely --- drivers/gpio/Kconfig | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 4a7f63143455..317199796c5f 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -88,29 +88,29 @@ config GPIO_IT8761E config GPIO_EXYNOS4 bool "Samsung Exynos4 GPIO library support" - default y if CPU_EXYNOS4210 - depends on ARM + default y + depends on ARM && CPU_EXYNOS4210 help Say yes here to support Samsung Exynos4 series SoCs GPIO library config GPIO_PLAT_SAMSUNG bool "Samsung SoCs GPIO library support" - default y if SAMSUNG_GPIOLIB_4BIT - depends on ARM + default y + depends on ARM && SAMSUNG_GPIOLIB_4BIT help Say yes here to support Samsung SoCs GPIO library config GPIO_S5PC100 bool "Samsung S5PC100 GPIO library support" - default y if CPU_S5PC100 - depends on ARM + default y + depends on ARM && CPU_S5PC100 help Say yes here to support Samsung S5PC100 SoCs GPIO library config GPIO_S5PV210 bool "Samsung S5PV210/S5PC110 GPIO library support" - default y if CPU_S5PV210 - depends on ARM + default y + depends on ARM && CPU_S5PV210 help Say yes here to support Samsung S5PV210/S5PC110 SoCs GPIO library From cbf74cea070fa1f705de4712e25d9e56ae6543c7 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 30 May 2011 16:31:11 +0200 Subject: [PATCH 009/347] oprofile, x86: Add comments to IBS LVT offset initialization Adding a comment in the code as IBS LVT setup is not obvious at all ... Signed-off-by: Robert Richter --- arch/x86/kernel/apic/apic.c | 3 ++- arch/x86/oprofile/op_model_amd.c | 13 +++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index fabf01eff771..a0bf78a0918c 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -390,7 +390,8 @@ static unsigned int reserve_eilvt_offset(int offset, unsigned int new) /* * If mask=1, the LVT entry does not generate interrupts while mask=0 - * enables the vector. See also the BKDGs. + * enables the vector. See also the BKDGs. Must be called with + * preemption disabled. */ int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask) diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 9fd8a567fe1e..9cbb710dc94b 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -609,16 +609,21 @@ static int setup_ibs_ctl(int ibs_eilvt_off) return 0; } +/* + * This runs only on the current cpu. We try to find an LVT offset and + * setup the local APIC. For this we must disable preemption. On + * success we initialize all nodes with this offset. This updates then + * the offset in the IBS_CTL per-node msr. The per-core APIC setup of + * the IBS interrupt vector is called from op_amd_setup_ctrs()/op_- + * amd_cpu_shutdown() using the new offset. + */ static int force_ibs_eilvt_setup(void) { int offset; int ret; - /* - * find the next free available EILVT entry, skip offset 0, - * pin search to this cpu - */ preempt_disable(); + /* find the next free available EILVT entry, skip offset 0 */ for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { if (get_eilvt(offset)) break; From 1adffbae22332bb558c2a29de19d9aca391869f6 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Tue, 31 May 2011 19:38:07 +0900 Subject: [PATCH 010/347] fat: Fix corrupt inode flags when remove ATTR_SYS flag We are clearly missing '~' in fat_ioctl_set_attributes(). Cc: Reported-by: Dmitry Dmitriev Signed-off-by: OGAWA Hirofumi --- fs/fat/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fat/file.c b/fs/fat/file.c index 7257752b6d5d..7018e1d8902d 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -102,7 +102,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) if (attr & ATTR_SYS) inode->i_flags |= S_IMMUTABLE; else - inode->i_flags &= S_IMMUTABLE; + inode->i_flags &= ~S_IMMUTABLE; } fat_save_attrs(inode, attr); From f01114cb59d670e9b4f2c335930dd57db96e9360 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 31 May 2011 12:26:55 +0200 Subject: [PATCH 011/347] sched: Fix cross-cpu clock sync on remote wakeups Markus reported that commit 317f394160e ("sched: Move the second half of ttwu() to the remote cpu") caused some accounting funnies on his AMD Phenom II X4, such as weird 'top' results. It turns out that this is due to non-synced TSC and the queued remote wakeups stopped coupeling the two relevant cpu clocks, which leads to wakeups seeing time jumps, which in turn lead to skewed runtime stats. Add an explicit call to sched_clock_cpu() to couple the per-cpu clocks to restore the normal flow of time. Reported-and-tested-by: Markus Trippelsdorf Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1306835745.2353.3.camel@twins Signed-off-by: Ingo Molnar --- kernel/sched.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched.c b/kernel/sched.c index cbb3a0eee58e..49cc70b152cf 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2600,6 +2600,7 @@ static void ttwu_queue(struct task_struct *p, int cpu) #if defined(CONFIG_SMP) if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { + sched_clock_cpu(cpu); /* sync clocks x-cpu */ ttwu_queue_remote(p, cpu); return; } From f339b9dc1f03591761d5d930800db24bc0eda1e1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 31 May 2011 10:49:20 +0200 Subject: [PATCH 012/347] sched: Fix schedstat.nr_wakeups_migrate While looking over the code I found that with the ttwu rework the nr_wakeups_migrate test broke since we now switch cpus prior to calling ttwu_stat(), hence the test is always true. Cure this by passing the migration state in wake_flags. Also move the whole test under CONFIG_SMP, its hard to migrate tasks on UP :-) Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-pwwxl7gdqs5676f1d4cx6pj7@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + kernel/sched.c | 11 +++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 8da84b7bc1b8..483c1ed5bc4d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1063,6 +1063,7 @@ struct sched_domain; */ #define WF_SYNC 0x01 /* waker goes to sleep after wakup */ #define WF_FORK 0x02 /* child wakeup after fork */ +#define WF_MIGRATED 0x04 /* internal use, task got migrated */ #define ENQUEUE_WAKEUP 1 #define ENQUEUE_HEAD 2 diff --git a/kernel/sched.c b/kernel/sched.c index 49cc70b152cf..2fe98ed474da 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2447,6 +2447,10 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) } rcu_read_unlock(); } + + if (wake_flags & WF_MIGRATED) + schedstat_inc(p, se.statistics.nr_wakeups_migrate); + #endif /* CONFIG_SMP */ schedstat_inc(rq, ttwu_count); @@ -2455,9 +2459,6 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) if (wake_flags & WF_SYNC) schedstat_inc(p, se.statistics.nr_wakeups_sync); - if (cpu != task_cpu(p)) - schedstat_inc(p, se.statistics.nr_wakeups_migrate); - #endif /* CONFIG_SCHEDSTATS */ } @@ -2675,8 +2676,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) p->sched_class->task_waking(p); cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); - if (task_cpu(p) != cpu) + if (task_cpu(p) != cpu) { + wake_flags |= WF_MIGRATED; set_task_cpu(p, cpu); + } #endif /* CONFIG_SMP */ ttwu_queue(p, cpu); From 74c355fbdfedd3820046dba4f537876cea54c207 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 May 2011 16:48:06 +0200 Subject: [PATCH 013/347] perf, cgroups: Fix up for new API Ben changed the cgroup API in commit f780bdb7c1c (cgroups: add per-thread subsystem callbacks) in an incompatible way, but forgot to convert the perf cgroup bits. Avoid compile warnings and runtime splats and convert perf too ;-) Acked-by: Ben Blum Cc: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1306767651.1200.2990.camel@twins Signed-off-by: Ingo Molnar --- kernel/events/core.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index c09767f7db3e..8a15944bf9d2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7394,26 +7394,12 @@ static int __perf_cgroup_move(void *info) return 0; } -static void perf_cgroup_move(struct task_struct *task) +static void +perf_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *task) { task_function_call(task, __perf_cgroup_move, task); } -static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cgrp, struct task_struct *task, - bool threadgroup) -{ - perf_cgroup_move(task); - if (threadgroup) { - struct task_struct *c; - rcu_read_lock(); - list_for_each_entry_rcu(c, &task->thread_group, thread_group) { - perf_cgroup_move(c); - } - rcu_read_unlock(); - } -} - static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, struct cgroup *old_cgrp, struct task_struct *task) { @@ -7425,7 +7411,7 @@ static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, if (!(task->flags & PF_EXITING)) return; - perf_cgroup_move(task); + perf_cgroup_attach_task(cgrp, task); } struct cgroup_subsys perf_subsys = { @@ -7434,6 +7420,6 @@ struct cgroup_subsys perf_subsys = { .create = perf_cgroup_create, .destroy = perf_cgroup_destroy, .exit = perf_cgroup_exit, - .attach = perf_cgroup_attach, + .attach_task = perf_cgroup_attach_task, }; #endif /* CONFIG_CGROUP_PERF */ From 046d886db18260d4435c942985392b7ef872337f Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Tue, 31 May 2011 00:44:31 +0200 Subject: [PATCH 014/347] OMAP: fix compilation error Forward-declare platform_device structure in arch/arm/plat-omap/include/plat/flash.h, otherwise compilation may break with: In file included from arch/arm/mach-omap1/flash.c:15: arch/arm/plat-omap/include/plat/flash.h:14: warning: 'struct platform_device' declared inside parameter list arch/arm/plat-omap/include/plat/flash.h:14: warning: its scope is only this definition or declaration, which is probably not what you want arch/arm/mach-omap1/flash.c:16: warning: 'struct platform_device' declared inside parameter list arch/arm/mach-omap1/flash.c:17: error: conflicting types for 'omap1_set_vpp' arch/arm/plat-omap/include/plat/flash.h:14: error: previous declaration of 'omap1_set_vpp' was here Detected and corrected while building for Amstrad Delta, confirmed with omap1_defconfig. Signed-off-by: Janusz Krzysztofik Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/include/plat/flash.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/plat-omap/include/plat/flash.h b/arch/arm/plat-omap/include/plat/flash.h index 3083195123ea..0d88499b79e9 100644 --- a/arch/arm/plat-omap/include/plat/flash.h +++ b/arch/arm/plat-omap/include/plat/flash.h @@ -11,6 +11,7 @@ #include +struct platform_device; extern void omap1_set_vpp(struct platform_device *pdev, int enable); #endif From f9fa1bb9d7a3c9537d6abd4ad47c58fe27862625 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 31 May 2011 20:55:44 +0800 Subject: [PATCH 015/347] ARM: OMAP2: Add missing include of linux/gpio.h I got some build error like below while executing "make omap2plus_defconfig". CC arch/arm/mach-omap2/board-2430sdp.o arch/arm/mach-omap2/board-2430sdp.c: In function 'omap_2430sdp_init': arch/arm/mach-omap2/board-2430sdp.c:247: error: 'GPIOF_OUT_INIT_LOW' undeclared (first use in this function) arch/arm/mach-omap2/board-2430sdp.c:247: error: (Each undeclared identifier is reported only once arch/arm/mach-omap2/board-2430sdp.c:247: error: for each function it appears in.) This patch fixes the build error by include linux/gpio.h instead of mach/gpio.h. Signed-off-by: Axel Lin Cc: Syed Mohammed Khasim Cc: Grazvydas Ignotas Cc: Steve Sakoman Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/board-2430sdp.c | 2 +- arch/arm/mach-omap2/board-apollon.c | 2 +- arch/arm/mach-omap2/board-omap3pandora.c | 2 +- arch/arm/mach-omap2/board-overo.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-omap2/board-2430sdp.c b/arch/arm/mach-omap2/board-2430sdp.c index d54969be0a54..5de6eac0a725 100644 --- a/arch/arm/mach-omap2/board-2430sdp.c +++ b/arch/arm/mach-omap2/board-2430sdp.c @@ -26,13 +26,13 @@ #include #include #include +#include #include #include #include #include -#include #include #include #include diff --git a/arch/arm/mach-omap2/board-apollon.c b/arch/arm/mach-omap2/board-apollon.c index f3beb8eeef77..b124bdfb4239 100644 --- a/arch/arm/mach-omap2/board-apollon.c +++ b/arch/arm/mach-omap2/board-apollon.c @@ -27,13 +27,13 @@ #include #include #include +#include #include #include #include #include -#include #include #include #include diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c index 1d10736c6d3c..5ee034a54afa 100644 --- a/arch/arm/mach-omap2/board-omap3pandora.c +++ b/arch/arm/mach-omap2/board-omap3pandora.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -41,7 +42,6 @@ #include #include -#include #include #include #include diff --git a/arch/arm/mach-omap2/board-overo.c b/arch/arm/mach-omap2/board-overo.c index 1555918e3ffa..9952c20b1ee0 100644 --- a/arch/arm/mach-omap2/board-overo.c +++ b/arch/arm/mach-omap2/board-overo.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -45,7 +46,6 @@ #include #include