From 5d266692372dfb412150006df420185b666658f9 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 23 Oct 2017 22:32:36 +0100
Subject: [PATCH 1/9] drm/i915: Filter out spurious execlists context-switch
 interrupts

Back in commit a4b2b01523a8 ("drm/i915: Don't mark an execlists
context-switch when idle") we noticed the presence of late
context-switch interrupts. We were able to filter those out by looking
at whether the ELSP remained active, but in commit beecec901790
("drm/i915/execlists: Preemption!") that became problematic as we now
anticipate receiving a context-switch event for preemption while ELSP
may be empty. To restore the spurious interrupt suppression, add a
counter for the expected number of pending context-switches and skip if
we do not need to handle this interrupt to make forward progress.

v2: Don't forget to switch on for preempt.
v3: Reduce the counter to a on/off boolean tracker. Declare the HW as
active when we first submit, and idle after the final completion event
(with which we confirm the HW says it is idle), and track each source
of activity separately. With a finite number of sources, it should aide
us in debugging which gets stuck.

Fixes: beecec901790 ("drm/i915/execlists: Preemption!")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Arkadiusz Hiler <arkadiusz.hiler@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171023213237.26536-3-chris@chris-wilson.co.uk
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
(cherry picked from commit 4a118ecbe99c93cf9f9582e83a88d03f18d6cb84)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_guc_submission.c |  3 ++
 drivers/gpu/drm/i915/i915_irq.c            |  6 ++--
 drivers/gpu/drm/i915/intel_engine_cs.c     |  5 ++--
 drivers/gpu/drm/i915/intel_lrc.c           | 27 ++++++++++++-----
 drivers/gpu/drm/i915/intel_ringbuffer.h    | 34 ++++++++++++++++++++--
 5 files changed, 62 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index a2e8114b739d..f84c267728fd 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -610,6 +610,7 @@ done:
 	execlists->first = rb;
 	if (submit) {
 		port_assign(port, last);
+		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
 		i915_guc_submit(engine);
 	}
 	spin_unlock_irq(&engine->timeline->lock);
@@ -633,6 +634,8 @@ static void i915_guc_irq_handler(unsigned long data)
 
 		rq = port_request(&port[0]);
 	}
+	if (!rq)
+		execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
 
 	if (!port_isset(last_port))
 		i915_guc_dequeue(engine);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b1296a55c1e4..f8205841868b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1388,8 +1388,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 	bool tasklet = false;
 
 	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
-		__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-		tasklet = true;
+		if (READ_ONCE(engine->execlists.active)) {
+			__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+			tasklet = true;
+		}
 	}
 
 	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index a47a9c6bea52..ab5bf4e2e28e 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1548,8 +1548,8 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
 		return false;
 
-	/* Both ports drained, no more ELSP submission? */
-	if (port_request(&engine->execlists.port[0]))
+	/* Waiting to drain ELSP? */
+	if (READ_ONCE(engine->execlists.active))
 		return false;
 
 	/* ELSP is empty, but there are ready requests? */
@@ -1749,6 +1749,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m)
 					   idx);
 			}
 		}
+		drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
 		rcu_read_unlock();
 	} else if (INTEL_GEN(dev_priv) > 6) {
 		drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7f45dd7dc3e5..5b87d5284a84 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -575,7 +575,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 * the state of the GPU is known (idle).
 			 */
 			inject_preempt_context(engine);
-			execlists->preempt = true;
+			execlists_set_active(execlists,
+					     EXECLISTS_ACTIVE_PREEMPT);
 			goto unlock;
 		} else {
 			/*
@@ -683,8 +684,10 @@ done:
 unlock:
 	spin_unlock_irq(&engine->timeline->lock);
 
-	if (submit)
+	if (submit) {
+		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
 		execlists_submit_ports(engine);
+	}
 }
 
 static void
@@ -696,6 +699,7 @@ execlist_cancel_port_requests(struct intel_engine_execlists *execlists)
 	while (num_ports-- && port_isset(port)) {
 		struct drm_i915_gem_request *rq = port_request(port);
 
+		GEM_BUG_ON(!execlists->active);
 		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED);
 		i915_gem_request_put(rq);
 
@@ -861,15 +865,21 @@ static void intel_lrc_irq_handler(unsigned long data)
 				unwind_incomplete_requests(engine);
 				spin_unlock_irq(&engine->timeline->lock);
 
-				GEM_BUG_ON(!execlists->preempt);
-				execlists->preempt = false;
+				GEM_BUG_ON(!execlists_is_active(execlists,
+								EXECLISTS_ACTIVE_PREEMPT));
+				execlists_clear_active(execlists,
+						       EXECLISTS_ACTIVE_PREEMPT);
 				continue;
 			}
 
 			if (status & GEN8_CTX_STATUS_PREEMPTED &&
-			    execlists->preempt)
+			    execlists_is_active(execlists,
+						EXECLISTS_ACTIVE_PREEMPT))
 				continue;
 
+			GEM_BUG_ON(!execlists_is_active(execlists,
+							EXECLISTS_ACTIVE_USER));
+
 			/* Check the context/desc id for this event matches */
 			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
 
@@ -892,6 +902,9 @@ static void intel_lrc_irq_handler(unsigned long data)
 			/* After the final element, the hw should be idle */
 			GEM_BUG_ON(port_count(port) == 0 &&
 				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+			if (port_count(port) == 0)
+				execlists_clear_active(execlists,
+						       EXECLISTS_ACTIVE_USER);
 		}
 
 		if (head != execlists->csb_head) {
@@ -901,7 +914,7 @@ static void intel_lrc_irq_handler(unsigned long data)
 		}
 	}
 
-	if (!execlists->preempt)
+	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
 		execlists_dequeue(engine);
 
 	intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
@@ -1460,7 +1473,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 		   GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 	execlists->csb_head = -1;
-	execlists->preempt = false;
+	execlists->active = 0;
 
 	/* After a GPU reset, we may have requests to replay */
 	if (!i915_modparams.enable_guc_submission && execlists->first)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 17186f067408..6a42ed618a28 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -241,9 +241,17 @@ struct intel_engine_execlists {
 	} port[EXECLIST_MAX_PORTS];
 
 	/**
-	 * @preempt: are we currently handling a preempting context switch?
+	 * @active: is the HW active? We consider the HW as active after
+	 * submitting any context for execution and until we have seen the
+	 * last context completion event. After that, we do not expect any
+	 * more events until we submit, and so can park the HW.
+	 *
+	 * As we have a small number of different sources from which we feed
+	 * the HW, we track the state of each inside a single bitfield.
 	 */
-	bool preempt;
+	unsigned int active;
+#define EXECLISTS_ACTIVE_USER 0
+#define EXECLISTS_ACTIVE_PREEMPT 1
 
 	/**
 	 * @port_mask: number of execlist ports - 1
@@ -525,6 +533,27 @@ struct intel_engine_cs {
 	u32 (*get_cmd_length_mask)(u32 cmd_header);
 };
 
+static inline void
+execlists_set_active(struct intel_engine_execlists *execlists,
+		     unsigned int bit)
+{
+	__set_bit(bit, (unsigned long *)&execlists->active);
+}
+
+static inline void
+execlists_clear_active(struct intel_engine_execlists *execlists,
+		       unsigned int bit)
+{
+	__clear_bit(bit, (unsigned long *)&execlists->active);
+}
+
+static inline bool
+execlists_is_active(const struct intel_engine_execlists *execlists,
+		    unsigned int bit)
+{
+	return test_bit(bit, (unsigned long *)&execlists->active);
+}
+
 static inline unsigned int
 execlists_num_ports(const struct intel_engine_execlists * const execlists)
 {
@@ -538,6 +567,7 @@ execlists_port_complete(struct intel_engine_execlists * const execlists,
 	const unsigned int m = execlists->port_mask;
 
 	GEM_BUG_ON(port_index(port, execlists) != 0);
+	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
 
 	memmove(port, port + 1, m * sizeof(struct execlist_port));
 	memset(port + m, 0, sizeof(struct execlist_port));

From a2487174af332f46456048f96ddd2a8c28a3839e Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 24 Oct 2017 12:55:01 +0100
Subject: [PATCH 2/9] drm/i915/execlists: Remove the priority "optimisation"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Originally we set the priority to max upon inserting the request into
the execlists queue (and removing it from the scheduler lists). We could
then use the prio==INT_MAX as a shortcut within execlists_schedule() to
detect the end of the dependency chain. Since commit 1f181225f8ec
("drm/i915/execlists: Keep request->priority for its lifetime") this is
no longer true as we use the request completion as an indicator the
schedule dependency chain is complete instead. (This allows us to then
reschedule requests even when its context is in flight.) However, this
makes the GEM_BUG_ON() inside execlists_schedule() racy as we may change
the rq->prio at the same time. As the assertion is useful, let's keep
the assertion and remove the micro-optimisation.

Fixes: 1f181225f8ec ("drm/i915/execlists: Keep request->priority for its lifetime")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171024115501.21033-1-chris@chris-wilson.co.uk
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
(cherry picked from commit 64b80085dd3603d401fc05879f700b86a3a5c8e8)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5b87d5284a84..d36e25607435 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -734,7 +734,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
 		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
 			INIT_LIST_HEAD(&rq->priotree.link);
-			rq->priotree.priority = INT_MAX;
 
 			dma_fence_set_error(&rq->fence, -EIO);
 			__i915_gem_request_submit(rq);
@@ -891,7 +890,6 @@ static void intel_lrc_irq_handler(unsigned long data)
 				execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 
 				trace_i915_gem_request_out(rq);
-				rq->priotree.priority = INT_MAX;
 				i915_gem_request_put(rq);
 
 				execlists_port_complete(execlists, port);

From b58d4bef1070c6ca57008756d86f79a687d2bbea Mon Sep 17 00:00:00 2001
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Date: Mon, 23 Oct 2017 18:32:09 +0300
Subject: [PATCH 3/9] drm/i915: Disable lazy PPGTT page table optimization for
 vGPU

When running under virtualization (vGPU active), we must disable
the lazy PPGTT page table initialization optimization introduced by
commit 14826673247e ("drm/i915: Only initialize partially filled
pagetables").

We must do this because GVT-g makes unduly assumptions about guest
behaviour, which this optimization breaks. This results in following
looking errors in the host:

ERROR gvt: guest page write error -22, gfn 0x7ada8, pa 0x7ada89a8, var 0x6, len 1

The real fix is to not to depend on i915 driver behaviour, but instead
either rely on only the contracts that i915 has with the hardware, or
add some paravirtualization. While the real fix is en route, it won't
be finished in time for 4.15, so the best option is to disable the
optimization for now when vGPU is active to avoid breaking 4.15 guests
in existing VM environments.

Fixes: 14826673247e ("drm/i915: Only initialize partially filled pagetables")
Suggested-by: Xiaolin Zhang <xiaolin.zhang@intel.com>
Signed-off-by: Xiaolin Zhang <xiaolin.zhang@intel.com>
[Joonas: Rewrote the commit message and added tags.]
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171023153209.10527-1-joonas.lahtinen@linux.intel.com
(cherry picked from commit 22a8a4fc93b14b5a8cfc785edbdc6f7bd98bffb6)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 527a2d2d6281..5eaa6893daaa 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1341,7 +1341,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
 			if (IS_ERR(pt))
 				goto unwind;
 
-			if (count < GEN8_PTES)
+			if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
 				gen8_initialize_pt(vm, pt);
 
 			gen8_ppgtt_set_pde(vm, pd, pt, pde);

From 90c702b88e5b5bbe2be53848aa3aba7c2b64223f Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Thu, 19 Oct 2017 17:13:41 +0200
Subject: [PATCH 4/9] drm/i915: Calculate ironlake intermediate watermarks
 correctly, v2.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The watermarks it should calculate against are the old optimal watermarks.
The currently active crtc watermarks are pure fiction, and are invalid in
case of a nonblocking modeset, page flip enabling/disabling planes or any
other reason.

When the crtc is disabled or during a modeset the intermediate watermarks
don't need to be programmed separately, and could be directly assigned
to the optimal watermarks.

Changes since v1:
- Use intel_atomic_get_old_crtc_state. (ville)

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171019151341.4579-2-maarten.lankhorst@linux.intel.com
[mlankhorst: Add cc stable and bugzilla link, since previous patch doesn't fix issue by itself]
Cc: stable@vger.kernel.org #v4.8+
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102373
(cherry picked from commit b6b178a77210055b153dbc175e4468bd3c7122df)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index c42a65a93b3a..7d2ecabc5de5 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3133,7 +3133,11 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev,
 				       struct intel_crtc_state *newstate)
 {
 	struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate;
-	struct intel_pipe_wm *b = &intel_crtc->wm.active.ilk;
+	struct intel_atomic_state *intel_state =
+		to_intel_atomic_state(newstate->base.state);
+	const struct intel_crtc_state *oldstate =
+		intel_atomic_get_old_crtc_state(intel_state, intel_crtc);
+	const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal;
 	int level, max_level = ilk_wm_max_level(to_i915(dev));
 
 	/*
@@ -3142,6 +3146,9 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev,
 	 * and after the vblank.
 	 */
 	*a = newstate->wm.ilk.optimal;
+	if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base))
+		return 0;
+
 	a->pipe_enabled |= b->pipe_enabled;
 	a->sprites_enabled |= b->sprites_enabled;
 	a->sprites_scaled |= b->sprites_scaled;

From 0f763ff370a538c07f8f266a787ebc1a63d5c7dc Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 6 Nov 2017 11:15:08 +0000
Subject: [PATCH 5/9] drm/i915: Lock llist_del_first() vs llist_del_all()

An oversight in commit 87701b4b5593 ("drm/i915: Only free the oldest
stale object before a fresh allocation") was that not only do we have to
serialise concurrent users of llist_del_first(), but we also have to
lock llist_del_first() vs llist_del_all().

From llist.h,

 * This can be summarized as follows:
 *
 *           |   add    | del_first |  del_all
 * add       |    -     |     -     |     -
 * del_first |          |     L     |     L
 * del_all   |          |           |     -
 *
 * Where, a particular row's operation can happen concurrently with a column's
 * operation, with "-" being no lock needed, while "L" being lock is needed.

This should hopefully explain:

<4>[   89.287106] general protection fault: 0000 [#1] PREEMPT SMP
<4>[   89.287126] Modules linked in: snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic x86_pkg_temp_thermal intel_powerclamp coretemp i915 crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core r8169 mii mei_me mei snd_pcm prime_numbers i2c_hid pinctrl_geminilake pinctrl_intel
<4>[   89.287226] CPU: 2 PID: 23 Comm: ksoftirqd/2 Tainted: G     U          4.14.0-rc8-CI-CI_DRM_3315+ #1
<4>[   89.287247] Hardware name: Intel Corp. Geminilake/GLK RVP2 LP4SD (07), BIOS GELKRVPA.X64.0062.B30.1708222146 08/22/2017
<4>[   89.287270] task: ffff88017ab34ec0 task.stack: ffffc90000128000
<4>[   89.287290] RIP: 0010:llist_add_batch+0x4/0x20
<4>[   89.287301] RSP: 0018:ffffc9000012bdb8 EFLAGS: 00010296
<4>[   89.287314] RAX: ffffffff811017ad RBX: 6e468801a1560000 RCX: ef3e53fceecdeb81
<4>[   89.287330] RDX: 6e468801a1566130 RSI: ffff880103d73d98 RDI: ffff880103d73d98
<4>[   89.287346] RBP: ffffc9000012bdb8 R08: ffff88017ab35780 R09: 0000000000000000
<4>[   89.287361] R10: ffffc9000012bd68 R11: 00000000abb18c3d R12: ffffffffa01369e0
<4>[   89.287377] R13: ffff88017fd1b8f8 R14: ffff88017ab34ec0 R15: 000000000000000a
<4>[   89.287393] FS:  0000000000000000(0000) GS:ffff88017fd00000(0000) knlGS:0000000000000000
<4>[   89.287411] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4>[   89.287424] CR2: 00007ff0c0755018 CR3: 000000016df9b000 CR4: 00000000003406e0
<4>[   89.287440] Call Trace:
<4>[   89.287511]  __i915_gem_free_object_rcu+0x20/0x40 [i915]
<4>[   89.287527]  rcu_process_callbacks+0x27a/0x730
<4>[   89.287544]  __do_softirq+0xc0/0x4ae
<4>[   89.287559]  ? smpboot_thread_fn+0x2d/0x280
<4>[   89.287571]  run_ksoftirqd+0x1f/0x70
<4>[   89.287582]  smpboot_thread_fn+0x18a/0x280
<4>[   89.287595]  kthread+0x114/0x150
<4>[   89.287605]  ? sort_range+0x30/0x30
<4>[   89.287615]  ? kthread_create_on_node+0x40/0x40
<4>[   89.287628]  ret_from_fork+0x27/0x40
<4>[   89.287641] Code: 0d 48 83 ea 01 4c 89 c1 48 83 fa ff 74 12 48 23 0c d7 74 ed 48 c1 e2 06 48 0f bd c9 48 8d 04 0a 5d c3 90 90 90 90 90 55 48 89 e5 <48> 8b 0a 48 89 0e 48 89 c8 f0 48 0f b1 3a 48 39 c1 75 ed 48 85
<1>[   89.287774] RIP: llist_add_batch+0x4/0x20 RSP: ffffc9000012bdb8
<4>[   89.287826] ---[ end trace e775d15174d8ae02 ]---

(Lockless lists are only easy (and lockless) when only using
llist_add/llist_del_all!)

Fixes: 87701b4b5593 ("drm/i915: Only free the oldest stale object before
a fresh allocation")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171106111508.11941-1-chris@chris-wilson.co.uk
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
(cherry picked from commit f991c492aa55fb1c6834882c5d786d5bb3b25f07)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 026cb52ece0b..94b23fcbc989 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4603,11 +4603,17 @@ static void __i915_gem_free_work(struct work_struct *work)
 	 * unbound now.
 	 */
 
+	spin_lock(&i915->mm.free_lock);
 	while ((freed = llist_del_all(&i915->mm.free_list))) {
+		spin_unlock(&i915->mm.free_lock);
+
 		__i915_gem_free_objects(i915, freed);
 		if (need_resched())
-			break;
+			return;
+
+		spin_lock(&i915->mm.free_lock);
 	}
+	spin_unlock(&i915->mm.free_lock);
 }
 
 static void __i915_gem_free_object_rcu(struct rcu_head *head)

From 0676e79415a1e1a2186ba2529abad3d3329bea95 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 8 Nov 2017 09:44:00 +0000
Subject: [PATCH 6/9] drm/i915: Idle the GPU before shinking everything

The handling of contexts are peculiar. Instead of tieing their vma to
activity, we pin the context. This means that we cannot simply unbind
the context object itself at will (which would normally cause us to wait
for the vma to be idle), but must manually idle the GPU and retire
requests first.

A consequence of this peculiarity is when doing a last desperate attempt
to recover memory. If the memory is tied up inside active context
objects, we will fail to recover any memory simply by trying to unbind
the objects without first doing a wait-for-idle.

A side-effect of removing the call to shrinker_lock_uninterruptible()
from i915_gem_shrinker_oom() was that we removed an unlocked
wait-for-idle, and so lost the "natural" shrinkage of context objects.
By replacing that with a locked wait from inside i915_gem_shrink(), we
not only replace it with the ability to recover all context objects, but
do so for all i915_gem_shrink_all() callers.

v2: Switching requires request allocation, which is not permitted from
inside the shrinker as it only uses ordinary allocations.

References: https://bugs.freedesktop.org/show_bug.cgi?id=102936
Fixes: f2123818ffad ("drm/i915: Move dev_priv->mm.[un]bound_list to its own lock")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171108094400.1386-1-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
(cherry picked from commit 2f6a3783833dde63f1c08982943a8b2229b97afb)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index eb31f8aa5c21..3770e3323fc8 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -162,6 +162,18 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 	if (!shrinker_lock(dev_priv, &unlock))
 		return 0;
 
+	/*
+	 * When shrinking the active list, also consider active contexts.
+	 * Active contexts are pinned until they are retired, and so can
+	 * not be simply unbound to retire and unpin their pages. To shrink
+	 * the contexts, we must wait until the gpu is idle.
+	 *
+	 * We don't care about errors here; if we cannot wait upon the GPU,
+	 * we will free as much as we can and hope to get a second chance.
+	 */
+	if (flags & I915_SHRINK_ACTIVE)
+		i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED);
+
 	trace_i915_gem_shrink(dev_priv, target, flags);
 	i915_gem_retire_requests(dev_priv);
 

From 398c13b9632027d044a0ab41538743214284c2a2 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 7 Nov 2017 22:06:56 +0000
Subject: [PATCH 7/9] drm/i915: Prune the reservation shared fence array

The shared fence array is not autopruning and may continue to grow as an
object is shared between new timelines. Take the opportunity when we
think the object is idle (we have to confirm that any external fence is
also signaled) to decouple all the fences.

We apply a similar trick after waiting on an object, see commit
e54ca9774777 ("drm/i915: Remove completed fences after a wait")

v2: No longer need to handle the batch pool as a special case.
v3: Need to trylock from within i915_vma_retire as this may be called
form the shrinker - and we may later try to allocate underneath the
reservation lock, so a deadlock is possible.

References: https://bugs.freedesktop.org/show_bug.cgi?id=102936
Fixes: d07f0e59b2c7 ("drm/i915: Move GEM activity tracking into a common struct reservation_object")
Fixes: 80b204bce8f2 ("drm/i915: Enable multiple timelines")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171107220656.5020-1-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
(cherry picked from commit 1ab22356b37ab08a391d6f007fda4c822bef9fb5)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_vma.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index fc77e8191eb5..fbfab2f33023 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -54,6 +54,13 @@ i915_vma_retire(struct i915_gem_active *active,
 	if (--obj->active_count)
 		return;
 
+	/* Prune the shared fence arrays iff completely idle (inc. external) */
+	if (reservation_object_trylock(obj->resv)) {
+		if (reservation_object_test_signaled_rcu(obj->resv, true))
+			reservation_object_add_excl_fence(obj->resv, NULL);
+		reservation_object_unlock(obj->resv);
+	}
+
 	/* Bump our place on the bound list to keep it roughly in LRU order
 	 * so that we don't steal from recently used but inactive objects
 	 * (unless we are forced to ofc!)

From 6ac43272768ca901daac4076a66c2c4e3c7b9321 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 8 Nov 2017 15:35:55 +0200
Subject: [PATCH 8/9] drm/i915: Move init_clock_gating() back to where it was
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Apparently setting up a bunch of GT registers before we've properly
initialized the rest of the GT hardware leads to these setting being
lost. So looks like I broke HSW with commit b7048ea12fbb ("drm/i915:
Do .init_clock_gating() earlier to avoid it clobbering watermarks")
by doing init_clock_gating() too early. This should actually affect
other platforms as well, but apparently not to such a great degree.

What I was ultimately after in that commit was to move the
ilk_init_lp_watermarks() call earlier. So let's undo the damage and
move init_clock_gating() back to where it was, and call
ilk_init_lp_watermarks() just before the watermark state readout.

This highlights how fragile and messed up our init order really is.
I wonder why we even initialize the display before gem. The opposite
order would make much more sense to me...

v2: Keep WaRsPkgCStateDisplayPMReq:hsw early as it really must
    be done before all planes might get disabled.

Cc: stable@vger.kernel.org
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mark Janes <mark.a.janes@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Oscar Mateo <oscar.mateo@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reported-by: Mark Janes <mark.a.janes@intel.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103549
Fixes: b7048ea12fbb ("drm/i915: Do .init_clock_gating() earlier to avoid it clobbering watermarks")
References: https://lists.freedesktop.org/archives/intel-gfx/2017-November/145432.html
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171108133555.14091-1-ville.syrjala@linux.intel.com
Tested-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
(cherry picked from commit f72b84c677d61f201b869223a8d6e389c7bb7d3d)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 14 +++++++--
 drivers/gpu/drm/i915/intel_pm.c      | 44 ++++++++++++----------------
 2 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index e2ac976844d8..f4a9a182868f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3676,6 +3676,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
 
 		intel_pps_unlock_regs_wa(dev_priv);
 		intel_modeset_init_hw(dev);
+		intel_init_clock_gating(dev_priv);
 
 		spin_lock_irq(&dev_priv->irq_lock);
 		if (dev_priv->display.hpd_irq_setup)
@@ -14350,8 +14351,6 @@ void intel_modeset_init_hw(struct drm_device *dev)
 
 	intel_update_cdclk(dev_priv);
 	dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw;
-
-	intel_init_clock_gating(dev_priv);
 }
 
 /*
@@ -15063,6 +15062,15 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
 	struct intel_encoder *encoder;
 	int i;
 
+	if (IS_HASWELL(dev_priv)) {
+		/*
+		 * WaRsPkgCStateDisplayPMReq:hsw
+		 * System hang if this isn't done before disabling all planes!
+		 */
+		I915_WRITE(CHICKEN_PAR1_1,
+			   I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
+	}
+
 	intel_modeset_readout_hw_state(dev);
 
 	/* HW state is read out, now we need to sanitize this mess. */
@@ -15160,6 +15168,8 @@ void intel_modeset_gem_init(struct drm_device *dev)
 
 	intel_init_gt_powersave(dev_priv);
 
+	intel_init_clock_gating(dev_priv);
+
 	intel_setup_overlay(dev_priv);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 7d2ecabc5de5..aa12a44e9a76 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5762,12 +5762,30 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
 	mutex_unlock(&dev_priv->wm.wm_mutex);
 }
 
+/*
+ * FIXME should probably kill this and improve
+ * the real watermark readout/sanitation instead
+ */
+static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
+	I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
+	I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
+
+	/*
+	 * Don't touch WM1S_LP_EN here.
+	 * Doing so could cause underruns.
+	 */
+}
+
 void ilk_wm_get_hw_state(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct ilk_wm_values *hw = &dev_priv->wm.hw;
 	struct drm_crtc *crtc;
 
+	ilk_init_lp_watermarks(dev_priv);
+
 	for_each_crtc(dev, crtc)
 		ilk_pipe_wm_get_hw_state(crtc);
 
@@ -8214,18 +8232,6 @@ static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
 	}
 }
 
-static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
-	I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
-	I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
-
-	/*
-	 * Don't touch WM1S_LP_EN here.
-	 * Doing so could cause underruns.
-	 */
-}
-
 static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
 {
 	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
@@ -8259,8 +8265,6 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
 		   (I915_READ(DISP_ARB_CTL) |
 		    DISP_FBC_WM_DIS));
 
-	ilk_init_lp_watermarks(dev_priv);
-
 	/*
 	 * Based on the document from hardware guys the following bits
 	 * should be set unconditionally in order to enable FBC.
@@ -8373,8 +8377,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN6_GT_MODE,
 		   _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
 
-	ilk_init_lp_watermarks(dev_priv);
-
 	I915_WRITE(CACHE_MODE_0,
 		   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
 
@@ -8601,8 +8603,6 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
 						 I915_GTT_PAGE_SIZE_2M);
 	enum pipe pipe;
 
-	ilk_init_lp_watermarks(dev_priv);
-
 	/* WaSwitchSolVfFArbitrationPriority:bdw */
 	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
 
@@ -8653,8 +8653,6 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
 
 static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
 {
-	ilk_init_lp_watermarks(dev_priv);
-
 	/* L3 caching of data atomics doesn't work -- disable it. */
 	I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
 	I915_WRITE(HSW_ROW_CHICKEN3,
@@ -8698,10 +8696,6 @@ static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
 	/* WaSwitchSolVfFArbitrationPriority:hsw */
 	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
 
-	/* WaRsPkgCStateDisplayPMReq:hsw */
-	I915_WRITE(CHICKEN_PAR1_1,
-		   I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
-
 	lpt_init_clock_gating(dev_priv);
 }
 
@@ -8709,8 +8703,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
 {
 	uint32_t snpcr;
 
-	ilk_init_lp_watermarks(dev_priv);
-
 	I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
 
 	/* WaDisableEarlyCull:ivb */

From e8c49fa96838101435b9e4884d49b30da7a4e0c6 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 9 Nov 2017 08:55:40 +0000
Subject: [PATCH 9/9] drm/i915: Reorder context-close to avoid calling
 i915_vma_close() under RCU

When we close the VMA, we unbind it from the ppgtt and tear down the
page directory pointing at it. That may trigger us to return WC pages
back to the system, requiring conversion back to WB which itself may
sleep. That makes i915_vma_close() unsuitable for use inside the RCU
read lock, which we need to hold to iterate the radixtree.

The fix is quite simple, we can close all the VMA as we close the ppgtt,
we only need to do that instead of closing them during destruction of
the LUT.

v2: Order between closing the LUT and the ppgtt is important; we use the
vma inside the LUT as a means of retrieving the object, and so we must
clear the LUT before freeing the VMA when closing the ppgtt.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103638
Fixes: 547da76b5777 ("drm/i915: Hold rcu_read_lock when iterating over the radixtree (vma idr)")
Fixes: d1b48c1e7184 ("drm/i915: Replace execbuf vma ht with an idr")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171109085540.32264-1-chris@chris-wilson.co.uk
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
(cherry picked from commit 94dec87159af6f3dcc0b78d3f909aefa9e29c01a)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 5bf96a258509..e304dcbc6042 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -106,14 +106,9 @@ static void lut_close(struct i915_gem_context *ctx)
 
 	radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
 		struct i915_vma *vma = rcu_dereference_raw(*slot);
-		struct drm_i915_gem_object *obj = vma->obj;
 
 		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
-
-		if (!i915_vma_is_ggtt(vma))
-			i915_vma_close(vma);
-
-		__i915_gem_object_release_unless_active(obj);
+		__i915_gem_object_release_unless_active(vma->obj);
 	}
 }
 
@@ -198,6 +193,11 @@ static void context_close(struct i915_gem_context *ctx)
 {
 	i915_gem_context_set_closed(ctx);
 
+	/*
+	 * The LUT uses the VMA as a backpointer to unref the object,
+	 * so we need to clear the LUT before we close all the VMA (inside
+	 * the ppgtt).
+	 */
 	lut_close(ctx);
 	if (ctx->ppgtt)
 		i915_ppgtt_close(&ctx->ppgtt->base);