aboutsummaryrefslogtreecommitdiff
path: root/SOURCES/CVE-2024-44961.patch
diff options
context:
space:
mode:
Diffstat (limited to 'SOURCES/CVE-2024-44961.patch')
-rw-r--r--SOURCES/CVE-2024-44961.patch140
1 files changed, 140 insertions, 0 deletions
diff --git a/SOURCES/CVE-2024-44961.patch b/SOURCES/CVE-2024-44961.patch
new file mode 100644
index 0000000..19e2808
--- /dev/null
+++ b/SOURCES/CVE-2024-44961.patch
@@ -0,0 +1,140 @@
+From git@z Thu Jan 1 00:00:00 1970
+Subject: [PATCH 1/2] drm/amdgpu: Pass amdgpu_job directly to
+ amdgpu_ring_soft_recovery
+From: Joshua Ashton <joshua@froggi.es>
+Date: Sat, 13 Jan 2024 14:02:03 +0000
+Message-Id: <20240113140206.2383133-1-joshua@froggi.es>
+MIME-Version: 1.0
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: 8bit
+
+We will need this to change the karma in the future.
+
+Signed-off-by: Joshua Ashton <joshua@froggi.es>
+
+Cc: Friedrich Vock <friedrich.vock@gmx.de>
+Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
+Cc: Christian König <christian.koenig@amd.com>
+Cc: André Almeida <andrealmeid@igalia.com>
+Cc: stable@vger.kernel.org
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 9 ++++-----
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 +--
+ 3 files changed, 6 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+index 78476bc75b4e..c1af7ca25912 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+@@ -52,7 +52,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
+ adev->job_hang = true;
+
+ if (amdgpu_gpu_recovery &&
+- amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
++ amdgpu_ring_soft_recovery(ring, job)) {
+ DRM_ERROR("ring %s timeout, but soft recovered\n",
+ s_job->sched->name);
+ goto exit;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+index 45424ebf9681..25209ce54552 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+@@ -425,14 +425,13 @@ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
+ * amdgpu_ring_soft_recovery - try to soft recover a ring lockup
+ *
+ * @ring: ring to try the recovery on
+- * @vmid: VMID we try to get going again
+- * @fence: timedout fence
++ * @job: the locked-up job
+ *
+ * Tries to get a ring proceeding again when it is stuck.
+ */
+-bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
+- struct dma_fence *fence)
++bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, struct amdgpu_job *job)
+ {
++ struct dma_fence *fence = job->base.s_fence->parent;
+ unsigned long flags;
+ ktime_t deadline;
+
+@@ -452,7 +451,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
+ atomic_inc(&ring->adev->gpu_reset_counter);
+ while (!dma_fence_is_signaled(fence) &&
+ ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0)
+- ring->funcs->soft_recovery(ring, vmid);
++ ring->funcs->soft_recovery(ring, job->vmid);
+
+ return dma_fence_is_signaled(fence);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+index bbb53720a018..734df88f22d4 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+@@ -354,8 +354,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring);
+ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t val0,
+ uint32_t reg1, uint32_t val1);
+-bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
+- struct dma_fence *fence);
++bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, struct amdgpu_job *job);
+
+ static inline void amdgpu_ring_set_preempt_cond_exec(struct amdgpu_ring *ring,
+ bool cond_exec)
+--
+2.43.0
+
+From git@z Thu Jan 1 00:00:00 1970
+Subject: [PATCH 2/2] drm/amdgpu: Mark ctx as guilty in ring_soft_recovery
+ path
+From: Joshua Ashton <joshua@froggi.es>
+Date: Sat, 13 Jan 2024 14:02:04 +0000
+Message-Id: <20240113140206.2383133-2-joshua@froggi.es>
+MIME-Version: 1.0
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: 8bit
+
+We need to bump the karma of the drm_sched job in order for the context
+that we just recovered to get correct feedback that it is guilty of
+hanging.
+
+Without this feedback, the application may keep pushing through the soft
+recoveries, continually hanging the system with jobs that timeout.
+
+There is an accompanying Mesa/RADV patch here
+https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27050
+to properly handle device loss state when VRAM is not lost.
+
+With these, I was able to run Counter-Strike 2 and launch an application
+which can fault the GPU in a variety of ways, and still have Steam +
+Counter-Strike 2 + Gamescope (compositor) stay up and continue
+functioning on Steam Deck.
+
+Signed-off-by: Joshua Ashton <joshua@froggi.es>
+
+Cc: Friedrich Vock <friedrich.vock@gmx.de>
+Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
+Cc: Christian König <christian.koenig@amd.com>
+Cc: André Almeida <andrealmeid@igalia.com>
+Cc: stable@vger.kernel.org
+Tested-by: Friedrich Vock <friedrich.vock@gmx.de>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+index 25209ce54552..e87cafb5b1c3 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+@@ -448,6 +448,8 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, struct amdgpu_job *job)
+ dma_fence_set_error(fence, -ENODATA);
+ spin_unlock_irqrestore(fence->lock, flags);
+
++ if (job->vm)
++ drm_sched_increase_karma(&job->base);
+ atomic_inc(&ring->adev->gpu_reset_counter);
+ while (!dma_fence_is_signaled(fence) &&
+ ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0)
+--
+2.43.0
+