mirror of
https://github.com/LibreELEC/LibreELEC.tv
synced 2025-09-24 19:46:01 +07:00
RPiOS 23.2.0-rc3 patches rebased onto 23.2.1 Signed-off-by: Matthias Reichl <hias@horus.com>
236 lines
9.4 KiB
Diff
236 lines
9.4 KiB
Diff
From f9a76b3a1e316e5ed6387819b87eaaf60f989a2b Mon Sep 17 00:00:00 2001
|
|
From: Iago Toral Quiroga <itoral@igalia.com>
|
|
Date: Tue, 26 Oct 2021 11:43:02 +0200
|
|
Subject: [PATCH 050/139] broadcom/compiler: update peripheral access
|
|
restrictions for v71
|
|
|
|
In V3D 4.x only a couple of simultaneous accesses where allowed, but
|
|
V3D 7.x is a bit more flexible, so rather than trying to check for all
|
|
the allowed combinations it is easier to check if we are one of the
|
|
disallows.
|
|
|
|
Shader-db (pi5):
|
|
|
|
total instructions in shared programs: 11338883 -> 11307386 (-0.28%)
|
|
instructions in affected programs: 2727201 -> 2695704 (-1.15%)
|
|
helped: 12555
|
|
HURT: 289
|
|
Instructions are helped.
|
|
|
|
total max-temps in shared programs: 2230199 -> 2229260 (-0.04%)
|
|
max-temps in affected programs: 20508 -> 19569 (-4.58%)
|
|
helped: 608
|
|
HURT: 4
|
|
Max-temps are helped.
|
|
|
|
total sfu-stalls in shared programs: 15236 -> 15293 (0.37%)
|
|
sfu-stalls in affected programs: 148 -> 205 (38.51%)
|
|
helped: 38
|
|
HURT: 64
|
|
Inconclusive result (%-change mean confidence interval includes 0).
|
|
|
|
total inst-and-stalls in shared programs: 11354119 -> 11322679 (-0.28%)
|
|
inst-and-stalls in affected programs: 2732262 -> 2700822 (-1.15%)
|
|
helped: 12550
|
|
HURT: 304
|
|
Inst-and-stalls are helped.
|
|
|
|
total nops in shared programs: 273711 -> 274095 (0.14%)
|
|
nops in affected programs: 9626 -> 10010 (3.99%)
|
|
helped: 186
|
|
HURT: 397
|
|
Nops are HURT.
|
|
---
|
|
src/broadcom/compiler/qpu_schedule.c | 88 +++++++++++++++++++++-------
|
|
src/broadcom/compiler/qpu_validate.c | 2 +-
|
|
src/broadcom/qpu/qpu_instr.c | 16 +++--
|
|
src/broadcom/qpu/qpu_instr.h | 2 +
|
|
4 files changed, 82 insertions(+), 26 deletions(-)
|
|
|
|
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
|
index e8197661f89..adb501e85ce 100644
|
|
--- a/src/broadcom/compiler/qpu_schedule.c
|
|
+++ b/src/broadcom/compiler/qpu_schedule.c
|
|
@@ -790,7 +790,8 @@ enum {
|
|
V3D_PERIPHERAL_TMU_WAIT = (1 << 6),
|
|
V3D_PERIPHERAL_TMU_WRTMUC_SIG = (1 << 7),
|
|
V3D_PERIPHERAL_TSY = (1 << 8),
|
|
- V3D_PERIPHERAL_TLB = (1 << 9),
|
|
+ V3D_PERIPHERAL_TLB_READ = (1 << 9),
|
|
+ V3D_PERIPHERAL_TLB_WRITE = (1 << 10),
|
|
};
|
|
|
|
static uint32_t
|
|
@@ -815,8 +816,10 @@ qpu_peripherals(const struct v3d_device_info *devinfo,
|
|
if (v3d_qpu_uses_sfu(inst))
|
|
result |= V3D_PERIPHERAL_SFU;
|
|
|
|
- if (v3d_qpu_uses_tlb(inst))
|
|
- result |= V3D_PERIPHERAL_TLB;
|
|
+ if (v3d_qpu_reads_tlb(inst))
|
|
+ result |= V3D_PERIPHERAL_TLB_READ;
|
|
+ if (v3d_qpu_writes_tlb(inst))
|
|
+ result |= V3D_PERIPHERAL_TLB_WRITE;
|
|
|
|
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
|
|
if (inst->alu.add.op != V3D_QPU_A_NOP &&
|
|
@@ -847,32 +850,75 @@ qpu_compatible_peripheral_access(const struct v3d_device_info *devinfo,
|
|
if (devinfo->ver < 41)
|
|
return false;
|
|
|
|
- /* V3D 4.1+ allow WRTMUC signal with TMU register write (other than
|
|
- * tmuc).
|
|
+ /* V3D 4.x can't do more than one peripheral access except in a
|
|
+ * few cases:
|
|
*/
|
|
- if (a_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
|
|
- b_peripherals == V3D_PERIPHERAL_TMU_WRITE) {
|
|
- return v3d_qpu_writes_tmu_not_tmuc(devinfo, b);
|
|
+ if (devinfo->ver <= 42) {
|
|
+ /* WRTMUC signal with TMU register write (other than tmuc). */
|
|
+ if (a_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
|
|
+ b_peripherals == V3D_PERIPHERAL_TMU_WRITE) {
|
|
+ return v3d_qpu_writes_tmu_not_tmuc(devinfo, b);
|
|
+ }
|
|
+ if (b_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
|
|
+ a_peripherals == V3D_PERIPHERAL_TMU_WRITE) {
|
|
+ return v3d_qpu_writes_tmu_not_tmuc(devinfo, a);
|
|
+ }
|
|
+
|
|
+ /* TMU read with VPM read/write. */
|
|
+ if (a_peripherals == V3D_PERIPHERAL_TMU_READ &&
|
|
+ (b_peripherals == V3D_PERIPHERAL_VPM_READ ||
|
|
+ b_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
|
|
+ return true;
|
|
+ }
|
|
+ if (b_peripherals == V3D_PERIPHERAL_TMU_READ &&
|
|
+ (a_peripherals == V3D_PERIPHERAL_VPM_READ ||
|
|
+ a_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ return false;
|
|
}
|
|
|
|
- if (a_peripherals == V3D_PERIPHERAL_TMU_WRITE &&
|
|
- b_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG) {
|
|
- return v3d_qpu_writes_tmu_not_tmuc(devinfo, a);
|
|
+ /* V3D 7.x can't have more than one of these restricted peripherals */
|
|
+ const uint32_t restricted = V3D_PERIPHERAL_TMU_WRITE |
|
|
+ V3D_PERIPHERAL_TMU_WRTMUC_SIG |
|
|
+ V3D_PERIPHERAL_TSY |
|
|
+ V3D_PERIPHERAL_TLB_READ |
|
|
+ V3D_PERIPHERAL_SFU |
|
|
+ V3D_PERIPHERAL_VPM_READ |
|
|
+ V3D_PERIPHERAL_VPM_WRITE;
|
|
+
|
|
+ const uint32_t a_restricted = a_peripherals & restricted;
|
|
+ const uint32_t b_restricted = b_peripherals & restricted;
|
|
+ if (a_restricted && b_restricted) {
|
|
+ /* WRTMUC signal with TMU register write (other than tmuc) is
|
|
+ * allowed though.
|
|
+ */
|
|
+ if (!((a_restricted == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
|
|
+ b_restricted == V3D_PERIPHERAL_TMU_WRITE &&
|
|
+ v3d_qpu_writes_tmu_not_tmuc(devinfo, b)) ||
|
|
+ (b_restricted == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
|
|
+ a_restricted == V3D_PERIPHERAL_TMU_WRITE &&
|
|
+ v3d_qpu_writes_tmu_not_tmuc(devinfo, a)))) {
|
|
+ return false;
|
|
+ }
|
|
}
|
|
|
|
- /* V3D 4.1+ allows TMU read with VPM read/write. */
|
|
- if (a_peripherals == V3D_PERIPHERAL_TMU_READ &&
|
|
- (b_peripherals == V3D_PERIPHERAL_VPM_READ ||
|
|
- b_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
|
|
- return true;
|
|
+ /* Only one TMU read per instruction */
|
|
+ if ((a_peripherals & V3D_PERIPHERAL_TMU_READ) &&
|
|
+ (b_peripherals & V3D_PERIPHERAL_TMU_READ)) {
|
|
+ return false;
|
|
}
|
|
- if (b_peripherals == V3D_PERIPHERAL_TMU_READ &&
|
|
- (a_peripherals == V3D_PERIPHERAL_VPM_READ ||
|
|
- a_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
|
|
- return true;
|
|
+
|
|
+ /* Only one TLB access per instruction */
|
|
+ if ((a_peripherals & (V3D_PERIPHERAL_TLB_WRITE |
|
|
+ V3D_PERIPHERAL_TLB_READ)) &&
|
|
+ (b_peripherals & (V3D_PERIPHERAL_TLB_WRITE |
|
|
+ V3D_PERIPHERAL_TLB_READ))) {
|
|
+ return false;
|
|
}
|
|
|
|
- return false;
|
|
+ return true;
|
|
}
|
|
|
|
/* Compute a bitmask of which rf registers are used between
|
|
diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c
|
|
index 12788692432..fde6695d59b 100644
|
|
--- a/src/broadcom/compiler/qpu_validate.c
|
|
+++ b/src/broadcom/compiler/qpu_validate.c
|
|
@@ -227,7 +227,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
|
vpm_writes +
|
|
tlb_writes +
|
|
tsy_writes +
|
|
- inst->sig.ldtmu +
|
|
+ (devinfo->ver <= 42 ? inst->sig.ldtmu : 0) +
|
|
inst->sig.ldtlb +
|
|
inst->sig.ldvpm +
|
|
inst->sig.ldtlbu > 1) {
|
|
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
|
|
index 195a0dcd232..f54ce7210fb 100644
|
|
--- a/src/broadcom/qpu/qpu_instr.c
|
|
+++ b/src/broadcom/qpu/qpu_instr.c
|
|
@@ -649,12 +649,14 @@ v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op)
|
|
}
|
|
|
|
bool
|
|
-v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
|
|
+v3d_qpu_reads_tlb(const struct v3d_qpu_instr *inst)
|
|
{
|
|
- if (inst->sig.ldtlb ||
|
|
- inst->sig.ldtlbu)
|
|
- return true;
|
|
+ return inst->sig.ldtlb || inst->sig.ldtlbu;
|
|
+}
|
|
|
|
+bool
|
|
+v3d_qpu_writes_tlb(const struct v3d_qpu_instr *inst)
|
|
+{
|
|
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
|
|
if (inst->alu.add.op != V3D_QPU_A_NOP &&
|
|
inst->alu.add.magic_write &&
|
|
@@ -672,6 +674,12 @@ v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
|
|
return false;
|
|
}
|
|
|
|
+bool
|
|
+v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
|
|
+{
|
|
+ return v3d_qpu_writes_tlb(inst) || v3d_qpu_reads_tlb(inst);
|
|
+}
|
|
+
|
|
bool
|
|
v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)
|
|
{
|
|
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
|
index 4b34d17bd4c..dece45c5c54 100644
|
|
--- a/src/broadcom/qpu/qpu_instr.h
|
|
+++ b/src/broadcom/qpu/qpu_instr.h
|
|
@@ -472,6 +472,8 @@ bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
|
bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
|
bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
|
bool v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
|
+bool v3d_qpu_reads_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
|
+bool v3d_qpu_writes_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
|
bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
|
bool v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
|
bool v3d_qpu_instr_is_legacy_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
|
--
|
|
2.39.2
|
|
|