summaryrefslogtreecommitdiff
path: root/tools/proxyclient/experiments/cpu_pstates.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/proxyclient/experiments/cpu_pstates.py')
-rwxr-xr-xtools/proxyclient/experiments/cpu_pstates.py150
1 files changed, 150 insertions, 0 deletions
diff --git a/tools/proxyclient/experiments/cpu_pstates.py b/tools/proxyclient/experiments/cpu_pstates.py
new file mode 100755
index 0000000..7403775
--- /dev/null
+++ b/tools/proxyclient/experiments/cpu_pstates.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MIT
+import sys, pathlib, time
+sys.path.append(str(pathlib.Path(__file__).resolve().parents[1]))
+
+from m1n1.setup import *
+from m1n1 import asm
+
+LOOPS = 10000000
+freq = u.mrs(CNTFRQ_EL0)
+
+CREG = [
+ 0x210e00000,
+ 0x211e00000,
+]
+
+CLUSTER_PSTATE = 0x20020
+
+# e-core pstates
+# 600 972 1332 1704 2064
+# p-core pstates
+# 600 828 1056 1284 1500 1728 1956 2184 2388 2592 2772 2988 3096 3144 3204
+
+code = u.malloc(0x1000)
+
+util = asm.ARMAsm("""
+bench:
+ mrs x1, CNTPCT_EL0
+1:
+ sub x0, x0, #1
+ cbnz x0, 1b
+
+ mrs x2, CNTPCT_EL0
+ sub x0, x2, x1
+ ret
+""", code)
+iface.writemem(code, util.data)
+p.dc_cvau(code, len(util.data))
+p.ic_ivau(code, len(util.data))
+
+def bench_cpu(idx):
+ if idx == 0:
+ elapsed = p.call(util.bench, LOOPS) / freq
+ else:
+ elapsed = p.smp_call_sync(idx, util.bench, LOOPS) / freq
+ if elapsed == 0:
+ return 0
+ mhz = (LOOPS / elapsed) / 1000000
+ return mhz
+
+print()
+
+e_pstate = p.read64(CREG[0] + CLUSTER_PSTATE)
+p_pstate = p.read64(CREG[1] + CLUSTER_PSTATE)
+
+print(f"E-Core pstate: {e_pstate:x}")
+print(f"P-Core pstate: {p_pstate:x}")
+
+#for cluster in range(2):
+ #print(f"Initializing cluster {cluster} (early)")
+
+ #p.write64(CREG[cluster] + 0x20660, 0x1000000015)
+ #p.write64(CREG[cluster] + 0x48000, 0)
+ #p.write64(CREG[cluster] + 0x48080, 0xa000000000000000)
+
+ #p.clear64(CREG[cluster] + CLUSTER_PSTATE, 1<<22)
+
+#p.set32(PMGR + 0x48000, 1)
+#p.set32(PMGR + 0x48c00, 1)
+#p.set32(PMGR + 0x48800, 1)
+#p.set32(PMGR + 0x48400, 1)
+
+CLUSTER_DVMR = 0x206b8
+CLUSTER_LIMIT2 = 0x40240
+CLUSTER_LIMIT3 = 0x40250
+CLUSTER_LIMIT1 = 0x48400
+
+PMGR_CPUGATING = 0x1c080
+CLUSTER_CTRL = 0x440f8
+CLUSTER_PSCTRL = 0x200f8
+
+for cluster in range(2):
+ print(f"Initializing cluster {cluster}")
+ ena = (1<<63)
+ val = p.read64(CREG[cluster] + CLUSTER_DVMR)
+ if cluster == 1:
+ ena |= (1<<32) | (1<<31)
+ if (val & ena) != ena:
+ print(f"DVMR: {val:#x} -> {val|ena:#x}")
+ p.set64(CREG[cluster] + CLUSTER_DVMR, ena) # CLUSTER_DVMR
+
+ #p.set64(CREG[cluster] + CLUSTER_LIMIT1, 1<<63)
+ #p.clear64(CREG[cluster] + CLUSTER_LIMIT2, 1<<63)
+ #p.set64(CREG[cluster] + CLUSTER_LIMIT3, 1<<63)
+
+ #p.set64(CREG[cluster] + CLUSTER_PSTATE, 0)
+
+ #p.set32(PMGR + PMGR_CPUGATING + 8 * cluster, 1<<31)
+
+ #p.write64(CREG[cluster] + CLUSTER_CTRL, 1)
+
+ #p.set64(CREG[cluster] + CLUSTER_PSCTRL, 1<<40)
+
+ #pstate = p.read64(CREG[cluster] + CLUSTER_PSTATE) & 0xf
+
+p.smp_start_secondaries()
+
+print("== Initial CPU frequencies ==")
+
+for cpu in range(8):
+ print(f"CPU {cpu}: {bench_cpu(cpu):.2f} MHz")
+
+def set_pstate(cluster, pstate):
+ # This really seems to be all that's needed
+
+ p.mask64(CREG[cluster] + CLUSTER_PSTATE, 0xf00f, (1<<25) | pstate | (pstate << 12))
+
+ # Optionally, adjust MCC performance in higher p-core pstates
+ if cluster == 1:
+ if pstate > 8:
+ p0, p1 = 0x133, 0x55555340
+ else:
+ p0, p1 = 0x813057f, 0x1800180
+
+ for lane in range(8):
+ p.write32(0x200200dc4 + lane * 0x40000, p0)
+ p.write32(0x200200dbc + lane * 0x40000, p1)
+
+ # This seems to be about notifying PMP
+ #p.write32(0x23b738004 + cluster*4, pstate)
+ #p.write32(0x23bc34000, 1 << cluster)
+
+set_pstate(1, 15)
+
+e_pstate = p.read64(CREG[0] + CLUSTER_PSTATE)
+p_pstate = p.read64(CREG[1] + CLUSTER_PSTATE)
+
+print(f"E-Core pstate: {e_pstate:x}")
+print(f"P-Core pstate: {p_pstate:x}")
+
+time.sleep(0.5)
+
+print("== Final CPU frequencies ==")
+
+#elapsed = p.smp_call(7, util.bench, 80000000)
+
+for cpu in range(8):
+ print(f"CPU {cpu}: {bench_cpu(cpu):.2f} MHz")
+
+#elapsed = p.smp_wait(7)