diff options
| author | magh <magh@maghmogh.com> | 2023-03-06 18:44:55 -0600 |
|---|---|---|
| committer | magh <magh@maghmogh.com> | 2023-03-06 18:44:55 -0600 |
| commit | e80d9d8871b325a04b18f90a9ea4bb7fd148fb25 (patch) | |
| tree | 79dbdb8506b7ff1e92549188d1b94cfc0b3503ae /tools/proxyclient/experiments/cpu_pstates.py | |
Diffstat (limited to 'tools/proxyclient/experiments/cpu_pstates.py')
| -rwxr-xr-x | tools/proxyclient/experiments/cpu_pstates.py | 150 |
1 files changed, 150 insertions, 0 deletions
diff --git a/tools/proxyclient/experiments/cpu_pstates.py b/tools/proxyclient/experiments/cpu_pstates.py new file mode 100755 index 0000000..7403775 --- /dev/null +++ b/tools/proxyclient/experiments/cpu_pstates.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib, time +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * +from m1n1 import asm + +LOOPS = 10000000 +freq = u.mrs(CNTFRQ_EL0) + +CREG = [ + 0x210e00000, + 0x211e00000, +] + +CLUSTER_PSTATE = 0x20020 + +# e-core pstates +# 600 972 1332 1704 2064 +# p-core pstates +# 600 828 1056 1284 1500 1728 1956 2184 2388 2592 2772 2988 3096 3144 3204 + +code = u.malloc(0x1000) + +util = asm.ARMAsm(""" +bench: + mrs x1, CNTPCT_EL0 +1: + sub x0, x0, #1 + cbnz x0, 1b + + mrs x2, CNTPCT_EL0 + sub x0, x2, x1 + ret +""", code) +iface.writemem(code, util.data) +p.dc_cvau(code, len(util.data)) +p.ic_ivau(code, len(util.data)) + +def bench_cpu(idx): + if idx == 0: + elapsed = p.call(util.bench, LOOPS) / freq + else: + elapsed = p.smp_call_sync(idx, util.bench, LOOPS) / freq + if elapsed == 0: + return 0 + mhz = (LOOPS / elapsed) / 1000000 + return mhz + +print() + +e_pstate = p.read64(CREG[0] + CLUSTER_PSTATE) +p_pstate = p.read64(CREG[1] + CLUSTER_PSTATE) + +print(f"E-Core pstate: {e_pstate:x}") +print(f"P-Core pstate: {p_pstate:x}") + +#for cluster in range(2): + #print(f"Initializing cluster {cluster} (early)") + + #p.write64(CREG[cluster] + 0x20660, 0x1000000015) + #p.write64(CREG[cluster] + 0x48000, 0) + #p.write64(CREG[cluster] + 0x48080, 0xa000000000000000) + + #p.clear64(CREG[cluster] + CLUSTER_PSTATE, 1<<22) + +#p.set32(PMGR + 0x48000, 1) +#p.set32(PMGR + 0x48c00, 1) +#p.set32(PMGR + 0x48800, 1) +#p.set32(PMGR + 0x48400, 1) + +CLUSTER_DVMR = 0x206b8 +CLUSTER_LIMIT2 = 0x40240 +CLUSTER_LIMIT3 = 0x40250 +CLUSTER_LIMIT1 = 0x48400 + +PMGR_CPUGATING = 0x1c080 +CLUSTER_CTRL = 0x440f8 +CLUSTER_PSCTRL = 0x200f8 + +for cluster in range(2): + print(f"Initializing cluster {cluster}") + ena = (1<<63) + val = p.read64(CREG[cluster] + CLUSTER_DVMR) + if cluster == 1: + ena |= (1<<32) | (1<<31) + if (val & ena) != ena: + print(f"DVMR: {val:#x} -> {val|ena:#x}") + p.set64(CREG[cluster] + CLUSTER_DVMR, ena) # CLUSTER_DVMR + + #p.set64(CREG[cluster] + CLUSTER_LIMIT1, 1<<63) + #p.clear64(CREG[cluster] + CLUSTER_LIMIT2, 1<<63) + #p.set64(CREG[cluster] + CLUSTER_LIMIT3, 1<<63) + + #p.set64(CREG[cluster] + CLUSTER_PSTATE, 0) + + #p.set32(PMGR + PMGR_CPUGATING + 8 * cluster, 1<<31) + + #p.write64(CREG[cluster] + CLUSTER_CTRL, 1) + + #p.set64(CREG[cluster] + CLUSTER_PSCTRL, 1<<40) + + #pstate = p.read64(CREG[cluster] + CLUSTER_PSTATE) & 0xf + +p.smp_start_secondaries() + +print("== Initial CPU frequencies ==") + +for cpu in range(8): + print(f"CPU {cpu}: {bench_cpu(cpu):.2f} MHz") + +def set_pstate(cluster, pstate): + # This really seems to be all that's needed + + p.mask64(CREG[cluster] + CLUSTER_PSTATE, 0xf00f, (1<<25) | pstate | (pstate << 12)) + + # Optionally, adjust MCC performance in higher p-core pstates + if cluster == 1: + if pstate > 8: + p0, p1 = 0x133, 0x55555340 + else: + p0, p1 = 0x813057f, 0x1800180 + + for lane in range(8): + p.write32(0x200200dc4 + lane * 0x40000, p0) + p.write32(0x200200dbc + lane * 0x40000, p1) + + # This seems to be about notifying PMP + #p.write32(0x23b738004 + cluster*4, pstate) + #p.write32(0x23bc34000, 1 << cluster) + +set_pstate(1, 15) + +e_pstate = p.read64(CREG[0] + CLUSTER_PSTATE) +p_pstate = p.read64(CREG[1] + CLUSTER_PSTATE) + +print(f"E-Core pstate: {e_pstate:x}") +print(f"P-Core pstate: {p_pstate:x}") + +time.sleep(0.5) + +print("== Final CPU frequencies ==") + +#elapsed = p.smp_call(7, util.bench, 80000000) + +for cpu in range(8): + print(f"CPU {cpu}: {bench_cpu(cpu):.2f} MHz") + +#elapsed = p.smp_wait(7) |
