summaryrefslogtreecommitdiff
path: root/tools/proxyclient/experiments/agx_tracetimings.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/proxyclient/experiments/agx_tracetimings.py')
-rw-r--r--tools/proxyclient/experiments/agx_tracetimings.py314
1 files changed, 314 insertions, 0 deletions
diff --git a/tools/proxyclient/experiments/agx_tracetimings.py b/tools/proxyclient/experiments/agx_tracetimings.py
new file mode 100644
index 0000000..52ac2c9
--- /dev/null
+++ b/tools/proxyclient/experiments/agx_tracetimings.py
@@ -0,0 +1,314 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MIT
+
+import sys, pathlib, time
+sys.path.append(str(pathlib.Path(__file__).resolve().parents[1]))
+
+import atexit, sys
+
+from m1n1.setup import *
+from m1n1.constructutils import Ver
+from m1n1.utils import *
+
+Ver.set_version(u)
+
+from m1n1.agx import AGX
+from m1n1.agx.render import *
+
+from m1n1.gpiola import GPIOLogicAnalyzer
+
+analyzer_cpu = 1
+
+p.pmgr_adt_clocks_enable("/arm-io/gfx-asc")
+p.pmgr_adt_clocks_enable("/arm-io/sgx")
+p.smp_start_secondaries()
+p.mmu_init_secondary(analyzer_cpu)
+iface.dev.timeout = 10
+
+agx = AGX(u)
+
+mon = RegMonitor(u, ascii=True, bufsize=0x8000000)
+agx.mon = mon
+
+sgx = agx.sgx_dev
+#mon.add(sgx.gpu_region_base, sgx.gpu_region_size, "contexts")
+#mon.add(sgx.gfx_shared_region_base, sgx.gfx_shared_region_size, "gfx-shared")
+#mon.add(sgx.gfx_handoff_base, sgx.gfx_handoff_size, "gfx-handoff")
+
+#mon.add(agx.initdasgx.gfx_handoff_base, sgx.gfx_handoff_size, "gfx-handoff")
+
+atexit.register(p.reboot)
+agx.start()
+
+print("==========================================")
+print("## After init")
+print("==========================================")
+mon.poll()
+agx.poll_objects()
+
+ctx = GPUContext(agx)
+ctx.bind(1)
+
+renderer = GPURenderer(ctx, 64, bm_slot=0, queue=0)
+renderer2 = GPURenderer(ctx, 64, bm_slot=1, queue=1)
+
+#for q in (renderer.wq_3d, renderer.wq_ta):#, renderer2.wq_3d, renderer2.wq_ta):
+ #q.info.unk_30 = 2
+ #q.info.unk_34 = 2
+ #q.info.unk_38 = 0xffff000000000000
+ #q.info.unk_40 = 0
+ #q.info.unk_44 = 0
+ #q.info.unk_48 = 2
+ #q.info.unk_50 = 0x1
+ #q.info.push()
+
+f = GPUFrame(ctx, sys.argv[1], track=False)
+#f2 = GPUFrame(renderer2.ctx, sys.argv[1], track=False)
+
+print("==========================================")
+print("## Pre submit")
+print("==========================================")
+
+mon.poll()
+agx.poll_objects()
+
+print("==========================================")
+print("## Submitting")
+print("==========================================")
+
+work = renderer.submit(f.cmdbuf)
+work2 = renderer2.submit(f.cmdbuf)
+workb = renderer.submit(f.cmdbuf)
+work2b = renderer2.submit(f.cmdbuf)
+
+print(work.wc_3d)
+print(work.wc_ta)
+print(work2.wc_3d)
+print(work2.wc_ta)
+
+print("==========================================")
+print("## Submitted")
+print("==========================================")
+
+def t(addr):
+ paddr = agx.uat.iotranslate(0, addr, 4)[0][0]
+ if paddr is None:
+ raise Exception(f"Failed to iotranslate {addr:#x}")
+ return paddr
+
+regs = {
+ "ta_cmds": t(agx.initdata.regionB.stats_ta.addrof("total_cmds")),
+ "ta0_busy": t(agx.initdata.regionB.stats_ta.stats.queues[0].addrof("busy")),
+ "ta0_unk4": t(agx.initdata.regionB.stats_ta.stats.queues[0].addrof("unk_4")),
+ "ta0_cq": t(agx.initdata.regionB.stats_ta.stats.queues[0].addrof("cur_cmdqueue")),
+ "ta0_cnt": t(agx.initdata.regionB.stats_ta.stats.queues[0].addrof("cur_count")),
+ "ta1_busy": t(agx.initdata.regionB.stats_ta.stats.queues[1].addrof("busy")),
+ "ta1_unk4": t(agx.initdata.regionB.stats_ta.stats.queues[1].addrof("unk_4")),
+ "ta1_cq": t(agx.initdata.regionB.stats_ta.stats.queues[1].addrof("cur_cmdqueue")),
+ "ta1_cnt": t(agx.initdata.regionB.stats_ta.stats.queues[1].addrof("cur_count")),
+ "ta_ts": t(agx.initdata.regionB.stats_ta.stats.addrof("unk_timestamp")),
+ "3d_cmds": t(agx.initdata.regionB.stats_3d.addrof("total_cmds")),
+ "3d_cq": t(agx.initdata.regionB.stats_3d.stats.addrof("cur_cmdqueue")),
+ "3d_tvb_oflws_1": t(agx.initdata.regionB.stats_3d.stats.addrof("tvb_overflows_1")),
+ "3d_tvb_oflws_2": t(agx.initdata.regionB.stats_3d.stats.addrof("tvb_overflows_2")),
+ "3d_cur_stamp_id": t(agx.initdata.regionB.stats_3d.stats.addrof("cur_stamp_id")),
+ "3d_ts": t(agx.initdata.regionB.stats_3d.stats.addrof("unk_timestamp")),
+
+ "bmctl_0": t(agx.initdata.regionB.buffer_mgr_ctl._addr + 0),
+ "bmctl_8": t(agx.initdata.regionB.buffer_mgr_ctl._addr + 8),
+ "2_bmctl_0": t(agx.initdata.regionB.buffer_mgr_ctl._addr + 16),
+ "2_bmctl_8": t(agx.initdata.regionB.buffer_mgr_ctl._addr + 24),
+
+ "bmmisc_0": t(renderer.buffer_mgr.info.misc.addrof("gpu_0")),
+ "bmmisc_4": t(renderer.buffer_mgr.info.misc.addrof("gpu_4")),
+ "bmmisc_8": t(renderer.buffer_mgr.info.misc.addrof("gpu_8")),
+ "bmmisc_c": t(renderer.buffer_mgr.info.misc.addrof("gpu_c")),
+ "bmi_gpuc": t(renderer.buffer_mgr.info.addrof("gpu_counter")),
+ "bmi_18": t(renderer.buffer_mgr.info.addrof("unk_18")),
+ "bmi_gpuc2": t(renderer.buffer_mgr.info.addrof("gpu_counter2")),
+
+ "2_bmmisc_0": t(renderer2.buffer_mgr.info.misc.addrof("gpu_0")),
+ "2_bmmisc_4": t(renderer2.buffer_mgr.info.misc.addrof("gpu_4")),
+ "2_bmmisc_8": t(renderer2.buffer_mgr.info.misc.addrof("gpu_8")),
+ "2_bmmisc_c": t(renderer2.buffer_mgr.info.misc.addrof("gpu_c")),
+ "2_bmi_gpuc": t(renderer2.buffer_mgr.info.addrof("gpu_counter")),
+ "2_bmi_18": t(renderer2.buffer_mgr.info.addrof("unk_18")),
+ "2_bmi_gpuc2": t(renderer2.buffer_mgr.info.addrof("gpu_counter2")),
+
+ "ctxdat_0": t(renderer.ctx.gpu_context._addr + 0),
+ "ctxdat_4": t(renderer.ctx.gpu_context._addr + 4),
+ "ctxdat_8": t(renderer.ctx.gpu_context._addr + 8),
+ "ctxdat_c": t(renderer.ctx.gpu_context._addr + 0xc),
+
+ "2_ctxdat_0": t(renderer2.ctx.gpu_context._addr + 0),
+ "2_ctxdat_4": t(renderer2.ctx.gpu_context._addr + 4),
+ "2_ctxdat_8": t(renderer2.ctx.gpu_context._addr + 8),
+ "2_ctxdat_c": t(renderer2.ctx.gpu_context._addr + 0xc),
+
+ "evctl_ta": t(renderer.event_control.addrof("has_ta")),
+ "evctl_pta": t(renderer.event_control.addrof("pstamp_ta")),
+ "evctl_3d": t(renderer.event_control.addrof("has_3d")),
+ "evctl_p3d": t(renderer.event_control.addrof("pstamp_3d")),
+ "evctl_in_list": t(renderer.event_control.addrof("in_list")),
+ "evctl_prev": t(renderer.event_control.list_head.addrof("prev")),
+ "evctl_next": t(renderer.event_control.list_head.addrof("next")),
+
+ "2_evctl_ta": t(renderer2.event_control.addrof("has_ta")),
+ "2_evctl_pta": t(renderer2.event_control.addrof("pstamp_ta")),
+ "2_evctl_3d": t(renderer2.event_control.addrof("has_3d")),
+ "2_evctl_p3d": t(renderer2.event_control.addrof("pstamp_3d")),
+ "2_evctl_in_list":t(renderer2.event_control.addrof("in_list")),
+ "2_evctl_prev": t(renderer2.event_control.list_head.addrof("prev")),
+ "2_evctl_next": t(renderer2.event_control.list_head.addrof("next")),
+
+ "jl_first": t(renderer.job_list.addrof("first_job")),
+ "jl_last": t(renderer.job_list.addrof("last_head")),
+ "jl_10": t(renderer.job_list.addrof("unkptr_10")),
+
+ "2_jl_first": t(renderer2.job_list.addrof("first_job")),
+ "2_jl_last": t(renderer2.job_list.addrof("last_head")),
+ "2_jl_10": t(renderer2.job_list.addrof("unkptr_10")),
+
+ "3d_done": t(renderer.wq_3d.info.pointers.addrof("gpu_doneptr")),
+ "3d_rptr": t(renderer.wq_3d.info.pointers.addrof("gpu_rptr")),
+ "3d_rptr1": t(renderer.wq_3d.info.addrof("gpu_rptr1")),
+ "3d_rptr2": t(renderer.wq_3d.info.addrof("gpu_rptr2")),
+ "3d_rptr3": t(renderer.wq_3d.info.addrof("gpu_rptr3")),
+ "3d_busy": t(renderer.wq_3d.info.addrof("busy")),
+ "3d_blk": t(renderer.wq_3d.info.addrof("blocked_on_barrier")),
+ "3d_2c": t(renderer.wq_3d.info.addrof("unk_2c")),
+ "3d_54": t(renderer.wq_3d.info.addrof("unk_54")),
+ "3d_58": t(renderer.wq_3d.info.addrof("unk_58")),
+
+ "2_3d_done": t(renderer2.wq_3d.info.pointers.addrof("gpu_doneptr")),
+ "2_3d_rptr": t(renderer2.wq_3d.info.pointers.addrof("gpu_rptr")),
+ "2_3d_busy": t(renderer2.wq_3d.info.addrof("busy")),
+ "2_3d_blk": t(renderer2.wq_3d.info.addrof("blocked_on_barrier")),
+ "2_3d_2c": t(renderer2.wq_3d.info.addrof("unk_2c")),
+ "2_3d_54": t(renderer2.wq_3d.info.addrof("unk_54")),
+
+ "ta_done": t(renderer.wq_ta.info.pointers.addrof("gpu_doneptr")),
+ "ta_rptr": t(renderer.wq_ta.info.pointers.addrof("gpu_rptr")),
+ "ta_rptr1": t(renderer.wq_ta.info.addrof("gpu_rptr1")),
+ "ta_rptr2": t(renderer.wq_ta.info.addrof("gpu_rptr2")),
+ "ta_rptr3": t(renderer.wq_ta.info.addrof("gpu_rptr3")),
+ "ta_busy": t(renderer.wq_ta.info.addrof("busy")),
+ "ta_blk": t(renderer.wq_ta.info.addrof("blocked_on_barrier")),
+ "ta_2c": t(renderer.wq_ta.info.addrof("unk_2c")),
+ "ta_54": t(renderer.wq_ta.info.addrof("unk_54")),
+ "ta_58": t(renderer.wq_ta.info.addrof("unk_58")),
+
+ "2_ta_done": t(renderer2.wq_ta.info.pointers.addrof("gpu_doneptr")),
+ "2_ta_rptr": t(renderer2.wq_ta.info.pointers.addrof("gpu_rptr")),
+ "2_ta_busy": t(renderer2.wq_ta.info.addrof("busy")),
+ "2_ta_blk": t(renderer2.wq_ta.info.addrof("blocked_on_barrier")),
+ "2_ta_2c": t(renderer2.wq_ta.info.addrof("unk_2c")),
+ "2_ta_54": t(renderer2.wq_ta.info.addrof("unk_54")),
+
+ "3d_ts1": t(work.wc_3d.ts1._addr),
+ "3d_ts1b": t(workb.wc_3d.ts1._addr),
+ "3d_ts2": t(work.wc_3d.ts2._addr),
+ "3d_ts3": t(work.wc_3d.ts3._addr),
+ "ta_ts1": t(work.wc_ta.ts1._addr),
+ "ta_ts1b": t(workb.wc_ta.ts1._addr),
+ "ta_ts2": t(work.wc_ta.ts2._addr),
+ "ta_ts3": t(work.wc_ta.ts3._addr),
+ "2_3d_ts1": t(work2.wc_3d.ts1._addr),
+ "2_3d_ts1b": t(work2b.wc_3d.ts1._addr),
+ "2_3d_ts2": t(work2.wc_3d.ts2._addr),
+ "2_3d_ts3": t(work2.wc_3d.ts3._addr),
+ "2_ta_ts1": t(work2.wc_ta.ts1._addr),
+ "2_ta_ts1b": t(work2b.wc_ta.ts1._addr),
+ "2_ta_ts2": t(work2.wc_ta.ts2._addr),
+ "2_ta_ts3": t(work2.wc_ta.ts3._addr),
+
+ "ta_stamp1": t(renderer.stamp_ta1._addr),
+ "ta_stamp2": t(renderer.stamp_ta2._addr),
+ "3d_stamp1": t(renderer.stamp_3d1._addr),
+ "3d_stamp2": t(renderer.stamp_3d2._addr),
+
+ "2_ta_stamp1": t(renderer2.stamp_ta1._addr),
+ "2_ta_stamp2": t(renderer2.stamp_ta2._addr),
+ "2_3d_stamp1": t(renderer2.stamp_3d1._addr),
+ "2_3d_stamp2": t(renderer2.stamp_3d2._addr),
+}
+
+div=4
+ticks = 24000000 // div * 3
+
+la = GPIOLogicAnalyzer(u, regs=regs, cpu=analyzer_cpu, div=div)
+
+
+print("==========================================")
+print("## Poll prior to job start")
+print("==========================================")
+
+mon.poll()
+agx.poll_objects()
+
+print("==========================================")
+print("## Run")
+print("==========================================")
+
+la.start(ticks, bufsize=0x400000)
+renderer.run()
+
+print("==========================================")
+print("## After r1 start")
+print("==========================================")
+#agx.poll_objects()
+
+#time.sleep(0.1)
+#mon.poll()
+#time.sleep(0.15)
+#mon.poll()
+renderer2.run()
+
+print("==========================================")
+print("## After r2 start")
+print("==========================================")
+agx.poll_objects()
+
+#mon.poll()
+print("==========================================")
+print("## Waiting")
+print("==========================================")
+
+try:
+
+ #while not work.ev_3d.fired:
+ #agx.asc.work()
+ ##mon.poll()
+ #agx.poll_objects()
+ #agx.poll_channels()
+ #print("==========================================")
+ ##time.sleep(0.1)
+
+ #print("==========================================")
+ #print("## Ev1 Fired")
+ #print("==========================================")
+
+ while not work2.ev_3d.fired:
+ agx.asc.work()
+ #mon.poll()
+ agx.poll_objects()
+ agx.poll_channels()
+ print("==========================================")
+ #time.sleep(0.1)
+
+ print("==========================================")
+ print("## Ev2 Fired")
+ print("==========================================")
+
+ renderer.wait()
+ renderer2.wait()
+
+ agx.poll_objects()
+ #mon.poll()
+
+finally:
+ la.complete()
+ la.show()
+
+time.sleep(2)
+