diff options
| author | magh <magh@maghmogh.com> | 2023-03-06 18:44:55 -0600 |
|---|---|---|
| committer | magh <magh@maghmogh.com> | 2023-03-06 18:44:55 -0600 |
| commit | e80d9d8871b325a04b18f90a9ea4bb7fd148fb25 (patch) | |
| tree | 79dbdb8506b7ff1e92549188d1b94cfc0b3503ae /tools/proxyclient/experiments | |
Diffstat (limited to 'tools/proxyclient/experiments')
41 files changed, 10913 insertions, 0 deletions
diff --git a/tools/proxyclient/experiments/addrdump.py b/tools/proxyclient/experiments/addrdump.py new file mode 100755 index 0000000..ba23226 --- /dev/null +++ b/tools/proxyclient/experiments/addrdump.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * + +blacklist = [] + +print("Dumping address space...") +of = None +if len(sys.argv) > 1: + of = open(sys.argv[1],"w") + print("Also dumping to file %s") + +for i in range(0x0000, 0x10000): + if i in blacklist: + v = "%08x: SKIPPED"%(i<<16) + else: + a = (i<<16) + 0x1000 + d = p.read32(a) + v = "%08x: %08x"%(a, d) + print(v) + if of: + of.write(v+"\n") + +if of: + of.close() diff --git a/tools/proxyclient/experiments/aes.py b/tools/proxyclient/experiments/aes.py new file mode 100644 index 0000000..77d017a --- /dev/null +++ b/tools/proxyclient/experiments/aes.py @@ -0,0 +1,144 @@ +# SPDX-License-Identifier: MIT +import sys, pathlib + +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * +from m1n1.shell import run_shell +from m1n1.hw.dart import DART +from m1n1.hw.aes import * + +def aes_set_custom_key( + aes, + key, + encrypt=True, + mode=AES_SET_KEY_BLOCK_MODE.CTR, + keyslot=0, + keygen=0, +): + keylen = { + 16: AES_SET_KEY_LEN.AES128, + 24: AES_SET_KEY_LEN.AES192, + 32: AES_SET_KEY_LEN.AES256, + }[len(key)] + + aes.R_CMD_FIFO = AESSetKeyCommand( + KEY_SELECT=0, + KEYLEN=keylen, + ENCRYPT=1 if encrypt else 0, + BLOCK_MODE=mode, + SLOT=keyslot, + KEYGEN=keygen, + ).value + + for i in range(0, len(key), 4): + aes.R_CMD_FIFO = struct.unpack(">I", key[i : i + 4])[0] + + +def aes_set_hw_key( + aes, + key, + keylen=AES_SET_KEY_LEN.AES128, + encrypt=True, + mode=AES_SET_KEY_BLOCK_MODE.CTR, + slot=0, + keygen=0, +): + aes.R_CMD_FIFO = AESSetKeyCommand( + KEY_SELECT=key, + KEYLEN=keylen, + ENCRYPT=1 if encrypt else 0, + BLOCK_MODE=mode, + SLOT=slot, + KEYGEN=keygen, + ).value + + +def aes_set_iv(aes, iv, slot=0): + assert len(iv) == 16 + aes.R_CMD_FIFO = AESSetIVCommand(SLOT=slot) + + for i in range(0, len(iv), 4): + aes.R_CMD_FIFO = struct.unpack(">I", iv[i : i + 4])[0] + + +def aes_crypt(aes, dart, data, key_slot=0, iv_slot=0): + assert len(data) % 16 == 0 + + bfr = p.memalign(0x4000, len(data)) + iova = dart.iomap(1, bfr, len(data)) + dart.iowrite(1, iova, data) + + aes.R_CMD_FIFO = AESCryptCommand(LEN=len(data), KEY_SLOT=key_slot, IV_SLOT=iv_slot) + aes.R_CMD_FIFO = 0 # actually upper bits of addr + aes.R_CMD_FIFO = iova # src + aes.R_CMD_FIFO = iova # dst + + aes.R_CMD_FIFO = AESBarrierCommand(IRQ=1).value + time.sleep(0.1) + # while aes.R_IRQ_STATUS.reg.FLAG != 1: + # pass + # aes.dump_regs() + aes.R_IRQ_STATUS = aes.R_IRQ_STATUS.val + + res = dart.ioread(1, iova, len(data)) + return res + + +def test_hw_key(key, keylen, keygen=0): + aes.R_IRQ_STATUS = aes.R_IRQ_STATUS.val + aes.R_CONTROL.set(CLEAR_FIFO=1) + aes.R_CONTROL.set(RESET=1) + aes.R_CONTROL.set(START=1) + # aes.dump_regs() + aes_set_hw_key(aes, key, keylen, slot=0, keygen=keygen) + # print(aes.R_IRQ_STATUS) + aes_set_iv(aes, b"\x00" * 16, slot=0) + chexdump(aes_crypt(aes, dart, b"\x00" * 16, key_slot=0, iv_slot=1)) + # aes.dump_regs() + aes.R_CONTROL.set(STOP=1) + + +def test_custom_key(key, keygen=0): + aes.R_IRQ_STATUS = aes.R_IRQ_STATUS.val + aes.R_CONTROL.set(CLEAR_FIFO=1) + aes.R_CONTROL.set(RESET=1) + aes.R_CONTROL.set(START=1) + # aes.dump_regs() + aes_set_custom_key(aes, key, keyslot=0, keygen=keygen) + aes_set_iv(aes, b"\x00" * 16) + aes_set_iv(aes, b"\x11" * 16, slot=1) + chexdump(aes_crypt(aes, dart, b"\x00" * 16, key_slot=0, iv_slot=0)) + # aes.dump_regs() + aes.R_CONTROL.set(STOP=1) + + +p.pmgr_adt_clocks_enable("/arm-io/aes") + +dart = DART.from_adt(u, "/arm-io/dart-sio") +dart.initialize() + +aes_base, _ = u.adt["/arm-io/aes"].get_reg(0) +aes = AESRegs(u, aes_base) +aes.dump_regs() + +dart.dump_all() + +for keygen in range(4): + print(f"zero key, keygen={keygen}", end="") + test_custom_key(b"\x00" * 16, keygen=keygen) + +for keygen in range(4): + print("#" * 10) + for keylen in [ + AES_SET_KEY_LEN.AES128, + AES_SET_KEY_LEN.AES192, + AES_SET_KEY_LEN.AES256, + ]: + for i in (1, 3): + print(f"key = {i}, keylen={keylen}, keygen={keygen}", end="") + test_hw_key(i, keylen, keygen=keygen) + +dart.dump_all() + +run_shell(globals(), msg="Have fun!") diff --git a/tools/proxyclient/experiments/agx_1tri.py b/tools/proxyclient/experiments/agx_1tri.py new file mode 100644 index 0000000..af06713 --- /dev/null +++ b/tools/proxyclient/experiments/agx_1tri.py @@ -0,0 +1,840 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT + +import sys, pathlib, time +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * +from m1n1.constructutils import Ver +from m1n1.utils import * + +Ver.set_version(u) + +from m1n1.shell import run_shell + +from m1n1.agx import AGX +from m1n1.agx.context import * + +p.pmgr_adt_clocks_enable("/arm-io/gfx-asc") +p.pmgr_adt_clocks_enable("/arm-io/sgx") +#p.pmgr_adt_clocks_enable("/arm-io/pmp") + +# [cpu0] [0xfffffe00124bf5c0] MMIO: R.4 0x204d14000 (sgx, offset 0xd14000) = 0x0 +p.read32(0x204000000 + 0xd14000) +# [cpu0] [0xfffffe00124bf9a8] MMIO: W.4 0x204d14000 (sgx, offset 0xd14000) = 0x70001 +p.write32(0x204000000 + 0xd14000, 0x70001) + +#p.read32(0x204010258) + +agx = AGX(u) + +mon = RegMonitor(u, ascii=True, bufsize=0x8000000) +agx.mon = mon + +sgx = agx.sgx_dev +mon.add(sgx.gpu_region_base, sgx.gpu_region_size, "contexts") +mon.add(sgx.gfx_shared_region_base, sgx.gfx_shared_region_size, "gfx-shared") +mon.add(sgx.gfx_handoff_base, sgx.gfx_handoff_size, "gfx-handoff") + +#addr, size = sgx.get_reg(0) +#mon.add(addr + 0x600000, size - 0x600000, "sgx") + +addr, size = u.adt["/arm-io/aic"].get_reg(0) +mon.add(addr, size, "aic") + +def unswizzle(addr, w, h, psize, dump=None, grid=False): + tw = 64 + th = 64 + ntx = (w + tw - 1) // 64 + nty = (h + th - 1) // 64 + data = iface.readmem(addr, ntx * nty * psize * tw * th) + new_data = [] + for y in range(h): + ty = y // th + for x in range(w): + tx = x // tw + toff = tw * th * psize * (ty * ntx + tx) + j = x & (tw - 1) + i = y & (th - 1) + off = ( + ((j & 1) << 0) | ((i & 1) << 1) | + ((j & 2) << 1) | ((i & 2) << 2) | + ((j & 4) << 2) | ((i & 4) << 3) | + ((j & 8) << 3) | ((i & 8) << 4) | + ((j & 16) << 4) | ((i & 16) << 5) | + ((j & 32) << 5) | ((i & 32) << 6)) + r,g,b,a = data[toff + psize*off: toff + psize*(off+1)] + if grid: + if x % 64 == 0 or y % 64 == 0: + r,g,b,a = 255,255,255,255 + elif x % 32 == 0 or y % 32 == 0: + r,g,b,a = 128,128,128,255 + new_data.append(bytes([b, g, r, a])) + data = b"".join(new_data) + if dump: + open(dump, "wb").write(data) + iface.writemem(addr, data) + +try: + agx.start() + + ctx_id = 3 + buffer_mgr_slot = 2 + + #agx.initdata.regionA.add_to_mon(mon) + #agx.initdata.regionB.add_to_mon(mon) + #agx.initdata.regionC.add_to_mon(mon) + + #agx.initdata.regionB.unk_170.add_to_mon(mon) + #agx.initdata.regionB.unk_178.add_to_mon(mon) + #agx.initdata.regionB.unk_180.add_to_mon(mon) + #agx.initdata.regionB.unk_190.add_to_mon(mon) + #agx.initdata.regionB.unk_198.add_to_mon(mon) + ##agx.initdata.regionB.fwlog_ring2.add_to_mon(mon) + #agx.initdata.regionB.hwdata_a.add_to_mon(mon) + #agx.initdata.regionB.hwdata_b.add_to_mon(mon) + mon.poll() + + #agx.asc.work_for(0.3) + + #p.write32(agx.initdata.regionC._paddr + 0x8900, 0xffffffff) + #p.write32(agx.initdata.regionC._paddr + 0x8904, 0xffffffff) + #mon.poll() + + agx.kick_firmware() + #agx.asc.work_for(0.3) + + #mon.poll() + + ##### Initialize context and load data + + ctx = GPUContext(agx) + ctx.bind(ctx_id) + + #p.read32(0x204000000 + 0xd14000) + #p.write32(0x204000000 + 0xd14000, 0x70001) + + #base = "gpudata/1tri/" + #base = "gpudata/mesa-flag/" + base = "gpudata/bunny/" + ctx.load_blob(0x1100000000, True, base + "mem_0_0.bin") + ctx.load_blob(0x1100008000, True, base + "mem_8000_0.bin") + ctx.load_blob(0x1100010000, True, base + "mem_10000_0.bin") + ctx.load_blob(0x1100058000, True, base + "mem_58000_0.bin") + ctx.load_blob(0x1100060000, True, base + "mem_60000_0.bin") + #ctx.load_blob(0x1100068000, True, base + "mem_68000_0.bin") + ctx.load_blob(0x1500000000, False, base + "mem_1500000000_0.bin") + ctx.load_blob(0x1500048000, False, base + "mem_1500048000_0.bin") + ctx.load_blob(0x15000d0000, False, base + "mem_15000d0000_0.bin") + ctx.load_blob(0x1500158000, False, base + "mem_1500158000_0.bin") + ctx.load_blob(0x15001e0000, False, base + "mem_15001e0000_0.bin") + ctx.load_blob(0x15001e8000, False, base + "mem_15001e8000_0.bin") + ctx.load_blob(0x15001f0000, False, base + "mem_15001f0000_0.bin") + ctx.load_blob(0x15001f8000, False, base + "mem_15001f8000_0.bin") + #ctx.load_blob(0x1500490000, False, base + "mem_1500490000_0.bin") + #ctx.load_blob(0x1500518000, False, base + "mem_1500518000_0.bin") + #color = ctx.buf_at(0x1500200000, False, 1310720, "Color", track=False) + #depth = ctx.buf_at(0x1500348000, False, 1310720, "Depth", track=False) + color = ctx.buf_at(0x1500200000, False, 2129920, "Color", track=False) + depth = ctx.buf_at(0x1500410000, False, 2129920, "Depth", track=False) + ctx.load_blob(0x1500620000, False, base + "mem_1500620000_0.bin", track=False) + ctx.load_blob(0x1500890000, False, base + "mem_1500890000_0.bin", track=False) + + mon.poll() + + p.memset32(color._paddr, 0xdeadbeef, color._size) + p.memset32(depth._paddr, 0xdeadbeef, depth._size) + + stencil = ctx.buf_at(0x1510410000, False, 2129920, "Stencil", track=False) + + width = 800 + height = 600 + + width_a = align_up(width, 64) + height_a = align_up(height, 64) + + depth_addr = depth._addr + + ##### Initialize buffer manager + + #buffer_mgr = GPUBufferManager(agx, ctx, 26) + buffer_mgr = GPUBufferManager(agx, ctx, 8) + + ##### Initialize work queues + + wq_3d = GPU3DWorkQueue(agx, ctx) + wq_ta = GPUTAWorkQueue(agx, ctx) + + ##### TA stamps + + #Message 1: DAG: Non Sequential Stamp Updates seen entryIdx 0x41 roots.dag 0x1 stampIdx 0x7 stampValue 0x4100 channel 0xffffffa000163f58 channelRingCommandIndex 0x1 + + prev_stamp_value = 0x4000 + stamp_value = 0x4100 + + # start? + stamp_ta1 = agx.kshared.new(BarrierCounter, name="TA stamp 1") + stamp_ta1.value = prev_stamp_value + stamp_ta1.push() + + # complete? + stamp_ta2 = agx.kobj.new(BarrierCounter, name="TA stamp 2") + stamp_ta2.value = prev_stamp_value + stamp_ta2.push() + + ##### 3D stamps + + # start? + stamp_3d1 = agx.kshared.new(BarrierCounter, name="3D stamp 1") + stamp_3d1.value = prev_stamp_value + stamp_3d1.push() + + # complete? + stamp_3d2 = agx.kobj.new(BarrierCounter, name="3D stamp 2") + stamp_3d2.value = prev_stamp_value + stamp_3d2.push() + + ##### Some kind of feedback/status buffer, GPU managed? + + event_control = agx.kobj.new(EventControl) + event_control.event_count = agx.kobj.new(Int32ul, "Event Count") + event_control.base_stamp = 0x15 #0 + event_control.unk_c = 0 + event_control.unk_10 = 0x50 + event_control.push() + + ##### TVB allocations / Tiler config + + tile_width = 32 + tile_height = 32 + tiles_x = ((width + tile_width - 1) // tile_width) + tiles_y = ((height + tile_height - 1) // tile_height) + tiles = tiles_x * tiles_y + + tile_blocks_x = (tiles_x + 15) // 16 + tile_blocks_y = (tiles_y + 15) // 16 + tile_blocks = tile_blocks_x * tile_blocks_y + + tiling_params = TilingParameters() + tiling_params.size1 = 0x14 * tile_blocks + tiling_params.unk_4 = 0x88 + tiling_params.unk_8 = 0x202 + tiling_params.x_max = width - 1 + tiling_params.y_max = height - 1 + tiling_params.tile_count = ((tiles_y-1) << 12) | (tiles_x-1) + tiling_params.x_blocks = (12 * tile_blocks_x) | (tile_blocks_x << 12) | (tile_blocks_x << 20) + tiling_params.y_blocks = (12 * tile_blocks_y) | (tile_blocks_y << 12) | (tile_blocks_y << 20) + tiling_params.size2 = 0x10 * tile_blocks + tiling_params.size3 = 0x20 * tile_blocks + tiling_params.unk_24 = 0x100 + tiling_params.unk_28 = 0x8000 + + tvb_something_size = 0x800 * tile_blocks + tvb_something = ctx.uobj.new_buf(tvb_something_size, "TVB Something") + + tvb_tilemap_size = 0x800 * tile_blocks + tvb_tilemap = ctx.uobj.new_buf(tvb_tilemap_size, "TVB Tilemap") + + tvb_heapmeta_size = 0x4000 + tvb_heapmeta = ctx.uobj.new_buf(tvb_heapmeta_size, "TVB Heap Meta") + + ##### Buffer stuff? + + # buffer related? + buf_desc = agx.kobj.new(BufferThing) + buf_desc.unk_0 = 0x0 + buf_desc.unk_8 = 0x0 + buf_desc.unk_10 = 0x0 + buf_desc.unkptr_18 = ctx.uobj.buf(0x80, "BufferThing.unkptr_18") + buf_desc.unk_20 = 0x0 + buf_desc.bm_misc_addr = buffer_mgr.misc_obj._addr + buf_desc.unk_2c = 0x0 + buf_desc.unk_30 = 0x0 + buf_desc.unk_38 = 0x0 + buf_desc.push() + + uuid_3d = 0x4000a14 + uuid_ta = 0x4000a15 + encoder_id = 0x30009fb + + ##### 3D barrier command + + ev_ta = 6 + ev_3d = 7 + + barrier_cmd = agx.kobj.new(WorkCommandBarrier) + barrier_cmd.stamp = stamp_ta2 + barrier_cmd.stamp_value1 = 0x4100 + barrier_cmd.stamp_value2 = 0x4100 + barrier_cmd.event = ev_ta + barrier_cmd.uuid = uuid_3d + + + #stamp.add_to_mon(mon) + #stamp2.add_to_mon(mon) + + print(barrier_cmd) + + wq_3d.submit(barrier_cmd.push()) + + ##### 3D execution + + wc_3d = agx.kobj.new(WorkCommand3D) + wc_3d.context_id = ctx_id + wc_3d.unk_8 = 0 + wc_3d.event_control = event_control + wc_3d.buffer_mgr = buffer_mgr.info + wc_3d.buf_thing = buf_desc + wc_3d.unk_emptybuf_addr = agx.kobj.buf(0x100, "unk_emptybuf") + wc_3d.tvb_tilemap = tvb_tilemap._addr + wc_3d.unk_40 = 0x88 + wc_3d.unk_48 = 0x1 + wc_3d.tile_blocks_y = tile_blocks_y * 4 + wc_3d.tile_blocks_x = tile_blocks_x * 4 + wc_3d.unk_50 = 0x0 + wc_3d.unk_58 = 0x0 + wc_3d.uuid1 = 0x3b315cae + wc_3d.uuid2 = 0x3b6c7b92 + wc_3d.unk_68 = 0x0 + wc_3d.tile_count = tiles + + wc_3d.unk_buf = WorkCommand1_UnkBuf() + wc_3d.unk_word = BarrierCounter() + wc_3d.unk_buf2 = WorkCommand1_UnkBuf2() + wc_3d.unk_buf2.unk_0 = 0 + wc_3d.unk_buf2.unk_8 = 0 + wc_3d.unk_buf2.unk_10 = 1 + wc_3d.ts1 = Timestamp() + wc_3d.ts2 = Timestamp() + wc_3d.ts3 = Timestamp() + wc_3d.unk_914 = 0 + wc_3d.unk_918 = 0 + wc_3d.unk_920 = 0 + wc_3d.unk_924 = 1 + + # Structures embedded in WorkCommand3D + if True: + wc_3d.struct_1 = Start3DStruct1() + wc_3d.struct_1.store_pipeline_addr = 0x14004 # CHECKED + wc_3d.struct_1.unk_8 = 0x0 + wc_3d.struct_1.unk_c = 0x0 + wc_3d.struct_1.uuid1 = wc_3d.uuid1 + wc_3d.struct_1.uuid2 = wc_3d.uuid2 + wc_3d.struct_1.unk_18 = 0x0 + wc_3d.struct_1.tile_blocks_y = tile_blocks_y * 4 + wc_3d.struct_1.tile_blocks_x = tile_blocks_x * 4 + wc_3d.struct_1.unk_24 = 0x0 + wc_3d.struct_1.tile_counts = ((tiles_y-1) << 12) | (tiles_x-1) + wc_3d.struct_1.unk_2c = 0x8 + wc_3d.struct_1.depth_clear_val1 = 1.0 # works + wc_3d.struct_1.stencil_clear_val1 = 0x0 + wc_3d.struct_1.unk_38 = 0x0 + wc_3d.struct_1.unk_3c = 0x1 + wc_3d.struct_1.unk_40_padding = bytes(0xb0) + wc_3d.struct_1.depth_bias_array = Start3DArrayAddr(0x1500158000) + wc_3d.struct_1.scissor_array = Start3DArrayAddr(0x15000d0000) + wc_3d.struct_1.unk_110 = 0x0 + wc_3d.struct_1.unk_118 = 0x0 + wc_3d.struct_1.unk_120 = [0] * 37 + wc_3d.struct_1.unk_reload_pipeline = Start3DStorePipelineBinding(0xffff8212, 0xfffffff4) + wc_3d.struct_1.unk_258 = 0 + wc_3d.struct_1.unk_260 = 0 + wc_3d.struct_1.unk_268 = 0 + wc_3d.struct_1.unk_270 = 0 + wc_3d.struct_1.reload_pipeline = Start3DClearPipelineBinding(0xffff8212, 0x13004) # CHECKED + wc_3d.struct_1.depth_flags = 0x00000 + wc_3d.struct_1.unk_290 = 0x0 + wc_3d.struct_1.depth_buffer_ptr1 = depth_addr + wc_3d.struct_1.unk_2a0 = 0x0 + wc_3d.struct_1.unk_2a8 = 0x0 + wc_3d.struct_1.depth_buffer_ptr2 = depth_addr + wc_3d.struct_1.depth_buffer_ptr3 = depth_addr + wc_3d.struct_1.unk_2c0 = 0x0 + wc_3d.struct_1.stencil_buffer_ptr1 = stencil._addr + wc_3d.struct_1.unk_2d0 = 0x0 + wc_3d.struct_1.unk_2d8 = 0x0 + wc_3d.struct_1.stencil_buffer_ptr2 = stencil._addr + wc_3d.struct_1.stencil_buffer_ptr3 = stencil._addr + wc_3d.struct_1.unk_2f0 = [0x0, 0x0, 0x0] + wc_3d.struct_1.aux_fb_unk0 = 0x4 + wc_3d.struct_1.unk_30c = 0x0 + wc_3d.struct_1.aux_fb = AuxFBInfo(0xc000, 0, width, height) + wc_3d.struct_1.unk_320_padding = bytes(0x10) + wc_3d.struct_1.unk_partial_store_pipeline = Start3DStorePipelineBinding(0xffff8212, 0xfffffff4) + wc_3d.struct_1.partial_store_pipeline = Start3DStorePipelineBinding(0x12, 0x14004) # CHECKED + wc_3d.struct_1.depth_clear_val2 = 1.0 + wc_3d.struct_1.stencil_clear_val2 = 0x0 + wc_3d.struct_1.context_id = ctx_id + wc_3d.struct_1.unk_376 = 0x0 + wc_3d.struct_1.unk_378 = 0x8 + wc_3d.struct_1.unk_37c = 0x0 + wc_3d.struct_1.unk_380 = 0x0 + wc_3d.struct_1.unk_388 = 0x0 + wc_3d.struct_1.depth_dimensions = 0x12b831f #0xef827f + + if True: + wc_3d.struct_2 = Start3DStruct2() + wc_3d.struct_2.unk_0 = 0xa000 + wc_3d.struct_2.clear_pipeline = Start3DClearPipelineBinding(0xffff8002, 0x12004) + wc_3d.struct_2.unk_18 = 0x88 + wc_3d.struct_2.scissor_array = 0x15000d0000 + wc_3d.struct_2.depth_bias_array = 0x1500158000 + wc_3d.struct_2.aux_fb = wc_3d.struct_1.aux_fb + wc_3d.struct_2.depth_dimensions = wc_3d.struct_1.depth_dimensions + wc_3d.struct_2.unk_48 = 0x0 + wc_3d.struct_2.depth_flags = wc_3d.struct_1.depth_flags + wc_3d.struct_2.depth_buffer_ptr1 = depth_addr + wc_3d.struct_2.depth_buffer_ptr2 = depth_addr + wc_3d.struct_2.stencil_buffer_ptr1 = stencil._addr + wc_3d.struct_2.stencil_buffer_ptr2 = stencil._addr + wc_3d.struct_2.unk_68 = [0] * 12 + wc_3d.struct_2.tvb_tilemap = tvb_tilemap._addr + wc_3d.struct_2.tvb_heapmeta_addr = tvb_heapmeta._addr + wc_3d.struct_2.unk_e8 = 0x50000000 * tile_blocks + wc_3d.struct_2.tvb_heapmeta_addr2 = tvb_heapmeta._addr + wc_3d.struct_2.unk_f8 = 0x10280 # TODO: varies 0, 0x280, 0x10000, 0x10280 + wc_3d.struct_2.aux_fb_ptr = 0x1500006000 + wc_3d.struct_2.unk_108 = [0x0, 0x0, 0x0, 0x0, 0x0, 0x0] + wc_3d.struct_2.pipeline_base = 0x1100000000 + wc_3d.struct_2.unk_140 = 0x8c60 + wc_3d.struct_2.unk_148 = 0x0 + wc_3d.struct_2.unk_150 = 0x0 + wc_3d.struct_2.unk_158 = 0x1c + wc_3d.struct_2.unk_160_padding = bytes(0x1e8) + + if True: + wc_3d.struct_6 = Start3DStruct6() + wc_3d.struct_6.unk_0 = 0x0 + wc_3d.struct_6.unk_8 = 0x0 + wc_3d.struct_6.unk_10 = 0x0 + wc_3d.struct_6.encoder_id = encoder_id + wc_3d.struct_6.unk_1c = 0xffffffff + wc_3d.struct_6.unknown_buffer = 0x150000e000 + wc_3d.struct_6.unk_28 = 0x0 + wc_3d.struct_6.unk_30 = 0x1 + wc_3d.struct_6.unk_34 = 0x1 + + if True: + wc_3d.struct_7 = Start3DStruct7() + wc_3d.struct_7.unk_0 = 0x0 + wc_3d.struct_7.stamp1 = stamp_3d1 + wc_3d.struct_7.stamp2 = stamp_3d2 + wc_3d.struct_7.stamp_value = stamp_value + wc_3d.struct_7.ev_3d = ev_3d + wc_3d.struct_7.unk_20 = 0x0 + wc_3d.struct_7.unk_24 = 0x0 # check + wc_3d.struct_7.uuid = uuid_3d + wc_3d.struct_7.prev_stamp_value = 0x0 + wc_3d.struct_7.unk_30 = 0x0 + + wc_3d.set_addr() # Update inner structure addresses + print("WC3D", hex(wc_3d._addr)) + print(" s1", hex(wc_3d.struct_1._addr)) + print(" s2", hex(wc_3d.struct_2._addr)) + print(" s6", hex(wc_3d.struct_6._addr)) + print(" s7", hex(wc_3d.struct_7._addr)) + + ms = GPUMicroSequence(agx) + + start_3d = Start3DCmd() + start_3d.struct1 = wc_3d.struct_1 + start_3d.struct2 = wc_3d.struct_2 + start_3d.buf_thing = buf_desc + start_3d.unkptr_1c = agx.initdata.regionB.unkptr_178 + 8 + start_3d.unkptr_24 = wc_3d.unk_word._addr + start_3d.struct6 = wc_3d.struct_6 + start_3d.struct7 = wc_3d.struct_7 + start_3d.cmdqueue_ptr = wq_3d.info._addr + start_3d.workitem_ptr = wc_3d._addr + start_3d.context_id = ctx_id + start_3d.unk_50 = 0x1 + start_3d.unk_54 = 0x0 + start_3d.unk_58 = 0x2 + start_3d.unk_5c = 0x0 + start_3d.prev_stamp_value = 0x0 + start_3d.unk_68 = 0x0 + start_3d.unk_buf_ptr = wc_3d.unk_buf._addr + start_3d.unk_buf2_ptr = wc_3d.unk_buf2._addr + start_3d.unk_7c = 0x0 + start_3d.unk_80 = 0x0 + start_3d.unk_84 = 0x0 + start_3d.uuid = uuid_3d + start_3d.attachments = [ + Attachment(color._addr, 0x2800, 0x10017), + Attachment(depth._addr, 0x4100, 0x10017), + ] + [Attachment(0, 0, 0)] * 14 + start_3d.num_attachments = 2 + start_3d.unk_190 = 0x0 + + ms.append(start_3d) + + ts1 = TimestampCmd() + ts1.unk_1 = 0x0 + ts1.unk_2 = 0x0 + ts1.unk_3 = 0x80 + ts1.ts0_addr = wc_3d.ts1._addr + ts1.ts1_addr = wc_3d.ts2._addr + ts1.ts2_addr = wc_3d.ts2._addr + ts1.cmdqueue_ptr = wq_3d.info._addr + ts1.unk_24 = 0x0 + ts1.uuid = uuid_3d + ts1.unk_30_padding = 0x0 + ms.append(ts1) + + ms.append(WaitForInterruptCmd(0, 1, 0)) + + ts2 = TimestampCmd() + ts2.unk_1 = 0x0 + ts2.unk_2 = 0x0 + ts2.unk_3 = 0x0 + ts2.ts0_addr = wc_3d.ts1._addr + ts2.ts1_addr = wc_3d.ts2._addr + ts2.ts2_addr = wc_3d.ts3._addr + ts2.cmdqueue_ptr = wq_3d.info._addr + ts2.unk_24 = 0x0 + ts2.uuid = uuid_3d + ts2.unk_30_padding = 0x0 + ms.append(ts2) + + finish_3d = Finalize3DCmd() + finish_3d.uuid = uuid_3d + finish_3d.unk_8 = 0 + finish_3d.stamp = stamp_3d2 + finish_3d.stamp_value = stamp_value + finish_3d.unk_18 = 0 + finish_3d.buf_thing = buf_desc + finish_3d.buffer_mgr = buffer_mgr.info + finish_3d.unk_2c = 1 + finish_3d.unkptr_34 = agx.initdata.regionB.unkptr_178 + 8 + finish_3d.struct7 = wc_3d.struct_7 + finish_3d.unkptr_44 = wc_3d.unk_word._addr + finish_3d.cmdqueue_ptr = wq_3d.info._addr + finish_3d.workitem_ptr = wc_3d._addr + finish_3d.unk_5c = ctx_id + finish_3d.unk_buf_ptr = wc_3d.unk_buf._addr + finish_3d.unk_6c = 0 + finish_3d.unk_74 = 0 + finish_3d.unk_7c = 0 + finish_3d.unk_84 = 0 + finish_3d.unk_8c = 0 + finish_3d.startcmd_offset = -0x200 + finish_3d.unk_98 = 1 + ms.append(finish_3d) + ms.finalize() + + wc_3d.microsequence_ptr = ms.obj._addr + wc_3d.microsequence_size = ms.size + + print(wc_3d) + + wc_3d.push() + ms.dump() + print(wc_3d) + wq_3d.submit(wc_3d) + + ##### TA init + + #print(ctx_info) + + wc_initbm = agx.kobj.new(WorkCommandInitBM) + wc_initbm.context_id = ctx_id + wc_initbm.unk_8 = buffer_mgr_slot + wc_initbm.unk_c = 0 + wc_initbm.unk_10 = buffer_mgr.info.block_count + wc_initbm.buffer_mgr = buffer_mgr.info + wc_initbm.stamp_value = stamp_value + wc_initbm.push() + + print(wc_initbm) + wq_ta.submit(wc_initbm) + + ##### TA execution + + wc_ta = agx.kobj.new(WorkCommandTA) + wc_ta.context_id = ctx_id + wc_ta.unk_8 = 0 + wc_ta.event_control = event_control + wc_ta.unk_14 = buffer_mgr_slot + wc_ta.buffer_mgr = buffer_mgr.info + wc_ta.buf_thing = buf_desc + wc_ta.unk_emptybuf_addr = wc_3d.unk_emptybuf_addr + wc_ta.unk_34 = 0x0 + + wc_ta.unk_154 = bytes(0x268) + wc_ta.unk_3e8 = bytes(0x74) + wc_ta.unk_594 = WorkCommand0_UnkBuf() + + wc_ta.ts1 = Timestamp() + wc_ta.ts2 = Timestamp() + wc_ta.ts3 = Timestamp() + wc_ta.unk_5c4 = 0 + wc_ta.unk_5c8 = 0 + wc_ta.unk_5cc = 0 + wc_ta.unk_5d0 = 0 + wc_ta.unk_5d4 = 0x27 #1 + + # Structures embedded in WorkCommandTA + if True: + + wc_ta.tiling_params = tiling_params + #wc_ta.tiling_params.unk_0 = 0x28 + #wc_ta.tiling_params.unk_4 = 0x88 + #wc_ta.tiling_params.unk_8 = 0x202 + #wc_ta.tiling_params.x_max = 639 + #wc_ta.tiling_params.y_max = 479 + #wc_ta.tiling_params.unk_10 = 0xe013 + #wc_ta.tiling_params.unk_14 = 0x20_20_18 + #wc_ta.tiling_params.unk_18 = 0x10_10_0c + #wc_ta.tiling_params.unk_1c = 0x20 + #wc_ta.tiling_params.unk_20 = 0x40 + #wc_ta.tiling_params.unk_24 = 0x100 + #wc_ta.tiling_params.unk_28 = 0x8000 + + if True: + wc_ta.struct_2 = StartTACmdStruct2() + wc_ta.struct_2.unk_0 = 0x200 + wc_ta.struct_2.unk_8 = 0x1e3ce508 # fixed + wc_ta.struct_2.unk_c = 0x1e3ce508 # fixed + wc_ta.struct_2.tvb_tilemap = tvb_tilemap._addr + wc_ta.struct_2.unkptr_18 = 0x0 + wc_ta.struct_2.unkptr_20 = tvb_something._addr + wc_ta.struct_2.tvb_heapmeta_addr = tvb_heapmeta._addr | 0x8000000000000000 + wc_ta.struct_2.iogpu_unk_54 = 0x6b0003 # fixed + wc_ta.struct_2.iogpu_unk_55 = 0x3a0012 # fixed + wc_ta.struct_2.iogpu_unk_56 = 0x1 # fixed + wc_ta.struct_2.unk_40 = 0x0 # fixed + wc_ta.struct_2.unk_48 = 0xa000 # fixed + wc_ta.struct_2.unk_50 = 0x88 # fixed + wc_ta.struct_2.tvb_heapmeta_addr2 = tvb_heapmeta._addr + wc_ta.struct_2.unk_60 = 0x0 # fixed + wc_ta.struct_2.unk_68 = 0x0 # fixed + wc_ta.struct_2.iogpu_deflake_1 = 0x15000052a0 + wc_ta.struct_2.iogpu_deflake_2 = 0x1500005020 + wc_ta.struct_2.unk_80 = 0x1 # fixed + wc_ta.struct_2.iogpu_deflake_3 = 0x1500005000 + wc_ta.struct_2.encoder_addr = 0x1500048000 + wc_ta.struct_2.unk_98 = [0x0, 0x0] # fixed + wc_ta.struct_2.unk_a8 = 0xa041 # fixed + wc_ta.struct_2.unk_b0 = [0x0, 0x0, 0x0, 0x0, 0x0, 0x0] # fixed + wc_ta.struct_2.pipeline_base = 0x1100000000 + wc_ta.struct_2.unk_e8 = 0x0 # fixed + wc_ta.struct_2.unk_f0 = 0x1c # fixed + wc_ta.struct_2.unk_f8 = 0x8c60 # fixed + wc_ta.struct_2.unk_100 = [0x0, 0x0, 0x0] # fixed + wc_ta.struct_2.unk_118 = 0x1c # fixed + + if True: + wc_ta.struct_3 = StartTACmdStruct3() + wc_ta.struct_3.unk_480 = [0x0, 0x0, 0x0, 0x0, 0x0, 0x0] # fixed + wc_ta.struct_3.unk_498 = 0x0 # fixed + wc_ta.struct_3.unk_4a0 = 0x0 # fixed + wc_ta.struct_3.iogpu_deflake_1 = 0x15000052a0 + wc_ta.struct_3.unk_4ac = 0x0 # fixed + wc_ta.struct_3.unk_4b0 = 0x0 # fixed + wc_ta.struct_3.unk_4b8 = 0x0 # fixed + wc_ta.struct_3.unk_4bc = 0x0 # fixed + wc_ta.struct_3.unk_4c4_padding = bytes(0x48) + wc_ta.struct_3.unk_50c = 0x0 # fixed + wc_ta.struct_3.unk_510 = 0x0 # fixed + wc_ta.struct_3.unk_518 = 0x0 # fixed + wc_ta.struct_3.unk_520 = 0x0 # fixed + wc_ta.struct_3.unk_528 = 0x0 # fixed + wc_ta.struct_3.unk_52c = 0x0 # fixed + wc_ta.struct_3.unk_530 = 0x0 # fixed + wc_ta.struct_3.encoder_id = encoder_id + wc_ta.struct_3.unk_538 = 0x0 # fixed + wc_ta.struct_3.unk_53c = 0xffffffff + wc_ta.struct_3.unknown_buffer = wc_3d.struct_6.unknown_buffer + wc_ta.struct_3.unk_548 = 0x0 # fixed + wc_ta.struct_3.unk_550 = [ + 0x0, 0x0, # fixed + 0x0, # 1 for boot stuff? + 0x0, 0x0, 0x0] # fixed + wc_ta.struct_3.stamp1 = stamp_ta1 + wc_ta.struct_3.stamp2 = stamp_ta2 + wc_ta.struct_3.stamp_value = stamp_value + wc_ta.struct_3.ev_ta = ev_ta + wc_ta.struct_3.unk_580 = 0x0 # fixed + wc_ta.struct_3.unk_584 = 0x0 # 1 for boot stuff? + wc_ta.struct_3.uuid2 = uuid_ta + #wc_ta.struct_3.unk_58c = [0x0, 0x0] + wc_ta.struct_3.unk_58c = [0x1, 0x0] + + wc_ta.set_addr() # Update inner structure addresses + #print("wc_ta", wc_ta) + + ms = GPUMicroSequence(agx) + + start_ta = StartTACmd() + start_ta.tiling_params = wc_ta.tiling_params + start_ta.struct2 = wc_ta.struct_2 + start_ta.buffer_mgr = buffer_mgr.info + start_ta.buf_thing = buf_desc + start_ta.unkptr_24 = agx.initdata.regionB.unkptr_170 + 4 + start_ta.cmdqueue_ptr = wq_ta.info._addr + start_ta.context_id = ctx_id + start_ta.unk_38 = 1 + start_ta.unk_3c = 1 #0 + start_ta.unk_40 = buffer_mgr_slot + start_ta.unk_48 = 1 #0 + start_ta.unk_50 = 0 + start_ta.struct3 = wc_ta.struct_3 + + start_ta.unkptr_5c = wc_ta.unk_594._addr + start_ta.unk_64 = 0x0 # fixed + start_ta.uuid = uuid_ta + start_ta.unk_70 = 0x0 # fixed + start_ta.unk_74 = [ # fixed + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + ] + start_ta.unk_15c = 0x0 # fixed + start_ta.unk_160 = 0x0 # fixed + start_ta.unk_168 = 0x0 # fixed + start_ta.unk_16c = 0x0 # fixed + start_ta.unk_170 = 0x0 # fixed + start_ta.unk_178 = 0x0 # fixed + ms.append(start_ta) + + ts1 = TimestampCmd() + ts1.unk_1 = 0x0 + ts1.unk_2 = 0x0 + ts1.unk_3 = 0x80 + ts1.ts0_addr = wc_ta.ts1._addr + ts1.ts1_addr = wc_ta.ts2._addr + ts1.ts2_addr = wc_ta.ts2._addr + ts1.cmdqueue_ptr = wq_ta.info._addr + ts1.unk_24 = 0x0 + ts1.uuid = uuid_ta + ts1.unk_30_padding = 0x0 + ms.append(ts1) + + ms.append(WaitForInterruptCmd(1, 0, 0)) + + ts2 = TimestampCmd() + ts2.unk_1 = 0x0 + ts2.unk_2 = 0x0 + ts2.unk_3 = 0x0 + ts2.ts0_addr = wc_ta.ts1._addr + ts2.ts1_addr = wc_ta.ts2._addr + ts2.ts2_addr = wc_ta.ts3._addr + ts2.cmdqueue_ptr = wq_ta.info._addr + ts2.unk_24 = 0x0 + ts2.uuid = uuid_ta + ts2.unk_30_padding = 0x0 + ms.append(ts2) + + finish_ta = FinalizeTACmd() + finish_ta.buf_thing = buf_desc + finish_ta.buffer_mgr = buffer_mgr.info + finish_ta.unkptr_14 = agx.initdata.regionB.unkptr_170 + 4 + finish_ta.cmdqueue_ptr = wq_ta.info._addr + finish_ta.context_id = ctx_id + finish_ta.unk_28 = 0x0 # fixed + finish_ta.struct3 = wc_ta.struct_3 + finish_ta.unk_34 = 0x0 # fixed + finish_ta.uuid = uuid_ta + finish_ta.stamp = stamp_ta2 + finish_ta.stamp_value = stamp_value + finish_ta.unk_48 = 0x0 # fixed + finish_ta.unk_50 = 0x0 # fixed + finish_ta.unk_54 = 0x0 # fixed + finish_ta.unk_58 = 0x0 # fixed + finish_ta.unk_60 = 0x0 # fixed + finish_ta.unk_64 = 0x0 # fixed + finish_ta.unk_68 = 0x0 # fixed + finish_ta.startcmd_offset = -0x1e8 # fixed + finish_ta.unk_70 = 0x0 # fixed + ms.append(finish_ta) + + ms.finalize() + + wc_ta.unkptr_45c = tvb_something._addr + wc_ta.tvb_size = tvb_something_size + wc_ta.microsequence_ptr = ms.obj._addr + wc_ta.microsequence_size = ms.size + wc_ta.ev_3d = ev_3d + wc_ta.stamp_value = stamp_value + + wc_ta.push() + ms.dump() + + mon.poll() + + print(wc_ta) + wq_ta.submit(wc_ta) + + ##### Run queues + agx.ch.queue[2].q_3D.run(wq_3d, ev_3d) + agx.ch.queue[2].q_TA.run(wq_ta, ev_ta) + + ##### Wait for work + agx.asc.work_for(0.3) + print("3D:") + print(wq_3d.info.pull()) + print("TA:") + print(wq_ta.info.pull()) + print("Barriers:") + print(stamp_ta1.pull()) + print(stamp_ta2.pull()) + print(stamp_3d1.pull()) + print(stamp_3d2.pull()) + + event_control.pull() + print(event_control) + + print("==") + mon.poll() + print("==") + #agx.kick_firmware() + agx.asc.work_for(0.3) + p.read32(0x204000000 + 0xd14000) + # [cpu0] [0xfffffe00124bf9a8] MMIO: W.4 0x204d14000 (sgx, offset 0xd14000) = 0x70001 + p.write32(0x204000000 + 0xd14000, 0x70001) + + #agx.uat.dump(ctx_id) + + fault_code = p.read64(0x204017030) + fault_addr = fault_code >> 24 + if fault_addr & 0x8000000000: + fault_addr |= 0xffffff8000000000 + print(f"FAULT CODE: {fault_code:#x}") + base, obj = agx.find_object(fault_addr) + if obj is not None: + print(f"Faulted at : {fault_addr:#x}: {obj!s} + {fault_addr - base:#x}") + #agx.kick_firmware() + mon.poll() + + #print(buffer_mgr.info.pull()) + #print(buffer_mgr.counter_obj.pull()) + #print(buffer_mgr.misc_obj.pull()) + #print(buffer_mgr.block_ctl_obj.pull()) + + width = 800 + height = 600 + + unswizzle(color._paddr, width, height, 4, "fb.bin", grid=True) + unswizzle(depth._paddr, width, height, 4, "depth.bin", grid=True) + + p.fb_blit(0, 0, width, height, color._paddr, width) + + print("TVB something:") + chexdump(iface.readmem(tvb_something._paddr, tvb_something._size), stride=16, abbreviate=False) + + print("TVB list:") + chexdump(iface.readmem(tvb_tilemap._paddr, tvb_tilemap._size), stride=5, abbreviate=False) + + print("Tile params:") + print(f"X: {tiles_x} ({tile_blocks_x})") + print(f"Y: {tiles_y} ({tile_blocks_y})") + print(f"Total: {tiles} ({tile_blocks})") + + agx.stop() +except: + #agx.uat.dump(ctx_id) + p.reboot() + raise + #agx.stop() + +#time.sleep(10) +#p.reboot() diff --git a/tools/proxyclient/experiments/agx_boot.py b/tools/proxyclient/experiments/agx_boot.py new file mode 100755 index 0000000..b5d668c --- /dev/null +++ b/tools/proxyclient/experiments/agx_boot.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT + +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * +from m1n1.constructutils import Ver +from m1n1.utils import * + +Ver.set_version(u) + +from m1n1.shell import run_shell + +from m1n1.fw.agx import Agx + +p.pmgr_adt_clocks_enable("/arm-io/gfx-asc") +p.pmgr_adt_clocks_enable("/arm-io/sgx") + +agx = Agx(u) +agx.verbose = 10 + +#agx.uat.dump(0) + +agx.build_initdata() + +run_shell(globals(), msg="Have fun!") diff --git a/tools/proxyclient/experiments/agx_cancel.py b/tools/proxyclient/experiments/agx_cancel.py new file mode 100644 index 0000000..af0881d --- /dev/null +++ b/tools/proxyclient/experiments/agx_cancel.py @@ -0,0 +1,246 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT + +import sys, pathlib, time +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +import atexit, sys + +from m1n1.setup import * +from m1n1.constructutils import Ver +from m1n1.utils import * + +Ver.set_version(u) + +from m1n1.agx import AGX +from m1n1.agx.render import * + +from m1n1 import asm + +from m1n1.gpiola import GPIOLogicAnalyzer + +analyzer_cpu = 1 + +p.pmgr_adt_clocks_enable("/arm-io/gfx-asc") +p.pmgr_adt_clocks_enable("/arm-io/sgx") +p.smp_start_secondaries() +p.mmu_init_secondary(analyzer_cpu) +iface.dev.timeout = 42 + +agx = AGX(u) + +mon = RegMonitor(u, ascii=True, bufsize=0x8000000) +agx.mon = mon + +sgx = agx.sgx_dev + +atexit.register(p.reboot) +agx.start() + +mon.poll() +agx.poll_objects() + +ctx = GPUContext(agx) +ctx.bind(63) + +f = GPUFrame(ctx, sys.argv[1], track=False) + +r = GPURenderer(ctx, 128, bm_slot=0x10, queue=1) + +dep_stamp = agx.kobj.new(StampCounter, name="Dep stamp") +dep_stamp.value = 0x100 +dep_stamp.push() + +#r.submit(f.cmdbuf, (dep_stamp._addr, 0x200, 0x10)) +r.submit(f.cmdbuf) +r.submit(f.cmdbuf) + +def t(addr): + paddr = agx.uat.iotranslate(0, addr, 4)[0][0] + if paddr is None: + raise Exception(f"Failed to iotranslate {addr:#x}") + return paddr + +regs = { + "ta_cmds": t(agx.initdata.regionB.stats_ta.addrof("total_cmds")), + "ta_ts": t(agx.initdata.regionB.stats_ta.stats.addrof("unk_timestamp")), +} + +pend_base = agx.initdata.regionC.addrof("pending_stamps") +for i in range(5): + regs[f"st{i}_info"] = t(pend_base + i*8) + regs[f"st{i}_val"] = t(pend_base + i*8 + 4) + +for i in range(4): + regs[f"ta{i}_cq"] = t(agx.initdata.regionB.stats_ta.stats.queues[i].addrof("cur_cmdqueue")) + +regs.update({ + #"pwr_status": t(agx.initdata.regionB.hwdata_a.addrof("pwr_status")), + #"pstate": t(agx.initdata.regionB.hwdata_a.addrof("cur_pstate")), + #"temp_c": t(agx.initdata.regionB.hwdata_a.addrof("temp_c")), + #"pwr_mw": t(agx.initdata.regionB.hwdata_a.addrof("avg_power_mw")), + #"pwr_ts": t(agx.initdata.regionB.hwdata_a.addrof("update_ts")), + + #"unk_10": t(agx.initdata.regionB.hwdata_a.addrof("unk_10")), + #"unk_14": t(agx.initdata.regionB.hwdata_a.addrof("unk_14")), + #"actual_pstate": t(agx.initdata.regionB.hwdata_a.addrof("actual_pstate")), + #"tgt_pstate": t(agx.initdata.regionB.hwdata_a.addrof("tgt_pstate")), + #"unk_40": t(agx.initdata.regionB.hwdata_a.addrof("unk_40")), + #"unk_44": t(agx.initdata.regionB.hwdata_a.addrof("unk_44")), + #"unk_48": t(agx.initdata.regionB.hwdata_a.addrof("unk_48")), + #"freq_mhz": t(agx.initdata.regionB.hwdata_a.addrof("freq_mhz")), + + #"unk_748.0": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")), + #"unk_748.1": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")+4), + #"unk_748.2": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")+8), + #"unk_748.3": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")+12), + #"use_percent": t(agx.initdata.regionB.hwdata_a.addrof("use_percent")), + #"unk_83c": t(agx.initdata.regionB.hwdata_a.addrof("unk_83c")), + #"freq_with_off": t(agx.initdata.regionB.hwdata_a.addrof("freq_with_off")), + #"unk_ba0": t(agx.initdata.regionB.hwdata_a.addrof("unk_ba0")), + #"unk_bb0": t(agx.initdata.regionB.hwdata_a.addrof("unk_bb0")), + #"unk_c44": t(agx.initdata.regionB.hwdata_a.addrof("unk_c44")), + #"unk_c58": t(agx.initdata.regionB.hwdata_a.addrof("unk_c58")), + + #"unk_3ca0": t(agx.initdata.regionB.hwdata_a.addrof("unk_3ca0")), + #"unk_3ca8": t(agx.initdata.regionB.hwdata_a.addrof("unk_3ca8")), + #"unk_3cb0": t(agx.initdata.regionB.hwdata_a.addrof("unk_3cb0")), + #"ts_last_idle": t(agx.initdata.regionB.hwdata_a.addrof("ts_last_idle")), + #"ts_last_poweron": t(agx.initdata.regionB.hwdata_a.addrof("ts_last_poweron")), + #"ts_last_poweroff": t(agx.initdata.regionB.hwdata_a.addrof("ts_last_poweroff")), + #"unk_3cd0": t(agx.initdata.regionB.hwdata_a.addrof("unk_3cd0")), + + "halt_count": t(agx.initdata.fw_status.addrof("halt_count")), + "halted": t(agx.initdata.fw_status.addrof("halted")), + "resume": t(agx.initdata.fw_status.addrof("resume")), + "unk_40": t(agx.initdata.fw_status.addrof("unk_40")), + "unk_ctr": t(agx.initdata.fw_status.addrof("unk_ctr")), + "unk_60": t(agx.initdata.fw_status.addrof("unk_60")), + "unk_70": t(agx.initdata.fw_status.addrof("unk_70")), + "c_118c0": t(agx.initdata.regionC._addr + 0x118c0), + "c_118c4": t(agx.initdata.regionC._addr + 0x118c4), + "c_118c8": t(agx.initdata.regionC._addr + 0x118c8), + "c_118cc": t(agx.initdata.regionC._addr + 0x118cc), + "c_118d0": t(agx.initdata.regionC._addr + 0x118d0), + "c_118d4": t(agx.initdata.regionC._addr + 0x118d4), + "c_118d8": t(agx.initdata.regionC._addr + 0x118d8), + "c_118dc": t(agx.initdata.regionC._addr + 0x118dc), + "3d_cmds": t(agx.initdata.regionB.stats_3d.addrof("total_cmds")), + #"3d_tvb_oflws_1": t(agx.initdata.regionB.stats_3d.stats.addrof("tvb_overflows_1")), + #"3d_tvb_oflws_2": t(agx.initdata.regionB.stats_3d.stats.addrof("tvb_overflows_2")), + "3d_cur_stamp_id": t(agx.initdata.regionB.stats_3d.stats.addrof("cur_stamp_id")), + "3d_ts": t(agx.initdata.regionB.stats_3d.stats.addrof("unk_timestamp")), + #"3d_cur_stamp_id": t(agx.initdata.regionB.stats_3d.stats.addrof("cur_stamp_id")), +}) + +for i in range(4): + regs[f"3d{i}_cq"] = t(agx.initdata.regionB.stats_3d.stats.queues[i].addrof("cur_cmdqueue")) + + +i = 0 +regs.update({ + f"r{i}_3d_done": t(r.wq_3d.info.pointers.addrof("gpu_doneptr")), + #f"r{i}_3d_rptr": t(r.wq_3d.info.pointers.addrof("gpu_rptr")), + f"r{i}_3d_busy": t(r.wq_3d.info.addrof("busy")), + #f"r{i}_3d_blk": t(r.wq_3d.info.addrof("blocked_on_barrier")), + #f"r{i}_3d_2c": t(r.wq_3d.info.addrof("unk_2c")), + #f"r{i}_3d_54": t(r.wq_3d.info.addrof("unk_54")), + + f"r{i}_ta_done": t(r.wq_ta.info.pointers.addrof("gpu_doneptr")), + #f"r{i}_ta_rptr": t(r.wq_ta.info.pointers.addrof("gpu_rptr")), + f"r{i}_ta_busy": t(r.wq_ta.info.addrof("busy")), + #f"r{i}_ta_blk": t(r.wq_ta.info.addrof("blocked_on_barrier")), + #f"r{i}_ta_2c": t(r.wq_ta.info.addrof("unk_2c")), + #f"r{i}_ta_54": t(r.wq_ta.info.addrof("unk_54")), + f"r{i}_ta_stamp1": t(r.stamp_ta1._addr), + f"r{i}_ta_stamp2":t(r.stamp_ta2._addr), + f"r{i}_3d_stamp1": t(r.stamp_3d1._addr), + f"r{i}_3d_stamp2":t(r.stamp_3d2._addr), + + f"r{i}_ev_cnt":t(r.event_control.event_count._addr), + f"r{i}_ev_cur":t(r.event_control.addrof("cur_count")), + f"r{i}_ev_10":t(r.event_control.addrof("unk_10")), +}) + +div=4 +ticks = 24000000 // div * 25 + +la = GPIOLogicAnalyzer(u, regs=regs, cpu=analyzer_cpu, div=div) + +print("Queues:") +print(f" TA: {r.wq_ta.info._addr:#x} (stamp {r.work[0].ev_ta.id})") +#print(r.wq_ta.info) +print(f" 3D: {r.wq_3d.info._addr:#x} (stamp {r.work[0].ev_3d.id})") +#print(r.wq_3d.info) + +print("==========================================") +print("## Run") +print("==========================================") + +la.start(ticks, bufsize=0x8000000) + +t = time.time() + +buf = agx.kobj.new_buf(0x1000, "foo") +buf.val = b"A" * 0x1000 +buf.push() +agx.uat.flush_dirty() + +try: + r.run() + + #for a in range(8): + #for b in range(8): + #agx.ch.devctrl.dc_1e(a, b) + + #agx.uat.flush_dirty() + #agx.ch.devctrl.write32(dep_stamp._addr, 0x200) + + #data = struct.pack("<QQQQQI", 0xaaaa, buf._addr, 0xbbbb, 0, 0, 0) + + + #agx.poll_objects() + #mon.poll() + #agx.ch.devctrl.send_foo(9, data) + #agx.ch.devctrl.dc_09(0xaaaa, buf._addr, 0xbbbb) + + #for i in range(0x28, 0xff): + #print(hex(i)) + #data = struct.pack("<QQQQQI", dep_stamp._addr, 0x10_00000200, buf._addr, 0x12_00000010, 0x13_00000010, 0x10) + #agx.ch.devctrl.send_foo(i, data) + #agx.asc.work() + #time.sleep(0.1) + #agx.poll_objects() + #mon.poll() + + #time.sleep(0.1) + #agx.asc.work() + + #chexdump(buf.pull().val) + + agx.kick_firmware() + + while not r.ev_3d.fired: + agx.asc.work() + agx.poll_channels() + print("==========================================") + #agx.poll_objects() + #mon.poll() + agx.kick_firmware() + if time.time() > t + 2: + raise Exception("Timeout") + r.wait() + +finally: + + dep_stamp.pull() + print(f"Stamp value: {dep_stamp.value:#x}") + + #agx.poll_objects() + #mon.poll() + + la.complete() + la.show() + +time.sleep(2) + diff --git a/tools/proxyclient/experiments/agx_deps.py b/tools/proxyclient/experiments/agx_deps.py new file mode 100644 index 0000000..9a7c196 --- /dev/null +++ b/tools/proxyclient/experiments/agx_deps.py @@ -0,0 +1,242 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT + +import sys, pathlib, time +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +import atexit, sys + +from m1n1.setup import * +from m1n1.constructutils import Ver +from m1n1.utils import * + +Ver.set_version(u) + +from m1n1.agx import AGX +from m1n1.agx.render import * + +from m1n1 import asm + +from m1n1.gpiola import GPIOLogicAnalyzer + +analyzer_cpu = 1 + +p.pmgr_adt_clocks_enable("/arm-io/gfx-asc") +p.pmgr_adt_clocks_enable("/arm-io/sgx") +p.smp_start_secondaries() +p.mmu_init_secondary(analyzer_cpu) +iface.dev.timeout = 42 + +agx = AGX(u) + +mon = RegMonitor(u, ascii=True, bufsize=0x8000000) +agx.mon = mon + +sgx = agx.sgx_dev +#mon.add(sgx.gpu_region_base, sgx.gpu_region_size, "contexts") +#mon.add(sgx.gfx_shared_region_base, sgx.gfx_shared_region_size, "gfx-shared") +#mon.add(sgx.gfx_handoff_base, sgx.gfx_handoff_size, "gfx-handoff") + +#mon.add(agx.initdasgx.gfx_handoff_base, sgx.gfx_handoff_size, "gfx-handoff") + +atexit.register(p.reboot) +agx.start() + +print("==========================================") +print("## After init") +print("==========================================") +mon.poll() +agx.poll_objects() + +ctx = GPUContext(agx) +ctx.bind(63) + +f = GPUFrame(ctx, sys.argv[1], track=False) + +RENDERERS = 4 + +renderers = [] + +for i in range(RENDERERS): + r = GPURenderer(ctx, 128, bm_slot=0x10 + i, queue=1) + renderers.append(r) + + for q in (r.wq_3d, r.wq_ta): + q.info.set_prio(2) + q.info.push() + +print("==========================================") +print("## Submitting") +print("==========================================") + +w = renderers[3].submit(f.cmdbuf) +w = renderers[0].submit(f.cmdbuf, w) +w = renderers[2].submit(f.cmdbuf, w) +w = renderers[1].submit(f.cmdbuf, w) +w = renderers[3].submit(f.cmdbuf, w) +w = renderers[3].submit(f.cmdbuf, w) +w = renderers[1].submit(f.cmdbuf, w) +w = renderers[1].submit(f.cmdbuf, w) +w = renderers[0].submit(f.cmdbuf, w) + +for i, r in enumerate(renderers): + r.submit(f.cmdbuf) + +print("==========================================") +print("## Submitted") +print("==========================================") + +def t(addr): + paddr = agx.uat.iotranslate(0, addr, 4)[0][0] + if paddr is None: + raise Exception(f"Failed to iotranslate {addr:#x}") + return paddr + +regs = { + "ta_cmds": t(agx.initdata.regionB.stats_ta.addrof("total_cmds")), + "ta_ts": t(agx.initdata.regionB.stats_ta.stats.addrof("unk_timestamp")), +} + +pend_base = agx.initdata.regionC.addrof("pending_stamps") +for i in range(5): + regs[f"st{i}_info"] = t(pend_base + i*8) + regs[f"st{i}_val"] = t(pend_base + i*8 + 4) + +for i in range(4): + regs[f"ta{i}_cq"] = t(agx.initdata.regionB.stats_ta.stats.queues[i].addrof("cur_cmdqueue")) + +regs.update({ + #"pwr_status": t(agx.initdata.regionB.hwdata_a.addrof("pwr_status")), + #"pstate": t(agx.initdata.regionB.hwdata_a.addrof("cur_pstate")), + #"temp_c": t(agx.initdata.regionB.hwdata_a.addrof("temp_c")), + #"pwr_mw": t(agx.initdata.regionB.hwdata_a.addrof("avg_power_mw")), + #"pwr_ts": t(agx.initdata.regionB.hwdata_a.addrof("update_ts")), + + #"unk_10": t(agx.initdata.regionB.hwdata_a.addrof("unk_10")), + #"unk_14": t(agx.initdata.regionB.hwdata_a.addrof("unk_14")), + #"actual_pstate": t(agx.initdata.regionB.hwdata_a.addrof("actual_pstate")), + #"tgt_pstate": t(agx.initdata.regionB.hwdata_a.addrof("tgt_pstate")), + #"unk_40": t(agx.initdata.regionB.hwdata_a.addrof("unk_40")), + #"unk_44": t(agx.initdata.regionB.hwdata_a.addrof("unk_44")), + #"unk_48": t(agx.initdata.regionB.hwdata_a.addrof("unk_48")), + #"freq_mhz": t(agx.initdata.regionB.hwdata_a.addrof("freq_mhz")), + + #"unk_748.0": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")), + #"unk_748.1": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")+4), + #"unk_748.2": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")+8), + #"unk_748.3": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")+12), + #"use_percent": t(agx.initdata.regionB.hwdata_a.addrof("use_percent")), + #"unk_83c": t(agx.initdata.regionB.hwdata_a.addrof("unk_83c")), + #"freq_with_off": t(agx.initdata.regionB.hwdata_a.addrof("freq_with_off")), + #"unk_ba0": t(agx.initdata.regionB.hwdata_a.addrof("unk_ba0")), + #"unk_bb0": t(agx.initdata.regionB.hwdata_a.addrof("unk_bb0")), + #"unk_c44": t(agx.initdata.regionB.hwdata_a.addrof("unk_c44")), + #"unk_c58": t(agx.initdata.regionB.hwdata_a.addrof("unk_c58")), + + #"unk_3ca0": t(agx.initdata.regionB.hwdata_a.addrof("unk_3ca0")), + #"unk_3ca8": t(agx.initdata.regionB.hwdata_a.addrof("unk_3ca8")), + #"unk_3cb0": t(agx.initdata.regionB.hwdata_a.addrof("unk_3cb0")), + #"ts_last_idle": t(agx.initdata.regionB.hwdata_a.addrof("ts_last_idle")), + #"ts_last_poweron": t(agx.initdata.regionB.hwdata_a.addrof("ts_last_poweron")), + #"ts_last_poweroff": t(agx.initdata.regionB.hwdata_a.addrof("ts_last_poweroff")), + #"unk_3cd0": t(agx.initdata.regionB.hwdata_a.addrof("unk_3cd0")), + + "halt_count": t(agx.initdata.fw_status.addrof("halt_count")), + "halted": t(agx.initdata.fw_status.addrof("halted")), + "resume": t(agx.initdata.fw_status.addrof("resume")), + "unk_40": t(agx.initdata.fw_status.addrof("unk_40")), + "unk_ctr": t(agx.initdata.fw_status.addrof("unk_ctr")), + "unk_60": t(agx.initdata.fw_status.addrof("unk_60")), + "unk_70": t(agx.initdata.fw_status.addrof("unk_70")), + "c_118c0": t(agx.initdata.regionC._addr + 0x118c0), + "c_118c4": t(agx.initdata.regionC._addr + 0x118c4), + "c_118c8": t(agx.initdata.regionC._addr + 0x118c8), + "c_118cc": t(agx.initdata.regionC._addr + 0x118cc), + "c_118d0": t(agx.initdata.regionC._addr + 0x118d0), + "c_118d4": t(agx.initdata.regionC._addr + 0x118d4), + "c_118d8": t(agx.initdata.regionC._addr + 0x118d8), + "c_118dc": t(agx.initdata.regionC._addr + 0x118dc), + "3d_cmds": t(agx.initdata.regionB.stats_3d.addrof("total_cmds")), + #"3d_tvb_oflws_1": t(agx.initdata.regionB.stats_3d.stats.addrof("tvb_overflows_1")), + #"3d_tvb_oflws_2": t(agx.initdata.regionB.stats_3d.stats.addrof("tvb_overflows_2")), + "3d_cur_stamp_id": t(agx.initdata.regionB.stats_3d.stats.addrof("cur_stamp_id")), + "3d_ts": t(agx.initdata.regionB.stats_3d.stats.addrof("unk_timestamp")), + #"3d_cur_stamp_id": t(agx.initdata.regionB.stats_3d.stats.addrof("cur_stamp_id")), +}) + +for i in range(4): + regs[f"3d{i}_cq"] = t(agx.initdata.regionB.stats_3d.stats.queues[i].addrof("cur_cmdqueue")) + + +for i, r in enumerate(renderers): + regs.update({ + f"r{i}_3d_done": t(r.wq_3d.info.pointers.addrof("gpu_doneptr")), + #f"r{i}_3d_rptr": t(r.wq_3d.info.pointers.addrof("gpu_rptr")), + f"r{i}_3d_busy": t(r.wq_3d.info.addrof("busy")), + #f"r{i}_3d_blk": t(r.wq_3d.info.addrof("blocked_on_barrier")), + #f"r{i}_3d_2c": t(r.wq_3d.info.addrof("unk_2c")), + #f"r{i}_3d_54": t(r.wq_3d.info.addrof("unk_54")), + + f"r{i}_ta_done": t(r.wq_ta.info.pointers.addrof("gpu_doneptr")), + #f"r{i}_ta_rptr": t(r.wq_ta.info.pointers.addrof("gpu_rptr")), + f"r{i}_ta_busy": t(r.wq_ta.info.addrof("busy")), + #f"r{i}_ta_blk": t(r.wq_ta.info.addrof("blocked_on_barrier")), + #f"r{i}_ta_2c": t(r.wq_ta.info.addrof("unk_2c")), + #f"r{i}_ta_54": t(r.wq_ta.info.addrof("unk_54")), + f"r{i}_ta_stamp1": t(r.stamp_ta1._addr), + f"r{i}_ta_stamp2":t(r.stamp_ta2._addr), + f"r{i}_3d_stamp1": t(r.stamp_3d1._addr), + f"r{i}_3d_stamp2":t(r.stamp_3d2._addr), + + f"r{i}_ev_cnt":t(r.event_control.event_count._addr), + f"r{i}_ev_cur":t(r.event_control.addrof("cur_count")), + f"r{i}_ev_10":t(r.event_control.addrof("unk_10")), + }) + +div=4 +ticks = 24000000 // div * 25 + +la = GPIOLogicAnalyzer(u, regs=regs, cpu=analyzer_cpu, div=div) + +print("Queues:") +for i, r in enumerate(renderers): + print(f" Renderer {i}") + print(f" TA: {r.wq_ta.info._addr:#x} (stamp {r.work[0].ev_ta.id})") + #print(r.wq_ta.info) + print(f" 3D: {r.wq_3d.info._addr:#x} (stamp {r.work[0].ev_3d.id})") + #print(r.wq_3d.info) + +print("==========================================") +print("## Run") +print("==========================================") + +la.start(ticks, bufsize=0x8000000) + +t = time.time() + +try: + for r in renderers[:RENDERERS]: + r.run() + + for r in renderers[:RENDERERS]: + while not r.ev_3d.fired: + agx.asc.work() + agx.poll_channels() + print("==========================================") + #agx.poll_objects() + #mon.poll() + agx.kick_firmware() + if time.time() > t + 10: + raise Exception("Timeout") + + r.wait() + +finally: + #agx.poll_objects() + #mon.poll() + + la.complete() + la.show() + +time.sleep(2) + diff --git a/tools/proxyclient/experiments/agx_dumpstructs.py b/tools/proxyclient/experiments/agx_dumpstructs.py new file mode 100644 index 0000000..08a7327 --- /dev/null +++ b/tools/proxyclient/experiments/agx_dumpstructs.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT + +import sys, pathlib, time +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.constructutils import * +from m1n1.fw.agx import microsequence, initdata + +#for v in initdata.__all__: +#for v in initdata.__dict__: +def dump(module): + for v in module.__dict__: + struct = getattr(module, v) + if isinstance(struct, type) and issubclass(struct, ConstructClass) and struct is not ConstructClass: + print(struct.to_rust()) + print() + +dump(microsequence) diff --git a/tools/proxyclient/experiments/agx_parallel.py b/tools/proxyclient/experiments/agx_parallel.py new file mode 100644 index 0000000..163497e --- /dev/null +++ b/tools/proxyclient/experiments/agx_parallel.py @@ -0,0 +1,341 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT + +import sys, pathlib, time +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +import atexit, sys + +from m1n1.setup import * +from m1n1.constructutils import Ver +from m1n1.utils import * + +Ver.set_version(u) + +from m1n1.agx import AGX +from m1n1.agx.render import * + +from m1n1 import asm + +from m1n1.gpiola import GPIOLogicAnalyzer + +analyzer_cpu = 1 + +p.pmgr_adt_clocks_enable("/arm-io/gfx-asc") +p.pmgr_adt_clocks_enable("/arm-io/sgx") +p.smp_start_secondaries() +p.mmu_init_secondary(analyzer_cpu) +iface.dev.timeout = 42 + +## heater code +if True: + code = u.malloc(0x1000) + + util = asm.ARMAsm(""" + bench: + mrs x1, CNTPCT_EL0 + 1: + sub x0, x0, #1 + cbnz x0, 1b + + mrs x2, CNTPCT_EL0 + sub x0, x2, x1 + ret + """, code) + iface.writemem(code, util.data) + p.dc_cvau(code, len(util.data)) + p.ic_ivau(code, len(util.data)) + + LOOPS = 80000000000 + for idx in range(2, 8): + print(f"bench {idx}") + p.smp_call(idx, util.bench, LOOPS) + +agx = AGX(u) + +mon = RegMonitor(u, ascii=True, bufsize=0x8000000) +agx.mon = mon + +sgx = agx.sgx_dev +#mon.add(sgx.gpu_region_base, sgx.gpu_region_size, "contexts") +#mon.add(sgx.gfx_shared_region_base, sgx.gfx_shared_region_size, "gfx-shared") +#mon.add(sgx.gfx_handoff_base, sgx.gfx_handoff_size, "gfx-handoff") + +#mon.add(agx.initdasgx.gfx_handoff_base, sgx.gfx_handoff_size, "gfx-handoff") + +atexit.register(p.reboot) +agx.start() + +print("==========================================") +print("## After init") +print("==========================================") +mon.poll() +agx.poll_objects() + +ctx = GPUContext(agx) +ctx.bind(63) +ctx0 = GPUContext(agx) +ctx0.bind(62) + +f = GPUFrame(ctx, sys.argv[1], track=False) +f2 = GPUFrame(ctx0, sys.argv[1], track=False) + +RENDERERS = 4 +FRAMES = 8 + +renderers = [] + +fault_cmdbuf = f.cmdbuf.clone() +#fault_cmdbuf.depth_buffer = 0xdeadb000 + +for i in range(RENDERERS): + c = ctx0 if i == 0 else ctx + r = GPURenderer(c, 32, bm_slot=0x10 + i, queue=1) + renderers.append(r) + + for q in (r.wq_3d, r.wq_ta): + q.info.set_prio(2) + q.info.push() + +#for r in renderers[2:4]: + #for q in (r.wq_3d, r.wq_ta): + #q.info.set_prio(3) + #q.info.push() + +#for r in renderers[4:6]: + #for q in (r.wq_3d, r.wq_ta): + #q.info.set_prio(0) + #q.info.push() + +#for r in renderers[6:8]: + #for q in (r.wq_3d, r.wq_ta): + #q.info.set_prio(1) + #q.info.push() + +print("==========================================") +print("## Submitting") +print("==========================================") + +for i, r in enumerate(renderers): + for j in range(FRAMES): + if (i, j) in ((1, 0), (2, 1), (3, 1)): + r.submit(fault_cmdbuf) + elif i == 0: + r.submit(f2.cmdbuf) + else: + r.submit(f.cmdbuf) + +print("==========================================") +print("## Submitted") +print("==========================================") + +def t(addr): + paddr = agx.uat.iotranslate(0, addr, 4)[0][0] + if paddr is None: + raise Exception(f"Failed to iotranslate {addr:#x}") + return paddr + +regs = { + "ta_cmds": t(agx.initdata.regionB.stats_ta.addrof("total_cmds")), + "ta_ts": t(agx.initdata.regionB.stats_ta.stats.addrof("unk_timestamp")), +} + +pend_base = agx.initdata.regionC.addrof("pending_stamps") +for i in range(5): + regs[f"st{i}_info"] = t(pend_base + i*8) + regs[f"st{i}_val"] = t(pend_base + i*8 + 4) + +for i in range(4): + regs[f"ta{i}_cq"] = t(agx.initdata.regionB.stats_ta.stats.queues[i].addrof("cur_cmdqueue")) + +regs.update({ + "pwr_status": t(agx.initdata.regionB.hwdata_a.addrof("pwr_status")), + "pstate": t(agx.initdata.regionB.hwdata_a.addrof("cur_pstate")), + "temp_c": t(agx.initdata.regionB.hwdata_a.addrof("temp_c")), + "pwr_mw": t(agx.initdata.regionB.hwdata_a.addrof("avg_power_mw")), + "pwr_ts": t(agx.initdata.regionB.hwdata_a.addrof("update_ts")), + + #"unk_10": t(agx.initdata.regionB.hwdata_a.addrof("unk_10")), + #"unk_14": t(agx.initdata.regionB.hwdata_a.addrof("unk_14")), + "actual_pstate": t(agx.initdata.regionB.hwdata_a.addrof("actual_pstate")), + "tgt_pstate": t(agx.initdata.regionB.hwdata_a.addrof("tgt_pstate")), + #"unk_40": t(agx.initdata.regionB.hwdata_a.addrof("unk_40")), + #"unk_44": t(agx.initdata.regionB.hwdata_a.addrof("unk_44")), + #"unk_48": t(agx.initdata.regionB.hwdata_a.addrof("unk_48")), + "freq_mhz": t(agx.initdata.regionB.hwdata_a.addrof("freq_mhz")), + + #"unk_748.0": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")), + #"unk_748.1": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")+4), + #"unk_748.2": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")+8), + #"unk_748.3": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")+12), + #"use_percent": t(agx.initdata.regionB.hwdata_a.addrof("use_percent")), + #"unk_83c": t(agx.initdata.regionB.hwdata_a.addrof("unk_83c")), + "freq_with_off": t(agx.initdata.regionB.hwdata_a.addrof("freq_with_off")), + #"unk_ba0": t(agx.initdata.regionB.hwdata_a.addrof("unk_ba0")), + #"unk_bb0": t(agx.initdata.regionB.hwdata_a.addrof("unk_bb0")), + #"unk_c44": t(agx.initdata.regionB.hwdata_a.addrof("unk_c44")), + #"unk_c58": t(agx.initdata.regionB.hwdata_a.addrof("unk_c58")), + + #"unk_3ca0": t(agx.initdata.regionB.hwdata_a.addrof("unk_3ca0")), + #"unk_3ca8": t(agx.initdata.regionB.hwdata_a.addrof("unk_3ca8")), + #"unk_3cb0": t(agx.initdata.regionB.hwdata_a.addrof("unk_3cb0")), + #"ts_last_idle": t(agx.initdata.regionB.hwdata_a.addrof("ts_last_idle")), + #"ts_last_poweron": t(agx.initdata.regionB.hwdata_a.addrof("ts_last_poweron")), + #"ts_last_poweroff": t(agx.initdata.regionB.hwdata_a.addrof("ts_last_poweroff")), + #"unk_3cd0": t(agx.initdata.regionB.hwdata_a.addrof("unk_3cd0")), + + "halt_count": t(agx.initdata.fw_status.addrof("halt_count")), + "halted": t(agx.initdata.fw_status.addrof("halted")), + "resume": t(agx.initdata.fw_status.addrof("resume")), + "unk_40": t(agx.initdata.fw_status.addrof("unk_40")), + "unk_ctr": t(agx.initdata.fw_status.addrof("unk_ctr")), + "unk_60": t(agx.initdata.fw_status.addrof("unk_60")), + "unk_70": t(agx.initdata.fw_status.addrof("unk_70")), + "c_118c0": t(agx.initdata.regionC._addr + 0x118c0), + "c_118c4": t(agx.initdata.regionC._addr + 0x118c4), + "c_118c8": t(agx.initdata.regionC._addr + 0x118c8), + "c_118cc": t(agx.initdata.regionC._addr + 0x118cc), + "c_118d0": t(agx.initdata.regionC._addr + 0x118d0), + "c_118d4": t(agx.initdata.regionC._addr + 0x118d4), + "c_118d8": t(agx.initdata.regionC._addr + 0x118d8), + "c_118dc": t(agx.initdata.regionC._addr + 0x118dc), + "3d_cmds": t(agx.initdata.regionB.stats_3d.addrof("total_cmds")), + #"3d_tvb_oflws_1": t(agx.initdata.regionB.stats_3d.stats.addrof("tvb_overflows_1")), + #"3d_tvb_oflws_2": t(agx.initdata.regionB.stats_3d.stats.addrof("tvb_overflows_2")), + "3d_cur_stamp_id": t(agx.initdata.regionB.stats_3d.stats.addrof("cur_stamp_id")), + "3d_ts": t(agx.initdata.regionB.stats_3d.stats.addrof("unk_timestamp")), + #"3d_cur_stamp_id": t(agx.initdata.regionB.stats_3d.stats.addrof("cur_stamp_id")), +}) + +for i in range(4): + regs[f"3d{i}_cq"] = t(agx.initdata.regionB.stats_3d.stats.queues[i].addrof("cur_cmdqueue")) + + +for i, r in enumerate(renderers): + regs.update({ + f"r{i}_3d_done": t(r.wq_3d.info.pointers.addrof("gpu_doneptr")), + #f"r{i}_3d_rptr": t(r.wq_3d.info.pointers.addrof("gpu_rptr")), + f"r{i}_3d_busy": t(r.wq_3d.info.addrof("busy")), + #f"r{i}_3d_blk": t(r.wq_3d.info.addrof("blocked_on_barrier")), + #f"r{i}_3d_2c": t(r.wq_3d.info.addrof("unk_2c")), + #f"r{i}_3d_54": t(r.wq_3d.info.addrof("unk_54")), + + f"r{i}_ta_done": t(r.wq_ta.info.pointers.addrof("gpu_doneptr")), + #f"r{i}_ta_rptr": t(r.wq_ta.info.pointers.addrof("gpu_rptr")), + f"r{i}_ta_busy": t(r.wq_ta.info.addrof("busy")), + #f"r{i}_ta_blk": t(r.wq_ta.info.addrof("blocked_on_barrier")), + #f"r{i}_ta_2c": t(r.wq_ta.info.addrof("unk_2c")), + #f"r{i}_ta_54": t(r.wq_ta.info.addrof("unk_54")), + f"r{i}_f{j}_ta_stamp1": t(r.stamp_ta1._addr), + f"r{i}_ta_stamp2":t(r.stamp_ta2._addr), + f"r{i}_f{j}_3d_stamp1": t(r.stamp_3d1._addr), + f"r{i}_3d_stamp2":t(r.stamp_3d2._addr), + }) + + for j in range(FRAMES): + work = r.work[j] + regs.update({ + f"r{i}_f{j}_3d_ts": t(work.wc_3d.ts1._addr), + f"r{i}_f{j}_ta_ts": t(work.wc_ta.ts1._addr), + }) + +div=4 +ticks = 24000000 // div * 25 + +la = GPIOLogicAnalyzer(u, regs=regs, cpu=analyzer_cpu, div=div) + + +print("==========================================") +print("## Poll prior to job start") +print("==========================================") + +#mon.poll() +#agx.poll_objects() + + +print("==========================================") +print("## After start") +print("==========================================") +#agx.poll_objects() + +#mon.poll() +print("==========================================") +print("## Waiting") +print("==========================================") + +print("Queues:") +for i, r in enumerate(renderers): + print(f" Renderer {i}") + print(f" TA: {r.wq_ta.info._addr:#x} (stamp {r.work[0].ev_ta.id})") + #print(r.wq_ta.info) + print(f" 3D: {r.wq_3d.info._addr:#x} (stamp {r.work[0].ev_3d.id})") + #print(r.wq_3d.info) + +print("==========================================") +print("## Run") +print("==========================================") + +la.start(ticks, bufsize=0x8000000) + +try: + for r in renderers[:RENDERERS]: + r.run() + + for r in renderers[:RENDERERS]: + while not r.ev_3d.fired: + agx.asc.work() + agx.poll_channels() + print("==========================================") + agx.poll_objects() + mon.poll() + + r.wait() + + #agx.poll_objects() + + #print("==========================================") + #print("## Stop ASC") + #print("==========================================") + + #agx.asc.stop() + + ##time.sleep(0.1) + + ##agx.poll_objects() + + #print("==========================================") + #print("## Start ASC") + #print("==========================================") + + #agx.asc.start() + + ##agx.poll_objects() + + #print("==========================================") + #print("## Run 2") + #print("==========================================") + + #for r in renderers[RENDERERS//2:]: + #r.run() + + #for r in renderers[RENDERERS//2:]: + #while not r.ev_3d.fired: + #agx.asc.work() + #agx.poll_channels() + #print("==========================================") + + #r.wait() + + #agx.poll_objects() + + #mon.poll() + +finally: + #agx.poll_objects() + #mon.poll() + + la.complete() + la.show() + +time.sleep(2) + diff --git a/tools/proxyclient/experiments/agx_renderframe.py b/tools/proxyclient/experiments/agx_renderframe.py new file mode 100644 index 0000000..82ac91c --- /dev/null +++ b/tools/proxyclient/experiments/agx_renderframe.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT + +import sys, pathlib, time +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +import atexit, sys + +from m1n1.setup import * +from m1n1.constructutils import Ver +from m1n1.utils import * + +Ver.set_version(u) + +from m1n1.agx import AGX +from m1n1.agx.render import * + +from m1n1 import asm + +p.pmgr_adt_clocks_enable("/arm-io/gfx-asc") +p.pmgr_adt_clocks_enable("/arm-io/sgx") + +agx = AGX(u) +mon = RegMonitor(u, ascii=True, bufsize=0x8000000) +agx.mon = mon + +sgx = agx.sgx_dev + +try: + agx.start() + agx.uat.dump(0) + + print("==========================================") + print("## After init") + print("==========================================") + mon.poll() + agx.poll_objects() + + ctx = GPUContext(agx) + ctx.bind(63) + + f = GPUFrame(ctx, sys.argv[1], track=False) + + r = GPURenderer(ctx, 16, bm_slot=0, queue=1) + print("==========================================") + print("## Submitting") + print("==========================================") + + w = r.submit(f.cmdbuf) + + print("==========================================") + print("## Submitted") + print("==========================================") + + print("==========================================") + print("## Run") + print("==========================================") + + r.run() + + while not r.ev_3d.fired: + agx.asc.work() + agx.poll_channels() + + agx.poll_objects() + mon.poll() + r.wait() + + time.sleep(3) + +finally: + #agx.poll_objects() + p.reboot() diff --git a/tools/proxyclient/experiments/agx_tlb.py b/tools/proxyclient/experiments/agx_tlb.py new file mode 100644 index 0000000..5ec86ca --- /dev/null +++ b/tools/proxyclient/experiments/agx_tlb.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT + +import sys, pathlib, time +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +import atexit, sys + +from m1n1.setup import * +from m1n1.constructutils import Ver +from m1n1.utils import * + +Ver.set_version(u) + +from m1n1.agx import AGX +from m1n1.agx.render import * + +from m1n1 import asm + +from m1n1.gpiola import GPIOLogicAnalyzer + +analyzer_cpu = 1 + +p.pmgr_adt_clocks_enable("/arm-io/gfx-asc") +p.pmgr_adt_clocks_enable("/arm-io/sgx") +p.smp_start_secondaries() +p.mmu_init_secondary(analyzer_cpu) +iface.dev.timeout = 42 + +agx = AGX(u) + +def initdata_hook(agx): + agx.initdata.regionC.idle_to_off_timeout_ms = 20000 + agx.initdata.regionC.push() + +agx.initdata_hook = initdata_hook + +mon = RegMonitor(u, ascii=True, bufsize=0x8000000) +agx.mon = mon + +sgx = agx.sgx_dev +#mon.add(sgx.gpu_region_base, sgx.gpu_region_size, "contexts") +#mon.add(sgx.gfx_shared_region_base, sgx.gfx_shared_region_size, "gfx-shared") +#mon.add(sgx.gfx_handoff_base, sgx.gfx_handoff_size, "gfx-handoff") + +#mon.add(agx.initdasgx.gfx_handoff_base, sgx.gfx_handoff_size, "gfx-handoff") + +atexit.register(p.reboot) +agx.start() + +print("==========================================") +print("## After init") +print("==========================================") +mon.poll() +agx.poll_objects() + +ctx = GPUContext(agx) +ctx.bind(3) + +f = GPUFrame(ctx, sys.argv[1], track=False) + +RENDERERS = 1 +FRAMES = 1 + +renderers = [] + +for i in range(RENDERERS): + r = GPURenderer(ctx, 4, bm_slot=0x10 + i, queue=1) + renderers.append(r) + + for q in (r.wq_3d, r.wq_ta): + q.info.set_prio(2) + q.info.push() + +print("==========================================") +print("## Submitting") +print("==========================================") + +for i, r in enumerate(renderers): + for j in range(FRAMES): + r.submit(f.cmdbuf) + +print("==========================================") +print("## Submitted") +print("==========================================") + +def t(addr): + paddr = agx.uat.iotranslate(0, addr, 4)[0][0] + if paddr is None: + raise Exception(f"Failed to iotranslate {addr:#x}") + return paddr + +regs = { + "ta_cmds": t(agx.initdata.regionB.stats_ta.addrof("total_cmds")), + "ta_ts": t(agx.initdata.regionB.stats_ta.stats.addrof("unk_timestamp")), +} + +#pend_base = agx.initdata.regionC.addrof("pending_stamps") +#for i in range(5): + #regs[f"st{i}_info"] = t(pend_base + i*8) + #regs[f"st{i}_val"] = t(pend_base + i*8 + 4) + +#for i in range(4): + #regs[f"ta{i}_cq"] = t(agx.initdata.regionB.stats_ta.stats.queues[i].addrof("cur_cmdqueue")) + +regs.update({ + "pwr_status": t(agx.initdata.regionB.hwdata_a.addrof("pwr_status")), + "pstate": t(agx.initdata.regionB.hwdata_a.addrof("cur_pstate")), + "temp_c": t(agx.initdata.regionB.hwdata_a.addrof("temp_c")), + "pwr_mw": t(agx.initdata.regionB.hwdata_a.addrof("avg_power_mw")), + "pwr_ts": t(agx.initdata.regionB.hwdata_a.addrof("update_ts")), + + "unk_10": t(agx.initdata.regionB.hwdata_a.addrof("unk_10")), + "unk_14": t(agx.initdata.regionB.hwdata_a.addrof("unk_14")), + "actual_pstate": t(agx.initdata.regionB.hwdata_a.addrof("actual_pstate")), + "tgt_pstate": t(agx.initdata.regionB.hwdata_a.addrof("tgt_pstate")), + "unk_40": t(agx.initdata.regionB.hwdata_a.addrof("unk_40")), + "unk_44": t(agx.initdata.regionB.hwdata_a.addrof("unk_44")), + "unk_48": t(agx.initdata.regionB.hwdata_a.addrof("unk_48")), + "freq_mhz": t(agx.initdata.regionB.hwdata_a.addrof("freq_mhz")), + + "unk_748.0": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")), + "unk_748.1": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")+4), + "unk_748.2": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")+8), + "unk_748.3": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")+12), + "use_percent": t(agx.initdata.regionB.hwdata_a.addrof("use_percent")), + "unk_83c": t(agx.initdata.regionB.hwdata_a.addrof("unk_83c")), + "freq_with_off": t(agx.initdata.regionB.hwdata_a.addrof("freq_with_off")), + "unk_ba0": t(agx.initdata.regionB.hwdata_a.addrof("unk_ba0")), + "unk_bb0": t(agx.initdata.regionB.hwdata_a.addrof("unk_bb0")), + "unk_c44": t(agx.initdata.regionB.hwdata_a.addrof("unk_c44")), + "unk_c58": t(agx.initdata.regionB.hwdata_a.addrof("unk_c58")), + + "unk_3ca0": t(agx.initdata.regionB.hwdata_a.addrof("unk_3ca0")), + "unk_3ca8": t(agx.initdata.regionB.hwdata_a.addrof("unk_3ca8")), + "unk_3cb0": t(agx.initdata.regionB.hwdata_a.addrof("unk_3cb0")), + "ts_last_idle": t(agx.initdata.regionB.hwdata_a.addrof("ts_last_idle")), + "ts_last_poweron": t(agx.initdata.regionB.hwdata_a.addrof("ts_last_poweron")), + "ts_last_poweroff": t(agx.initdata.regionB.hwdata_a.addrof("ts_last_poweroff")), + "unk_3cd0": t(agx.initdata.regionB.hwdata_a.addrof("unk_3cd0")), + + "halt_count": t(agx.initdata.fw_status.addrof("halt_count")), + "halted": t(agx.initdata.fw_status.addrof("halted")), + "resume": t(agx.initdata.fw_status.addrof("resume")), + "unk_40": t(agx.initdata.fw_status.addrof("unk_40")), + "unk_ctr": t(agx.initdata.fw_status.addrof("unk_ctr")), + "unk_60": t(agx.initdata.fw_status.addrof("unk_60")), + "unk_70": t(agx.initdata.fw_status.addrof("unk_70")), + "c_118c0": t(agx.initdata.regionC._addr + 0x118c0), + "c_118c4": t(agx.initdata.regionC._addr + 0x118c4), + "c_118c8": t(agx.initdata.regionC._addr + 0x118c8), + "c_118cc": t(agx.initdata.regionC._addr + 0x118cc), + "c_118d0": t(agx.initdata.regionC._addr + 0x118d0), + "c_118d4": t(agx.initdata.regionC._addr + 0x118d4), + "c_118d8": t(agx.initdata.regionC._addr + 0x118d8), + "c_118dc": t(agx.initdata.regionC._addr + 0x118dc), + "3d_cmds": t(agx.initdata.regionB.stats_3d.addrof("total_cmds")), + #"3d_cq": t(agx.initdata.regionB.stats_3d.stats.addrof("cur_cmdqueue")), + #"3d_tvb_oflws_1": t(agx.initdata.regionB.stats_3d.stats.addrof("tvb_overflows_1")), + #"3d_tvb_oflws_2": t(agx.initdata.regionB.stats_3d.stats.addrof("tvb_overflows_2")), + #"3d_cur_stamp_id": t(agx.initdata.regionB.stats_3d.stats.addrof("cur_stamp_id")), + "3d_ts": t(agx.initdata.regionB.stats_3d.stats.addrof("unk_timestamp")), + "hoff_lock": agx.uat.handoff.reg.LOCK_AP.addr, + "hoff_ctx": agx.uat.handoff.reg.CUR_CTX.addr, + "hoff_unk2": agx.uat.handoff.reg.UNK2.addr, + "hoff_unk3_lo": agx.uat.handoff.reg.UNK3.addr, + "hoff_unk3_hi": agx.uat.handoff.reg.UNK3.addr + 4, +}) + +for i, r in enumerate(renderers): + regs.update({ + f"r{i}_3d_done": t(r.wq_3d.info.pointers.addrof("gpu_doneptr")), + #f"r{i}_3d_rptr": t(r.wq_3d.info.pointers.addrof("gpu_rptr")), + f"r{i}_3d_busy": t(r.wq_3d.info.addrof("busy")), + #f"r{i}_3d_blk": t(r.wq_3d.info.addrof("blocked_on_barrier")), + #f"r{i}_3d_2c": t(r.wq_3d.info.addrof("unk_2c")), + #f"r{i}_3d_54": t(r.wq_3d.info.addrof("unk_54")), + + f"r{i}_ta_done": t(r.wq_ta.info.pointers.addrof("gpu_doneptr")), + #f"r{i}_ta_rptr": t(r.wq_ta.info.pointers.addrof("gpu_rptr")), + f"r{i}_ta_busy": t(r.wq_ta.info.addrof("busy")), + #f"r{i}_ta_blk": t(r.wq_ta.info.addrof("blocked_on_barrier")), + #f"r{i}_ta_2c": t(r.wq_ta.info.addrof("unk_2c")), + #f"r{i}_ta_54": t(r.wq_ta.info.addrof("unk_54")), + f"r{i}_f{j}_ta_stamp1": t(r.stamp_ta1._addr), + f"r{i}_ta_stamp2":t(r.stamp_ta2._addr), + f"r{i}_f{j}_3d_stamp1": t(r.stamp_3d1._addr), + f"r{i}_3d_stamp2":t(r.stamp_3d2._addr), + }) + + #for j in range(FRAMES): + #work = r.work[j] + #regs.update({ + #f"r{i}_f{j}_3d_ts": t(work.wc_3d.ts1._addr), + #f"r{i}_f{j}_ta_ts": t(work.wc_ta.ts1._addr), + #}) + +div=4 +ticks = 24000000 // div * 25 + +la = GPIOLogicAnalyzer(u, regs=regs, cpu=analyzer_cpu, div=div) + +print("==========================================") +print("## Run") +print("==========================================") + +la.start(ticks, bufsize=0x8000000) + +depth_len = align_up(1920*1080*4, 0x4000) +#agx.uat.iomap_at(ctx.ctx, f.cmdbuf.depth_buffer, 0, depth_len, VALID=0) +#agx.uat.flush_dirty() + +fb = r.work[0].fb + +#agx.uat.iomap_at(ctx.ctx, fb, 0, depth_len, VALID=0) +#agx.uat.flush_dirty() + +agx.kick_firmware() + +agx.show_stats = False +count_pa = renderers[0].event_control.event_count._paddr + +print(f"count: {p.read32(count_pa)}") + +agx.uat.invalidate_cache() +agx.uat.dump(ctx.ctx) + +mon.add(0x9fff74000, 0x4000) + +try: + for r in renderers: + r.run() + + for r in renderers: + while not r.ev_ta.fired: + agx.asc.work() + agx.poll_channels() + + print("TA fired") + print(f"count: {p.read32(count_pa)}") + + for r in renderers: + while not r.ev_3d.fired: + agx.asc.work() + agx.poll_channels() + #print("==========================================") + + r.wait() + + print("3D fired") + print("Timestamps:") + print(f" 3D 1: {r.ts3d_1.pull().val}") + print(f" 3D 2: {r.ts3d_2.pull().val}") + print(f" TA 1: {r.tsta_1.pull().val}") + print(f" TA 2: {r.tsta_2.pull().val}") + print("CPU flag:", r.buffer_mgr.misc_obj.pull().cpu_flag) + + mon.poll() + + #agx.uat.iomap_at(ctx.ctx, fb, 0, depth_len, VALID=0) + #agx.uat.flush_dirty() + + print(f"fb: {fb:#x}") + + for i, r in enumerate(renderers): + for j in range(FRAMES): + r.submit(f.cmdbuf) + + r.run() + + for r in renderers: + while not r.ev_3d.fired: + agx.asc.work() + r.wait() + + print("3D fired again") + print("Timestamps:") + print(f" 3D 1: {r.ts3d_1.pull().val}") + print(f" 3D 2: {r.ts3d_2.pull().val}") + print(f" TA 1: {r.tsta_1.pull().val}") + print(f" TA 2: {r.tsta_2.pull().val}") + print("CPU flag:", r.buffer_mgr.misc_obj.pull().cpu_flag) + + time.sleep(0.5) + +finally: + agx.poll_channels() + #agx.poll_objects() + #mon.poll() + + la.complete() + la.show() + +time.sleep(2) + diff --git a/tools/proxyclient/experiments/agx_tracetimings.py b/tools/proxyclient/experiments/agx_tracetimings.py new file mode 100644 index 0000000..52ac2c9 --- /dev/null +++ b/tools/proxyclient/experiments/agx_tracetimings.py @@ -0,0 +1,314 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT + +import sys, pathlib, time +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +import atexit, sys + +from m1n1.setup import * +from m1n1.constructutils import Ver +from m1n1.utils import * + +Ver.set_version(u) + +from m1n1.agx import AGX +from m1n1.agx.render import * + +from m1n1.gpiola import GPIOLogicAnalyzer + +analyzer_cpu = 1 + +p.pmgr_adt_clocks_enable("/arm-io/gfx-asc") +p.pmgr_adt_clocks_enable("/arm-io/sgx") +p.smp_start_secondaries() +p.mmu_init_secondary(analyzer_cpu) +iface.dev.timeout = 10 + +agx = AGX(u) + +mon = RegMonitor(u, ascii=True, bufsize=0x8000000) +agx.mon = mon + +sgx = agx.sgx_dev +#mon.add(sgx.gpu_region_base, sgx.gpu_region_size, "contexts") +#mon.add(sgx.gfx_shared_region_base, sgx.gfx_shared_region_size, "gfx-shared") +#mon.add(sgx.gfx_handoff_base, sgx.gfx_handoff_size, "gfx-handoff") + +#mon.add(agx.initdasgx.gfx_handoff_base, sgx.gfx_handoff_size, "gfx-handoff") + +atexit.register(p.reboot) +agx.start() + +print("==========================================") +print("## After init") +print("==========================================") +mon.poll() +agx.poll_objects() + +ctx = GPUContext(agx) +ctx.bind(1) + +renderer = GPURenderer(ctx, 64, bm_slot=0, queue=0) +renderer2 = GPURenderer(ctx, 64, bm_slot=1, queue=1) + +#for q in (renderer.wq_3d, renderer.wq_ta):#, renderer2.wq_3d, renderer2.wq_ta): + #q.info.unk_30 = 2 + #q.info.unk_34 = 2 + #q.info.unk_38 = 0xffff000000000000 + #q.info.unk_40 = 0 + #q.info.unk_44 = 0 + #q.info.unk_48 = 2 + #q.info.unk_50 = 0x1 + #q.info.push() + +f = GPUFrame(ctx, sys.argv[1], track=False) +#f2 = GPUFrame(renderer2.ctx, sys.argv[1], track=False) + +print("==========================================") +print("## Pre submit") +print("==========================================") + +mon.poll() +agx.poll_objects() + +print("==========================================") +print("## Submitting") +print("==========================================") + +work = renderer.submit(f.cmdbuf) +work2 = renderer2.submit(f.cmdbuf) +workb = renderer.submit(f.cmdbuf) +work2b = renderer2.submit(f.cmdbuf) + +print(work.wc_3d) +print(work.wc_ta) +print(work2.wc_3d) +print(work2.wc_ta) + +print("==========================================") +print("## Submitted") +print("==========================================") + +def t(addr): + paddr = agx.uat.iotranslate(0, addr, 4)[0][0] + if paddr is None: + raise Exception(f"Failed to iotranslate {addr:#x}") + return paddr + +regs = { + "ta_cmds": t(agx.initdata.regionB.stats_ta.addrof("total_cmds")), + "ta0_busy": t(agx.initdata.regionB.stats_ta.stats.queues[0].addrof("busy")), + "ta0_unk4": t(agx.initdata.regionB.stats_ta.stats.queues[0].addrof("unk_4")), + "ta0_cq": t(agx.initdata.regionB.stats_ta.stats.queues[0].addrof("cur_cmdqueue")), + "ta0_cnt": t(agx.initdata.regionB.stats_ta.stats.queues[0].addrof("cur_count")), + "ta1_busy": t(agx.initdata.regionB.stats_ta.stats.queues[1].addrof("busy")), + "ta1_unk4": t(agx.initdata.regionB.stats_ta.stats.queues[1].addrof("unk_4")), + "ta1_cq": t(agx.initdata.regionB.stats_ta.stats.queues[1].addrof("cur_cmdqueue")), + "ta1_cnt": t(agx.initdata.regionB.stats_ta.stats.queues[1].addrof("cur_count")), + "ta_ts": t(agx.initdata.regionB.stats_ta.stats.addrof("unk_timestamp")), + "3d_cmds": t(agx.initdata.regionB.stats_3d.addrof("total_cmds")), + "3d_cq": t(agx.initdata.regionB.stats_3d.stats.addrof("cur_cmdqueue")), + "3d_tvb_oflws_1": t(agx.initdata.regionB.stats_3d.stats.addrof("tvb_overflows_1")), + "3d_tvb_oflws_2": t(agx.initdata.regionB.stats_3d.stats.addrof("tvb_overflows_2")), + "3d_cur_stamp_id": t(agx.initdata.regionB.stats_3d.stats.addrof("cur_stamp_id")), + "3d_ts": t(agx.initdata.regionB.stats_3d.stats.addrof("unk_timestamp")), + + "bmctl_0": t(agx.initdata.regionB.buffer_mgr_ctl._addr + 0), + "bmctl_8": t(agx.initdata.regionB.buffer_mgr_ctl._addr + 8), + "2_bmctl_0": t(agx.initdata.regionB.buffer_mgr_ctl._addr + 16), + "2_bmctl_8": t(agx.initdata.regionB.buffer_mgr_ctl._addr + 24), + + "bmmisc_0": t(renderer.buffer_mgr.info.misc.addrof("gpu_0")), + "bmmisc_4": t(renderer.buffer_mgr.info.misc.addrof("gpu_4")), + "bmmisc_8": t(renderer.buffer_mgr.info.misc.addrof("gpu_8")), + "bmmisc_c": t(renderer.buffer_mgr.info.misc.addrof("gpu_c")), + "bmi_gpuc": t(renderer.buffer_mgr.info.addrof("gpu_counter")), + "bmi_18": t(renderer.buffer_mgr.info.addrof("unk_18")), + "bmi_gpuc2": t(renderer.buffer_mgr.info.addrof("gpu_counter2")), + + "2_bmmisc_0": t(renderer2.buffer_mgr.info.misc.addrof("gpu_0")), + "2_bmmisc_4": t(renderer2.buffer_mgr.info.misc.addrof("gpu_4")), + "2_bmmisc_8": t(renderer2.buffer_mgr.info.misc.addrof("gpu_8")), + "2_bmmisc_c": t(renderer2.buffer_mgr.info.misc.addrof("gpu_c")), + "2_bmi_gpuc": t(renderer2.buffer_mgr.info.addrof("gpu_counter")), + "2_bmi_18": t(renderer2.buffer_mgr.info.addrof("unk_18")), + "2_bmi_gpuc2": t(renderer2.buffer_mgr.info.addrof("gpu_counter2")), + + "ctxdat_0": t(renderer.ctx.gpu_context._addr + 0), + "ctxdat_4": t(renderer.ctx.gpu_context._addr + 4), + "ctxdat_8": t(renderer.ctx.gpu_context._addr + 8), + "ctxdat_c": t(renderer.ctx.gpu_context._addr + 0xc), + + "2_ctxdat_0": t(renderer2.ctx.gpu_context._addr + 0), + "2_ctxdat_4": t(renderer2.ctx.gpu_context._addr + 4), + "2_ctxdat_8": t(renderer2.ctx.gpu_context._addr + 8), + "2_ctxdat_c": t(renderer2.ctx.gpu_context._addr + 0xc), + + "evctl_ta": t(renderer.event_control.addrof("has_ta")), + "evctl_pta": t(renderer.event_control.addrof("pstamp_ta")), + "evctl_3d": t(renderer.event_control.addrof("has_3d")), + "evctl_p3d": t(renderer.event_control.addrof("pstamp_3d")), + "evctl_in_list": t(renderer.event_control.addrof("in_list")), + "evctl_prev": t(renderer.event_control.list_head.addrof("prev")), + "evctl_next": t(renderer.event_control.list_head.addrof("next")), + + "2_evctl_ta": t(renderer2.event_control.addrof("has_ta")), + "2_evctl_pta": t(renderer2.event_control.addrof("pstamp_ta")), + "2_evctl_3d": t(renderer2.event_control.addrof("has_3d")), + "2_evctl_p3d": t(renderer2.event_control.addrof("pstamp_3d")), + "2_evctl_in_list":t(renderer2.event_control.addrof("in_list")), + "2_evctl_prev": t(renderer2.event_control.list_head.addrof("prev")), + "2_evctl_next": t(renderer2.event_control.list_head.addrof("next")), + + "jl_first": t(renderer.job_list.addrof("first_job")), + "jl_last": t(renderer.job_list.addrof("last_head")), + "jl_10": t(renderer.job_list.addrof("unkptr_10")), + + "2_jl_first": t(renderer2.job_list.addrof("first_job")), + "2_jl_last": t(renderer2.job_list.addrof("last_head")), + "2_jl_10": t(renderer2.job_list.addrof("unkptr_10")), + + "3d_done": t(renderer.wq_3d.info.pointers.addrof("gpu_doneptr")), + "3d_rptr": t(renderer.wq_3d.info.pointers.addrof("gpu_rptr")), + "3d_rptr1": t(renderer.wq_3d.info.addrof("gpu_rptr1")), + "3d_rptr2": t(renderer.wq_3d.info.addrof("gpu_rptr2")), + "3d_rptr3": t(renderer.wq_3d.info.addrof("gpu_rptr3")), + "3d_busy": t(renderer.wq_3d.info.addrof("busy")), + "3d_blk": t(renderer.wq_3d.info.addrof("blocked_on_barrier")), + "3d_2c": t(renderer.wq_3d.info.addrof("unk_2c")), + "3d_54": t(renderer.wq_3d.info.addrof("unk_54")), + "3d_58": t(renderer.wq_3d.info.addrof("unk_58")), + + "2_3d_done": t(renderer2.wq_3d.info.pointers.addrof("gpu_doneptr")), + "2_3d_rptr": t(renderer2.wq_3d.info.pointers.addrof("gpu_rptr")), + "2_3d_busy": t(renderer2.wq_3d.info.addrof("busy")), + "2_3d_blk": t(renderer2.wq_3d.info.addrof("blocked_on_barrier")), + "2_3d_2c": t(renderer2.wq_3d.info.addrof("unk_2c")), + "2_3d_54": t(renderer2.wq_3d.info.addrof("unk_54")), + + "ta_done": t(renderer.wq_ta.info.pointers.addrof("gpu_doneptr")), + "ta_rptr": t(renderer.wq_ta.info.pointers.addrof("gpu_rptr")), + "ta_rptr1": t(renderer.wq_ta.info.addrof("gpu_rptr1")), + "ta_rptr2": t(renderer.wq_ta.info.addrof("gpu_rptr2")), + "ta_rptr3": t(renderer.wq_ta.info.addrof("gpu_rptr3")), + "ta_busy": t(renderer.wq_ta.info.addrof("busy")), + "ta_blk": t(renderer.wq_ta.info.addrof("blocked_on_barrier")), + "ta_2c": t(renderer.wq_ta.info.addrof("unk_2c")), + "ta_54": t(renderer.wq_ta.info.addrof("unk_54")), + "ta_58": t(renderer.wq_ta.info.addrof("unk_58")), + + "2_ta_done": t(renderer2.wq_ta.info.pointers.addrof("gpu_doneptr")), + "2_ta_rptr": t(renderer2.wq_ta.info.pointers.addrof("gpu_rptr")), + "2_ta_busy": t(renderer2.wq_ta.info.addrof("busy")), + "2_ta_blk": t(renderer2.wq_ta.info.addrof("blocked_on_barrier")), + "2_ta_2c": t(renderer2.wq_ta.info.addrof("unk_2c")), + "2_ta_54": t(renderer2.wq_ta.info.addrof("unk_54")), + + "3d_ts1": t(work.wc_3d.ts1._addr), + "3d_ts1b": t(workb.wc_3d.ts1._addr), + "3d_ts2": t(work.wc_3d.ts2._addr), + "3d_ts3": t(work.wc_3d.ts3._addr), + "ta_ts1": t(work.wc_ta.ts1._addr), + "ta_ts1b": t(workb.wc_ta.ts1._addr), + "ta_ts2": t(work.wc_ta.ts2._addr), + "ta_ts3": t(work.wc_ta.ts3._addr), + "2_3d_ts1": t(work2.wc_3d.ts1._addr), + "2_3d_ts1b": t(work2b.wc_3d.ts1._addr), + "2_3d_ts2": t(work2.wc_3d.ts2._addr), + "2_3d_ts3": t(work2.wc_3d.ts3._addr), + "2_ta_ts1": t(work2.wc_ta.ts1._addr), + "2_ta_ts1b": t(work2b.wc_ta.ts1._addr), + "2_ta_ts2": t(work2.wc_ta.ts2._addr), + "2_ta_ts3": t(work2.wc_ta.ts3._addr), + + "ta_stamp1": t(renderer.stamp_ta1._addr), + "ta_stamp2": t(renderer.stamp_ta2._addr), + "3d_stamp1": t(renderer.stamp_3d1._addr), + "3d_stamp2": t(renderer.stamp_3d2._addr), + + "2_ta_stamp1": t(renderer2.stamp_ta1._addr), + "2_ta_stamp2": t(renderer2.stamp_ta2._addr), + "2_3d_stamp1": t(renderer2.stamp_3d1._addr), + "2_3d_stamp2": t(renderer2.stamp_3d2._addr), +} + +div=4 +ticks = 24000000 // div * 3 + +la = GPIOLogicAnalyzer(u, regs=regs, cpu=analyzer_cpu, div=div) + + +print("==========================================") +print("## Poll prior to job start") +print("==========================================") + +mon.poll() +agx.poll_objects() + +print("==========================================") +print("## Run") +print("==========================================") + +la.start(ticks, bufsize=0x400000) +renderer.run() + +print("==========================================") +print("## After r1 start") +print("==========================================") +#agx.poll_objects() + +#time.sleep(0.1) +#mon.poll() +#time.sleep(0.15) +#mon.poll() +renderer2.run() + +print("==========================================") +print("## After r2 start") +print("==========================================") +agx.poll_objects() + +#mon.poll() +print("==========================================") +print("## Waiting") +print("==========================================") + +try: + + #while not work.ev_3d.fired: + #agx.asc.work() + ##mon.poll() + #agx.poll_objects() + #agx.poll_channels() + #print("==========================================") + ##time.sleep(0.1) + + #print("==========================================") + #print("## Ev1 Fired") + #print("==========================================") + + while not work2.ev_3d.fired: + agx.asc.work() + #mon.poll() + agx.poll_objects() + agx.poll_channels() + print("==========================================") + #time.sleep(0.1) + + print("==========================================") + print("## Ev2 Fired") + print("==========================================") + + renderer.wait() + renderer2.wait() + + agx.poll_objects() + #mon.poll() + +finally: + la.complete() + la.show() + +time.sleep(2) + diff --git a/tools/proxyclient/experiments/aic_test.py b/tools/proxyclient/experiments/aic_test.py new file mode 100755 index 0000000..aad7e2c --- /dev/null +++ b/tools/proxyclient/experiments/aic_test.py @@ -0,0 +1,411 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * +from m1n1 import asm + +ULCON = 0x235200000 +UCON = 0x235200004 +UFCON = 0x235200008 +UTRSTAT = 0x235200010 + +AIC = 0x23b100000 + +AIC_RST = AIC + 0xc +AIC_CFG = AIC + 0x10 + +AIC_TB = 0x23b108000 +AIC_TGT_DST = AIC + 0x3000 +AIC_SW_GEN_SET = AIC + 0x4000 +AIC_SW_GEN_CLR = AIC + 0x4080 +AIC_MASK_SET = AIC + 0x4100 +AIC_MASK_CLR = AIC + 0x4180 +AIC_HW_STATE = AIC + 0x4200 + +AIC_INTERRUPT_ACK = AIC + 0x2004 +AIC_IPI_SET = AIC + 0x2008 +AIC_IPI_CLR = AIC + 0x200c + +AIC_IPI_MASK_SET = AIC + 0x2024 +AIC_IPI_MASK_CLR = AIC + 0x2028 + +daif = u.mrs(DAIF) +print("DAIF: %x" % daif) +daif &= ~0x3c0 +#daif |= 0x3c0 +u.msr(DAIF, daif) +print("DAIF: %x" % u.mrs(DAIF)) + +def cpoll(): + mon.poll() + print("<") + mon.poll() + print(">") + +p.write32(AIC + 0xc, 1) +p.write32(AIC + 0x10, 0xe0777971) +p.write32(AIC + 0x18, 0) +p.write32(AIC + 0x20, 0xffffffff) +p.write32(AIC + 0x24, 0xffffffff) +p.write32(AIC + 0x28, 0xffffffff) +p.write32(AIC + 0x2c, 0xffffffff) +p.write32(AIC + 0x30, 0xffffffff) +p.write32(AIC + 0x34, 0xffffffff) +p.write32(AIC + 0x38, 0xffffffff) +p.write32(AIC + 0x3c, 0xffffffff) +p.write32(AIC + 0x40, 0xffffffff) +p.write32(AIC + 0x38, 0xffffffff) +#p.write32(AIC + 0xc, 0) + +p.memset32(AIC_MASK_SET, 0xffffffff, 0x80) +p.memset32(AIC_SW_GEN_CLR, 0xffffffff, 0x80) +p.memset32(AIC_TGT_DST, 0x1, 0x1000) +#p.memset32(AIC_MASK_CLR, 0xffffffff, 0x80) + +#p.write32(AIC + 0x10, 0xe0777971) + +mon.add(AIC + 0x0000, 0x1000) +mon.add(AIC + 0x2080, 0x040) +mon.add(AIC + 0x4000, 0x200) +mon.add(AIC + 0x5000, 0x080) +mon.add(AIC + 0x5080, 0x080) +mon.add(AIC + 0x5100, 0x080) +mon.add(AIC + 0x5180, 0x080) +mon.add(AIC + 0x5200, 0x080) +mon.add(AIC + 0x5280, 0x080) +mon.add(AIC + 0x5300, 0x080) +mon.add(AIC + 0x5380, 0x080) +#mon.add(AIC + 0x3000, 0x400) +#mon.add(AIC + 0x4000, 0x400) +#mon.add(AIC + 0x8000, 0x20) +#mon.add(AIC + 0x8030, 0xd0) +#mon.add(0x235200000, 0x20) + +def test_ipi(): + cpoll() + + print("Set IPI") + + p.write32(AIC_IPI_SET, 1) + + cpoll() + cpoll() + + print("Read ACK reg") + + reason = p.read32(AIC_INTERRUPT_ACK) + print("reason: 0x%x" % reason) + + cpoll() + + print("Write reason") + p.write32(AIC_INTERRUPT_ACK, reason) + + cpoll() + + reason = p.read32(AIC_INTERRUPT_ACK) + print("reason: 0x%x" % reason) + + cpoll() + + print("Write ACK reg") + p.write32(AIC_INTERRUPT_ACK, reason) + cpoll() + + print("Clear IPI") + + p.write32(AIC_IPI_CLR, 1) + cpoll() + + print("Read ACK reg") + + reason = p.read32(AIC_INTERRUPT_ACK) + + print("reason: 0x%x" % reason) + + cpoll() + + print("Write IPI ACK") + + p.write32(AIC_IPI_MASK_CLR, 1) + + cpoll() + +def test_timer(): + cpoll() + + freq = u.mrs(CNTFRQ_EL0) + print("Timer freq: %d" % freq) + + #u.msr(CNTP_CTL_EL0, 0) + #u.msr(CNTP_TVAL_EL0, freq * 2) + #u.msr(CNTP_CTL_EL0, 1) + #u.msr(CNTV_CTL_EL0, 0) + #u.msr(CNTV_TVAL_EL0, freq * 2) + #u.msr(CNTV_CTL_EL0, 1) + #u.msr(CNTHV_CTL_EL2, 0) + #u.msr(CNTHV_TVAL_EL2, freq * 2) + #u.msr(CNTHV_CTL_EL2, 1) + u.msr(CNTHP_CTL_EL2, 0) + u.msr(CNTHP_TVAL_EL2, freq * 2) + u.msr(CNTHP_CTL_EL2, 1) + + iface.ttymode() + + #while True: + #p.nop() + #time.sleep(0.3) + #print(". %x" % u.mrs(CNTP_CTL_EL0)) + +def get_irq_state(irq): + v = p.read32(AIC_HW_STATE + 4* (irq//32)) + return bool(v & 1<<(irq%32)) + +def test_uart_irq(): + cpoll() + #p.memset32(AIC_MASK_CLR, 0xffffffff, 0x80) + + print("cleanup") + p.write32(UCON, 5) + p.write32(UFCON, 0x11) + p.write32(UTRSTAT, 0xfff) + + cpoll() + + for irq in range(600, 610): + #print("S: ", get_irq_state(irq)) + p.write32(AIC_SW_GEN_CLR + 4* (irq//32), 1<<(irq%32)) + #print("S: ", get_irq_state(irq)) + #print("a") + #print("S: ", get_irq_state(irq)) + p.write32(AIC_MASK_CLR + 4* (irq//32), 1<<(irq%32)) + #print("S: ", get_irq_state(irq)) + #print("b") + + irq = 605 + + cpoll() + print("a") + print("S: ", get_irq_state(irq)) + print("ucon: %x" %p.read32(UCON)) + + TX_IRQ_EN = 0x1000 + + RX_IRQ_ENABLE = 0x20000 + RX_IRQ_UNMASK = 0x10000 + + RX_IRQ_ENA = 0x20000 + RX_IRQ_MASK = 0x4000 # defer? + + code = u.malloc(0x1000) + + c = asm.ARMAsm(""" + ldr x1, =0x235200000 + + ldr x3, =0xc000000 +1: + subs x3, x3, #1 + bne 1b + mov x2, 'A' + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + #str w2, [x1, #0x20] + + mov x3, #0x3ff + str w3, [x1, #0x10] + #str w2, [x1, #0x20] + str w0, [x1, #4] + ldr w0, [x1, #0x10] + + ldr x3, =0xc00000 +1: + subs x3, x3, #1 + bne 1b + + #mov x3, #0x3ff + #str w3, [x1, #0x10] + #ldr w2, [x1, #4] + #mov x2, #0x205 + #str w2, [x1, #4] + #str w0, [x1, #4] + ##ldr w0, [x1, #0x10] + + #ldr x3, =0xc00000 +#1: + #subs x3, x3, #1 + #bne 1b + + ldr w0, [x1, #0x10] + #mov w0, w2 + ret +""", code) + iface.writemem(code, c.data) + p.dc_cvau(code, len(c.data)) + p.ic_ivau(code, len(c.data)) + + #RX_IRQ_ + + """ +UCON UTRSTAT +00200 TX FIFO thresh IRQ delivery enable +00080 0200 TX FIFO threshold IRQ unmask +20000 0100 RX IRQ unmask +10000 RX IRQ delivery enable +""" + + # edge triggered + TX_FIFO_THRESH_CROSSED_IRQ_UNMASK = 0x2000 + + TX_IRQ_UNMASK = 0x200 + TX_EVENT_ENABLE = 0x80 + RX_EVENT_ENABLE = 0x20000 + RX_IRQ_UNMASK = 0x10000 + + #flags = 0x7ffc0 + crash = 0x180000 + no_irqs = 0x21c5c0 + instant_irqs = 0x3a00 + #flags = no_irqs | 0x0000 + #flags = 0x2e5c0 + #flags = 0x2000 + + #flags = 0x30000 + #flags = 0x80 + flags = 0x7ff80 + + + val = flags | 0x005 + #print("ucon<-%x" % val) + #p.write32(UCON, val) + p.write32(UTRSTAT, 0xfff) + print("utrstat=%x" % p.read32(UTRSTAT)) + ret = p.call(code, val) + print("utrstat::%x" % ret) + print("utrstat=%x" % p.read32(UTRSTAT)) + time.sleep(0.5) + iface.dev.write(b'1') + #print(iface.dev.read(1)) + time.sleep(0.1) + print("ucon: %x" %p.read32(UCON)) + print("delay") + try: + p.udelay(500000) + except: + pass + iface.dev.write(bytes(64)) + p.nop() + print("ucon: %x" %p.read32(UCON)) + print("S: ", get_irq_state(irq)) + + #while True: + #print("S: ", get_irq_state(irq)) + #p.write32(UTRSTAT, 0xfff) + #print("utrstat=%x" % p.read32(UTRSTAT)) + #print("ucon: %x" %p.read32(UCON)) + #print(">S: ", get_irq_state(irq)) + #p.write32(UCON, flags | 0x005) + #print(">ucon: %x" %p.read32(UCON)) + #time.sleep(0.1) + + + +def test_smp_ipi(): + p.smp_start_secondaries() + + code = u.malloc(0x1000) + + c = asm.ARMAsm(""" +#define sys_reg(op0, op1, CRn, CRm, op2) s##op0##_##op1##_c##CRn##_c##CRm##_##op2 +#define SYS_CYC_OVRD sys_reg(3, 5, 15, 5, 0) + + msr DAIFClr, 7 + ldr x1, =0x000000 + msr SYS_CYC_OVRD, x1 + mrs x0, SYS_CYC_OVRD + mov x1, #0x1000000 +1: + subs x1, x1, #1 + mrs x0, HCR_EL2 + bne 1b + ret +""", code) + + iface.writemem(code, c.data) + p.dc_cvau(code, len(c.data)) + p.ic_ivau(code, len(c.data)) + + print("Enable IRQs on secondaries") + for i in range(1, 8): + ret = p.smp_call_sync(i, code) + print("0x%x"%ret) + + #e0477971 + #p.write32(AIC + 0x10, 0xe0777971) + #p.write32(AIC + 0x28, 0xffffffff) + + cpoll() + + print("Clear IPI") + p.write32(AIC_IPI_CLR, 0xffffffff) + p.write32(AIC_IPI_MASK_CLR, 0xffffffff) + for i in range(8): + p.write32(AIC_IPI_CLR+0x3000+i*0x80, 0xffffffff) + p.write32(AIC_IPI_MASK_CLR+0x3000+i*0x80, 0xffffffff) + + cpoll() + + print("Set IPI") + #p.write32(AIC_IPI_SET, 0x00000004) + #p.write32(AIC_IPI_SET, 0x00000000) + + cpoll() + print("Clear IPI") + p.write32(AIC_IPI_CLR, 0xffffffff) + p.write32(AIC_IPI_MASK_CLR, 0xffffffff) + for i in range(8): + p.write32(AIC_IPI_CLR+0x3000+i*0x80, 1) + p.write32(AIC_IPI_MASK_CLR+0x3000+i*0x80, 1) + +def test_smp_affinity(): + p.write32(AIC_TGT_DST, 0x6) + p.write32(AIC_TGT_DST+4, 0xfe) + p.write32(AIC_TGT_DST+8, 0xfe) + p.write32(AIC_TGT_DST+12, 0x6) + p.write32(AIC_SW_GEN_SET,0x8); + p.write32(AIC_MASK_CLR,0x8); + +#test_ipi() +#test_timer() +#test_uart_irq() +test_smp_ipi() +test_smp_affinity() diff --git a/tools/proxyclient/experiments/amcc_err_handler.py b/tools/proxyclient/experiments/amcc_err_handler.py new file mode 100644 index 0000000..935a99e --- /dev/null +++ b/tools/proxyclient/experiments/amcc_err_handler.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * + +#for i in (0x28e580350, 0x28e580328, 0x28e580380, 0x28e580378): + #p.write32(i, 0) + +sts = p.read32(0x20002100c) +print(f"status: {sts:#x}") +p.write32(0x200021010, 0) +p.write32(0x20002100c, 0xfff) +time.sleep(0.1) +sts = p.read32(0x20002100c) +print(f"status: {sts:#x}") + +print(f"ERRLOG0: {p.read32(0x20000070c):#x}") +print(f"ERRLOG1: {p.read32(0x200000710):#x}") +print(f"ERRLOG2: {p.read32(0x200000714):#x}") +print(f"ERRLOG3: {p.read32(0x200000718):#x}") +print(f"ERRLOG4: {p.read32(0x20000071c):#x}") + +p.write32(0x20000070c, 0xffffffff) + +#p.fb_shutdown() +u.inst("tlbi vmalle1is") + +#p.memset32(fb, 0xffffffff, 3024 * 1964 * 4) +#p.dc_cvac(fb, 3024 * 1964 * 4) + +#p.memset32(0x100_80000000, 0xffffffff, 0x80000000) + +#p.memcpy32(fb, fb + 0x1000, 0x800) +#p.memset32(fb, 0xfffff, 3024 * 1964 * 4) +#p.memset32(fb, 0xffffffff, 3024 * 1964 * 4) +#p.memcpy32(0x100_80000000, fb + 0x1000, 0x1800) +#p.read8(fb + 0x10) +#p.write8(fb + 0x200, 0xdeadbeef) +#p.memset32(fb, 0xfffffff, 3024 * 1964 * 4) + +#p.iodev_write(IODEV.FB, "test\n", 5) diff --git a/tools/proxyclient/experiments/aop.py b/tools/proxyclient/experiments/aop.py new file mode 100755 index 0000000..0dba36f --- /dev/null +++ b/tools/proxyclient/experiments/aop.py @@ -0,0 +1,310 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +import struct +import traceback +from construct import * + +from m1n1.setup import * +from m1n1.shell import run_shell +from m1n1.hw.dart import DART, DARTRegs +from m1n1.fw.asc import StandardASC, ASCDummyEndpoint +from m1n1.fw.asc.base import * +from m1n1.fw.aop import * +from m1n1.fw.aop.ipc import * +from m1n1.fw.afk.rbep import * +from m1n1.fw.afk.epic import * + +# Set up a secondary proxy channel so that we can stream +# the microphone samples +p.usb_iodev_vuart_setup(p.iodev_whoami()) +p.iodev_set_usage(IODEV.USB_VUART, USAGE.UARTPROXY) + +p.pmgr_adt_clocks_enable("/arm-io/dart-aop") + +adt_dc = u.adt["/arm-io/aop/iop-aop-nub/aop-audio/dc-2400000"] + +pdm_config = Container( + unk1=2, + clockSource=u'pll ', + pdmFrequency=2400000, + unk3_clk=24000000, + unk4_clk=24000000, + unk5_clk=24000000, + channelPolaritySelect=256, + unk7=99, + unk8=1013248, + unk9=0, + ratios=Container( + r1=15, + r2=5, + r3=2, + ), + filterLengths=0x542c47, + coeff_bulk=120, + coefficients=GreedyRange(Int32sl).parse(adt_dc.coefficients), + unk10=1, + micTurnOnTimeMs=20, + unk11=1, + micSettleTimeMs=50, +) + +decimator_config = Container( + latency=15, + ratios=Container( + r1=15, + r2=5, + r3=2, + ), + filterLengths=0x542c47, + coeff_bulk=120, + coefficients=GreedyRange(Int32sl).parse(adt_dc.coefficients), +) + +class AFKEP_Hello(AFKEPMessage): + TYPE = 63, 48, Constant(0x80) + UNK = 7, 0 + +class AFKEP_Hello_Ack(AFKEPMessage): + TYPE = 63, 48, Constant(0xa0) + +class EPICEndpoint(AFKRingBufEndpoint): + BUFSIZE = 0x1000 + + def __init__(self, *args, **kwargs): + self.seq = 0x0 + self.wait_reply = False + self.ready = False + super().__init__(*args, **kwargs) + + @msg_handler(0x80, AFKEP_Hello) + def Hello(self, msg): + self.rxbuf, self.rxbuf_dva = self.asc.ioalloc(self.BUFSIZE) + self.txbuf, self.txbuf_dva = self.asc.ioalloc(self.BUFSIZE) + + self.send(AFKEP_Hello_Ack()) + + def handle_hello(self, hdr, sub, fd): + if sub.type != 0xc0: + return False + + payload = fd.read() + name = payload.split(b"\0")[0].decode("ascii") + self.log(f"Hello! (endpoint {name})") + self.ready = True + return True + + def handle_reply(self, hdr, sub, fd): + if self.wait_reply: + self.pending_call.read_resp(fd) + self.wait_reply = False + return True + return False + + def handle_ipc(self, data): + fd = BytesIO(data) + hdr = EPICHeader.parse_stream(fd) + sub = EPICSubHeaderVer2.parse_stream(fd) + + handled = False + + if sub.category == EPICCategory.REPORT: + handled = self.handle_hello(hdr, sub, fd) + if sub.category == EPICCategory.REPLY: + handled = self.handle_reply(hdr, sub, fd) + + if not handled and getattr(self, 'VERBOSE', False): + self.log(f"< 0x{hdr.channel:x} Type {hdr.type} Ver {hdr.version} Tag {hdr.seq}") + self.log(f" Len {sub.length} Ver {sub.version} Cat {sub.category} Type {sub.type:#x} Ts {sub.timestamp:#x}") + self.log(f" Unk1 {sub.unk1:#x} Unk2 {sub.unk2:#x}") + chexdump(fd.read()) + + def indirect(self, call, chan=0x1000000d, timeout=0.1): + tx = call.ARGS.build(call.args) + self.asc.iface.writemem(self.txbuf, tx[4:]) + + cmd = self.roundtrip(IndirectCall( + txbuf=self.txbuf_dva, txlen=len(tx) - 4, + rxbuf=self.rxbuf_dva, rxlen=self.BUFSIZE, + retcode=0, + ), category=EPICCategory.COMMAND, typ=call.TYPE) + fd = BytesIO() + fd.write(struct.pack("<I", cmd.rets.retcode)) + fd.write(self.asc.iface.readmem(self.rxbuf, cmd.rets.rxlen)) + fd.seek(0) + call.read_resp(fd) + return call + + def roundtrip(self, call, chan=0x1000000d, timeout=0.3, + category=EPICCategory.NOTIFY, typ=None): + tx = call.ARGS.build(call.args) + fd = BytesIO() + fd.write(EPICHeader.build(Container( + channel=chan, + type=EPICType.NOTIFY, + version=2, + seq=self.seq, + ))) + self.seq += 1 + fd.write(EPICSubHeaderVer2.build(Container( + length=len(tx), + category=category, + type=typ or call.TYPE, + ))) + fd.write(tx) + + self.pending_call = call + self.wait_reply = True + self.send_ipc(fd.getvalue()) + + deadline = time.time() + timeout + while time.time() < deadline and self.wait_reply: + self.asc.work() + if self.wait_reply: + self.wait_reply = False + raise ASCTimeout("ASC reply timed out") + + return call + +class SPUAppEndpoint(EPICEndpoint): + SHORT = "SPUAppep" + +class AccelEndpoint(EPICEndpoint): + SHORT = "accelep" + +class GyroEndpoint(EPICEndpoint): + SHORT = "gyroep" + +class UNK23Endpoint(EPICEndpoint): + SHORT = "unk23ep" + +class LASEndpoint(EPICEndpoint): + SHORT = "lasep" + #VERBOSE = True # <--- uncomment to see lid angle measurements + +class WakehintEndpoint(EPICEndpoint): + SHORT = "wakehintep" + +class UNK26Endpoint(EPICEndpoint): + SHORT = "unk26ep" + +class AudioEndpoint(EPICEndpoint): + SHORT = "audioep" + + +class OSLogMessage(Register64): + TYPE = 63, 56 + +class OSLog_Init(OSLogMessage): + TYPE = 63, 56, Constant(1) + UNK = 51, 0 + DVA = 7, 0 + +class AOPOSLogEndpoint(ASCBaseEndpoint): + BASE_MESSAGE = OSLogMessage + SHORT = "oslog" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.started = False + + @msg_handler(1, OSLog_Init) + def Init(self, msg): + self.iobuffer, self.iobuffer_dva = self.asc.ioalloc(0x1_0000) + self.send(OSLog_Init(DVA=self.iobuffer_dva//0x1000)) + self.started = True + return True + + +class AOPClient(StandardASC, AOPBase): + ENDPOINTS = { + 8: AOPOSLogEndpoint, + + 0x20: SPUAppEndpoint, + 0x21: AccelEndpoint, + 0x22: GyroEndpoint, + 0x23: UNK23Endpoint, + 0x24: LASEndpoint, + 0x25: WakehintEndpoint, + 0x26: UNK26Endpoint, + 0x27: AudioEndpoint, + 0x28: EPICEndpoint, + 0x29: EPICEndpoint, + 0x2a: EPICEndpoint, + 0x2b: EPICEndpoint + } + + def __init__(self, u, adtpath, dart=None): + node = u.adt[adtpath] + self.base = node.get_reg(0)[0] + + AOPBase.__init__(self, u, node) + super().__init__(u, self.base, dart) + +p.dapf_init_all() + +dart = DART.from_adt(u, "/arm-io/dart-aop", iova_range=(0x2c000, 0x10_000_000)) +dart.initialize() +dart.regs.TCR[0].set(BYPASS_DAPF=0, BYPASS_DART=0, TRANSLATE_ENABLE=1) +dart.regs.TCR[7].set(BYPASS_DAPF=0, BYPASS_DART=0, TRANSLATE_ENABLE=1) +dart.regs.TCR[15].val = 0x20100 + +aop = AOPClient(u, "/arm-io/aop", dart) + +aop.update_bootargs({ + 'p0CE': 0x20000, +# 'laCn': 0x0, +# 'tPOA': 0x1, +}) + +aop.verbose = 4 + +def set_aop_audio_pstate(devid, pstate): + audep.roundtrip(SetDeviceProp( + devid=devid, + modifier=202, + data=Container( + devid=devid, + cookie=1, + target_pstate=pstate, + unk2=1, + ) + )).check_retcode() + +try: + aop.boot() + for epno in range(0x20, 0x2c): + aop.start_ep(epno) + + timeout = 10 + while (not aop.audioep.ready) and timeout: + aop.work_for(0.1) + timeout -= 1 + + if not timeout: + raise Exception("Timed out waiting on audio endpoint") + + print("Finished boot") + + audep = aop.audioep + + audep.roundtrip(AttachDevice(devid='pdm0')).check_retcode() + audep.indirect(SetDeviceProp( + devid='pdm0', modifier=200, data=pdm_config) + ).check_retcode() + audep.indirect(SetDeviceProp( + devid='pdm0', modifier=210, data=decimator_config) + ).check_retcode() + audep.roundtrip(AttachDevice(devid='hpai')).check_retcode() + audep.roundtrip(AttachDevice(devid='lpai')).check_retcode() + audep.roundtrip(SetDeviceProp( + devid='lpai', modifier=301, data=Container(unk1=7, unk2=7, unk3=1, unk4=7)) + ).check_retcode() +except KeyboardInterrupt: + pass +except Exception: + print(traceback.format_exc()) + +run_shell(locals(), poll_func=aop.work) diff --git a/tools/proxyclient/experiments/audio_capture.py b/tools/proxyclient/experiments/audio_capture.py new file mode 100755 index 0000000..d79cbd4 --- /dev/null +++ b/tools/proxyclient/experiments/audio_capture.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +import time +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +# audio_capture.py -- capture audio on jack microphone input (on M1 macs with cs42l83) +# +# sample usage with sox: (recoding can be loud!) +# +# ./audio_capture.py | sox -t raw -r 48000 -c 1 -e signed-int -b 32 -L - OUTPUT_FILE + +from m1n1.setup import * +from m1n1.hw.dart import DART, DARTRegs +from m1n1.hw.i2c import I2C +from m1n1.hw.pmgr import PMGR +from m1n1.hw.nco import NCO +from m1n1.hw.admac import * +from m1n1.hw.mca import * + +p.pmgr_adt_clocks_enable("/arm-io/i2c2") +p.pmgr_adt_clocks_enable("/arm-io/admac-sio") +p.pmgr_adt_clocks_enable("/arm-io/dart-sio") +p.pmgr_adt_clocks_enable("/arm-io/mca-switch") +p.pmgr_adt_clocks_enable("/arm-io/mca3") + +# reset AUDIO_P +PS_AUDIO_P = PMGR(u).regs[0].PS4[10] +PS_AUDIO_P.set(DEV_DISABLE=1) +PS_AUDIO_P.set(RESET=1) +PS_AUDIO_P.set(RESET=0) +PS_AUDIO_P.set(DEV_DISABLE=0) + +dart_base, _ = u.adt["/arm-io/dart-sio"].get_reg(0) +dart = DART(iface, DARTRegs(u, dart_base), util=u) +dart.initialize() + +cl_no = 2 + +admac = ADMAC(u, "/arm-io/admac-sio", dart, debug=True) +dmachan = admac.chans[4*cl_no+1] +dmachan.buswidth = E_BUSWIDTH.W_32BIT +dmachan.framesize = E_FRAME.F_1_WORD + +nco = NCO(u, "/arm-io/nco") +nco[cl_no].set_rate(6000000) +nco[cl_no].enable() + +mca_switch1_base = u.adt["/arm-io/mca-switch"].get_reg(1)[0] +mca_cl_base = u.adt["/arm-io/mca-switch"].get_reg(0)[0] + 0x4000*cl_no +cl = MCACluster(u, mca_cl_base) + +regs, serdes = cl.regs, cl.rxa + +regs.SYNCGEN_STATUS.set(EN=0) +regs.SYNCGEN_MCLK_SEL.val =(1 + cl_no) +regs.SYNCGEN_HI_PERIOD.val = 0 # period minus one +regs.SYNCGEN_LO_PERIOD.val = 0x7b # period minus one + +serdes.STATUS.set(EN=0) +serdes.CONF.set( + NSLOTS=0, + SLOT_WIDTH=E_SLOT_WIDTH.W_32BIT, + BCLK_POL=1, + UNK1=1, UNK2=1, + SYNC_SEL=(1 + cl_no) +) +serdes.UNK1.val = 0x4 + +serdes.BITDELAY.val = 1 + +serdes.CHANMASK[0].val = 0xffff_ffff +serdes.CHANMASK[1].val = 0xffff_fffe + +regs.PORT_ENABLES.set(CLOCK1=1, CLOCK2=1, DATA=0) +regs.PORT_CLK_SEL.set(SEL=(cl_no + 1)) +regs.MCLK_STATUS.set(EN=1) +regs.SYNCGEN_STATUS.set(EN=1) + +cs42l_addr = 0x48 +i2c2 = I2C(u, "/arm-io/i2c2") +def cs42l_write(regaddr, val): + i2c2.write_reg(cs42l_addr, 0x0, [regaddr >> 8]) + i2c2.write_reg(cs42l_addr, regaddr & 0xff, [val]) + +p.write32(0x23d1f002c, 0x76a02) +p.write32(0x23d1f002c, 0x76a03) # take jack codec out of reset + +cs42l_write(0x1009, 0x0) # FS_int = MCLK/250 +cs42l_write(0x1101, 0x7a) # power on +cs42l_write(0x1103, 0x22) # power on ring sense +cs42l_write(0x1107, 0x1) # SCLK present +cs42l_write(0x1121, 0xa6) # Headset Switch Control +cs42l_write(0x1129, 0x1) # Headset Clamp Disable +cs42l_write(0x1205, 0x7c) # FSYNC period +cs42l_write(0x1207, 0x20) # ASP Clock Configuration +cs42l_write(0x1208, 0x12) # BITDELAY = 1 +cs42l_write(0x120c, 0x1) # SCLK_PREDIV = div-by-2 +cs42l_write(0x150a, 0x55) # PLL +cs42l_write(0x151b, 0x1) # PLL +cs42l_write(0x1501, 0x1) # power on PLL +cs42l_write(0x1b70, 0xc3) # HSBIAS sense +cs42l_write(0x1b71, 0xe0) # v-- headset +cs42l_write(0x1b73, 0xc0) +cs42l_write(0x1b74, 0x1f) +cs42l_write(0x1b75, 0xb6) +cs42l_write(0x1b76, 0x8f) +cs42l_write(0x1b79, 0x0) +cs42l_write(0x1b7a, 0xfc) +cs42l_write(0x1c03, 0xc0) # HSBIAS +cs42l_write(0x2506, 0xc) # ASP TX samp. rate +cs42l_write(0x2609, 0x4c) # SRC output samp. rate +cs42l_write(0x2901, 0x1) # ASP TX enable & size +cs42l_write(0x2902, 0x1) # ASP TX channel enable + +time.sleep(0.01) + +cs42l_write(0x1201, 0x1) # transition to PLL clock + +# drain garbled samples (why are they garbled? i am not sure) +time.sleep(0.5) + +dmachan.submit(buflen=0x4000) +dmachan.enable() + +p.write32(mca_switch1_base + 0x8000*cl_no, 0x24800) +serdes.STATUS.set(EN=1) + +while True: + while dmachan.can_submit(): + dmachan.submit(buflen=0x4000) + sys.stdout.buffer.write(dmachan.poll()) diff --git a/tools/proxyclient/experiments/chickens.py b/tools/proxyclient/experiments/chickens.py new file mode 100755 index 0000000..bc2b6ec --- /dev/null +++ b/tools/proxyclient/experiments/chickens.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * + +sys_regs = dict([ + ("HID0", (3, 0, 15, 0, 0)), + ("HID1", (3, 0, 15, 1, 0)), + ("EHID20", (3, 0, 15, 1, 2)), + ("HID2", (3, 0, 15, 2, 0)), + ("HID3", (3, 0, 15, 3, 0)), + ("HID4", (3, 0, 15, 4, 0)), + ("EHID4", (3, 0, 15, 4, 1)), + ("HID5", (3, 0, 15, 5, 0)), + ("HID6", (3, 0, 15, 6, 0)), + ("HID7", (3, 0, 15, 7, 0)), + ("HID8", (3, 0, 15, 8, 0)), + ("HID9", (3, 0, 15, 9, 0)), + ("EHID9", (3, 0, 15, 9, 1)), + ("HID10", (3, 0, 15, 10, 0)), + ("EHID10", (3, 0, 15, 10, 1)), + ("HID11", (3, 0, 15, 11, 0)), +]) + +CYC_OVRD = (3, 5, 15, 5, 0) +CYC_CFG = (3, 5, 15, 4, 0) + +L2C_ERR_STS = (3, 3, 15, 8, 0) + +s3_6_c15_c1_0 = (3, 6, 15, 1, 0) +s3_6_c15_c1_6 = (3, 6, 15, 1, 6) + +s3_4_c15_c1_4 = (3, 4, 15, 1, 4) +s3_4_c15_c5_0 = (3, 4, 15, 5, 0) + +h13e_chickenbits = [ + ("EHID4", 0x100000000800, 0), + ("HID5", 0x2000000000000000, 0), + ("EHID10", 0x2000100000000, 0), + ("EHID20", 0x100, 0), + ("EHID9", 0, 0x20), + ("EHID20", 0x8000, 0), + ("EHID20", 0x10000, 0), + ("EHID20", 0x600000, 0), + +] + +tlbi_vmalle1 = 0xd508871f + +def h13e_init(): + mpidr = u.mrs(MPIDR_EL1) + print("mpidr = 0x%x" % mpidr) + + #print("OSLAR") + #u.msr(OSLAR_EL1, 0) + #print("s3_6_c15_c1_0") + #u.msr(s3_6_c15_c1_0, 1) + #print("tlbi_vmalle1") + #u.inst(tlbi_vmalle1) + + ## This looks like APRR stuff? + #v = u.mrs(s3_6_c15_c1_6) + #print("s3_6_c15_c1_6 == 0x%x" % v) + #v = 0x2020a505f020f0f0 + #print("s3_6_c15_c1_6 <= 0x%x" % v) + #u.msr(s3_6_c15_c1_6, v) + + #u.msr(s3_6_c15_c1_0, 0) + + for reg, setb, clearb in h13e_chickenbits: + v = u.mrs(sys_regs[reg]) + print("%r == 0x%x" % (reg, v)) + v &= ~clearb + v |= setb + print("%r <= 0x%x" % (reg, v)) + u.msr(sys_regs[reg], v) + + v = u.mrs(s3_4_c15_c5_0) + print("s3_4_c15_c5_0 == 0x%x" % v) + print("s3_4_c15_c5_0 <= 0x%x" % (mpidr & 0xff)) + u.msr(s3_4_c15_c5_0, mpidr & 0xff) + + u.msr(s3_4_c15_c1_4, 0x100) + + v = u.mrs(CYC_OVRD) + print("CYC_OVRD == 0x%x" % v) + v &= ~0xf00000 + print("CYC_OVRD <= 0x%x" % v) + u.msr(CYC_OVRD, v) + + v = u.mrs(ACTLR_EL1) + print("ACTLR_EL1 == 0x%x" % v) + v |= 0x200 + print("ACTLR_EL1 <= 0x%x" % v) + u.msr(ACTLR_EL1, v) + + v = u.mrs(CYC_CFG) + print("CYC_CFG == 0x%x" % v) + v |= 0xc + print("CYC_CFG <= 0x%x" % v) + u.msr(CYC_CFG, v) + + print("L2C_ERR_STS = %x" % u.mrs(L2C_ERR_STS)) + u.msr(L2C_ERR_STS, 0) + +h13e_init() diff --git a/tools/proxyclient/experiments/cpu_pstate_latencies.py b/tools/proxyclient/experiments/cpu_pstate_latencies.py new file mode 100755 index 0000000..0acfc28 --- /dev/null +++ b/tools/proxyclient/experiments/cpu_pstate_latencies.py @@ -0,0 +1,239 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib, time +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * +from m1n1 import asm + +p.smp_start_secondaries() + +tfreq = u.mrs(CNTFRQ_EL0) + +TEST_CPUS = [1, 4] + +CLUSTER_PSTATE = 0x20020 +CLUSTER_STATUS = 0x20050 + +if u.adt["/chosen"].chip_id == 0x8103: + CREG = [ + 0x210e00000, + 0x211e00000, + ] + + MAX_PSTATE = [5, 15] + +elif u.adt["/chosen"].chip_id == 0x8112: + CREG = [ + 0x210e00000, + 0x211e00000, + ] + + MAX_PSTATE = [7, 17] + +code = u.malloc(0x1000) + +util = asm.ARMAsm(f""" +bench: + mrs x1, CNTPCT_EL0 +1: + sub x0, x0, #1 + cbnz x0, 1b + + mrs x2, CNTPCT_EL0 + sub x0, x2, x1 + ret + +signal_and_write: + sev + mrs x2, CNTPCT_EL0 + add x2, x2, #0x800 +1: + mrs x3, CNTPCT_EL0 + sub x4, x3, x2 + cbnz x4, 1b + str x1, [x0] + mov x0, x3 + ret + +timelog: + mrs x2, s3_1_c15_c0_0 /* SYS_IMP_APL_PMCR0 */ + orr x2, x2, #1 + msr s3_1_c15_c0_0, x2 + mov x2, #0xffffffffffffffff + msr s3_1_c15_c1_0, x2 + isb + wfe +1: + mrs x2, CNTPCT_EL0 + mrs x3, s3_2_c15_c0_0 + isb + stp x2, x3, [x0], #16 + mov x4, #0x40 +2: + sub x4, x4, #1 + cbnz x4, 2b + sub x1, x1, #1 + cbnz x1, 1b + + ret +""", code) +iface.writemem(code, util.data) +p.dc_cvau(code, len(util.data)) +p.ic_ivau(code, len(util.data)) + +def bench_cpu(idx, loops=10000000): + if idx == 0: + elapsed = p.call(util.bench, loops) / tfreq + else: + elapsed = p.smp_call_sync(idx, util.bench, loops) / tfreq + if elapsed == 0: + return 0 + mhz = (loops / elapsed) / 1000000 + return mhz + +def set_pstate(cluster, pstate): + p.mask64(CREG[cluster] + CLUSTER_PSTATE, 0x1f01f, (1<<25) | pstate | (pstate << 12)) + +print() + +LOG_ITERS = 10000 +logbuf = u.malloc(LOG_ITERS * 16) + +def bench_latency(cluster, cpu, from_pstate, to_pstate, verbose=False): + set_pstate(cluster, from_pstate) + bench_cpu(cpu) + + p.smp_call(cpu, util.timelog, logbuf, LOG_ITERS) + psreg = (p.read64(CREG[cluster] + CLUSTER_PSTATE) & ~0x1f001f) | (1<<25) | to_pstate | (to_pstate << 12) + tval = p.call(util.signal_and_write, CREG[cluster] + CLUSTER_PSTATE, psreg) + p.smp_wait(cpu) + + logdata = iface.readmem(logbuf, LOG_ITERS * 16) + lts, lcyc = None, None + + log = [] + for i in range(LOG_ITERS): + ts, cyc = struct.unpack("<QQ", logdata [i*16:i*16+16]) + log.append((ts, cyc)) + + off = 256 + + ts_0, cyc_0 = log[off] + ts_e, cyc_e = log[-1] + f_init = None + f_end = None + lts, lcyc = ts_0, cyc_0 + + inc = to_pstate > from_pstate + + blip = 0 + cnt = dts_sum = 0 + for i in range(off, len(log)): + ts, cyc = log[i] + dts = ts - lts + dcyc = cyc - lcyc + + cnt += 1 + dts_sum += dts + + blip = max(blip, dts) + + if f_init is None and ts > tval: + tidx = i + f_init = (lcyc - cyc_0) / (lts - ts_0) * tfreq / 1000000 + dts_init = dts_sum / cnt + if f_end is None and ts > (tval + ts_e) / 2: + f_end = (cyc_e - cyc) / (ts_e - ts) * tfreq / 1000000 + cnt = dts_sum = 0 + + #if lts is not None: + #print(f"{i}: {ts}: {cyc} ({ts-lts}: {cyc-lcyc})") + #else: + #print(f"{i}: {ts}: {cyc}") + lts, lcyc = ts, cyc + + dts_end = dts_sum / cnt + + window = 32 + + if verbose: + print(f"Triggered at {tval}") + + thresh = 2/ (1/f_init + 1/f_end) + + for i in range(tidx, LOG_ITERS - window - 1): + ts0, cyc0 = log[i - window] + ts1, cyc1 = log[i + window] + f = (cyc1 - cyc0) / (ts1 - ts0) * tfreq / 1000000 + if inc and (f > thresh) or ((not inc) and f < thresh): + tts = log[i][0] + tidx = i + if verbose: + print(f"Frequency transition at #{i} {tts}") + break + + if verbose: + print(f"Initial frequency: {f_init:.2f}") + print(f"Final frequency: {f_end:.2f}") + print(f"Threshold: {thresh:.2f}") + + for i in range(max(window, tidx - 10 * window), tidx + 10 * window): + ts0, cyc0 = log[i - window] + ts1, cyc1 = log[i + window] + lts, lcyc = log[i - 1] + ts, cyc = log[i] + f = (cyc1 - cyc0) / (ts1 - ts0) * tfreq / 1000000 + print(f"{i}: {ts}: {cyc} ({ts-lts}: {cyc-lcyc}): {f:.2f}") + + blip -= min(dts_init, dts_end) + + return (tts - tval) / tfreq * 1000000000, blip / tfreq * 1000000000 + +for cluster, creg in enumerate(CREG): + cpu = TEST_CPUS[cluster] + + freqs = [] + + print(f"#### Cluster {cluster} ####") + print(" P-States:") + print(" ", end="") + for pstate in range(MAX_PSTATE[cluster] + 1): + set_pstate(cluster, pstate) + freq = int(round(bench_cpu(cpu))) + freqs.append(freq) + print(f"{pstate}:{freq}MHz", end=" ") + print() + print() + + print(" To-> |", end="") + for to_pstate in range(1, MAX_PSTATE[cluster] + 1): + print(f" {freqs[to_pstate]:7d} |", end="") + print() + print(" From |", end="") + for to_pstate in range(1, MAX_PSTATE[cluster] + 1): + print(f"---------+", end="") + print() + + maxblip = 0 + + for from_pstate in range(1, MAX_PSTATE[cluster] + 1): + print(f" {freqs[from_pstate]:4d} |", end="") + for to_pstate in range(1, MAX_PSTATE[cluster] + 1): + if from_pstate == to_pstate: + print(f" ******* |", end="") + continue + lat, blip = bench_latency(cluster, cpu, from_pstate, to_pstate) + print(f" {lat:7.0f} |", end="") + maxblip = max(maxblip, blip) + print() + + print() + print(f"Maximum execution latency spike: {maxblip:.0f} ns") + print() + +print() + +#bench_latency(1, TEST_CPUS[1], 15, 14, True) + + diff --git a/tools/proxyclient/experiments/cpu_pstates.py b/tools/proxyclient/experiments/cpu_pstates.py new file mode 100755 index 0000000..7403775 --- /dev/null +++ b/tools/proxyclient/experiments/cpu_pstates.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib, time +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * +from m1n1 import asm + +LOOPS = 10000000 +freq = u.mrs(CNTFRQ_EL0) + +CREG = [ + 0x210e00000, + 0x211e00000, +] + +CLUSTER_PSTATE = 0x20020 + +# e-core pstates +# 600 972 1332 1704 2064 +# p-core pstates +# 600 828 1056 1284 1500 1728 1956 2184 2388 2592 2772 2988 3096 3144 3204 + +code = u.malloc(0x1000) + +util = asm.ARMAsm(""" +bench: + mrs x1, CNTPCT_EL0 +1: + sub x0, x0, #1 + cbnz x0, 1b + + mrs x2, CNTPCT_EL0 + sub x0, x2, x1 + ret +""", code) +iface.writemem(code, util.data) +p.dc_cvau(code, len(util.data)) +p.ic_ivau(code, len(util.data)) + +def bench_cpu(idx): + if idx == 0: + elapsed = p.call(util.bench, LOOPS) / freq + else: + elapsed = p.smp_call_sync(idx, util.bench, LOOPS) / freq + if elapsed == 0: + return 0 + mhz = (LOOPS / elapsed) / 1000000 + return mhz + +print() + +e_pstate = p.read64(CREG[0] + CLUSTER_PSTATE) +p_pstate = p.read64(CREG[1] + CLUSTER_PSTATE) + +print(f"E-Core pstate: {e_pstate:x}") +print(f"P-Core pstate: {p_pstate:x}") + +#for cluster in range(2): + #print(f"Initializing cluster {cluster} (early)") + + #p.write64(CREG[cluster] + 0x20660, 0x1000000015) + #p.write64(CREG[cluster] + 0x48000, 0) + #p.write64(CREG[cluster] + 0x48080, 0xa000000000000000) + + #p.clear64(CREG[cluster] + CLUSTER_PSTATE, 1<<22) + +#p.set32(PMGR + 0x48000, 1) +#p.set32(PMGR + 0x48c00, 1) +#p.set32(PMGR + 0x48800, 1) +#p.set32(PMGR + 0x48400, 1) + +CLUSTER_DVMR = 0x206b8 +CLUSTER_LIMIT2 = 0x40240 +CLUSTER_LIMIT3 = 0x40250 +CLUSTER_LIMIT1 = 0x48400 + +PMGR_CPUGATING = 0x1c080 +CLUSTER_CTRL = 0x440f8 +CLUSTER_PSCTRL = 0x200f8 + +for cluster in range(2): + print(f"Initializing cluster {cluster}") + ena = (1<<63) + val = p.read64(CREG[cluster] + CLUSTER_DVMR) + if cluster == 1: + ena |= (1<<32) | (1<<31) + if (val & ena) != ena: + print(f"DVMR: {val:#x} -> {val|ena:#x}") + p.set64(CREG[cluster] + CLUSTER_DVMR, ena) # CLUSTER_DVMR + + #p.set64(CREG[cluster] + CLUSTER_LIMIT1, 1<<63) + #p.clear64(CREG[cluster] + CLUSTER_LIMIT2, 1<<63) + #p.set64(CREG[cluster] + CLUSTER_LIMIT3, 1<<63) + + #p.set64(CREG[cluster] + CLUSTER_PSTATE, 0) + + #p.set32(PMGR + PMGR_CPUGATING + 8 * cluster, 1<<31) + + #p.write64(CREG[cluster] + CLUSTER_CTRL, 1) + + #p.set64(CREG[cluster] + CLUSTER_PSCTRL, 1<<40) + + #pstate = p.read64(CREG[cluster] + CLUSTER_PSTATE) & 0xf + +p.smp_start_secondaries() + +print("== Initial CPU frequencies ==") + +for cpu in range(8): + print(f"CPU {cpu}: {bench_cpu(cpu):.2f} MHz") + +def set_pstate(cluster, pstate): + # This really seems to be all that's needed + + p.mask64(CREG[cluster] + CLUSTER_PSTATE, 0xf00f, (1<<25) | pstate | (pstate << 12)) + + # Optionally, adjust MCC performance in higher p-core pstates + if cluster == 1: + if pstate > 8: + p0, p1 = 0x133, 0x55555340 + else: + p0, p1 = 0x813057f, 0x1800180 + + for lane in range(8): + p.write32(0x200200dc4 + lane * 0x40000, p0) + p.write32(0x200200dbc + lane * 0x40000, p1) + + # This seems to be about notifying PMP + #p.write32(0x23b738004 + cluster*4, pstate) + #p.write32(0x23bc34000, 1 << cluster) + +set_pstate(1, 15) + +e_pstate = p.read64(CREG[0] + CLUSTER_PSTATE) +p_pstate = p.read64(CREG[1] + CLUSTER_PSTATE) + +print(f"E-Core pstate: {e_pstate:x}") +print(f"P-Core pstate: {p_pstate:x}") + +time.sleep(0.5) + +print("== Final CPU frequencies ==") + +#elapsed = p.smp_call(7, util.bench, 80000000) + +for cpu in range(8): + print(f"CPU {cpu}: {bench_cpu(cpu):.2f} MHz") + +#elapsed = p.smp_wait(7) diff --git a/tools/proxyclient/experiments/dart_dump.py b/tools/proxyclient/experiments/dart_dump.py new file mode 100755 index 0000000..f6e0e08 --- /dev/null +++ b/tools/proxyclient/experiments/dart_dump.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +import struct + +from m1n1.setup import * +from m1n1 import asm +from m1n1.hw.dart import DART + +if len(sys.argv) > 1: + dart_name = sys.argv[1] +else: + dart_name = "dart-disp0" + +dart = DART.from_adt(u, "arm-io/" + dart_name) +dart.dump_all() +dart.dart.regs.dump_regs() diff --git a/tools/proxyclient/experiments/dcp.py b/tools/proxyclient/experiments/dcp.py new file mode 100755 index 0000000..93d6167 --- /dev/null +++ b/tools/proxyclient/experiments/dcp.py @@ -0,0 +1,351 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +import struct +from construct import * + +from m1n1.setup import * +from m1n1.shell import run_shell +from m1n1 import asm +from m1n1.hw.dart import DART, DARTRegs +from m1n1.fw.dcp.client import DCPClient +from m1n1.fw.dcp.manager import DCPManager +from m1n1.fw.dcp.ipc import ByRef +from m1n1.proxyutils import RegMonitor + +disp_name = "/arm-io/disp0" + +external = hasattr(u.adt[disp_name], "external") and u.adt[disp_name].external != 0 +compat = u.adt[disp_name].compatible[0].split(",")[-1] + +mon = RegMonitor(u) + +if compat == 't8103': + #mon.add(0x230000000, 0x18000) + #mon.add(0x230018000, 0x4000) + #mon.add(0x230068000, 0x8000) + #mon.add(0x2300b0000, 0x8000) + #mon.add(0x2300f0000, 0x4000) + #mon.add(0x230100000, 0x10000) + #mon.add(0x230170000, 0x10000) + #mon.add(0x230180000, 0x1c000) + #mon.add(0x2301a0000, 0x10000) + #mon.add(0x2301d0000, 0x4000) + #mon.add(0x230230000, 0x10000) + #mon.add(0x23038c000, 0x10000) + #mon.add(0x230800000, 0x10000) + #mon.add(0x230840000, 0xc000) + #mon.add(0x230850000, 0x2000) + ##mon.add(0x230852000, 0x5000) # big curve / gamma table + #mon.add(0x230858000, 0x18000) + #mon.add(0x230870000, 0x4000) + #mon.add(0x230880000, 0x8000) + #mon.add(0x230894000, 0x4000) + #mon.add(0x2308a8000, 0x8000) + #mon.add(0x2308b0000, 0x8000) + #mon.add(0x2308f0000, 0x4000) + ##mon.add(0x2308fc000, 0x4000) # stats / RGB color histogram + #mon.add(0x230900000, 0x10000) + #mon.add(0x230970000, 0x10000) + #mon.add(0x230980000, 0x10000) + #mon.add(0x2309a0000, 0x10000) + #mon.add(0x2309d0000, 0x4000) + #mon.add(0x230a30000, 0x20000) + #mon.add(0x230b8c000, 0x10000) + #mon.add(0x231100000, 0x8000) + #mon.add(0x231180000, 0x4000) + #mon.add(0x2311bc000, 0x10000) + #mon.add(0x231300000, 0x8000) + ##mon.add(0x23130c000, 0x4000) # - DCP dart + #mon.add(0x231310000, 0x8000) + #mon.add(0x231340000, 0x8000) + ##mon.add(0x231800000, 0x8000) # breaks DCP + ##mon.add(0x231840000, 0x8000) # breaks DCP + ##mon.add(0x231850000, 0x8000) # something DCP? + ##mon.add(0x231920000, 0x8000) # breaks DCP + ##mon.add(0x231960000, 0x8000) # breaks DCP + ##mon.add(0x231970000, 0x10000) # breaks DCP + ##mon.add(0x231c00000, 0x10000) # DCP mailbox + + mon.add(0x230845840, 0x40) # error regs + +def get_color_mode(mgr): + best_id = None + best_score = -1 + for mode in mgr.dcpav_prop['ColorElements']: + if mode['IsVirtual']: + continue + if mode['Depth'] != 8: + continue + if mode['Score'] > best_score: + best_score = mode['Score'] + best_id = mode['ID'] + return best_id + +def get_timing_mode(mgr): + best_id = None + best_score = -1 + for mode in mgr.dcpav_prop['TimingElements']: + if mode['IsVirtual']: + continue + if int(mode['Score']) > best_score: + best_score = int(mode['Score']) + best_id = int(mode['ID']) + return best_id + +mon.poll() + +dart = DART.from_adt(u, "arm-io/dart-dcp") +disp_dart = DART.from_adt(u, "arm-io/dart-disp0") + +print("DCP DART:") +dart.regs.dump_regs() +print("DISP DART:") +disp_dart.regs.dump_regs() + +dcp_addr = u.adt["arm-io/dcp"].get_reg(0)[0] +dcp = DCPClient(u, dcp_addr, dart, disp_dart) +dcp.dva_offset = getattr(u.adt["/arm-io/dcp"][0], "asc_dram_mask", 0) + +dcp.start() +dcp.start_ep(0x37) +dcp.dcpep.initialize() + +mgr = DCPManager(dcp.dcpep, compat) + +mon.poll() + +mgr.start_signal() + +mon.poll() + +mgr.get_color_remap_mode(6) +mgr.enable_disable_video_power_savings(0) + +mgr.update_notify_clients_dcp([0,0,0,0,0,0,1,1,1,0,1,1,1,1]) +mgr.first_client_open() +print(f"keep on: {mgr.isKeepOnScreen()}") +print(f"main display: {mgr.is_main_display()}") +assert mgr.setPowerState(1, False, ByRef(0)) == 0 + +mon.poll() + +if external: + assert mgr.set_display_device(2) == 0 +else: + assert mgr.set_display_device(0) == 2 +assert mgr.set_parameter_dcp(14, [0], 1) == 0 + +color_mode = get_color_mode(mgr) +timing_mode = get_timing_mode(mgr) +mgr.SetDigitalOutMode(color_mode, timing_mode) +mon.poll() + +while mgr.iomfb_prop['DPTimingModeId'] != timing_mode: + print("Try re-setting mode") + mgr.SetDigitalOutMode(color_mode, timing_mode) + mon.poll() + +if external: + assert mgr.set_display_device(2) == 0 +else: + assert mgr.set_display_device(0) == 2 +assert mgr.set_parameter_dcp(14, [0], 1) == 0 + +t = ByRef(b"\x00" * 0xc0c) +assert mgr.get_gamma_table(t) == 2 +assert mgr.set_contrast(0) == 0 +assert mgr.setBrightnessCorrection(65536) == 0 + +if external: + assert mgr.set_display_device(2) == 0 +else: + assert mgr.set_display_device(0) == 2 +assert mgr.set_parameter_dcp(14, [0], 1) == 0 + +mon.poll() + +swapid = ByRef(0) + +def start(): + # arg: IOUserClient + ret = mgr.swap_start(swapid, { + "addr": 0xFFFFFE1667BA4A00, + "unk": 0, + "flag1": 0, + "flag2": 1 + }) + assert ret == 0 + print(f"swap ID: {swapid.val:#x}") + +start() + +mgr.set_matrix(9, [[1<<32, 0, 0], + [0, 1<<32, 0], + [0, 0, 1<<32]]) +mgr.setBrightnessCorrection(65536) +mgr.set_parameter_dcp(3, [65536], 1) +mgr.set_parameter_dcp(6, [65536], 1) + +width = mgr.display_width() +height = mgr.display_height() + +surface_id = 3 + +swap_rec = Container( + flags1 = 0x861202, + flags2 = 0x04, + swap_id = swapid.val, + surf_ids = [surface_id, 0, 0, 0], + src_rect = [[0, 0, width, height],[0,0,0,0],[0,0,0,0],[0,0,0,0]], + surf_flags = [1, 0, 0, 0], + surf_unk = [0, 0, 0, 0], + dst_rect = [[0, 0, width, height],[0,0,0,0],[0,0,0,0],[0,0,0,0]], + swap_enabled = 0x80000007, + swap_completed = 0x80000007, + bl_unk = 0x1, + bl_val = 0x58f058d0, # ~99 nits + bl_power = 0x40, +) + +surf = Container( + is_tiled = False, + unk_1 = False, + unk_2 = False, + plane_cnt = 0, + plane_cnt2 = 0, + format = "BGRA", + xfer_func = 13, + colorspace = 1, + stride = width * 4, + pix_size = 4, + pel_w = 1, + pel_h = 1, + offset = 0, + width = width, + height = height, + buf_size = width * height * 4, + surface_id = surface_id, + has_comp = True, + has_planes = True, + has_compr_info = False, + unk_1f5 = 0, + unk_1f9 = 0, +) + +compressed_surf = Container( + is_tiled = False, + unk_1 = False, + unk_2 = False, + plane_cnt = 2, + plane_cnt2 = 2, + format = 'b3a8', + unk_f = 0x00000000, + xfer_func = 13, + colorspace = 2, + stride = width, + pix_size = 1, + pel_w = 1, + pel_h = 1, + offset = 0, + width = width, + height = height, + buf_size = 0x00A36000, + unk_2d = 0, + unk_31 = 0, + surface_id = 5, + comp_types = [ + Container(count = 0, types =[]), + Container(count = 0, types =[]), + ], + has_comp = True, + planes = [ + Container( + width = width, + height = height, + base = 0, + offset = 0, + stride = 0x1e000, + size = 0x818000, + tile_size = 1024, + tile_w = 16, + tile_h = 16, + unk2 = 0x05, + ), + Container( + width = width, + height = height, + base = 0x818000, + offset = 0x818000, + stride = 0x7800, + size = 0x21e000, + tile_size = 256, + tile_w = 16, + tile_h = 16, + unk2 = 0x05, + ) + ], + has_planes = True, + compression_info = [ + unhex(""" + 10 00 00 00 10 00 00 00 00 80 7F 00 00 00 00 00 + 08 00 00 00 78 00 00 00 44 00 00 00 00 00 00 00 + 03 00 00 00 00 00 00 00 AA AA AA 00 04 00 00 00 + E0 01 00 AA + """), + unhex(""" + 10 00 00 00 10 00 00 00 00 60 A1 00 00 80 81 00 + 08 00 00 00 78 00 00 00 44 00 00 00 00 00 00 00 + 03 00 00 00 00 00 00 00 AA AA AA 00 01 00 00 00 + 78 00 00 AA + """), + ], + has_compr_info = True, + unk_1f5 = 0x100000, + unk_1f9 = 0x100000, +) + + +outB = ByRef(False) + +swaps = mgr.swaps + +mon.poll() + +fb_size = align_up(width * height * 4, 8 * 0x4000) +print(f"Display {width}x{height}, fb size: {fb_size}") + +buf = u.memalign(0x4000, fb_size) + +colors = [0xDD0000, 0xFE6230, 0xFEF600, 0x00BB00, 0x009BFE, 0x000083, 0x30009B] + + +for i, color in enumerate(colors): + lines = height // len(colors) + offset = i * lines * width * 4 + p.memset32(buf + offset, color, lines * width * 4) + +iova = disp_dart.iomap(0, buf, fb_size) + +surfaces = [surf, None, None, None] +#surfaces = [compressed_surf, None, None, None] +surfAddr = [iova, 0, 0, 0] + +def submit(): + swap_rec.swap_id = swapid.val + ret = mgr.swap_submit_dcp(swap_rec=swap_rec, surfaces=surfaces, surfAddr=surfAddr, + unkBool=False, unkFloat=0.0, unkInt=0, unkOutBool=outB) + print(f"swap returned {ret} / {outB}") + + dcp.work() + + if ret == 0: + while swaps == mgr.swaps: + dcp.work() + print("swap complete!") + +submit() + +run_shell(globals(), msg="Have fun!") diff --git a/tools/proxyclient/experiments/dcp_iboot.py b/tools/proxyclient/experiments/dcp_iboot.py new file mode 100755 index 0000000..9aed5d8 --- /dev/null +++ b/tools/proxyclient/experiments/dcp_iboot.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +import struct +from construct import * + +from m1n1.setup import * +from m1n1.shell import run_shell +from m1n1 import asm +from m1n1.hw.dart import DART, DARTRegs +from m1n1.fw.dcp.iboot import DCPIBootClient, SurfaceFormat, EOTF, Transform, AddrFormat, Colorspace +from m1n1.proxyutils import RegMonitor + +print(f"Framebuffer at {u.ba.video.base:#x}") + +p.display_shutdown(DCP_SHUTDOWN_MODE.QUIESCED) + +dart = DART.from_adt(u, "arm-io/dart-dcp") +disp_dart = DART.from_adt(u, "arm-io/dart-disp0") +#disp_dart.dump_all() + +dcp_addr = u.adt["arm-io/dcp"].get_reg(0)[0] +dcp = DCPIBootClient(u, dcp_addr, dart, disp_dart) +dcp.dva_offset = getattr(u.adt["/arm-io/dcp"][0], "asc_dram_mask", 0) + +dcp.start() +dcp.start_ep(0x23) +dcp.start_ep(0x24) + +dcp.iboot.wait_for("disp0") +dcp.dptx.wait_for("dcpav0") +dcp.dptx.wait_for("dcpdp0") + +#dcp.dptx.dcpav0.setPower(False) +#dcp.dptx.dcpav0.forceHotPlugDetect() +#dcp.dptx.dcpav0.setVirtualDeviceMode(0) +#dcp.dptx.dcpav0.setPower(True) +#dcp.dptx.dcpav0.wakeDisplay() +#dcp.dptx.dcpav0.sleepDisplay() +#dcp.dptx.dcpav0.wakeDisplay() + +print("Waiting for HPD...") +while True: + hpd, ntim, ncolor = dcp.iboot.disp0.getModeCount() + if hpd: + break + +print("HPD asserted") + +print(f"Connected:{hpd} Timing modes:{ntim} Color modes:{ncolor}") +dcp.iboot.disp0.setPower(True) + +timing_modes = dcp.iboot.disp0.getTimingModes() +print("Timing modes:") +print(timing_modes) + +color_modes = dcp.iboot.disp0.getColorModes() +print("Color modes:") +print(color_modes) + +timing_modes.sort(key=lambda c: (c.valid, c.width <= 1920, c.fps_int <= 60, c.width, c.height, c.fps_int, c.fps_frac)) +timing_mode = timing_modes[-1] + +color_modes.sort(key=lambda c: (c.valid, c.bpp <= 32, c.bpp, -int(c.colorimetry), -int(c.encoding), -int(c.eotf))) +color_mode = color_modes[-1] + +print("Chosen timing mode:", timing_mode) +print("Chosen color mode:", color_mode) + +dcp.iboot.disp0.setMode(timing_mode, color_mode) + +w, h = timing_mode.width, timing_mode.height + +layer = Container( + planes = [ + Container( + addr = 0x013ec000, + stride = u.ba.video.stride, + addr_format = AddrFormat.PLANAR, + ), + Container(), + Container() + ], + plane_cnt = 1, + width = u.ba.video.width, + height = u.ba.video.height, + surface_fmt = SurfaceFormat.w30r, + colorspace = Colorspace.SCRGBFixed, + eotf = EOTF.GAMMA_SDR, + transform = Transform.NONE, +) + +mw = min(w, u.ba.video.width) +mh = min(h, u.ba.video.height) + +swap = dcp.iboot.disp0.swapBegin() +print(swap) +dcp.iboot.disp0.swapSetLayer(0, layer, (mw, mh, 0, 0), (mw, mh, 0, 0)) +dcp.iboot.disp0.swapEnd() +#dcp.iboot.disp0.swapWait(swap.swap_id) + +run_shell(globals(), msg="Have fun!") + +# full shutdown! +dcp.stop(1) +p.pmgr_reset(0, "DISP0_CPU0") diff --git a/tools/proxyclient/experiments/dcpext_iboot.py b/tools/proxyclient/experiments/dcpext_iboot.py new file mode 100644 index 0000000..ceb60bd --- /dev/null +++ b/tools/proxyclient/experiments/dcpext_iboot.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +import struct +from construct import * + +from m1n1.setup import * +from m1n1.shell import run_shell +from m1n1 import asm +from m1n1.hw.dart import DART, DARTRegs +from m1n1.fw.dcp.iboot import DCPIBootClient, SurfaceFormat, EOTF, Transform, AddrFormat +from m1n1.fw.dcp.dcpav import * +from m1n1.proxyutils import RegMonitor + +dart = DART.from_adt(u, "arm-io/dart-dcpext0") +disp_dart = DART.from_adt(u, "arm-io/dart-dispext0") +#disp_dart.dump_all() + +dcp_addr = u.adt["arm-io/dcpext0"].get_reg(0)[0] +dcp = DCPIBootClient(u, dcp_addr, dart, disp_dart) +dcp.dva_offset = getattr(u.adt["/arm-io/dcpext0"][0], "asc_dram_mask", 0) + +dcp.start() +dcp.start_ep(0x20) +dcp.start_ep(0x23) +dcp.start_ep(0x24) +dcp.start_ep(0x27) +dcp.start_ep(0x2a) + +dcp.system.wait_for("system") +dcp.iboot.wait_for("disp0") +dcp.dptx.wait_for("dcpav0") +dcp.dptx.wait_for("dcpav1") +dcp.dptx.wait_for("dcpdp0") +dcp.dptx.wait_for("dcpdp1") +dcp.dpport.wait_for("port0") +dcp.dpport.wait_for("port1") + +dcp.system.wait_for("system") +dcp.system.system.setProperty("gAFKConfigLogMask", 0xffff) + +print("Connect...") +dcp.dpport.port0.open() +dcp.dpport.port0.getLocation() +dcp.dpport.port0.getLocation() +dcp.dpport.port0.getUnit() +# this triggers the power up message +dcp.dpport.port0.displayRequest() +# these seem to not work/do anything? +dcp.dpport.port0.connectTo(True, ATC0, DPPHY, 0) + +#dcp.dcpav.controller.setPower(False) +#dcp.dcpav.controller.forceHotPlugDetect() +#dcp.dcpav.controller.setVirtualDeviceMode(0) +#dcp.dcpav.controller.setPower(True) +#dcp.dcpav.controller.wakeDisplay() +#dcp.dcpav.controller.sleepDisplay() +#dcp.dcpav.controller.wakeDisplay() + +print("Waiting for HPD...") +while True: + hpd, ntim, ncolor = dcp.iboot.disp0.getModeCount() + if hpd: + break + +print("HPD asserted") + +print(f"Connected:{hpd} Timing modes:{ntim} Color modes:{ncolor}") +dcp.iboot.disp0.setPower(True) + +timing_modes = dcp.iboot.disp0.getTimingModes() +print("Timing modes:") +print(timing_modes) + +color_modes = dcp.iboot.disp0.getColorModes() +print("Color modes:") +print(color_modes) + +timing_modes.sort(key=lambda c: (c.valid, c.width <= 1920, c.fps_int <= 60, c.width, c.height, c.fps_int, c.fps_frac)) +timing_mode = timing_modes[-1] + +color_modes.sort(key=lambda c: (c.valid, c.bpp <= 32, c.bpp, -int(c.colorimetry), -int(c.encoding), -int(c.eotf))) +color_mode = color_modes[-1] + +print("Chosen timing mode:", timing_mode) +print("Chosen color mode:", color_mode) + +dcp.iboot.disp0.setMode(timing_mode, color_mode) + +w, h = timing_mode.width, timing_mode.height + +layer = Container( + planes = [ + Container( + addr = 0x013ec000, + stride = u.ba.video.stride, + addr_format = AddrFormat.PLANAR, + ), + Container(), + Container() + ], + plane_cnt = 1, + width = u.ba.video.width, + height = u.ba.video.height, + surface_fmt = SurfaceFormat.w30r, + colorspace = 2, + eotf = EOTF.GAMMA_SDR, + transform = Transform.NONE, +) + +mw = min(w, u.ba.video.width) +mh = min(h, u.ba.video.height) + +swap = dcp.iboot.disp0.swapBegin() +print(swap) +dcp.iboot.disp0.swapSetLayer(0, layer, (mw, mh, 0, 0), (mw, mh, 0, 0)) +dcp.iboot.disp0.swapEnd() +#dcp.iboot.disp0.swapWait(swap.swap_id) + +run_shell(globals(), msg="Have fun!") + +# full shutdown! +dcp.stop(1) +p.pmgr_reset(0, "DISP0_CPU0") diff --git a/tools/proxyclient/experiments/find_sprr_regs.py b/tools/proxyclient/experiments/find_sprr_regs.py new file mode 100755 index 0000000..b3261db --- /dev/null +++ b/tools/proxyclient/experiments/find_sprr_regs.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * +from m1n1.find_regs import * +from m1n1 import asm + +p.iodev_set_usage(IODEV.FB, 0) + +if u.mrs(SPRR_CONFIG_EL1): + u.msr(GXF_CONFIG_EL12, 0) + u.msr(SPRR_CONFIG_EL12, 0) + u.msr(GXF_CONFIG_EL1, 0) + u.msr(SPRR_CONFIG_EL1, 0) + +# Set up HCR_EL2 for EL1, since we can't do it after enabling GXF +u.inst("nop", call="el1") + +all_regs = set() +for reg in [SPRR_CONFIG_EL1, GXF_CONFIG_EL1, SPRR_CONFIG_EL12, GXF_CONFIG_EL12]: + old_regs = set(find_regs(u, values=False)) + u.msr(reg, 1) + el2_items = set(find_regs(u)) + el2_vals = dict(el2_items) + new_regs = set(k for k, v in el2_items) + + all_regs = all_regs.union(new_regs) + + diff_regs = new_regs - old_regs + + print(reg) + for r in sorted(diff_regs): + print(" %s --> %lx" % (sysreg_name(r), u.mrs(r))) + +gl2_items = list(find_regs(u, regs=static_regs,call="gl2")) +gl2_vals = dict(gl2_items) +gl2_regs = set(k for k, v in gl2_items) + +print("GL2") +for reg in sorted(gl2_regs - all_regs): + print(" %s -> %lx" % (sysreg_name(reg), gl2_vals[reg])) +for reg in sorted(gl2_regs): + if reg in el2_vals and gl2_vals[reg] != el2_vals[reg]: + print(" ! %s %lx -> %lx" % (sysreg_name(reg), el2_vals[reg], gl2_vals[reg])) + +u.msr(GXF_CONFIG_EL12, 0) +u.msr(SPRR_CONFIG_EL12, 0) +u.msr(GXF_CONFIG_EL1, 0) +u.msr(SPRR_CONFIG_EL1, 0) + +gl1_items = list(find_regs(u, regs=static_regs, call="gl1")) +gl1_vals = dict(gl1_items) +gl1_regs = set(k for k, v in gl1_items) + +print("GL1") +for reg in sorted(gl1_regs - all_regs): + val = gl1_vals[reg] + print(" %s -> %lx" % (sysreg_name(reg), val)) + + cval = u.mrs(reg, call="gl1", silent=False) + print(" cur: 0x%lx" % (cval)) + + try: + u.msr(reg, cval, call="gl1", silent=False) + except: + print(">RO") + continue + + gl2_vals = dict(find_regs(u, regs=static_regs,call="gl2")) + u.msr(reg, cval ^ 0xffff, call="gl1", silent=True) + + for r, v in find_regs(u, regs=static_regs, call="gl2"): + if v != gl2_vals[r]: + print(" GL2 access: %s %lx -> %lx" % (sysreg_name(r), gl2_vals[r], v)) + + u.msr(reg, cval, call="gl1", silent=True) + +for reg in sorted(gl1_regs): + if reg in el2_vals and gl1_vals[reg] != el2_vals[reg]: + print(" ! %s %lx -> %lx" % (sysreg_name(reg), el2_vals[reg], gl1_vals[reg])) diff --git a/tools/proxyclient/experiments/fptest.py b/tools/proxyclient/experiments/fptest.py new file mode 100755 index 0000000..6c45bdc --- /dev/null +++ b/tools/proxyclient/experiments/fptest.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * +from m1n1 import asm + +FPCR_FZ = 1 << 24 + +ACTLR_DEFAULT = 0xc00 +ACTLR_AFP = 1 << 5 + +AFPCR = (3,6,15,2,5) +AFPCR_DAZ = 1 << 0 +AFPCR_FTZ = 1 << 1 + +code_buffer = p.malloc(0x1000) +data_buffer = p.malloc(0x1000) + +code = asm.ARMAsm(""" + ldr s0, [x0, #0] + ldr s1, [x0, #4] + fmul s0, s1, s0 + str s0, [x0, #8] + + ldr s0, [x0, #12] + ldr s1, [x0, #16] + fmul s0, s1, s0 + str s0, [x0, #20] + + # to test EL0 access + # mrs x0, s3_6_c15_c2_5 + ret +""", code_buffer) + +iface.writemem(code_buffer, code.data) +p.dc_cvau(code_buffer, code.len) +p.ic_ivau(code_buffer, code.len) + +def test_denormals(): + + data = [ + 0x00400000, # a denormal + 0x40000000, # 2 + 0, + 0x00800000, # smallest non-denormal + 0x3f000000, # 0.5 + 0, + ] + + iface.writemem(data_buffer, struct.pack("<%dI" % len(data), *data)) + + p.set_exc_guard(GUARD.SKIP) + ret = p.el0_call(code_buffer, data_buffer | REGION_RW_EL0) + p.set_exc_guard(GUARD.OFF) + + v1 = p.read32(data_buffer + 8) + v2 = p.read32(data_buffer + 20) + + print(" Input:", end=" ") + if v1 == 0: + print("FLUSH ", end=" ") + elif v1 == 0x00800000: + print("NORMAL", end=" ") + else: + print("0x08x?" % v1, end=" ") + + print("Output:", end=" ") + if v2 == 0: + print("FLUSH ", end=" ") + elif v2 == 0x00400000: + print("NORMAL", end=" ") + else: + print("0x08x?" % v2, end=" ") + print("r = 0x%x" % ret) + + +print("Testing normal mode") +u.msr(ACTLR_EL1, ACTLR_DEFAULT) +u.msr(AFPCR, 0) + +u.msr(FPCR, 0) +print("FPCR.FZ = 0") +test_denormals() + +u.msr(FPCR, FPCR_FZ) +print("FPCR.FZ = 1") +test_denormals() + +print() +print("Testing Apple mode") +u.msr(ACTLR_EL1, ACTLR_DEFAULT | ACTLR_AFP) +u.msr(AFPCR, 0) + +u.msr(FPCR, 0) +print("FPCR.FZ = 0") +test_denormals() + +u.msr(FPCR, FPCR_FZ) +print("FPCR.FZ = 1") +test_denormals() + +u.msr(AFPCR, AFPCR_DAZ) +print("AFPCR.<FTZ, DAZ> = 0, 1") +test_denormals() + +u.msr(AFPCR, AFPCR_FTZ) +print("AFPCR.<FTZ, DAZ> = 1, 0") +test_denormals() + +u.msr(AFPCR, AFPCR_FTZ | AFPCR_DAZ) +print("AFPCR.<FTZ, DAZ> = 1, 1") +test_denormals() diff --git a/tools/proxyclient/experiments/hacr_trap_bits.py b/tools/proxyclient/experiments/hacr_trap_bits.py new file mode 100755 index 0000000..7982c33 --- /dev/null +++ b/tools/proxyclient/experiments/hacr_trap_bits.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * +from m1n1 import asm + +code_len = 12 * 16 * 8 + 4 +data_len = 8 * 16 * 8 + +if u.mrs(SPRR_CONFIG_EL1): + u.msr(GXF_CONFIG_EL12, 0) + u.msr(SPRR_CONFIG_EL12, 0) + u.msr(GXF_CONFIG_EL1, 0) + u.msr(SPRR_CONFIG_EL1, 0) + +u.msr(HACR_EL2, 0) + +hcr = HCR(u.mrs(HCR_EL2)) +hcr.TIDCP = 0 +hcr.TGE = 0 +u.msr(HCR_EL2, hcr.value) +u.inst(0xd5033fdf) # isb + +ACTLR_DEFAULT = 0xc00 +ACTLR_AFP = 1 << 5 +u.msr(ACTLR_EL1, ACTLR_DEFAULT | ACTLR_AFP) + +code_buffer = p.malloc(code_len) +data_buffer = p.malloc(data_len) + +template = asm.ARMAsm(""" + mov x2, x0 + mrs x2, s3_0_c0_c0_0 + str x2, [x1], #8 + ret +""", code_buffer) + +mov, mrs, st, ret = struct.unpack("4I", template.data) + +data = [] + +BAD = 0xacce5515abad1dea + +AUX = [ + ACTLR_EL1, + ACTLR_EL2, + AFSR0_EL1, + AFSR0_EL2, + AFSR1_EL1, + AFSR1_EL2, + AIDR_EL1, + AIDR2_EL1, + AMAIR_EL1, + AMAIR_EL2, + APCTL_EL1, + APSTS_EL1, +] + +def test(): + u.msr(SPRR_CONFIG_EL1, 1) + u.msr(GXF_CONFIG_EL1, 1) + u.msr(SPRR_CONFIG_EL12, 1) + u.msr(GXF_CONFIG_EL12, 1) + + for op1 in range(1 << 3): + for CRn in (0b1011, 0b1111): + mrs0 = mrs | (op1 << 16) | (CRn << 12) + insns = [] + for CRm in range(1 << 4): + for op2 in range(1 << 3): + insns.extend((mov, mrs0 | (CRm << 8) | (op2 << 5), st)) + insns.append(ret) + iface.writemem(code_buffer, struct.pack("<385I", *insns)) + p.dc_cvau(code_buffer, code_len) + p.ic_ivau(code_buffer, code_len) + + p.set_exc_guard(GUARD.SILENT | GUARD.SKIP) + p.el1_call(code_buffer, BAD, data_buffer) + cnt = p.get_exc_count() + + data = iface.readmem(data_buffer, data_len) + d = struct.unpack("<128Q", data) + i = 0 + for CRm in range(1 << 4): + for op2 in range(1 << 3): + v = d[i] + if v != BAD: + yield (3, op1, CRn, CRm, op2) + i += 1 + for enc in AUX: + try: + v = u.mrs(enc, call="el1", silent=True) + if v != BAD: + yield enc + except: + continue + + u.msr(GXF_CONFIG_EL12, 0) + u.msr(SPRR_CONFIG_EL12, 0) + u.msr(GXF_CONFIG_EL1, 0) + u.msr(SPRR_CONFIG_EL1, 0) + +baseline = set(test()) + +for bit in range(64): + print() + print ("## HACR_EL2[%d]" % bit) + u.msr(HACR_EL2, 1<<bit) + u.inst(0xd5033fdf) # isb + + new = set(test()) + + added = new - baseline + removed = baseline - new + + if added: + print("Untraps:") + for enc in sorted(added): + print(f"{sysreg_name(enc)} ({', '.join(str(i) for i in enc)})") + + if removed: + print("Traps:") + for enc in sorted(removed): + print(f"{sysreg_name(enc)} ({', '.join(str(i) for i in enc)})") + +p.set_exc_guard(GUARD.OFF) diff --git a/tools/proxyclient/experiments/i2c.py b/tools/proxyclient/experiments/i2c.py new file mode 100755 index 0000000..4100964 --- /dev/null +++ b/tools/proxyclient/experiments/i2c.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +import struct + +from m1n1.setup import * +from m1n1 import asm + +base = 0x235010000 + +# register defines from https://github.com/torvalds/linux/blob/master/drivers/i2c/busses/i2c-pasemi.c +# Copyright (C) 2006-2007 PA Semi, Inc +# SMBus host driver for PA Semi PWRficient +REG_MTXFIFO = 0x00 +REG_MRXFIFO = 0x04 +REG_SMSTA = 0x14 +REG_CTL = 0x1c + +MTXFIFO_READ = 0x00000400 +MTXFIFO_STOP = 0x00000200 +MTXFIFO_START = 0x00000100 +MTXFIFO_DATA_M = 0x000000ff + +MRXFIFO_EMPTY = 0x00000100 +MRXFIFO_DATA_M = 0x000000ff + +SMSTA_XEN = 0x08000000 +SMSTA_MTN = 0x00200000 + +CTL_MRR = 0x00000400 +CTL_MTR = 0x00000200 +CTL_CLK_M = 0x000000ff + +CLK_100K_DIV = 84 +CLK_400K_DIV = 21 + + +def i2c_read_reg(addr, reg, reg_size): + p.set32(base + REG_CTL, CTL_MTR | CTL_MRR) + p.write32(base + REG_SMSTA, 0xffffffff) + + p.write32(base + REG_MTXFIFO, MTXFIFO_START | (addr << 1)) + p.write32(base + REG_MTXFIFO, MTXFIFO_STOP | reg) + + while not (p.read32(base + REG_SMSTA) & SMSTA_XEN): + pass + + p.write32(base + REG_MTXFIFO, MTXFIFO_START | (addr << 1) | 1) + p.write32(base + REG_MTXFIFO, MTXFIFO_READ | MTXFIFO_STOP | reg_size + 1) + + res = [] + while len(res) < reg_size+1: + v = p.read32(base + REG_MRXFIFO) + if v & 0x100: + continue + res.append(v) + + if res[0] < reg_size: + print("only read %d instead of %d bytes" % (res[0], reg_size)) + return res[1:] + + +def i2c_write_reg(addr, reg, data): + p.set32(base + REG_CTL, CTL_MTR | CTL_MRR) + p.write32(base + REG_SMSTA, 0xffffffff) + + p.write32(base + REG_MTXFIFO, MTXFIFO_START | (addr << 1)) + p.write32(base + REG_MTXFIFO, reg) + for i in range(len(data)-1): + p.write32(base + REG_MTXFIFO, data[i]) + p.write32(base + REG_MTXFIFO, data[-1] | MTXFIFO_STOP) + + while not (p.read32(base + REG_SMSTA) & SMSTA_XEN): + pass + + +def i2c_read16(addr, reg): + data = struct.pack(">2b", *i2c_read_reg(addr, reg, 2)) + return struct.unpack(">H", data)[0] + + +def i2c_read32(addr, reg): + data = struct.pack(">4b", *i2c_read_reg(addr, reg, 4)) + return struct.unpack(">I", data)[0] + + +def tps6598x_exec_cmd(addr, cmd, data_in, out_len): + if data_in: + data = [len(data_in)] + data_in + + # TPS_REG_DATA1 + i2c_write_reg(addr, 0x09, data) + + # TPS_REG_CMD1 + cmd = [4] + list(map(ord, cmd)) + i2c_write_reg(addr, 0x08, cmd) + + # TPS_REG_CMD1 + v = i2c_read32(addr, 0x08) + while v != 0: + if v == 0x21434d44: # !CMD + raise Exception("Invalid command!") + v = i2c_read32(addr, 0x08) + + if not out_len: + return + + # TPS_REG_DATA1 + return i2c_read_reg(addr, 0x09, out_len) + + +print("make sure to run pmgr_adt_clocks_enable for /arm-io/i2c0 before this script.") + +# apple-specific command to bring the power state to zero +# (or any other value specified as an argument) +tps6598x_exec_cmd(0x3f, "SSPS", [0], 0) +tps6598x_exec_cmd(0x38, "SSPS", [0], 0) + +tps6598x_exec_cmd(0x3f, "SWDF", None, 0) +tps6598x_exec_cmd(0x3f, "SWSr", None, 0) +tps6598x_exec_cmd(0x38, "SWDF", None, 0) +tps6598x_exec_cmd(0x38, "SWSr", None, 0) diff --git a/tools/proxyclient/experiments/jpeg.py b/tools/proxyclient/experiments/jpeg.py new file mode 100644 index 0000000..1e53a6b --- /dev/null +++ b/tools/proxyclient/experiments/jpeg.py @@ -0,0 +1,1222 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * +from m1n1.hw.dart import DART, DARTRegs +from m1n1.hw.jpeg import * +from m1n1.utils import * +import argparse +import struct +import time +from enum import IntEnum +from PIL import Image, ImageDraw + + +def divroundup(val, div): + return (val + div - 1) // div + + +def yuv2rgb(y, u, v): + y -= 16 + u -= 128 + v -= 128 + + y /= 255 + u /= 255 + v /= 255 + + r = y + 1.13983 * v + g = y - 0.39465 * u - 0.58060 * v + b = y + 2.03211 * u + + r = min(255, max(0, int(r * 255))) + g = min(255, max(0, int(g * 255))) + b = min(255, max(0, int(b * 255))) + + return (r, g, b) + + +def rgb2yuv(r, g, b): + r /= 255 + g /= 255 + b /= 255 + + y = 0.299*r + 0.587*g + 0.114*b + u = -0.14713*r - 0.28886*g + 0.436*b + v = 0.615*r - 0.51499*g - 0.10001*b + + y = y * 255 + 16 + u = u * 255 + 128 + v = v * 255 + 128 + + y = min(255, max(0, int(y))) + u = min(255, max(0, int(u))) + v = min(255, max(0, int(v))) + + return (y, u, v) + + +ap = argparse.ArgumentParser(description='JPEG block experiment') +ap.add_argument("--jpeg", dest='which_jpeg', type=str, default='jpeg0', + help='which JPEG instance (jpeg0/jpeg1)') +g = ap.add_mutually_exclusive_group(required=True) +g.add_argument("-e", "--encode", action='store_true') +g.add_argument("-d", "--decode", action='store_true') +ap.add_argument("--raw-output", type=str, required=False) +ap.add_argument("--decode-scale", type=int, required=False, default=1) +ap.add_argument("--decode-pixelfmt", type=str, required=False, default='RGBA') +ap.add_argument("--decode-rgba-alpha", type=int, required=False, default=255) +ap.add_argument("--encode-subsampling", type=str, required=False, default='444') +ap.add_argument("--encode-rst-interval", type=int, required=False) +ap.add_argument("--encode-pixelfmt", type=str, required=False, default='RGB888') +ap.add_argument("input", type=str) +ap.add_argument("output", type=str) +args = ap.parse_args() + +# print(args) + +# Perform necessary pre-parsing +if args.decode: + assert args.decode_scale in [1, 2, 4, 8] + decode_scale = args.decode_scale + # FIXME: verify behavior on non-evenly-divisible sizes + + assert args.decode_pixelfmt in [ + 'RGBA', + 'BGRA', + 'RGB565', + 'YUV422-CbYCrY', + 'YUV422-YCbYCr', + 'YUV422-planar', + 'YUV420-planar', + 'YUV444-planar', + ] + pixfmt = args.decode_pixelfmt + + with open(args.input, 'rb') as f: + jpeg_data = f.read() + + found_sof0 = False + + jpeg_work = jpeg_data + while jpeg_work: + seg_marker = struct.unpack(">H", jpeg_work[:2])[0] + print(f"Seg {seg_marker:04X}") + if seg_marker == 0xFFD8: + # SOI + jpeg_work = jpeg_work[2:] + elif seg_marker == 0xFFDA: + # SOS + break + else: + seg_len = struct.unpack(">H", jpeg_work[2:4])[0] + assert seg_len >= 2 + seg_data = jpeg_work[4:4 + seg_len - 2] + jpeg_work = jpeg_work[4 + seg_len - 2:] + + if seg_marker == 0xFFC0: + # SOF0 + assert not found_sof0 + found_sof0 = True + sof0 = struct.unpack(">BHHB", seg_data[:6]) + (jpeg_bpp, jpeg_H, jpeg_W, jpeg_components_cnt) = sof0 + # it is not yet verified what the requirements are for inputs + assert jpeg_bpp == 8 + assert jpeg_components_cnt == 1 or jpeg_components_cnt == 3 + if jpeg_components_cnt == 1: + jpeg_MODE = '400' + else: + jpeg_components = {} + for i in range(jpeg_components_cnt): + comp_id, comp_sampling, _ = seg_data[6+3*i:6+3*(i+1)] + jpeg_components[comp_id] = comp_sampling + assert 1 in jpeg_components + comp_Y = jpeg_components[1] + assert 2 in jpeg_components + comp_Cb = jpeg_components[2] + assert 3 in jpeg_components + comp_Cr = jpeg_components[3] + + if (comp_Y, comp_Cb, comp_Cr) == (0x11, 0x11, 0x11): + jpeg_MODE = '444' + elif (comp_Y, comp_Cb, comp_Cr) == (0x21, 0x11, 0x11): + jpeg_MODE = '422' + elif (comp_Y, comp_Cb, comp_Cr) == (0x22, 0x11, 0x11): + jpeg_MODE = '420' + elif (comp_Y, comp_Cb, comp_Cr) == (0x41, 0x11, 0x11): + jpeg_MODE = '411' + else: + # TODO: 422-vertical, others??? + # Is it possible to implement them? + print("Unsupported subsampling mode") + assert False + + assert found_sof0 + print(f"JPEG is {jpeg_W}x{jpeg_H} with subsampling {jpeg_MODE}") + + if jpeg_MODE == '444' or jpeg_MODE == '400': + macroblock_W, macroblock_H = 8, 8 + elif jpeg_MODE == '422': + macroblock_W, macroblock_H = 16, 8 + elif jpeg_MODE == '420': + macroblock_W, macroblock_H = 16, 16 + elif jpeg_MODE == '411': + macroblock_W, macroblock_H = 32, 8 + else: + assert False + + # FIXME: Exactly how much extra memory do we need to allocate? + surface_W = divroundup(jpeg_W // decode_scale, macroblock_W) * macroblock_W + surface_H = divroundup(jpeg_H // decode_scale, macroblock_H) * macroblock_H + if pixfmt in ['RGBA', 'BGRA']: + BYTESPP = 4 + elif pixfmt in ['RGB565', 'YUV422-CbYCrY', 'YUV422-YCbYCr']: + BYTESPP = 2 + elif pixfmt in ['YUV422-planar', 'YUV420-planar', 'YUV444-planar']: + BYTESPP = 1 + else: + assert False + surface_stride = surface_W * BYTESPP + surface_sz = surface_stride*surface_H + + if pixfmt == 'YUV422-planar': + P1_MULW = 1 # FIXME UGLY + P1_DIVW = 1 + P1_DIVH = 1 + elif pixfmt == 'YUV420-planar': + P1_MULW = 1 + P1_DIVW = 1 + P1_DIVH = 2 + elif pixfmt == 'YUV444-planar': + P1_MULW = 2 + P1_DIVW = 1 + P1_DIVH = 1 + if pixfmt in ['YUV422-planar', 'YUV420-planar', 'YUV444-planar']: + surface_P1_W = surface_W * P1_MULW // P1_DIVW + surface_P1_H = surface_H // P1_DIVH + surface_P1_stride = surface_P1_W + surface_P1_off = surface_sz + surface_sz += surface_P1_stride*surface_P1_H + else: + surface_P1_stride = 0 + surface_P1_off = 0 + + input_mem_sz = align_up(len(jpeg_data)) + print(f"Using size {input_mem_sz:08X} for JPEG data") + + output_mem_sz = align_up(surface_sz) + print(f"Using size {output_mem_sz:08X} for output image") +else: + assert args.encode_subsampling in ['444', '422', '420', '400'] + if args.encode_subsampling == '444' or args.encode_subsampling == '400': + macroblock_W, macroblock_H = 8, 8 + elif args.encode_subsampling == '422': + macroblock_W, macroblock_H = 16, 8 + elif args.encode_subsampling == '420': + macroblock_W, macroblock_H = 16, 16 + else: + assert False + + assert args.encode_pixelfmt in [ + 'RGB888', + 'RGB101010', + 'RGB565', + 'YUV10', + 'YUV-linear', + 'YUV444-planar', + 'YUV422-planar', + 'YUV420-planar', + ] + pixfmt = args.encode_pixelfmt + + # Driver doesn't support this either + if pixfmt == 'YUV-linear' and args.encode_subsampling == '444': + print("WARNING: This combination does not appear to work!!!") + if pixfmt == 'YUV422-planar' and args.encode_subsampling == '444': + print("WARNING: This combination does not appear to work!!!") + if pixfmt == 'YUV420-planar' and args.encode_subsampling == '444': + print("WARNING: This combination does not appear to work!!!") + + image_data = b'' + image_data_P1 = b'' + with Image.open(args.input) as im: + im_W, im_H = im.size + + if pixfmt != 'YUV420-planar': + for y in range(im_H): + for x in range(im_W): + r, g, b = im.getpixel((x, y)) + if pixfmt == 'RGB888': + image_data += struct.pack("BBBB", r, g, b, 255) + elif pixfmt == 'RGB101010': + image_data += struct.pack("<I", (r << 2) | (g << 12) | (b << 22)) + elif pixfmt == 'RGB565': + image_data += struct.pack("<H", (r >> 3) | ((g >> 2) << 5) | ((b >> 3) << 11)) + elif pixfmt == 'YUV10': + # absolute garbage color space conversion + # for demonstration purposes only + y_, u_, v_ = rgb2yuv(r, g, b) + image_data += struct.pack("<I", (y_ << 2) | (u_ << 12) | (v_ << 22)) + elif pixfmt == 'YUV-linear': + # garbage color space conversion, garbage subsampling + # for demonstration purposes only + y_, u_, v_ = rgb2yuv(r, g, b) + if x & 1 == 0: + color = u_ + else: + color = v_ + image_data += struct.pack("BB", y_, color) + elif pixfmt == 'YUV444-planar': + # garbage color space conversion + # for demonstration purposes only + y_, u_, v_ = rgb2yuv(r, g, b) + image_data += struct.pack("B", y_) + image_data_P1 += struct.pack("BB", u_, v_) + elif pixfmt == 'YUV422-planar': + # garbage color space conversion, garbage subsampling + # for demonstration purposes only + y_, u_, v_ = rgb2yuv(r, g, b) + if x & 1 == 0: + color = u_ + else: + color = v_ + image_data += struct.pack("B", y_) + image_data_P1 += struct.pack("B", color) + else: + assert False + else: + for y in range(im_H): + for x in range(im_W): + r, g, b = im.getpixel((x, y)) + # garbage color space conversion, garbage subsampling + # for demonstration purposes only + y_, u_, v_ = rgb2yuv(r, g, b) + if x & 1 == 0: + color = u_ + else: + color = v_ + image_data += struct.pack("B", y_) + if y & 1 == 0: + image_data_P1 += struct.pack("B", color) + + if pixfmt in ['RGB888', 'RGB101010', 'YUV10']: + BYTESPP = 4 + BYTESPP_P1 = 0 + P1_DIVH = 1 + elif pixfmt in ['RGB565', 'YUV-linear']: + BYTESPP = 2 + BYTESPP_P1 = 0 + P1_DIVH = 1 + elif pixfmt == 'YUV444-planar': + BYTESPP = 1 + BYTESPP_P1 = 2 + P1_DIVH = 1 + elif pixfmt == 'YUV422-planar': + BYTESPP = 1 + BYTESPP_P1 = 1 + P1_DIVH = 1 + elif pixfmt == 'YUV420-planar': + BYTESPP = 1 + BYTESPP_P1 = 1 + P1_DIVH = 2 + else: + assert False + surface_stride = im_W * BYTESPP + surface_sz = surface_stride * im_H + surface_P1_off = surface_sz + print(f"Plane 1 offset at {surface_P1_off:08X}") + surface_P1_stride = im_W * BYTESPP_P1 + surface_sz += surface_P1_stride * im_H // P1_DIVH + input_mem_sz = align_up(surface_sz) + + output_mem_sz = input_mem_sz + + print(f"Using size {input_mem_sz:08X} for input image") + print(f"Using size {output_mem_sz:08X} for output data") + +# Turn on the JPEG block +p.pmgr_adt_clocks_enable(f'/arm-io/dart-{args.which_jpeg}') +p.pmgr_adt_clocks_enable(f'/arm-io/{args.which_jpeg}') + +dart = DART.from_adt(u, f'/arm-io/dart-{args.which_jpeg}') +dart.initialize() + +jpeg_base, _ = u.adt[f'/arm-io/{args.which_jpeg}'].get_reg(0) +jpeg = JPEGRegs(u, jpeg_base) + + +def reset_block(): + jpeg.MODE.val = 0x100 + jpeg.MODE.val = 0x13e + + set_default_regs() + + jpeg.MODE.val = 0x17f + for _ in range(10000): + v = jpeg.REG_0x1004.val + if v == 0: + break + print(f"reset 1 -- {v}") + if (v := jpeg.REG_0x1004.val) != 0: + print(f"reset 1 failed! -- {v}") + assert False + + jpeg.RST_INTERVAL.val = 1 + for _ in range(2500): + v = jpeg.RST_INTERVAL.val + if v == 1: + break + print(f"reset 2 -- {v}") + if (v := jpeg.RST_INTERVAL.val) != 1: + print(f"reset 2 failed! -- {v}") + assert False + jpeg.RST_INTERVAL.val = 0 + + jpeg.ENABLE_RST_LOGGING.val = 0 + jpeg.REG_0x1a8.val = 0 + jpeg.REG_0x1ac.val = 0 + jpeg.REG_0x1b0.val = 0 + jpeg.REG_0x1b4.val = 0 + jpeg.REG_0x1bc.val = 0 + jpeg.REG_0x1c0.val = 0 + jpeg.REG_0x1c4.val = 0 + jpeg.REG_0x1c8.val = 0 + jpeg.REG_0x1cc.val = 0 + jpeg.REG_0x1d0.val = 0 + jpeg.REG_0x1d4.val = 0 + + jpeg.MODE.val = 0x143 + + +def set_default_regs(param1=0): + jpeg.REG_0x0.val = 0 + jpeg.REG_0x0.val = 0 + jpeg.REG_0x4.val = 0 + jpeg.CODEC.val = 0 + jpeg.REG_0x2c.val = 0 + jpeg.REG_0x30.val = 0 + jpeg.REG_0x34.val = 1 + jpeg.REG_0x38.val = 1 + jpeg.CHROMA_HALVE_H_TYPE1.val = 0 + jpeg.CHROMA_HALVE_H_TYPE2.val = 0 + jpeg.CHROMA_HALVE_V_TYPE1.val = 0 + jpeg.CHROMA_HALVE_V_TYPE2.val = 0 + jpeg.CHROMA_DOUBLE_H.val = 0 + jpeg.CHROMA_QUADRUPLE_H.val = 0 + jpeg.CHROMA_DOUBLE_V.val = 0 + jpeg.PLANAR_CHROMA_HALVING.val = 0 + jpeg.PX_USE_PLANE1.val = 0 + jpeg.PX_TILES_W.val = 1 + jpeg.PX_TILES_H.val = 1 + jpeg.PX_PLANE0_WIDTH.val = 1 + jpeg.PX_PLANE0_HEIGHT.val = 1 + jpeg.PX_PLANE0_TILING_H.val = 1 + jpeg.PX_PLANE0_TILING_V.val = 1 + jpeg.PX_PLANE0_STRIDE.val = 1 + jpeg.PX_PLANE1_WIDTH.val = 1 + jpeg.PX_PLANE1_HEIGHT.val = 1 + jpeg.PX_PLANE1_TILING_H.val = 1 + jpeg.PX_PLANE1_TILING_V.val = 1 + jpeg.PX_PLANE1_STRIDE.val = 1 + jpeg.INPUT_START1.val = 0 + jpeg.INPUT_START2.val = 0 + jpeg.REG_0x94.val = 1 + jpeg.REG_0x98.val = 1 + jpeg.INPUT_END.val = 0xffffffff + jpeg.OUTPUT_START1.val = 0 + jpeg.OUTPUT_START2.val = 0 + jpeg.OUTPUT_END.val = 0xffffffff + for i in range(11): + jpeg.MATRIX_MULT[i].val = 0 + for i in range(10): + jpeg.DITHER[i].val = 0xff + jpeg.ENCODE_PIXEL_FORMAT.val = 0 + jpeg.ENCODE_COMPONENT0_POS.val = 0 + jpeg.ENCODE_COMPONENT1_POS.val = 0 + jpeg.ENCODE_COMPONENT2_POS.val = 0 + jpeg.ENCODE_COMPONENT3_POS.val = 0 + jpeg.CONVERT_COLOR_SPACE.val = 0 + jpeg.REG_0x118.val = 0 + jpeg.REG_0x11c.val = 0 + jpeg.REG_0x120.val = 0 + jpeg.TILING_ENABLE.val = 0 + jpeg.TILING_PLANE0.val = 0 + jpeg.TILING_PLANE1.val = 0 + jpeg.DECODE_MACROBLOCKS_W.val = 0 + jpeg.DECODE_MACROBLOCKS_H.val = 0 + jpeg.SCALE_FACTOR.val = 0 + jpeg.DECODE_PIXEL_FORMAT.val = 0 + jpeg.YUV422_ORDER.val = 0 + jpeg.RGBA_ORDER.val = 0 + jpeg.RGBA_ALPHA.val = 0 + jpeg.RIGHT_EDGE_PIXELS.val = 0 + jpeg.BOTTOM_EDGE_PIXELS.val = 0 + jpeg.RIGHT_EDGE_SAMPLES.val = 0 + jpeg.BOTTOM_EDGE_SAMPLES.val = 0 + + # this is always done on the m1 max hwrev + jpeg.REG_0x1fc.val = 0 + jpeg.REG_0x200.val = 0 + jpeg.REG_0x204.val = 0 + jpeg.REG_0x208.val = 0 + jpeg.REG_0x214.val = 0 + jpeg.REG_0x218.val = 0 + jpeg.REG_0x21c.val = 0 + jpeg.REG_0x220.val = 0 + jpeg.REG_0x224.val = 0 + jpeg.REG_0x228.val = 0 + jpeg.REG_0x22c.val = 0 + jpeg.REG_0x230.val = 0 + jpeg.REG_0x234.val = 0x1f40 + jpeg.REG_0x244.val = 0 + jpeg.REG_0x248.val = 0 + jpeg.REG_0x258.val = 0 + jpeg.REG_0x25c.val = 0 + jpeg.REG_0x23c.val = 0 + jpeg.REG_0x240.val = 0 + jpeg.REG_0x250.val = 0 + jpeg.REG_0x254.val = 0 + + jpeg.REG_0x160.val = param1 + jpeg.TIMEOUT.val = 0 + jpeg.REG_0x20.val = 0xff + + +print(f"HW revision is {jpeg.HWREV}") +reset_block() + +input_buf_phys = u.heap.memalign(0x4000, input_mem_sz) +output_buf_phys = u.heap.memalign(0x4000, output_mem_sz) +print(f"buffers (phys) {input_buf_phys:016X} {output_buf_phys:016X}") + +input_buf_iova = dart.iomap(0, input_buf_phys, input_mem_sz) +output_buf_iova = dart.iomap(0, output_buf_phys, output_mem_sz) +print(f"buffers (iova) {input_buf_iova:08X} {output_buf_iova:08X}") +# dart.dump_all() + +iface.writemem(input_buf_phys, b'\xAA' * input_mem_sz) +iface.writemem(output_buf_phys, b'\xAA' * output_mem_sz) + + +if args.decode: + iface.writemem(input_buf_phys, jpeg_data) + print("JPEG uploaded") + + jpeg.REG_0x34 = 1 + jpeg.REG_0x2c = 0 + jpeg.REG_0x38 = 0 + if jpeg_MODE == '444': + jpeg.CODEC.set(CODEC=E_CODEC._444) + elif jpeg_MODE == '400': + jpeg.CODEC.set(CODEC=E_CODEC._400) + elif jpeg_MODE == '422': + jpeg.CODEC.set(CODEC=E_CODEC._422) + elif jpeg_MODE == '420': + jpeg.CODEC.set(CODEC=E_CODEC._420) + elif jpeg_MODE == '411': + jpeg.CODEC.set(CODEC=E_CODEC._411) + else: + assert False + if pixfmt == 'RGBA' or pixfmt == 'BGRA': + jpeg.DECODE_PIXEL_FORMAT.set(FORMAT=E_DECODE_PIXEL_FORMAT.RGBA8888) + elif pixfmt == 'RGB565': + jpeg.DECODE_PIXEL_FORMAT.set(FORMAT=E_DECODE_PIXEL_FORMAT.RGB565) + elif pixfmt == 'YUV422-CbYCrY' or pixfmt == 'YUV422-YCbYCr': + jpeg.DECODE_PIXEL_FORMAT.set(FORMAT=E_DECODE_PIXEL_FORMAT.YUV422_linear) + elif pixfmt == 'YUV422-planar': + jpeg.DECODE_PIXEL_FORMAT.set(FORMAT=E_DECODE_PIXEL_FORMAT.YUV422_planar) + elif pixfmt == 'YUV420-planar': + jpeg.DECODE_PIXEL_FORMAT.set(FORMAT=E_DECODE_PIXEL_FORMAT.YUV420_planar) + elif pixfmt == 'YUV444-planar': + jpeg.DECODE_PIXEL_FORMAT.set(FORMAT=E_DECODE_PIXEL_FORMAT.YUV444_planar) + else: + assert False + + if pixfmt in ['YUV422-planar', 'YUV420-planar', 'YUV444-planar']: + jpeg.PX_USE_PLANE1 = 1 + jpeg.PX_PLANE1_WIDTH = jpeg_W * P1_MULW // P1_DIVW // decode_scale - 1 + jpeg.PX_PLANE1_HEIGHT = jpeg_H // P1_DIVH // decode_scale - 1 + else: + jpeg.PX_USE_PLANE1 = 0 + jpeg.PX_PLANE0_WIDTH = jpeg_W*BYTESPP // decode_scale - 1 + jpeg.PX_PLANE0_HEIGHT = jpeg_H // decode_scale - 1 + jpeg.TIMEOUT = 266000000 + + jpeg.REG_0x94 = 0x1f + jpeg.REG_0x98 = 1 + + jpeg.DECODE_MACROBLOCKS_W = divroundup(jpeg_W, macroblock_W) + jpeg.DECODE_MACROBLOCKS_H = divroundup(jpeg_H, macroblock_H) + right_edge_px = \ + jpeg_W - divroundup(jpeg_W, macroblock_W)*macroblock_W + macroblock_W + bot_edge_px = \ + jpeg_H - divroundup(jpeg_H, macroblock_H)*macroblock_H + macroblock_H + # XXX changing this does not seem to do anything. + # Does it possibly affect scaling down? + jpeg.RIGHT_EDGE_PIXELS.val = right_edge_px + jpeg.BOTTOM_EDGE_PIXELS.val = bot_edge_px + jpeg.RIGHT_EDGE_SAMPLES.val = right_edge_px // (macroblock_W // 8) + jpeg.BOTTOM_EDGE_SAMPLES.val = bot_edge_px // (macroblock_H // 8) + + jpeg.PX_TILES_H = divroundup(jpeg_H, macroblock_H) + # FIXME explain this + if pixfmt in ['RGBA', 'BGRA', 'RGB565', 'YUV444-planar']: + jpeg.PX_TILES_W = divroundup(jpeg_W // decode_scale, macroblock_W) + else: + jpeg.PX_TILES_W = divroundup(jpeg_W // decode_scale, max(macroblock_W, 16)) + if pixfmt == 'RGBA' or pixfmt == 'BGRA': + if jpeg_MODE == '444' or jpeg_MODE == '400': + jpeg.PX_PLANE0_TILING_H = 4 + jpeg.PX_PLANE0_TILING_V = 8 // decode_scale + jpeg.PX_PLANE1_TILING_H = 1 + jpeg.PX_PLANE1_TILING_V = 1 + elif jpeg_MODE == '422': + jpeg.PX_PLANE0_TILING_H = 8 + jpeg.PX_PLANE0_TILING_V = 8 // decode_scale + jpeg.PX_PLANE1_TILING_H = 1 + jpeg.PX_PLANE1_TILING_V = 1 + elif jpeg_MODE == '420': + jpeg.PX_PLANE0_TILING_H = 8 + jpeg.PX_PLANE0_TILING_V = 16 // decode_scale + jpeg.PX_PLANE1_TILING_H = 0 + jpeg.PX_PLANE1_TILING_V = 0 + elif jpeg_MODE == '411': + jpeg.PX_PLANE0_TILING_H = 16 + jpeg.PX_PLANE0_TILING_V = 8 // decode_scale + jpeg.PX_PLANE1_TILING_H = 0 + jpeg.PX_PLANE1_TILING_V = 0 + else: + assert False + elif pixfmt == 'RGB565': + if jpeg_MODE == '444' or jpeg_MODE == '400': + jpeg.PX_PLANE0_TILING_H = 2 + jpeg.PX_PLANE0_TILING_V = 8 // decode_scale + jpeg.PX_PLANE1_TILING_H = 1 + jpeg.PX_PLANE1_TILING_V = 1 + elif jpeg_MODE == '422': + jpeg.PX_PLANE0_TILING_H = 4 + jpeg.PX_PLANE0_TILING_V = 8 // decode_scale + jpeg.PX_PLANE1_TILING_H = 1 + jpeg.PX_PLANE1_TILING_V = 1 + elif jpeg_MODE == '420': + jpeg.PX_PLANE0_TILING_H = 4 + jpeg.PX_PLANE0_TILING_V = 16 // decode_scale + jpeg.PX_PLANE1_TILING_H = 0 + jpeg.PX_PLANE1_TILING_V = 0 + elif jpeg_MODE == '411': + jpeg.PX_PLANE0_TILING_H = 8 + jpeg.PX_PLANE0_TILING_V = 8 // decode_scale + jpeg.PX_PLANE1_TILING_H = 0 + jpeg.PX_PLANE1_TILING_V = 0 + else: + assert False + elif pixfmt == 'YUV422-CbYCrY' or pixfmt == 'YUV422-YCbYCr': + if jpeg_MODE == '444' or jpeg_MODE == '400': + jpeg.PX_PLANE0_TILING_H = 4 + jpeg.PX_PLANE0_TILING_V = 8 // decode_scale + jpeg.PX_PLANE1_TILING_H = 1 + jpeg.PX_PLANE1_TILING_V = 1 + elif jpeg_MODE == '422': + jpeg.PX_PLANE0_TILING_H = 4 + jpeg.PX_PLANE0_TILING_V = 8 // decode_scale + jpeg.PX_PLANE1_TILING_H = 1 + jpeg.PX_PLANE1_TILING_V = 1 + elif jpeg_MODE == '420': + jpeg.PX_PLANE0_TILING_H = 4 + jpeg.PX_PLANE0_TILING_V = 16 // decode_scale + jpeg.PX_PLANE1_TILING_H = 0 + jpeg.PX_PLANE1_TILING_V = 0 + elif jpeg_MODE == '411': + jpeg.PX_PLANE0_TILING_H = 8 + jpeg.PX_PLANE0_TILING_V = 8 // decode_scale + jpeg.PX_PLANE1_TILING_H = 0 + jpeg.PX_PLANE1_TILING_V = 0 + else: + assert False + elif pixfmt == 'YUV422-planar': + if jpeg_MODE == '444' or jpeg_MODE == '400': + jpeg.PX_PLANE0_TILING_H = 2 + jpeg.PX_PLANE0_TILING_V = 8 // decode_scale + jpeg.PX_PLANE1_TILING_H = 2 + jpeg.PX_PLANE1_TILING_V = 8 // decode_scale + elif jpeg_MODE == '422': + jpeg.PX_PLANE0_TILING_H = 2 + jpeg.PX_PLANE0_TILING_V = 8 // decode_scale + jpeg.PX_PLANE1_TILING_H = 2 + jpeg.PX_PLANE1_TILING_V = 8 // decode_scale + elif jpeg_MODE == '420': + jpeg.PX_PLANE0_TILING_H = 2 + jpeg.PX_PLANE0_TILING_V = 16 // decode_scale + jpeg.PX_PLANE1_TILING_H = 2 + jpeg.PX_PLANE1_TILING_V = 16 // decode_scale + elif jpeg_MODE == '411': + jpeg.PX_PLANE0_TILING_H = 4 + jpeg.PX_PLANE0_TILING_V = 8 // decode_scale + jpeg.PX_PLANE1_TILING_H = 4 + jpeg.PX_PLANE1_TILING_V = 8 // decode_scale + else: + assert False + elif pixfmt == 'YUV420-planar': + if jpeg_MODE == '444' or jpeg_MODE == '400': + jpeg.PX_PLANE0_TILING_H = 2 + jpeg.PX_PLANE0_TILING_V = 8 // decode_scale + jpeg.PX_PLANE1_TILING_H = 2 + jpeg.PX_PLANE1_TILING_V = 4 // decode_scale + elif jpeg_MODE == '422': + jpeg.PX_PLANE0_TILING_H = 2 + jpeg.PX_PLANE0_TILING_V = 8 // decode_scale + jpeg.PX_PLANE1_TILING_H = 2 + jpeg.PX_PLANE1_TILING_V = 4 // decode_scale + elif jpeg_MODE == '420': + jpeg.PX_PLANE0_TILING_H = 2 + jpeg.PX_PLANE0_TILING_V = 16 // decode_scale + jpeg.PX_PLANE1_TILING_H = 2 + jpeg.PX_PLANE1_TILING_V = 8 // decode_scale + elif jpeg_MODE == '411': + jpeg.PX_PLANE0_TILING_H = 4 + jpeg.PX_PLANE0_TILING_V = 8 // decode_scale + jpeg.PX_PLANE1_TILING_H = 4 + jpeg.PX_PLANE1_TILING_V = 4 // decode_scale + else: + assert False + elif pixfmt == 'YUV444-planar': + if jpeg_MODE == '444' or jpeg_MODE == '400': + jpeg.PX_PLANE0_TILING_H = 1 + jpeg.PX_PLANE0_TILING_V = 8 // decode_scale + jpeg.PX_PLANE1_TILING_H = 2 + jpeg.PX_PLANE1_TILING_V = 8 // decode_scale + elif jpeg_MODE == '422': + # The driver doesn't use this, but guessing seems to be fine? + jpeg.PX_PLANE0_TILING_H = 2 + jpeg.PX_PLANE0_TILING_V = 8 // decode_scale + jpeg.PX_PLANE1_TILING_H = 4 + jpeg.PX_PLANE1_TILING_V = 8 // decode_scale + elif jpeg_MODE == '420': + # The driver doesn't use this, but guessing seems to be fine? + jpeg.PX_PLANE0_TILING_H = 2 + jpeg.PX_PLANE0_TILING_V = 16 // decode_scale + jpeg.PX_PLANE1_TILING_H = 4 + jpeg.PX_PLANE1_TILING_V = 16 // decode_scale + elif jpeg_MODE == '411': + # The driver doesn't use this, but guessing seems to be fine? + jpeg.PX_PLANE0_TILING_H = 4 + jpeg.PX_PLANE0_TILING_V = 8 // decode_scale + jpeg.PX_PLANE1_TILING_H = 8 + jpeg.PX_PLANE1_TILING_V = 8 // decode_scale + else: + assert False + else: + assert False + + if pixfmt in ['RGBA', 'BGRA', 'RGB565', 'YUV444-planar']: + if jpeg_MODE in ['422', '420']: + jpeg.CHROMA_DOUBLE_H = 1 + + if jpeg_MODE == '411': + jpeg.CHROMA_QUADRUPLE_H = 1 + + if jpeg_MODE == '420': + jpeg.CHROMA_DOUBLE_V = 1 + elif pixfmt in ["YUV422-CbYCrY", "YUV422-YCbYCr", "YUV422-planar"]: + if jpeg_MODE == '444': + jpeg.CHROMA_HALVE_H_TYPE1 = 1 + + if jpeg_MODE == '411': + jpeg.CHROMA_DOUBLE_H = 1 + + if jpeg_MODE == '420': + jpeg.CHROMA_DOUBLE_V = 1 + elif pixfmt in ["YUV420-planar"]: + if jpeg_MODE == '444': + jpeg.CHROMA_HALVE_H_TYPE1 = 1 + + if jpeg_MODE in ['444', '422', '411']: + jpeg.CHROMA_HALVE_V_TYPE1 = 1 + + if jpeg_MODE == '411': + jpeg.CHROMA_DOUBLE_H = 1 + else: + assert False + + jpeg.MATRIX_MULT[0].val = 0x100 + jpeg.MATRIX_MULT[1].val = 0x0 + jpeg.MATRIX_MULT[2].val = 0x167 + jpeg.MATRIX_MULT[3].val = 0x100 + jpeg.MATRIX_MULT[4].val = 0xffffffa8 + jpeg.MATRIX_MULT[5].val = 0xffffff49 + jpeg.MATRIX_MULT[6].val = 0x100 + jpeg.MATRIX_MULT[7].val = 0x1c6 + jpeg.MATRIX_MULT[8].val = 0x0 + jpeg.MATRIX_MULT[9].val = 0x0 + jpeg.MATRIX_MULT[10].val = 0xffffff80 + + jpeg.RGBA_ALPHA = args.decode_rgba_alpha + jpeg.RGBA_ORDER = pixfmt == "RGBA" + jpeg.YUV422_ORDER = pixfmt == "YUV422-YCbYCr" + + if decode_scale == 1: + jpeg.SCALE_FACTOR.set(SCALE=E_SCALE.DIV1) + elif decode_scale == 2: + jpeg.SCALE_FACTOR.set(SCALE=E_SCALE.DIV2) + elif decode_scale == 4: + jpeg.SCALE_FACTOR.set(SCALE=E_SCALE.DIV4) + elif decode_scale == 8: + jpeg.SCALE_FACTOR.set(SCALE=E_SCALE.DIV8) + else: + assert False + + jpeg.INPUT_START1 = input_buf_iova + jpeg.INPUT_START2 = 0xdeadbeef + jpeg.INPUT_END = input_buf_iova + input_mem_sz + jpeg.OUTPUT_START1 = output_buf_iova + jpeg.OUTPUT_START2 = output_buf_iova + surface_P1_off + jpeg.OUTPUT_END = output_buf_iova + output_mem_sz + jpeg.PX_PLANE0_STRIDE = surface_stride + jpeg.PX_PLANE1_STRIDE = surface_P1_stride + + jpeg.REG_0x1ac = 0x0 + jpeg.REG_0x1b0 = 0x0 + jpeg.REG_0x1b4 = 0x0 + jpeg.REG_0x1bc = 0x0 + jpeg.REG_0x1c0 = 0x0 + jpeg.REG_0x1c4 = 0x0 + + jpeg.REG_0x118 = 0x0 + jpeg.REG_0x11c = 0x1 + + jpeg.MODE = 0x177 + jpeg.REG_0x1028 = 0x400 + + jpeg.JPEG_IO_FLAGS = 0x3f + jpeg.REG_0x0 = 0x1 + jpeg.REG_0x1004 = 0x1 + + # FIXME: we don't actually know when it's done + time.sleep(1) + + print(jpeg.STATUS.reg) + print(jpeg.PERFCOUNTER.reg) + + output_data = iface.readmem(output_buf_phys, output_mem_sz) + if args.raw_output is not None: + with open(args.raw_output, 'wb') as f: + f.write(output_data) + + # Just for demonstration purposes, wrangle everything back into RGB + with Image.new( + mode='RGBA', + size=(jpeg_W // decode_scale, jpeg_H // decode_scale)) as im: + if pixfmt in ["RGBA", "BGRA", "RGB565"]: + for y in range(jpeg_H // decode_scale): + for x in range(jpeg_W // decode_scale): + block = output_data[ + y*surface_stride + x*BYTESPP: + y*surface_stride + (x+1)*BYTESPP] + + if pixfmt == "RGBA": + r, g, b, a = block + elif pixfmt == "BGRA": + b, g, r, a = block + elif pixfmt == "RGB565": + rgb = struct.unpack("<H", block)[0] + b = (rgb & 0b11111) << 3 + g = ((rgb >> 5) & 0b111111) << 2 + r = ((rgb >> 11) & 0b11111) << 3 + a = 255 + else: + assert False + im.putpixel((x, y), (r, g, b, a)) + elif pixfmt in ["YUV422-CbYCrY", "YUV422-YCbYCr"]: + for y in range(jpeg_H // decode_scale): + for x in range(0, jpeg_W // decode_scale, 2): + block = output_data[ + y*surface_stride + x*BYTESPP: + y*surface_stride + (x+2)*BYTESPP] + + if pixfmt == "YUV422-CbYCrY": + cb, y0, cr, y1 = block + elif pixfmt == "YUV422-YCbYCr": + y0, cb, y1, cr = block + + r0, g0, b0 = yuv2rgb(y0, cb, cr) + r1, g1, b1 = yuv2rgb(y1, cb, cr) + + im.putpixel((x, y), (r0, g0, b0, 255)) + # XXX this really needs some fixing + if x+1 < jpeg_W // decode_scale: + im.putpixel((x+1, y), (r1, g1, b1, 255)) + elif pixfmt == "YUV422-planar": + for y in range(jpeg_H // decode_scale): + for x in range(jpeg_W // decode_scale): + y_ = output_data[y*surface_stride + x] + cb = output_data[surface_P1_off + y*surface_P1_stride + x&~1] + cr = output_data[surface_P1_off + y*surface_P1_stride + (x&~1)+1] + + r, g, b = yuv2rgb(y_, cb, cr) + + im.putpixel((x, y), (r, g, b, 255)) + elif pixfmt == "YUV420-planar": + for y in range(jpeg_H // decode_scale): + for x in range(jpeg_W // decode_scale): + y_ = output_data[y*surface_stride + x] + cb = output_data[surface_P1_off + (y//2)*surface_P1_stride + x&~1] + cr = output_data[surface_P1_off + (y//2)*surface_P1_stride + (x&~1)+1] + + r, g, b = yuv2rgb(y_, cb, cr) + + im.putpixel((x, y), (r, g, b, 255)) + elif pixfmt == "YUV444-planar": + for y in range(jpeg_H // decode_scale): + for x in range(jpeg_W // decode_scale): + y_ = output_data[y*surface_stride + x] + cb = output_data[surface_P1_off + y*surface_P1_stride + x*2] + cr = output_data[surface_P1_off + y*surface_P1_stride + x*2+1] + + r, g, b = yuv2rgb(y_, cb, cr) + + im.putpixel((x, y), (r, g, b, 255)) + else: + assert False + im.save(args.output) + +if args.encode: + iface.writemem(input_buf_phys, image_data) + iface.writemem(input_buf_phys + surface_P1_off, image_data_P1) + print("Pixel data uploaded") + + jpeg.MODE = 0x17f + jpeg.REG_0x38 = 0x1 # if not set nothing happens + jpeg.REG_0x2c = 0x1 # if not set only header is output + jpeg.REG_0x34 = 0x0 # if set output is a JPEG but weird with no footer + + if args.encode_subsampling == '444': + jpeg.CODEC.set(CODEC=E_CODEC._444) + elif args.encode_subsampling == '422': + jpeg.CODEC.set(CODEC=E_CODEC._422) + elif args.encode_subsampling == '420': + jpeg.CODEC.set(CODEC=E_CODEC._420) + elif args.encode_subsampling == '400': + jpeg.CODEC.set(CODEC=E_CODEC._400) + else: + assert False + + if BYTESPP_P1 != 0: + jpeg.PX_USE_PLANE1 = 1 + jpeg.PX_PLANE1_WIDTH = im_W*BYTESPP_P1 - 1 + jpeg.PX_PLANE1_HEIGHT = im_H // P1_DIVH - 1 + else: + jpeg.PX_USE_PLANE1 = 0 + jpeg.PX_PLANE1_WIDTH = 0xffffffff + jpeg.PX_PLANE1_HEIGHT = 0xffffffff + jpeg.PX_PLANE0_WIDTH = im_W*BYTESPP - 1 + jpeg.PX_PLANE0_HEIGHT = im_H - 1 + jpeg.TIMEOUT = 266000000 + + jpeg.PX_TILES_W = divroundup(im_W, macroblock_W) + jpeg.PX_TILES_H = divroundup(im_H, macroblock_H) + if pixfmt in ['RGB888', 'RGB101010', 'YUV10']: + if args.encode_subsampling == '444' or args.encode_subsampling == '400': + jpeg.PX_PLANE0_TILING_H = 4 + jpeg.PX_PLANE0_TILING_V = 8 + jpeg.PX_PLANE1_TILING_H = 1 + jpeg.PX_PLANE1_TILING_V = 1 + elif args.encode_subsampling == '422': + jpeg.PX_PLANE0_TILING_H = 8 + jpeg.PX_PLANE0_TILING_V = 8 + jpeg.PX_PLANE1_TILING_H = 1 + jpeg.PX_PLANE1_TILING_V = 1 + elif args.encode_subsampling == '420': + jpeg.PX_PLANE0_TILING_H = 8 + jpeg.PX_PLANE0_TILING_V = 16 + jpeg.PX_PLANE1_TILING_H = 0 + jpeg.PX_PLANE1_TILING_V = 0 + else: + assert False + elif pixfmt == 'RGB565': + if args.encode_subsampling == '444' or args.encode_subsampling == '400': + jpeg.PX_PLANE0_TILING_H = 2 + jpeg.PX_PLANE0_TILING_V = 8 + jpeg.PX_PLANE1_TILING_H = 1 + jpeg.PX_PLANE1_TILING_V = 1 + elif args.encode_subsampling == '422': + jpeg.PX_PLANE0_TILING_H = 4 + jpeg.PX_PLANE0_TILING_V = 8 + jpeg.PX_PLANE1_TILING_H = 1 + jpeg.PX_PLANE1_TILING_V = 1 + elif args.encode_subsampling == '420': + jpeg.PX_PLANE0_TILING_H = 4 + jpeg.PX_PLANE0_TILING_V = 16 + jpeg.PX_PLANE1_TILING_H = 0 + jpeg.PX_PLANE1_TILING_V = 0 + else: + assert False + elif pixfmt == 'YUV-linear': + if args.encode_subsampling == '444' or args.encode_subsampling == '400': + jpeg.PX_PLANE0_TILING_H = 2 + jpeg.PX_PLANE0_TILING_V = 8 + jpeg.PX_PLANE1_TILING_H = 1 + jpeg.PX_PLANE1_TILING_V = 1 + elif args.encode_subsampling == '422': + jpeg.PX_PLANE0_TILING_H = 4 + jpeg.PX_PLANE0_TILING_V = 8 + jpeg.PX_PLANE1_TILING_H = 1 + jpeg.PX_PLANE1_TILING_V = 1 + elif args.encode_subsampling == '420': + jpeg.PX_PLANE0_TILING_H = 4 + jpeg.PX_PLANE0_TILING_V = 16 + jpeg.PX_PLANE1_TILING_H = 0 + jpeg.PX_PLANE1_TILING_V = 0 + else: + assert False + elif pixfmt == 'YUV444-planar': + if args.encode_subsampling == '444' or args.encode_subsampling == '400': + jpeg.PX_PLANE0_TILING_H = 1 + jpeg.PX_PLANE0_TILING_V = 8 + jpeg.PX_PLANE1_TILING_H = 2 + jpeg.PX_PLANE1_TILING_V = 8 + elif args.encode_subsampling == '422': + jpeg.PX_PLANE0_TILING_H = 2 + jpeg.PX_PLANE0_TILING_V = 8 + jpeg.PX_PLANE1_TILING_H = 4 + jpeg.PX_PLANE1_TILING_V = 8 + elif args.encode_subsampling == '420': + jpeg.PX_PLANE0_TILING_H = 2 + jpeg.PX_PLANE0_TILING_V = 16 + jpeg.PX_PLANE1_TILING_H = 4 + jpeg.PX_PLANE1_TILING_V = 16 + else: + assert False + elif pixfmt == 'YUV422-planar': + if args.encode_subsampling == '444' or args.encode_subsampling == '400': + jpeg.PX_PLANE0_TILING_H = 1 + jpeg.PX_PLANE0_TILING_V = 8 + jpeg.PX_PLANE1_TILING_H = 1 + jpeg.PX_PLANE1_TILING_V = 8 + elif args.encode_subsampling == '422': + jpeg.PX_PLANE0_TILING_H = 2 + jpeg.PX_PLANE0_TILING_V = 8 + jpeg.PX_PLANE1_TILING_H = 2 + jpeg.PX_PLANE1_TILING_V = 8 + elif args.encode_subsampling == '420': + jpeg.PX_PLANE0_TILING_H = 2 + jpeg.PX_PLANE0_TILING_V = 16 + jpeg.PX_PLANE1_TILING_H = 2 + jpeg.PX_PLANE1_TILING_V = 16 + else: + assert False + elif pixfmt == 'YUV420-planar': + if args.encode_subsampling == '444' or args.encode_subsampling == '400': + jpeg.PX_PLANE0_TILING_H = 1 + jpeg.PX_PLANE0_TILING_V = 8 + jpeg.PX_PLANE1_TILING_H = 1 + jpeg.PX_PLANE1_TILING_V = 4 + elif args.encode_subsampling == '422': + jpeg.PX_PLANE0_TILING_H = 2 + jpeg.PX_PLANE0_TILING_V = 8 + jpeg.PX_PLANE1_TILING_H = 2 + jpeg.PX_PLANE1_TILING_V = 4 + elif args.encode_subsampling == '420': + jpeg.PX_PLANE0_TILING_H = 2 + jpeg.PX_PLANE0_TILING_V = 16 + jpeg.PX_PLANE1_TILING_H = 2 + jpeg.PX_PLANE1_TILING_V = 8 + else: + assert False + else: + assert False + jpeg.PX_PLANE0_STRIDE = surface_stride + jpeg.PX_PLANE1_STRIDE = surface_P1_stride + + if pixfmt in ['RGB888', 'RGB101010', 'RGB565', 'YUV10', 'YUV444-planar']: + if args.encode_subsampling in ['422', '420']: + jpeg.CHROMA_HALVE_H_TYPE1 = 1 + if args.encode_subsampling == '420': + jpeg.CHROMA_HALVE_V_TYPE1 = 1 + elif pixfmt in ['YUV-linear', 'YUV422-planar']: + if args.encode_subsampling == '420': + jpeg.CHROMA_HALVE_V_TYPE1 = 1 + elif pixfmt == 'YUV420-planar': + if args.encode_subsampling in ['422', '444']: + jpeg.CHROMA_DOUBLE_V = 1 + else: + assert False + + # none of this seems to affect anything???? + jpeg.REG_0x94 = 0xc # c/2 for 444; 8/2 for 422; 3/1 for 411; b/2 for 400 + jpeg.REG_0x98 = 0x2 + jpeg.REG_0x20c = im_W + jpeg.REG_0x210 = im_H + + if pixfmt in ['RGB888', 'RGB101010', 'RGB565']: + jpeg.CONVERT_COLOR_SPACE = 1 + jpeg.MATRIX_MULT[0].val = 0x4d + jpeg.MATRIX_MULT[1].val = 0x96 + jpeg.MATRIX_MULT[2].val = 0x1d + jpeg.MATRIX_MULT[3].val = 0xffffffd5 + jpeg.MATRIX_MULT[4].val = 0xffffffab + jpeg.MATRIX_MULT[5].val = 0x80 + jpeg.MATRIX_MULT[6].val = 0x80 + jpeg.MATRIX_MULT[7].val = 0xffffff95 + jpeg.MATRIX_MULT[8].val = 0xffffffeb + jpeg.MATRIX_MULT[9].val = 0x0 + jpeg.MATRIX_MULT[10].val = 0x80 + + if pixfmt == 'RGB888': + jpeg.ENCODE_PIXEL_FORMAT.set(FORMAT=E_ENCODE_PIXEL_FORMAT.RGB888) + elif pixfmt == 'RGB101010': + jpeg.ENCODE_PIXEL_FORMAT.set(FORMAT=E_ENCODE_PIXEL_FORMAT.RGB101010) + elif pixfmt == 'RGB565': + jpeg.ENCODE_PIXEL_FORMAT.set(FORMAT=E_ENCODE_PIXEL_FORMAT.RGB565) + elif pixfmt == 'YUV10': + jpeg.ENCODE_PIXEL_FORMAT.set(FORMAT=E_ENCODE_PIXEL_FORMAT.YUV10_linear) + elif pixfmt == 'YUV-linear': + jpeg.ENCODE_PIXEL_FORMAT.set(FORMAT=E_ENCODE_PIXEL_FORMAT.YUV_linear) + elif pixfmt in ['YUV444-planar', 'YUV422-planar', 'YUV420-planar']: + jpeg.ENCODE_PIXEL_FORMAT.set(FORMAT=E_ENCODE_PIXEL_FORMAT.YUV_planar) + else: + assert False + if pixfmt == 'YUV-linear': + jpeg.ENCODE_COMPONENT0_POS = 0 + jpeg.ENCODE_COMPONENT1_POS = 1 + jpeg.ENCODE_COMPONENT2_POS = 3 + jpeg.ENCODE_COMPONENT3_POS = 2 + elif pixfmt in ['YUV422-planar', 'YUV420-planar', 'YUV444-planar']: + jpeg.ENCODE_COMPONENT0_POS = 0 + jpeg.ENCODE_COMPONENT1_POS = 0 + jpeg.ENCODE_COMPONENT2_POS = 1 + jpeg.ENCODE_COMPONENT3_POS = 3 + else: + jpeg.ENCODE_COMPONENT0_POS = 0 + jpeg.ENCODE_COMPONENT1_POS = 1 + jpeg.ENCODE_COMPONENT2_POS = 2 + jpeg.ENCODE_COMPONENT3_POS = 3 + + jpeg.INPUT_START1 = input_buf_iova + jpeg.INPUT_START2 = input_buf_iova + surface_P1_off + jpeg.INPUT_END = input_buf_iova + input_mem_sz + 7 # NOTE +7 + jpeg.OUTPUT_START1 = output_buf_iova + jpeg.OUTPUT_START2 = 0xdeadbeef + jpeg.OUTPUT_END = output_buf_iova + output_mem_sz + + jpeg.REG_0x118 = 0x1 + jpeg.REG_0x11c = 0x0 + + jpeg.ENABLE_RST_LOGGING = args.encode_rst_interval is not None + + jpeg.MODE = 0x16f + if args.encode_subsampling == '444': + jpeg_subsampling = E_JPEG_IO_FLAGS_SUBSAMPLING._444 + elif args.encode_subsampling == '422': + jpeg_subsampling = E_JPEG_IO_FLAGS_SUBSAMPLING._422 + elif args.encode_subsampling == '420': + jpeg_subsampling = E_JPEG_IO_FLAGS_SUBSAMPLING._420 + elif args.encode_subsampling == '400': + jpeg_subsampling = E_JPEG_IO_FLAGS_SUBSAMPLING._400 + else: + assert False + jpeg.JPEG_IO_FLAGS.set( + OUTPUT_8BYTE_CHUNKS_CORRECTLY=1, + OUTPUT_MACROBLOCKS_UNFLIPPED_H=1, + SUBSAMPLING_MODE=jpeg_subsampling + ) + jpeg.JPEG_WIDTH = im_W + jpeg.JPEG_HEIGHT = im_H + if args.encode_rst_interval is not None: + jpeg.RST_INTERVAL = args.encode_rst_interval + else: + jpeg.RST_INTERVAL = 0 + jpeg.JPEG_OUTPUT_FLAGS = 0 + + jpeg.QTBL[0].val = 0xa06e64a0 + jpeg.QTBL[1].val = 0xf0ffffff + jpeg.QTBL[2].val = 0x78788cbe + jpeg.QTBL[3].val = 0xffffffff + jpeg.QTBL[4].val = 0x8c82a0f0 + jpeg.QTBL[5].val = 0xffffffff + jpeg.QTBL[6].val = 0x8caadcff + jpeg.QTBL[7].val = 0xffffffff + jpeg.QTBL[8].val = 0xb4dcffff + jpeg.QTBL[9].val = 0xffffffff + jpeg.QTBL[10].val = 0xf0ffffff + jpeg.QTBL[11].val = 0xffffffff + jpeg.QTBL[12].val = 0xffffffff + jpeg.QTBL[13].val = 0xffffffff + jpeg.QTBL[14].val = 0xffffffff + jpeg.QTBL[15].val = 0xffffffff + + jpeg.QTBL[16].val = 0xaab4f0ff + jpeg.QTBL[17].val = 0xffffffff + jpeg.QTBL[18].val = 0xb4d2ffff + jpeg.QTBL[19].val = 0xffffffff + jpeg.QTBL[20].val = 0xf0ffffff + jpeg.QTBL[21].val = 0xffffffff + jpeg.QTBL[22].val = 0xffffffff + jpeg.QTBL[23].val = 0xffffffff + jpeg.QTBL[24].val = 0xffffffff + jpeg.QTBL[25].val = 0xffffffff + jpeg.QTBL[26].val = 0xffffffff + jpeg.QTBL[27].val = 0xffffffff + jpeg.QTBL[28].val = 0xffffffff + jpeg.QTBL[29].val = 0xffffffff + jpeg.QTBL[30].val = 0xffffffff + jpeg.QTBL[31].val = 0xffffffff + + jpeg.QTBL[32].val = 0x01010201 + jpeg.QTBL[33].val = 0x01020202 + jpeg.QTBL[34].val = 0x02030202 + jpeg.QTBL[35].val = 0x03030604 + jpeg.QTBL[36].val = 0x03030303 + jpeg.QTBL[37].val = 0x07050804 + jpeg.QTBL[38].val = 0x0608080a + jpeg.QTBL[39].val = 0x0908070b + jpeg.QTBL[40].val = 0x080a0e0d + jpeg.QTBL[41].val = 0x0b0a0a0c + jpeg.QTBL[42].val = 0x0a08080b + jpeg.QTBL[43].val = 0x100c0c0d + jpeg.QTBL[44].val = 0x0f0f0f0f + jpeg.QTBL[45].val = 0x090b1011 + jpeg.QTBL[46].val = 0x0f0e110d + jpeg.QTBL[47].val = 0x0e0e0e01 + + jpeg.QTBL[48].val = 0x04040405 + jpeg.QTBL[49].val = 0x04050905 + jpeg.QTBL[50].val = 0x05090f0a + jpeg.QTBL[51].val = 0x080a0f1a + jpeg.QTBL[52].val = 0x13090913 + jpeg.QTBL[53].val = 0x1a1a1a1a + jpeg.QTBL[54].val = 0x0d1a1a1a + jpeg.QTBL[55].val = 0x1a1a1a1a + jpeg.QTBL[56].val = 0x1a1a1a1a + jpeg.QTBL[57].val = 0x1a1a1a1a + jpeg.QTBL[58].val = 0x1a1a1a1a + jpeg.QTBL[59].val = 0x1a1a1a1a + jpeg.QTBL[60].val = 0x1a1a1a1a + jpeg.QTBL[61].val = 0x1a1a1a1a + jpeg.QTBL[62].val = 0x1a1a1a1a + jpeg.QTBL[63].val = 0x1a1a1a1a + + jpeg.HUFFMAN_TABLE.val = 0x3c + jpeg.QTBL_SEL.val = 0xff + jpeg.REG_0x0.val = 0x1 + jpeg.REG_0x1004.val = 0x1 + + # FIXME: we don't actually know when it's done + time.sleep(1) + + print(jpeg.STATUS.reg) + print(jpeg.PERFCOUNTER.reg) + jpeg_out_sz = jpeg.COMPRESSED_BYTES.val + print(f"JPEG output is {jpeg_out_sz} bytes") + + rst_log_n = jpeg.RST_LOG_ENTRIES.val + for i in range(rst_log_n): + print(f"RST log[{i}] = 0x{jpeg.RSTLOG[i].val:X}") + + output_data = iface.readmem(output_buf_phys, output_mem_sz) + if args.raw_output is not None: + with open(args.raw_output, 'wb') as f: + f.write(output_data) + with open(args.output, 'wb') as f: + f.write(output_data[:jpeg_out_sz]) diff --git a/tools/proxyclient/experiments/jpeg_doc.md b/tools/proxyclient/experiments/jpeg_doc.md new file mode 100644 index 0000000..afb32f8 --- /dev/null +++ b/tools/proxyclient/experiments/jpeg_doc.md @@ -0,0 +1,1019 @@ +# Apple Silicon JPEG encoder/decoder reverse engineering notes + +## General + +### REG_0x0 (+0x0000) + +This register is not fully understood yet. It is set to 1 to kick off an operation. + +This register appears to be 1 bit wide. It is readable/writable. + +The driver resets this register to 0. + + +### REG_0x4 (+0x0004) + +This register is not understood yet. + +This register appears to be 1 bit wide. It is readable/writable. + +The driver resets this register to 0. + + +### MODE (+0x0008) + +This register controls the mode of operation of the hardware. The details of this register are not understood yet. + +This register appears to be 10 bit wide. It is readable/writable. + +At minimum bits 0x043 need to be set or else reading from some registers above 0x1000 will fault. + +This register is set to multiple different values throughout the reset process. + + +### REG_0xc (+0x000C) + +This register is not understood yet. + +This register is at least 10 bit wide. It appears to be read-only. The power-up state appears to be 0x200. + +The driver reads this register and stores the value after an interrupt occurs. + + +### REG_0x10 (+0x0010) +### REG_0x14 (+0x0014) + +No access to this register has been observed. + +This register is at least 11 bit wide. It appears to be read-only. The power-up state appears to be 0x400. + + +### REG_0x18 (+0x0018) + +No access to this register has been observed. + +This register is at least 8 bit wide. It appears to be read-only. The power-up state appears to be 0x55. + + +### (+0x001C) + +No access to this register has been observed. + +It appears to be read-only. The power-up state appears to be 0. + + +### REG_0x20 (+0x0020) + +This register is not understood yet. + +This register appears to be 11 bit wide. It is readable/writable. + +The driver resets this register to 0xff, and it is written with a 0 after an interrupt occurs. + + +### STATUS (+0x0024) + +- bit0: Operation is completed ??? +- bit1: Timeout occurred +- bit2: Read buffer overflow +- bit3: Write buffer overflow +- bit4: Codec buffer overflow +- bit5: Some kind of error, happens if macroblock settings are messed up +- bit6: AXI error +- bit7: The driver checks for this after an interrupt, but the meaning is not understood + + +### CODEC (+0x0028) + +This register controls how the JPEG data is processed wrt subsampling mode. It affects both encode and decode. + +- 0 = 4:4:4 +- 1 = 4:2:2 +- 2 = 4:1:1 +- 3 = 4:2:0 +- 4 = 4:0:0 + +### REG_0x2c (+0x002C) + +This register is not fully understood yet. + +This register appears to be 1 bit wide. It is readable/writable. + +The driver sets this register to 0 when decoding and 1 when encoding. If it is not set to 1 when encoding, only headers will be output. The interrupt handler makes a decision based on this register. + + +### REG_0x30 (+0x0030) + +This register is not understood yet. + +This register appears to be 1 bit wide. It is readable/writable. + +The driver resets this register to 0. + + +### REG_0x34 (+0x0034) + +This register is not fully understood yet. + +This register appears to be 1 bit wide. It is readable/writable. + +The driver sets this register to 1 when decoding and 0 when encoding. If it is not set to 0 when encoding, the output will be corrupted in some way. + + +### REG_0x38 (+0x0038) + +This register is not fully understood yet. + +This register appears to be 1 bit wide. It is readable/writable. + +The driver sets this register to 0 when decoding and 1 when encoding. If it is not set to 1 when encoding, nothing will be output. If it is set to 1 when decoding, the output will be a weird tiled format. + + + +## Chroma control + +### CHROMA_HALVE_H_TYPE1 (+0x003c) +### CHROMA_HALVE_H_TYPE2 (+0x0040) + +Setting these register to 1 causes chroma to be subsampled horizontally. + +The second register produces a different result from the first register. It is speculated that this is related to chroma siting, but this has not been verified yet. If both the second and the first register are set, the second appears to win. + + +### CHROMA_HALVE_V_TYPE1 (+0x0044) +### CHROMA_HALVE_V_TYPE2 (+0x0048) + +Setting these register to 1 causes chroma to be subsampled vertically. + +The second register produces a different result from the first register. It is speculated that this is related to chroma siting, but this has not been verified yet. If both the second and the first register are set, the second appears to win. + + +### CHROMA_DOUBLE_H (+0x004c) + +Setting this register to 1 causes chroma to be doubled/interpolated horizontally. + + +### CHROMA_QUADRUPLE_H (+0x0050) + +Setting this register to 1 causes chroma to be quadrupled/interpolated horizontally. If both this and the previous register are set, double appears to win. + + +### CHROMA_DOUBLE_V (+0x0054) + +Setting this register to 1 causes chroma to be doubled/interpolated vertically. + + +## Pixel data control + +### PX_USE_PLANE1 (+0x0058) + +Setting this register to 1 enables use of the second pixel plane. + + +### PX_TILES_W (+0x005c) + +This register specifies the width of the image in tiles/MCUs/macroblocks, where the macroblock size depends on the chroma subsampling mode, i.e. divroundup by 8 for 4:4:4, by 16 for 4:2:2 and 4:2:0, by 32 for 4:1:1 (FIXME verify this again). + +This register is 16 bits wide. + + +### PX_TILES_H (+0x0060) + +This register specifies the height of the image in tiles/MCUs/macroblocks, where the macroblock size depends on the chroma subsampling mode, i.e. divroundup by 16 for 4:2:0 or else by 8 (FIXME verify this again). + +This register is 16 bits wide. + + +### PX_PLANE0_WIDTH (+0x0064) + +This register specifies the width of the image data in plane 0, in bytes, minus 1. When decoding, it is important to set this correctly for the edge to be processed properly. + +This register is 20 bits wide, even though the driver will sometimes write 0xffffffff. + + +### PX_PLANE0_HEIGHT (+0x0068) + +This register specifies the height of the image data in plane 0, in rows, minus 1. When decoding, it might be important to set this correctly for the edge to be processed properly. + +This register is 16 bits wide, even though the driver will sometimes write 0xffffffff. + + +### PX_PLANE0_TILING_H (+0x006c) + +This register somehow controls how pixel data matches up with subsampled chroma data, but the details are not understood yet. Valid range 0-31. + + +### PX_PLANE0_TILING_V (+0x0070) + +This register somehow controls how pixel data matches up with subsampled chroma data, but the details are not understood yet. Valid range 0-31. + + +### PX_PLANE0_STRIDE (+0x0074) + +This is the row stride of plane 0 in bytes. + +This register is 24 bits wide. + + +### PX_PLANE1_WIDTH (+0x0078) +### PX_PLANE1_HEIGHT (+0x007c) +### PX_PLANE1_TILING_H (+0x0080) +### PX_PLANE1_TILING_V (+0x0084) +### PX_PLANE1_STRIDE (+0x0088) + +These registers function similarly to the plane 0 registers. + + +## Input/output pointers + +### INPUT_START1 (+0x008c) + +Input pointer 1 IOVA. + + +### INPUT_START2 (+0x0090) + +Input pointer 2 IOVA. + + +### REG_0x94 (+0x0094) + +This register is not understood yet. + +The driver sets this register to a fixed value of 0x1f when decoding and to a value that depends on the chroma subsampling mode when encoding (0xc for 4:4:4, 0x8 for 4:2:2, 0x3 for 4:2:0, 0xb for 4:0:0), but changing it does not seem to do anything. + +This register is 6 bits wide. + + +### REG_0x98 (+0x0098) + +This register is not understood yet. + +The driver sets this register to a fixed value of 1 when decoding and to a value that depends on the chroma subsampling mode when encoding (2 for 4:4:4/4:2:2/4:0:0, 1 for 4:2:0), but changing it does not seem to do anything. + +This register is 6 bits wide. + + +### INPUT_END (+0x009c) + +End of input data IOVA. + +For reasons that are not understood, this is ORed with 7 when encoding. + + +### OUTPUT_START1 (+0x00a0) + +Output pointer 1 IOVA. + + +### OUTPUT_START2 (+0x00a4) + +Output pointer 2 IOVA. + + +### OUTPUT_END (+0x00a8) + +End of output data IOVA. + + +## MATRIX_MULT (+0x00ac-0x00d7) (11 entries) + +Color space conversion matrix. + +The full details of the shifting/offset/final two values is not understood yet. + + +## DITHER (+0x00d8-0x00ff) (10 entries) + +Dithering when decoding to RGB565. + +The full details of this is not understood yet. + + +## Encoding pixel format + +### ENCODE_PIXEL_FORMAT (+0x0100) + +- 0 = RGB101010 +- 1 = YUV10 4:4:4 linear +- 2 = RGB888 +- 3 = RGB565 +- 4 = YUV planar (partially tested, details not fully understood) +- 5 = YUV8 4:2:2 linear +- 6-9 = do something, may not be useful, maybe invalid, not used by driver + + +### ENCODE_COMPONENT0_POS (+0x0104) +### ENCODE_COMPONENT1_POS (+0x0108) +### ENCODE_COMPONENT2_POS (+0x010c) +### ENCODE_COMPONENT3_POS (+0x0110) + +These registers control the positions of each component in the parsed pixel data. It is used to allow e.g. flipping between RGBA and BGRA. + + +### CONVERT_COLOR_SPACE (+0x0114) + +Setting this register to 1 enables color space conversion when encoding + + +## Unknown + +### REG_0x118 (+0x0118) + +This register is not understood yet. + +This register appears to be 1 bit wide. It is readable/writable. + +This register is set to 0 when decoding and 1 when encoding. + + +### REG_0x11c (+0x011c) + +This register is not understood yet. + +This register appears to be 1 bit wide. It is readable/writable. + +This register is set to 1 when decoding and 0 when encoding. + + +### REG_0x120 (+0x0120) + +This register is not understood yet. + +This register appears to be 1 bit wide. It is readable/writable. + +The driver resets this register to 0. + + +### TILING_ENABLE (+0x0124) + +This register enables the functionality of the following two registers. + +The driver sets this register to 1 when decoding if the surface "is tiled." + + +### TILING_PLANE0 (+0x0128) + +This register is not fully understood yet. A value greater than 8 causes plane 0 to be reformatted (tiled?), but the details are not understood yet. + +This register appears to be 5 bit wide. It is readable/writable. + + +### TILING_PLANE1 (+0x012c) + +This register is not fully understood yet. A value greater than 8 causes plane 1 to be reformatted (tiled?), but the details are not understood yet. + +This register appears to be 5 bit wide. It is readable/writable. + + +## Decoding image size + +### DECODE_MACROBLOCKS_W (+0x0130) + +Sets the width of the decoded image in macroblocks, where the macroblock size depends on the chroma subsampling mode, i.e. divroundup by 8 for 4:4:4, by 16 for 4:2:2 and 4:2:0, by 32 for 4:1:1. + +This register is 16 bits wide. + + +### DECODE_MACROBLOCKS_H (+0x0134) + +Sets the height of the decoded image in macroblocks, where the macroblock size depends on the chroma subsampling mode, i.e. divroundup by 16 for 4:2:0 or else by 8. + +This register is 16 bits wide. + + +### RIGHT_EDGE_PIXELS (+0x0138) + +The driver sets this to the number of pixels that are valid in the rightmost macroblocks, but changing it does not seem to do anything. + +This register is 5 bits wide. + + +### BOTTOM_EDGE_PIXELS (+0x013c) + +The driver sets this to the number of pixels that are valid in the bottommost macroblocks, but changing it does not seem to do anything. + +This register is 4 bits wide. + + +### RIGHT_EDGE_SAMPLES (+0x0140) + +The driver sets this to the number of chroma samples that are valid in the rightmost macroblocks, but changing it does not seem to do anything. + +This register is 3 bits wide. + + +### BOTTOM_EDGE_SAMPLES (+0x0144) + +The driver sets this to the number of chroma samples that are valid in the bottommost macroblocks, but changing it does not seem to do anything. + +This register is 3 bits wide. + + +### SCALE_FACTOR (+0x0148) + +- 0 = /1 +- 1 = /2 +- 2 = /4 +- 3 = /8 + +This appears to be ignored when encoding. + + +## Decoding pixel format + +### DECODE_PIXEL_FORMAT (+0x014c) + +- 0 = YUV 444 (2P) +- 1 = YUV 422 (2P) +- 2 = YUV 420 (2P) +- 3 = YUV 422 (1P) +- 4 = driver mentions YUV10 444 (1P) but it does not appear to work (driver also says it doesn't work) +- 5 = RGB888 +- 6 = RGB565 +- 7 = driver mentions RGB101010 but it does not appear to work (driver also says it doesn't work) + + +### YUV422_ORDER (+0x0150) + +- 0 = Cb Y'0 Cr Y'1 +- 1 = Y'0 Cb Y'1 Cr + + +### RGBA_ORDER (+0x0154) + +- 0 = BGRA +- 1 = RGBA + + +### RGBA_ALPHA (+0x0158) + +This value is filled in to alpha bytes when decoding into RGB888 + + +## Unknown/status + +### PLANAR_CHROMA_HALVING (+0x015c) + +This register is not fully understood yet. Setting it seems to halve the chroma vertically when outputting into planar modes. This is different from the CHROMA_HALVE_V_* registers because it halves the final image, not each macroblock. + +The driver seems to use this in some cases when scaling down an image by 8, but the details of this are not understood yet. + +The driver resets this register to 0. + + +### REG_0x160 (+0x0160) + +This register is not understood yet. + +This register appears to be 1 bit wide. It is readable/writable. + +The driver resets this register to a configurable value that happens to be 0. + + +### REG_0x164 (+0x0164) + +This register is not understood yet. + +It appears to be read-only. The power-up state appears to be 0. + +The driver reads this register and stores the value after an interrupt occurs. + + +### (+0x0168) + +No access to this register has been observed. + +It appears to be read-only. The power-up state appears to be 0. + + +### REG_0x16c (+0x016c) + +This register is not understood yet. + +It appears to be read-only. The power-up state appears to be 0. + +The driver reads this register and stores the value after an interrupt occurs. + + +### REG_0x170 (+0x0170) + +This register is not understood yet. + +It appears to be read-only. The power-up state appears to be 0. + +The driver reads this register and stores the value after an interrupt occurs. + + +### (+0x0174) + +No access to this register has been observed. + +It appears to be read-only. The power-up state appears to be 0. + + +### PERFCOUNTER (+0x0178) + +This register appears to be a performance counter. It is not yet understood what is being measured. + +It appears to be read-only. + +The driver reads this register and accumulates it after an interrupt occurs. + + +### (+0x017c) + +No access to this register has been observed. + +It appears to be read-only. The power-up state appears to be 0. + + +### (+0x0180) + +No access to this register has been observed. + +It appears to be read-only. The power-up state appears to be 0. + + +### TIMEOUT (+0x0184) + +This register configures the timeout. It is not yet understood what units this is in. + +This register is 32 bits wide. + + +### HWREV (+0x0188) + +This register contains the hardware revision. On the M1 Max, it is 0xd1013. + + +### REG_0x18c (+0x018c) + +No access to this register has been observed. + +This register appears to be 2 bits wide. It is readable/writable. + + +### REG_0x190 (+0x0190) + +No access to this register has been observed. + +This register appears to be 2 bits wide. It is readable/writable. + + +### REG_0x194 (+0x0194) + +No access to this register has been observed. + +This register appears to be 4 bits wide. It is readable/writable. + + +### REG_0x198 (+0x0198) + +No access to this register has been observed. + +This register appears to be 4 bits wide. It is readable/writable. + + +### REG_0x19c (+0x019c) + +This register is not understood yet. + +This register appears to be 1 bit wide. It is readable/writable. + +The driver under some conditions writes a 1 here. + + +## RST logging / unknown + +### ENABLE_RST_LOGGING (+0x01a0) + +If this register is set to 1, some data about RST blocks will be logged when encoding. + + +### RST_LOG_ENTRIES (+0x01a4) + +This register will contain the number of RST log entries. + + +### REG_0x1a8 (+0x01a8) +### REG_0x1ac (+0x01ac) +### REG_0x1b0 (+0x01b0) + +This register is not understood yet. + +This register appears to be 1 bit wide. It is readable/writable. + + +### REG_0x1b4 (+0x01b4) +### REG_0x1b8 (+0x01b8) +### REG_0x1bc (+0x01bc) + +This register is not understood yet. + +This register appears to be 32 bit wide. It is readable/writable. + + +### REG_0x1c0 (+0x01c0) +### REG_0x1c4 (+0x01c4) + +This register is not understood yet. + +This register appears to be 16 bit wide. It is readable/writable. + + +### REG_0x1c8 (+0x01c8) + +This register is not understood yet. + +This register appears to be 1 bit wide. It is readable/writable. + + +### REG_0x1cc (+0x01cc) +### REG_0x1d0 (+0x01d0) +### REG_0x1d4 (+0x01d4) +### REG_0x1d8 (+0x01d8) + +This register is not understood yet. + +This register appears to be 32 bit wide. It is readable/writable. + + +### REG_0x1dc (+0x01dc) + +This register is not understood yet. + +This register appears to be 1 bit wide. It is readable/writable. + + +### REG_0x1e0 (+0x01e0) + +This register is not understood yet. + +This register appears to be 14 bit wide. It is readable/writable. + + +### REG_0x1e4 (+0x01e4) + +This register is not understood yet. + +This register appears to be 13 bit wide. It is readable/writable. + + +### REG_0x1e8 (+0x01e8) + +This register is not understood yet. + +This register appears to be 9 bit wide. It is readable/writable. + + +### REG_0x1ec (+0x01ec) + +This register is not understood yet. + +This register appears to be 1 bit wide. It is readable/writable. + + +### REG_0x1f0 (+0x01f0) + +This register is not understood yet. + +This register appears to be 14 bit wide. It is readable/writable. + + +### REG_0x1f4 (+0x01f4) + +This register is not understood yet. + +This register appears to be 13 bit wide. It is readable/writable. + + +### REG_0x1f8 (+0x01f8) + +This register is not understood yet. + +This register appears to be 9 bit wide. It is readable/writable. + + +## Compressed pixel format / Compressed DMA / unknown + +### REG_0x1fc (+0x01fc) +### REG_0x200 (+0x0200) + +No access to this register has been observed. + +This register appears to be 2 bits wide. It is readable/writable. + + +### REG_0x204 (+0x0204) +### REG_0x208 (+0x0208) + +This register is not understood yet. + +This register appears to be 32 bit wide. It is readable/writable. + + +### REG_0x20c (+0x020c) +### REG_0x210 (+0x0210) +### REG_0x214 (+0x0214) +### REG_0x218 (+0x0218) + +This register is not understood yet. + +This register appears to be 17 bit wide. It is readable/writable. + + +### REG_0x21c (+0x021c) +### REG_0x220 (+0x0220) + +This register is not understood yet. + +This register appears to be 16 bit wide. It is readable/writable. + + +### REG_0x224 (+0x0224) +### REG_0x228 (+0x0228) + +This register is not understood yet. + +This register appears to be 17 bit wide. It is readable/writable. + + +### REG_0x22c (+0x022c) +### REG_0x230 (+0x0230) +### REG_0x234 (+0x0234) + +This register is not understood yet. + +This register appears to be 16 bit wide. It is readable/writable. + + +### REG_0x238 (+0x0238) + +This register is not understood yet. + +This register appears to be 1 bit wide. It is readable/writable. + + +### REG_0x23c (+0x023c) +### REG_0x240 (+0x0240) + +This register is not understood yet. + +This register appears to be 4 bit wide. It is readable/writable. + + +### REG_0x244 (+0x0244) +### REG_0x248 (+0x0248) + +This register is not understood yet. + +This register appears to be 8 bit wide. It is readable/writable. + + +### REG_0x24c (+0x024c) + +This register is not understood yet. + +This register appears to be 1 bit wide. It is readable/writable. + + +### REG_0x250 (+0x0250) +### REG_0x254 (+0x0254) + +This register is not understood yet. + +This register appears to be 4 bit wide. It is readable/writable. + + +### REG_0x258 (+0x0258) +### REG_0x25c (+0x025c) + +This register is not understood yet. + +This register appears to be 8 bit wide. It is readable/writable. + + +## REG_0x260 (+0x0260) +## REG_0x264 (+0x0264) + +No access to this register has been observed. + +This register appears to be 1 bit wide. It is readable/writable. + + +## REG_0x268 (+0x0268) +## REG_0x26c (+0x026c) + +No access to this register has been observed. + +This register appears to be 7 bit wide. It is readable/writable. + + +## (+0x0270-0x027f) + +No access to this register has been observed. + +It appears to be read-only. The power-up state appears to be 0. + + +## REG_0x280 (+0x0280) + +No access to this register has been observed. + +This register appears to be 1 bit wide. It is readable/writable. + + +## (+0x0284-0x0fff) + +No access to this register has been observed. + +It appears to be read-only. The power-up state appears to be 0. + + +## JPEG I/O related + +### JPEG_IO_FLAGS (+0x1000) + +- bit0-2 control subsampling mode output into the JPEG when encoding + - 0 = 4:4:4 + - 1 = 4:2:2 + - 2 = 4:2:0 + - 3 = monochrome + - 4 = 4 components ??? seems to work with 422 with 444 tiling params ????? + - 6 = indicate 4:1:1 in file, but setting CODEC = 2 doesn't actually work (broken) +- bit3 needs to be set when decoding. It must be unset when encoding. This is not fully understood yet +- bit4 causes macroblocks to _not_ be flipped horizontally. It affects both encoding and decoding. +- bit5 causes chunks of 8 bytes to _not_ be reversed. It affects both encoding and decoding. + + +### REG_0x1004 (+0x1004) + +This register is not fully understood yet. It is set to 1 to kick off an operation. + +Writing to this register while MODE is set incorrectly can trigger an exception. + + +### REG_0x1008 (+0x1008) + +No access to this register has been observed. + +This register is at least 1 bit wide. It appears to be read-only. The power-up state appears to be 1. + + +### QTBL_SEL (+0x100c) + +This register selects the quantization table in use for each component. + +- bit0-1 = component 0 +- bit2-3 = component 1 +- bit4-5 = component 2 +- bit6-7 = component 3? + + +### HUFFMAN_TABLE (+0x1010) + +This register controls Huffman tables used. The details of this register are not fully understood yet. + +This register is 8 bits wide. + + +### RST_INTERVAL (+0x1014) + +This register controls the interval at which RST markers will be generated when encoding. + +This register is 16 bits wide. + + +### JPEG_HEIGHT (+0x1018) + +This register specifies the height of the JPEG when encoding. It appears to only affect the header. + +This register is 16 bits wide. + + +### JPEG_WIDTH (+0x101c) + +This register specifies the width of the JPEG when encoding. + +This register is 16 bits wide. + + +### COMPRESSED_BYTES (+0x1020) + +This register will contain the final size of the JPEG when encoding + + +### JPEG_OUTPUT_FLAGS (+0x1024) + +- bit0 doesn't seem to do anything +- bit1 = output only SOS/EOI, no SOI/DQT/SOF0/DHT +- bit2 = output SOF0 after DHT instead of before it +- bit3 doesn't seem to do anything +- bit4 not sure exactly what this does, but it makes compression worse + + +### REG_0x1028 (+0x1028) + +This register is not understood yet. + +The driver sets this register to 0x400 when decoding. + +This register appears to be 32 bits wide, but writing 0xffffffff results in 0x8000071f. + + +### REG_0x102c (+0x102c) + +This register is not understood yet. + +The driver reads this register and does something with it after an interrupt occurs. + + +### BITSTREAM_CORRUPTION (+0x1030) + +This register is not understood yet. It supposedly contains information about bitstream corruption. + + +## (+0x1034-0x107f) + +No access to this register has been observed. + +It appears to be read-only. The power-up state appears to be 0. + + +## REG_0x1080 (+0x1080) + +No access to this register has been observed. + +This register appears to be 5 bit wide. It is readable/writable. + + +## REG_0x1084 (+0x1084) + +No access to this register has been observed. + +This register appears to be 7 bit wide. It is readable/writable. + + +## (+0x1088) + +No access to this register has been observed. + +It appears to be read-only. The power-up state appears to be 0. + + +## REG_0x108c (+0x108c) + +No access to this register has been observed. + +This register appears to be 32 bit wide. It is readable/writable. + + +## REG_0x1090 (+0x1090) + +No access to this register has been observed. + +This register appears to be 8 bit wide. It is readable/writable. + + +## (+0x1094-0x10df) + +No access to this register has been observed. + +It appears to be read-only. The power-up state appears to be 0. + + +## SHIKINO_VERSION_MAGIC0 (+0x10e0) +## SHIKINO_VERSION_MAGIC1 (+0x10e4) +## SHIKINO_VERSION_MAGIC2 (+0x10e8) +## SHIKINO_VERSION_MAGIC3 (+0x10ec) +## SHIKINO_VERSION_MAGIC4 (+0x10f0) + +Contains ASCII text 'SHIKINO KJN-7GI 0001' + + +## (+0x10f4-0x10ff) + +No access to this register has been observed. + +It appears to be read-only. The power-up state appears to be 0. + + +## QTBL (+0x1100-0x11ff) + +Quantization tables. The exact layout is not understood yet (zigzag or not?) + + +## (+0x1200-0x1fff) + +No access to this register has been observed. + + +## RSTLOG (+0x2000-0x2fff) + +RST log. The details of this are not understood yet. + + +## (+0x3000-0x3fff) + +No access to this register has been observed. diff --git a/tools/proxyclient/experiments/memdump.py b/tools/proxyclient/experiments/memdump.py new file mode 100755 index 0000000..882b79f --- /dev/null +++ b/tools/proxyclient/experiments/memdump.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * + +p = 0x800000000 +limit = u.base +block = 0x40000 + +while p < limit: + f = "mem/0x%x.bin" % p + if os.path.exists(f): + p += block + continue + + print("dumping 0x%x..." % p) + + data = iface.readmem(p, block) + open(f, "wb").write(data) + p += block diff --git a/tools/proxyclient/experiments/mmio_sweep.py b/tools/proxyclient/experiments/mmio_sweep.py new file mode 100755 index 0000000..938ff29 --- /dev/null +++ b/tools/proxyclient/experiments/mmio_sweep.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +import time +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * +from m1n1.loadobjs import * +import argparse +import numpy as np + +argparser = argparse.ArgumentParser() +argparser.add_argument("-d", "--domain", type=str, + help='Look for MMIO range associated with a particular' + ' power domain') +argparser.add_argument("-p", "--print", action='store_true', + help='Print power domain list') +args = argparser.parse_args() + +if args.print: + for dev in u.adt["/arm-io/pmgr"].devices: + print(dev.name) + sys.exit(0) + +granule = 0x4000 + +lp = LinkedProgram(u) +lp.load_inline_c(f''' + #define GRANULE {granule} + ''' + ''' + #include "exception.h" + #include "utils.h" + #include "soc.h" + + bool is_t6000(void) + { + return chip_id == T6000; + } + + void sweep(u64 from, u64 to, u64 target) + { + u32 *mask = (u32 *) target; + exc_guard = GUARD_MARK | GUARD_SILENT; + + int bitp = 0; + for (u64 p = from; p < to; p += GRANULE) { + sysop("dsb sy"); + sysop("isb"); + bool hit = read32(p) != 0xabad1dea; + + if (hit) + *mask |= (1 << bitp); + else + *mask &= ~(1 << bitp); + + if (++bitp >= 32) { + bitp = 0; + mask++; + } + } + + sysop("dsb sy"); + sysop("isb"); + } + ''') + +def do_sweep(maskrange): + masklen = (maskrange.stop - maskrange.start) // granule // 32 * 4 + 4 + mask_base = u.heap.malloc(masklen) + lp.sweep(maskrange.start, maskrange.stop, mask_base) + mask = iface.readmem(mask_base, masklen) + u.heap.free(mask_base) + return np.frombuffer(mask, dtype=np.uint8) + +def describe_mask(mask, maskrange): + ''' + Describe mask in terms of hot from-to ranges + ''' + ranges = [] + prev_hit = False + mask = np.concatenate((mask, [0])) + for i in range(len(mask)*8): + hit = mask[i//8] & (1<<(i%8)) != 0 + if hit and not prev_hit: + start = maskrange.start + i*granule + if not hit and prev_hit: + end = maskrange.start + i*granule + ranges.append((start, end)) + prev_hit = hit + return ranges + +if lp.is_t6000(): + maskrange = range(0x2_9000_0000, 0x4_0000_0000) +else: + maskrange = range(0x2_2000_0000, 0x3_0000_0000) + +pd_did_enable = set() +pmgr = u.adt["/arm-io/pmgr"] +ps_dev_by_id = {dev.id: dev for dev in pmgr.devices} +ps_deps = dict() +ps_addrs = dict() + +for dev in pmgr.devices: + ps = pmgr.ps_regs[dev.psreg] + addr = pmgr.get_reg(ps.reg)[0] + ps.offset + dev.psidx * 8 + + if lp.is_t6000() and dev.name.startswith("AOP_"): + addr = 0x292284000 + (dev.id - 403) * 8 + + ps_addrs[dev.name] = addr + ps_deps[dev.name] = [ + ps_dev_by_id[idx].name for idx + in dev.parents if idx in ps_dev_by_id + ] + +if lp.is_t6000(): + # on t6000, guess the AOP PD hierarchy (undocumented + # in ADT) by analogy with t8103 + ps_deps["AOP_GPIO"] += ["AOP_FILTER"] + ps_deps["AOP_BASE"] += ["AOP_FILTER"] + ps_deps["AOP_FR"] += ["AOP_FILTER"] + ps_deps["AOP_SPMI0"] += ["AOP_FR"] + ps_deps["AOP_SPMI1"] += ["AOP_FR"] + ps_deps["AOP_LEAP_CLK"] += ["AOP_FILTER"] + ps_deps["AOP_SHIM"] += ["AOP_BASE"] + ps_deps["AOP_UART0"] += ["AOP_SHIM"] + ps_deps["AOP_UART1"] += ["AOP_SHIM"] + ps_deps["AOP_UART2"] += ["AOP_SHIM"] + ps_deps["AOP_SCM"] += ["AOP_BASE", "AOP_FR"] + ps_deps["AOP_CPU"] += ["AOP_BASE"] + ps_deps["AOP_I2CM0"] += ["AOP_FR"] + ps_deps["AOP_I2CM1"] += ["AOP_FR"] + ps_deps["AOP_MCA0"] += ["AOP_FR", "AOP_SHIM"] + ps_deps["AOP_MCA1"] += ["AOP_FR", "AOP_SHIM"] + ps_deps["AOP_SPI0"] += ["AOP_FR"] + ps_deps["AOP_LEAP"] += ["AOP_LEAP_CLK"] + ps_deps["AOP_AUDIO_SHIM"] += ["AOP_LEAP_CLK"] + ps_deps["AOP_AUDIO_ADMA0"] += ["AOP_FR"] + ps_deps["AOP_PDMC_LPD"] += ["AOP_SHIM"] + ps_deps["AOP_SRAM"] += ["AOP_SCM", "AOP_CPU"] + +def ps_pstate(name): + return p.read32(ps_addrs[name]) & 0x0f + +def ps_enabled(name): + return p.read32(ps_addrs[name]) & 0x0f == 0x0f + +def ps_set_pstate(name, desired): + p.mask32(ps_addrs[name], 0xf, desired) + time.sleep(0.001) + actual = p.read32(ps_addrs[name]) + if actual & 0xf0 != desired << 4: + print("WARNING: %s stuck at pstate 0x%x (desired 0x%x)" \ + % (name, actual >> 4, desired)) + +def ps_enable(name): + print("Enabling %s..." % name) + ps_set_pstate(name, 0xf) + +def ps_disable(name): + p.mask32(ps_addrs[name], 0xf, 0x0) + +if args.domain: + ps_disable(args.domain) + + to_enable = set([args.domain]) + for dev in reversed(pmgr.devices): + if dev.name not in to_enable \ + or ps_enabled(dev.name): + continue + + for dep in ps_deps[dev.name]: + to_enable.add(dep) + + save = dict() + for dev in pmgr.devices: + if dev.name in to_enable: + save[dev.name] = ps_pstate(dev.name) + if dev.name != args.domain: + ps_enable(dev.name) + + premask = do_sweep(maskrange) + ps_enable(args.domain) + postmask = do_sweep(maskrange) + + print("Reverting...") + + for dev in reversed(pmgr.devices): + if dev.name in to_enable and dev.name: + ps_set_pstate(dev.name, save[dev.name]) + + hitmask = premask ^ postmask + if np.count_nonzero(hitmask & premask): + print("Que? Some ranges disappeared?") +else: + # no --domain flag, do a plain sweep + hitmask = do_sweep(maskrange) + +al = u.adt.build_addr_lookup() +for start, stop in describe_mask(hitmask, maskrange): + # bit ugly but it makes addrlookup do all the heavy lifting for us + al.add(range(start, stop), "hit") + +print("Hits:") +for zone, value in al.items(): + if ((zone.start - 1) // granule + 1) * granule >= zone.stop: + continue + if not any([v[0] == "hit" for v in value]): + continue + + labels = set([v[0] for v in value if v[0] != "hit"]) + print(f"\t{zone.start:9x} - {zone.stop:9x} | {' '.join(labels)}") diff --git a/tools/proxyclient/experiments/mtp.py b/tools/proxyclient/experiments/mtp.py new file mode 100644 index 0000000..56a72b1 --- /dev/null +++ b/tools/proxyclient/experiments/mtp.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib, fnmatch +import time +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +import struct +from m1n1.setup import * +from m1n1.fw.asc import StandardASC +from m1n1.hw.dart8110 import DART8110 +from m1n1.hw.dockchannel import DockChannel +from m1n1.fw.smc import SMCClient, SMCError +from m1n1.shell import run_shell +from m1n1.fw.mtp import * + +from construct import * + +mon = RegMonitor(u) + +#mon.add(0x23b700000, 0x10000) +mon.add(0x23d28c000, 0x4000) +mon.poll() +mon.poll() +mon.poll() +mon.add(0x24e400000, 0x4000) +mon.add(0x24e808000, 0x14000) + +smc_addr = u.adt["arm-io/smc"].get_reg(0)[0] +smc = SMCClient(u, smc_addr) +smc.start() +smc.start_ep(0x20) +smc.verbose = 0 + +p.dapf_init_all() + +dart = DART8110.from_adt(u, "/arm-io/dart-mtp", iova_range=(0x8000, 0x100000)) + +dart.regs.TCR[1].set(BYPASS_DAPF=0, BYPASS_DART=0, TRANSLATE_ENABLE=1) + +irq_base = u.adt["/arm-io/dockchannel-mtp"].get_reg(1)[0] +fifo_base = u.adt["/arm-io/dockchannel-mtp"].get_reg(2)[0] +dc = DockChannel(u, irq_base, fifo_base, 1) + +node = u.adt["/arm-io/dockchannel-mtp/mtp-transport"] + +while dc.rx_count: + dc.read(dc.rx_count) + +mtp_addr = u.adt["/arm-io/mtp"].get_reg(0)[0] +mtp = StandardASC(u, mtp_addr, dart, stream=1) +mtp.allow_phys = True +print("pre start") +mon.poll() +mtp.start() +print("started") +mon.poll() +print("ok") + +def poll(): + mtp.work() + mp.work_pending() + +# 0x40: device reset +# 0x42: + +# 0 -> mbox? +# 2 -> dapf? +# 3 -> dart? +# 3 -> dockchannel? +# 5 -> mbox? +def reset(i): + reg = 0x23d28c000 + i*8 + p.set32(reg, 1<<10) + p.set32(reg, 1<<31) + p.clear32(reg, 1<<31) + p.clear32(reg, 1<<10) + +try: + + mp = MTPProtocol(u, node, mtp, dc, smc) + + mp.wait_init("keyboard") + #mp.wait_init("multi_touch") + mp.wait_init("stm") + + mtp.stop() + mtp.start() + mon.poll() + + for i in range(256): + mp.stm.get_report(i) + mtp.work() + mp.work_pending() + + #for i in range(256): + #if i in (0x40, 0x42): + #continue + #m = UnkDeviceControlMsg() + #m.command = i + #for args in (b"", b"\x00", b"\x01", b"\x02", + #b"\x01\x00", b"\x01\x01", b"\x01\x02", + #b"\x00\x01", b"\x00\x02", b"\x00\x00", + #b"\x00\x00\x00", + #b"\x00\x00\x00\x00", + #b"\x00\x00\x00\x00\x00", + #b"\x00\x00\x00\x00\x00\x00", + #b"\x00\x00\x00\x00\x00\x00\x00", + #b"\x00\x00\x00\x00\x00\x00\x00\x00",): + #m.args = args + #print(f"{m.command:#x} {m.args.hex()}") + #mp.comm.device_control(m) + + #mon.poll() + #mtp.stop() + #mon.poll() + #mtp.start() + + #mon.poll() + #mtp.stop(1) + ##reset(1) + ##p.dapf_init_all() + + #mtp.boot() + + run_shell(locals(), poll_func=poll) + +finally: + #mtp.stop() + p.reboot() diff --git a/tools/proxyclient/experiments/ohmmeter.py b/tools/proxyclient/experiments/ohmmeter.py new file mode 100755 index 0000000..9ab24dd --- /dev/null +++ b/tools/proxyclient/experiments/ohmmeter.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +import time +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * +from m1n1.hw.i2c import I2C, I2CRegMapDev +from m1n1.hw.codecs.cs42l84 import * + +class CS42L84(I2CRegMapDev): + REGMAP = CS42L84Regs + ADDRESSING = (0, 2) + +def read_devid(): + pass + +def sense_Z(): + r = l84.regs + + r.HS_CLAMP_DISABLE.set(HS_CLAMP_DISABLE=1) + r.DAC_CTRL2.set(PULLDOWN_R=E_PULLDOWN_R.R_1K1OHMS) + r.DCID_CTRL1.set(Z_RANGE=E_DCID_Z_RANGE.UNK2) + r.DCID_CTRL2.set(GND_SEL=E_DCID_GND_SEL.HS3) + r.MSM_BLOCK_EN3.set(DCID_EN=1) + r.DCID_CTRL3.set(START=0) + r.DCID_CTRL3.set(START=1) + + while not r.DCID_STATUS.reg.DONE: + pass + + reading = r.DCID_STATUS.reg.OVERALL + offset_trim = r.DCID_TRIM_OFFSET.val - 128 + slope_trim = r.DCID_TRIM_SLOPE.val - 128 + pulldown_trim = r.DCID_PULLDOWN_TRIM.val - 128 + + Y_overall = ((reading + 0.5) * 0.01086 - 1.0 / (1 - slope_trim * 0.001375)) \ + / (614.0 + offset_trim * 0.125) + Y_pulldown = 1.0 / (1100 - pulldown_trim*2) + + if Y_overall > Y_pulldown: + Z_headphones = 1.0 / (Y_overall - Y_pulldown) + else: + Z_headphones = float('inf') + + r.MSM_BLOCK_EN3.set(DCID_EN=0) + r.DAC_CTRL2.set(PULLDOWN_R=E_PULLDOWN_R.NONE) + + return Z_headphones + +def init_ring_tip_sense(): + l84.regs.MIC_DET_CTRL4.set(LATCH_TO_VP=1) + l84.regs.TIP_SENSE_CTRL2.set(CTRL=E_TIP_SENSE_CTRL.SHORT_DET) + + l84.regs.RING_SENSE_CTRL.set(INV=1, UNK1=1, + RISETIME=E_DEBOUNCE_TIME.T_125MS, FALLTIME=E_DEBOUNCE_TIME.T_125MS) + l84.regs.TIP_SENSE_CTRL.set(INV=1, + RISETIME=E_DEBOUNCE_TIME.T_500MS, FALLTIME=E_DEBOUNCE_TIME.T_125MS) + l84.regs.MSM_BLOCK_EN3.set(TR_SENSE_EN=1) + +def wait_for_plug(): + while not l84.regs.TR_SENSE_STATUS.reg.TIP_PLUG: + time.sleep(0.001) + +def wait_for_unplug(): + while l84.regs.TR_SENSE_STATUS.reg.TIP_UNPLUG: + time.sleep(0.001) + + +p.pmgr_adt_clocks_enable("/arm-io/i2c2") +i2c2 = I2C(u, "/arm-io/i2c2") + +p.write32(0x2921f0010, 0x76a02) # invoke reset +p.write32(0x2921f0010, 0x76a03) # out of reset + +l84 = CS42L84(i2c2, 0x4b) + +init_ring_tip_sense() + +while True: + print("Waiting for plug... ", end=""); sys.stdout.flush() + wait_for_plug() + + print("measuring... ", end=""); sys.stdout.flush() + print(f"{sense_Z():.1f} ohms... ", end=""); sys.stdout.flush() + + wait_for_unplug() + print("yanked") diff --git a/tools/proxyclient/experiments/pcie_enable_devices.py b/tools/proxyclient/experiments/pcie_enable_devices.py new file mode 100755 index 0000000..6c1efcb --- /dev/null +++ b/tools/proxyclient/experiments/pcie_enable_devices.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * +from m1n1.fw.smc import SMCClient + +smc_addr = u.adt["arm-io/smc"].get_reg(0)[0] +smc = SMCClient(u, smc_addr, None) + +smc.start() +smc.start_ep(0x20) + +smc.smcep.write32("gP0d", 0x800001) +smc.smcep.write32("gP1a", 1) + +smc.stop() diff --git a/tools/proxyclient/experiments/prores.py b/tools/proxyclient/experiments/prores.py new file mode 100644 index 0000000..0b4308f --- /dev/null +++ b/tools/proxyclient/experiments/prores.py @@ -0,0 +1,695 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * +from m1n1.hw.dart8110 import DART8110, DART8110Regs +from m1n1.hw.prores import * +from m1n1.utils import * +import time + + +def divroundup(val, div): + return (val + div - 1) // div + + +def bswp16(x): + return (x >> 8) | ((x & 0xFF) << 8) + +# ffmpeg -y -i prores-encode-large.png -c:v rawvideo -pix_fmt nv24 prores-encode-large.yuv +im_W = 1920 +im_H = 1080 +with open('prores-encode-large.yuv', 'rb') as f: + im_data = f.read() +assert len(im_data) == (im_W*im_H) * 3 +image_data_luma = im_data[:im_W*im_H] +image_data_chroma = im_data[im_W*im_H:] + +p.pmgr_adt_clocks_enable(f'/arm-io/dart-apr0') +p.pmgr_adt_clocks_enable(f'/arm-io/apr0') + +dart = DART8110.from_adt(u, f'/arm-io/dart-apr0') +dart.initialize() + +apr_base, _ = u.adt[f'/arm-io/apr0'].get_reg(0) +apr = ProResRegs(u, apr_base) + +print(f"Register 0 (ID?) {apr.REG_0x0}") + +# TUNABLES +apr.MODE = 0x0 +apr.REG_0x118 = apr.REG_0x118.val & ~0x07FF07FF | 0x00000600 +apr.REG_0x148 = apr.REG_0x148.val & ~0x00000001 | 0x00000001 +apr.REG_0x160 = apr.REG_0x160.val & ~0x800F3FFF | 0x800A04FF +apr.REG_0x164 = apr.REG_0x164.val & ~0x07FF07FF | 0x07800080 +apr.REG_0x170 = apr.REG_0x170.val & ~0x800F3FFF | 0x800404FF +apr.REG_0x174 = apr.REG_0x174.val & ~0x07FF07FF | 0x06000080 +apr.REG_0x180 = apr.REG_0x180.val & ~0x800F3FFF | 0x800504FF +apr.REG_0x184 = apr.REG_0x184.val & ~0x07FF07FF | 0x06800080 +apr.REG_0x190 = apr.REG_0x190.val & ~0x800F3FFF | 0x800004FF +apr.REG_0x194 = apr.REG_0x194.val & ~0x000000FF | 0x00000040 +apr.REG_0x1a0 = apr.REG_0x1a0.val & ~0x800F3FFF | 0x800104FF +apr.REG_0x1a4 = apr.REG_0x1a4.val & ~0x000000FF | 0x00000080 +apr.REG_0x1b0 = apr.REG_0x1b0.val & ~0x800F3FFF | 0x800204FF +apr.REG_0x1b4 = apr.REG_0x1b4.val & ~0x000000FF | 0x00000040 +apr.REG_0x1c0 = apr.REG_0x1c0.val & ~0x800F3FFF | 0x800304FF +apr.REG_0x1c4 = apr.REG_0x1c4.val & ~0x000000FF | 0x00000040 +apr.REG_0x1d0 = apr.REG_0x1d0.val & ~0xBC00FF86 | 0xA4000786 +apr.REG_0x1d4 = apr.REG_0x1d4.val & ~0x000000FF | 0x00000020 +apr.REG_0x1d8 = apr.REG_0x1d8.val & ~0x000000FF | 0x000000FF +apr.REG_0x1dc = apr.REG_0x1dc.val & ~0x00FFFFFF | 0x00928170 +apr.REG_0x270 = apr.REG_0x270.val & ~0x800F3FFF | 0x800B08FF +apr.REG_0x274 = apr.REG_0x274.val & ~0x07FF07FF | 0x07000080 +apr.REG_0x280 = apr.REG_0x280.val & ~0xFFFFFFC0 | 0x00180000 +apr.REG_0x290 = apr.REG_0x290.val & ~0x800F3FFF | 0x800004FF +apr.REG_0x294 = apr.REG_0x294.val & ~0x000000FF | 0x00000080 +apr.REG_0x2a0 = apr.REG_0x2a0.val & ~0x800F3FFF | 0x800104FF +apr.REG_0x2a4 = apr.REG_0x2a4.val & ~0x000000FF | 0x00000080 +apr.REG_0x2b0 = apr.REG_0x2b0.val & ~0x800F3FFF | 0x800204FF +apr.REG_0x2b4 = apr.REG_0x2b4.val & ~0x000000FF | 0x00000040 +apr.REG_0x2c0 = apr.REG_0x2c0.val & ~0x800F3FFF | 0x800304FF +apr.REG_0x2c4 = apr.REG_0x2c4.val & ~0x000000FF | 0x00000040 +apr.REG_0x2d0 = apr.REG_0x2d0.val & ~0x802FF04C | 0x80070040 +apr.REG_0x2d4 = apr.REG_0x2d4.val & ~0x00000001 | 0x00000000 +apr.REG_0x2d8 = apr.REG_0x2d8.val & ~0xFFFF0003 | 0x00FF0003 +apr.REG_0x2e0 = apr.REG_0x2e0.val & ~0x07FF07FF | 0x06000040 +apr.REG_0x2f8 = apr.REG_0x2f8.val & ~0x802FF04C | 0x80081040 +apr.REG_0x2fc = apr.REG_0x2fc.val & ~0x00000001 | 0x00000000 +apr.REG_0x300 = apr.REG_0x300.val & ~0xFFFF0003 | 0x00FF0003 +apr.REG_0x308 = apr.REG_0x308.val & ~0x07FF07FF | 0x06400040 +apr.REG_0x320 = apr.REG_0x320.val & ~0x802FF04C | 0x80092040 +apr.REG_0x324 = apr.REG_0x324.val & ~0x00000001 | 0x00000000 +apr.REG_0x328 = apr.REG_0x328.val & ~0xFFFF0003 | 0x00FF0003 +apr.REG_0x330 = apr.REG_0x330.val & ~0x07FF07FF | 0x06800040 +apr.REG_0x350 = apr.REG_0x350.val & ~0x800F3FFF | 0x800B08FF +apr.REG_0x354 = apr.REG_0x354.val & ~0x07FF07FF | 0x076000A0 +apr.REG_0x360 = apr.REG_0x360.val & ~0xFFFFFFC0 | 0x00180000 +apr.REG_0x370 = apr.REG_0x370.val & ~0x800F3FFF | 0x800604FF +apr.REG_0x374 = apr.REG_0x374.val & ~0x07FF07FF | 0x06C000A0 +print("Applied tunables") + +# XXX test wrap around behavior +DESC_RING_SZ = 0x4000 +desc_ring_phys = u.heap.memalign(0x4000, DESC_RING_SZ) +desc_ring_iova = dart.iomap(0, desc_ring_phys, DESC_RING_SZ) +print(f"Descriptor ring @ phys {desc_ring_phys:016X} iova {desc_ring_iova:016X}") + +apr.DR_HEAD = 0 +apr.DR_TAIL = 0 +apr.DR_SIZE = DESC_RING_SZ +apr.DR_ADDR_LO = desc_ring_iova & 0xFFFFFFFF +apr.DR_ADDR_HI = desc_ring_iova >> 32 + +apr.MODE = 0xd # FIXME: dunno what this means + +# MATRICES +apr.QUANT_LUMA_EHQ[0].val = 0x802802 +apr.QUANT_CHROMA_EHQ[0].val = 0x804804 +apr.QUANT_LUMA_EHQ[1].val = 0x802802 +apr.QUANT_CHROMA_EHQ[1].val = 0x804804 +apr.QUANT_LUMA_EHQ[2].val = 0x802802 +apr.QUANT_CHROMA_EHQ[2].val = 0x804804 +apr.QUANT_LUMA_EHQ[3].val = 0x802802 +apr.QUANT_CHROMA_EHQ[3].val = 0x804804 +apr.QUANT_LUMA_EHQ[4].val = 0x802802 +apr.QUANT_CHROMA_EHQ[4].val = 0x804804 +apr.QUANT_LUMA_EHQ[5].val = 0x802802 +apr.QUANT_CHROMA_EHQ[5].val = 0x804804 +apr.QUANT_LUMA_EHQ[6].val = 0x802802 +apr.QUANT_CHROMA_EHQ[6].val = 0x804804 +apr.QUANT_LUMA_EHQ[7].val = 0x802802 +apr.QUANT_CHROMA_EHQ[7].val = 0x804804 +apr.QUANT_LUMA_EHQ[8].val = 0x802802 +apr.QUANT_CHROMA_EHQ[8].val = 0x804804 +apr.QUANT_LUMA_EHQ[9].val = 0x802802 +apr.QUANT_CHROMA_EHQ[9].val = 0x804804 +apr.QUANT_LUMA_EHQ[10].val = 0x802802 +apr.QUANT_CHROMA_EHQ[10].val = 0x804804 +apr.QUANT_LUMA_EHQ[11].val = 0x802802 +apr.QUANT_CHROMA_EHQ[11].val = 0x804804 +apr.QUANT_LUMA_EHQ[12].val = 0x802802 +apr.QUANT_CHROMA_EHQ[12].val = 0x804804 +apr.QUANT_LUMA_EHQ[13].val = 0x802802 +apr.QUANT_CHROMA_EHQ[13].val = 0x804804 +apr.QUANT_LUMA_EHQ[14].val = 0x802802 +apr.QUANT_CHROMA_EHQ[14].val = 0x804804 +apr.QUANT_LUMA_EHQ[15].val = 0x803802 +apr.QUANT_CHROMA_EHQ[15].val = 0x805804 +apr.QUANT_LUMA_EHQ[16].val = 0x802802 +apr.QUANT_CHROMA_EHQ[16].val = 0x804804 +apr.QUANT_LUMA_EHQ[17].val = 0x802802 +apr.QUANT_CHROMA_EHQ[17].val = 0x804804 +apr.QUANT_LUMA_EHQ[18].val = 0x802802 +apr.QUANT_CHROMA_EHQ[18].val = 0x804804 +apr.QUANT_LUMA_EHQ[19].val = 0x803803 +apr.QUANT_CHROMA_EHQ[19].val = 0x805805 +apr.QUANT_LUMA_EHQ[20].val = 0x802802 +apr.QUANT_CHROMA_EHQ[20].val = 0x804804 +apr.QUANT_LUMA_EHQ[21].val = 0x802802 +apr.QUANT_CHROMA_EHQ[21].val = 0x804804 +apr.QUANT_LUMA_EHQ[22].val = 0x803802 +apr.QUANT_CHROMA_EHQ[22].val = 0x805804 +apr.QUANT_LUMA_EHQ[23].val = 0x803803 +apr.QUANT_CHROMA_EHQ[23].val = 0x806805 +apr.QUANT_LUMA_EHQ[24].val = 0x802802 +apr.QUANT_CHROMA_EHQ[24].val = 0x804804 +apr.QUANT_LUMA_EHQ[25].val = 0x802802 +apr.QUANT_CHROMA_EHQ[25].val = 0x804804 +apr.QUANT_LUMA_EHQ[26].val = 0x803803 +apr.QUANT_CHROMA_EHQ[26].val = 0x805805 +apr.QUANT_LUMA_EHQ[27].val = 0x804803 +apr.QUANT_CHROMA_EHQ[27].val = 0x807806 +apr.QUANT_LUMA_EHQ[28].val = 0x802802 +apr.QUANT_CHROMA_EHQ[28].val = 0x804804 +apr.QUANT_LUMA_EHQ[29].val = 0x802802 +apr.QUANT_CHROMA_EHQ[29].val = 0x804804 +apr.QUANT_LUMA_EHQ[30].val = 0x803803 +apr.QUANT_CHROMA_EHQ[30].val = 0x806805 +apr.QUANT_LUMA_EHQ[31].val = 0x804804 +apr.QUANT_CHROMA_EHQ[31].val = 0x807807 + +apr.QUANT_LUMA_HQ[0].val = 0x804804 +apr.QUANT_CHROMA_HQ[0].val = 0x804804 +apr.QUANT_LUMA_HQ[1].val = 0x804804 +apr.QUANT_CHROMA_HQ[1].val = 0x804804 +apr.QUANT_LUMA_HQ[2].val = 0x804804 +apr.QUANT_CHROMA_HQ[2].val = 0x804804 +apr.QUANT_LUMA_HQ[3].val = 0x804804 +apr.QUANT_CHROMA_HQ[3].val = 0x804804 +apr.QUANT_LUMA_HQ[4].val = 0x804804 +apr.QUANT_CHROMA_HQ[4].val = 0x804804 +apr.QUANT_LUMA_HQ[5].val = 0x804804 +apr.QUANT_CHROMA_HQ[5].val = 0x804804 +apr.QUANT_LUMA_HQ[6].val = 0x804804 +apr.QUANT_CHROMA_HQ[6].val = 0x804804 +apr.QUANT_LUMA_HQ[7].val = 0x804804 +apr.QUANT_CHROMA_HQ[7].val = 0x804804 +apr.QUANT_LUMA_HQ[8].val = 0x804804 +apr.QUANT_CHROMA_HQ[8].val = 0x804804 +apr.QUANT_LUMA_HQ[9].val = 0x804804 +apr.QUANT_CHROMA_HQ[9].val = 0x804804 +apr.QUANT_LUMA_HQ[10].val = 0x804804 +apr.QUANT_CHROMA_HQ[10].val = 0x804804 +apr.QUANT_LUMA_HQ[11].val = 0x804804 +apr.QUANT_CHROMA_HQ[11].val = 0x804804 +apr.QUANT_LUMA_HQ[12].val = 0x804804 +apr.QUANT_CHROMA_HQ[12].val = 0x804804 +apr.QUANT_LUMA_HQ[13].val = 0x804804 +apr.QUANT_CHROMA_HQ[13].val = 0x804804 +apr.QUANT_LUMA_HQ[14].val = 0x804804 +apr.QUANT_CHROMA_HQ[14].val = 0x804804 +apr.QUANT_LUMA_HQ[15].val = 0x805804 +apr.QUANT_CHROMA_HQ[15].val = 0x805804 +apr.QUANT_LUMA_HQ[16].val = 0x804804 +apr.QUANT_CHROMA_HQ[16].val = 0x804804 +apr.QUANT_LUMA_HQ[17].val = 0x804804 +apr.QUANT_CHROMA_HQ[17].val = 0x804804 +apr.QUANT_LUMA_HQ[18].val = 0x804804 +apr.QUANT_CHROMA_HQ[18].val = 0x804804 +apr.QUANT_LUMA_HQ[19].val = 0x805805 +apr.QUANT_CHROMA_HQ[19].val = 0x805805 +apr.QUANT_LUMA_HQ[20].val = 0x804804 +apr.QUANT_CHROMA_HQ[20].val = 0x804804 +apr.QUANT_LUMA_HQ[21].val = 0x804804 +apr.QUANT_CHROMA_HQ[21].val = 0x804804 +apr.QUANT_LUMA_HQ[22].val = 0x805804 +apr.QUANT_CHROMA_HQ[22].val = 0x805804 +apr.QUANT_LUMA_HQ[23].val = 0x806805 +apr.QUANT_CHROMA_HQ[23].val = 0x806805 +apr.QUANT_LUMA_HQ[24].val = 0x804804 +apr.QUANT_CHROMA_HQ[24].val = 0x804804 +apr.QUANT_LUMA_HQ[25].val = 0x804804 +apr.QUANT_CHROMA_HQ[25].val = 0x804804 +apr.QUANT_LUMA_HQ[26].val = 0x805805 +apr.QUANT_CHROMA_HQ[26].val = 0x805805 +apr.QUANT_LUMA_HQ[27].val = 0x807806 +apr.QUANT_CHROMA_HQ[27].val = 0x807806 +apr.QUANT_LUMA_HQ[28].val = 0x804804 +apr.QUANT_CHROMA_HQ[28].val = 0x804804 +apr.QUANT_LUMA_HQ[29].val = 0x804804 +apr.QUANT_CHROMA_HQ[29].val = 0x804804 +apr.QUANT_LUMA_HQ[30].val = 0x806805 +apr.QUANT_CHROMA_HQ[30].val = 0x806805 +apr.QUANT_LUMA_HQ[31].val = 0x807807 +apr.QUANT_CHROMA_HQ[31].val = 0x807807 + +apr.QUANT_LUMA_NQ[0].val = 0x804804 +apr.QUANT_CHROMA_NQ[0].val = 0x804804 +apr.QUANT_LUMA_NQ[1].val = 0x805805 +apr.QUANT_CHROMA_NQ[1].val = 0x805805 +apr.QUANT_LUMA_NQ[2].val = 0x807806 +apr.QUANT_CHROMA_NQ[2].val = 0x807806 +apr.QUANT_LUMA_NQ[3].val = 0x809807 +apr.QUANT_CHROMA_NQ[3].val = 0x809807 +apr.QUANT_LUMA_NQ[4].val = 0x804804 +apr.QUANT_CHROMA_NQ[4].val = 0x804804 +apr.QUANT_LUMA_NQ[5].val = 0x806805 +apr.QUANT_CHROMA_NQ[5].val = 0x806805 +apr.QUANT_LUMA_NQ[6].val = 0x807807 +apr.QUANT_CHROMA_NQ[6].val = 0x807807 +apr.QUANT_LUMA_NQ[7].val = 0x809809 +apr.QUANT_CHROMA_NQ[7].val = 0x809809 +apr.QUANT_LUMA_NQ[8].val = 0x805805 +apr.QUANT_CHROMA_NQ[8].val = 0x805805 +apr.QUANT_LUMA_NQ[9].val = 0x807806 +apr.QUANT_CHROMA_NQ[9].val = 0x807806 +apr.QUANT_LUMA_NQ[10].val = 0x809807 +apr.QUANT_CHROMA_NQ[10].val = 0x809807 +apr.QUANT_LUMA_NQ[11].val = 0x80a809 +apr.QUANT_CHROMA_NQ[11].val = 0x80a809 +apr.QUANT_LUMA_NQ[12].val = 0x805805 +apr.QUANT_CHROMA_NQ[12].val = 0x805805 +apr.QUANT_LUMA_NQ[13].val = 0x807806 +apr.QUANT_CHROMA_NQ[13].val = 0x807806 +apr.QUANT_LUMA_NQ[14].val = 0x809807 +apr.QUANT_CHROMA_NQ[14].val = 0x809807 +apr.QUANT_LUMA_NQ[15].val = 0x80a809 +apr.QUANT_CHROMA_NQ[15].val = 0x80a809 +apr.QUANT_LUMA_NQ[16].val = 0x806805 +apr.QUANT_CHROMA_NQ[16].val = 0x806805 +apr.QUANT_LUMA_NQ[17].val = 0x807807 +apr.QUANT_CHROMA_NQ[17].val = 0x807807 +apr.QUANT_LUMA_NQ[18].val = 0x809808 +apr.QUANT_CHROMA_NQ[18].val = 0x809808 +apr.QUANT_LUMA_NQ[19].val = 0x80c80a +apr.QUANT_CHROMA_NQ[19].val = 0x80c80a +apr.QUANT_LUMA_NQ[20].val = 0x807806 +apr.QUANT_CHROMA_NQ[20].val = 0x807806 +apr.QUANT_LUMA_NQ[21].val = 0x808807 +apr.QUANT_CHROMA_NQ[21].val = 0x808807 +apr.QUANT_LUMA_NQ[22].val = 0x80a809 +apr.QUANT_CHROMA_NQ[22].val = 0x80a809 +apr.QUANT_LUMA_NQ[23].val = 0x80f80c +apr.QUANT_CHROMA_NQ[23].val = 0x80f80c +apr.QUANT_LUMA_NQ[24].val = 0x807806 +apr.QUANT_CHROMA_NQ[24].val = 0x807806 +apr.QUANT_LUMA_NQ[25].val = 0x809807 +apr.QUANT_CHROMA_NQ[25].val = 0x809807 +apr.QUANT_LUMA_NQ[26].val = 0x80b80a +apr.QUANT_CHROMA_NQ[26].val = 0x80b80a +apr.QUANT_LUMA_NQ[27].val = 0x81180e +apr.QUANT_CHROMA_NQ[27].val = 0x81180e +apr.QUANT_LUMA_NQ[28].val = 0x807807 +apr.QUANT_CHROMA_NQ[28].val = 0x807807 +apr.QUANT_LUMA_NQ[29].val = 0x80a809 +apr.QUANT_CHROMA_NQ[29].val = 0x80a809 +apr.QUANT_LUMA_NQ[30].val = 0x80e80b +apr.QUANT_CHROMA_NQ[30].val = 0x80e80b +apr.QUANT_LUMA_NQ[31].val = 0x815811 +apr.QUANT_CHROMA_NQ[31].val = 0x815811 + +apr.QUANT_LUMA_LT[0].val = 0x805804 +apr.QUANT_CHROMA_LT[0].val = 0x805804 +apr.QUANT_LUMA_LT[1].val = 0x807806 +apr.QUANT_CHROMA_LT[1].val = 0x807806 +apr.QUANT_LUMA_LT[2].val = 0x80b809 +apr.QUANT_CHROMA_LT[2].val = 0x80b809 +apr.QUANT_LUMA_LT[3].val = 0x80f80d +apr.QUANT_CHROMA_LT[3].val = 0x80f80d +apr.QUANT_LUMA_LT[4].val = 0x805805 +apr.QUANT_CHROMA_LT[4].val = 0x805805 +apr.QUANT_LUMA_LT[5].val = 0x808807 +apr.QUANT_CHROMA_LT[5].val = 0x808807 +apr.QUANT_LUMA_LT[6].val = 0x80d80b +apr.QUANT_CHROMA_LT[6].val = 0x80d80b +apr.QUANT_LUMA_LT[7].val = 0x81180f +apr.QUANT_CHROMA_LT[7].val = 0x81180f +apr.QUANT_LUMA_LT[8].val = 0x807806 +apr.QUANT_CHROMA_LT[8].val = 0x807806 +apr.QUANT_LUMA_LT[9].val = 0x80b809 +apr.QUANT_CHROMA_LT[9].val = 0x80b809 +apr.QUANT_LUMA_LT[10].val = 0x80f80d +apr.QUANT_CHROMA_LT[10].val = 0x80f80d +apr.QUANT_LUMA_LT[11].val = 0x81180f +apr.QUANT_CHROMA_LT[11].val = 0x81180f +apr.QUANT_LUMA_LT[12].val = 0x807807 +apr.QUANT_CHROMA_LT[12].val = 0x807807 +apr.QUANT_LUMA_LT[13].val = 0x80b809 +apr.QUANT_CHROMA_LT[13].val = 0x80b809 +apr.QUANT_LUMA_LT[14].val = 0x80f80d +apr.QUANT_CHROMA_LT[14].val = 0x80f80d +apr.QUANT_LUMA_LT[15].val = 0x813811 +apr.QUANT_CHROMA_LT[15].val = 0x813811 +apr.QUANT_LUMA_LT[16].val = 0x809807 +apr.QUANT_CHROMA_LT[16].val = 0x809807 +apr.QUANT_LUMA_LT[17].val = 0x80d80b +apr.QUANT_CHROMA_LT[17].val = 0x80d80b +apr.QUANT_LUMA_LT[18].val = 0x81080e +apr.QUANT_CHROMA_LT[18].val = 0x81080e +apr.QUANT_LUMA_LT[19].val = 0x817813 +apr.QUANT_CHROMA_LT[19].val = 0x817813 +apr.QUANT_LUMA_LT[20].val = 0x80b809 +apr.QUANT_CHROMA_LT[20].val = 0x80b809 +apr.QUANT_LUMA_LT[21].val = 0x80e80d +apr.QUANT_CHROMA_LT[21].val = 0x80e80d +apr.QUANT_LUMA_LT[22].val = 0x813810 +apr.QUANT_CHROMA_LT[22].val = 0x813810 +apr.QUANT_LUMA_LT[23].val = 0x81d817 +apr.QUANT_CHROMA_LT[23].val = 0x81d817 +apr.QUANT_LUMA_LT[24].val = 0x80b809 +apr.QUANT_CHROMA_LT[24].val = 0x80b809 +apr.QUANT_LUMA_LT[25].val = 0x80f80d +apr.QUANT_CHROMA_LT[25].val = 0x80f80d +apr.QUANT_LUMA_LT[26].val = 0x815811 +apr.QUANT_CHROMA_LT[26].val = 0x815811 +apr.QUANT_LUMA_LT[27].val = 0x82381c +apr.QUANT_CHROMA_LT[27].val = 0x82381c +apr.QUANT_LUMA_LT[28].val = 0x80d80b +apr.QUANT_CHROMA_LT[28].val = 0x80d80b +apr.QUANT_LUMA_LT[29].val = 0x811810 +apr.QUANT_CHROMA_LT[29].val = 0x811810 +apr.QUANT_LUMA_LT[30].val = 0x81c815 +apr.QUANT_CHROMA_LT[30].val = 0x81c815 +apr.QUANT_LUMA_LT[31].val = 0x829823 +apr.QUANT_CHROMA_LT[31].val = 0x829823 + +apr.QUANT_LUMA_PROXY[0].val = 0x807804 +apr.QUANT_CHROMA_PROXY[0].val = 0x807804 +apr.QUANT_LUMA_PROXY[1].val = 0x80b809 +apr.QUANT_CHROMA_PROXY[1].val = 0x80b809 +apr.QUANT_LUMA_PROXY[2].val = 0x80e80d +apr.QUANT_CHROMA_PROXY[2].val = 0x80e80d +apr.QUANT_LUMA_PROXY[3].val = 0xfff83f +apr.QUANT_CHROMA_PROXY[3].val = 0xfff83f +apr.QUANT_LUMA_PROXY[4].val = 0x807807 +apr.QUANT_CHROMA_PROXY[4].val = 0x807807 +apr.QUANT_LUMA_PROXY[5].val = 0x80c80b +apr.QUANT_CHROMA_PROXY[5].val = 0x80c80b +apr.QUANT_LUMA_PROXY[6].val = 0x83f80e +apr.QUANT_CHROMA_PROXY[6].val = 0x83f80e +apr.QUANT_LUMA_PROXY[7].val = 0xffffff +apr.QUANT_CHROMA_PROXY[7].val = 0xffffff +apr.QUANT_LUMA_PROXY[8].val = 0x80b809 +apr.QUANT_CHROMA_PROXY[8].val = 0x80b809 +apr.QUANT_LUMA_PROXY[9].val = 0x80e80d +apr.QUANT_CHROMA_PROXY[9].val = 0x80e80d +apr.QUANT_LUMA_PROXY[10].val = 0xfff83f +apr.QUANT_CHROMA_PROXY[10].val = 0xfff83f +apr.QUANT_LUMA_PROXY[11].val = 0xffffff +apr.QUANT_CHROMA_PROXY[11].val = 0xffffff +apr.QUANT_LUMA_PROXY[12].val = 0x80b80b +apr.QUANT_CHROMA_PROXY[12].val = 0x80b80b +apr.QUANT_LUMA_PROXY[13].val = 0x80e80d +apr.QUANT_CHROMA_PROXY[13].val = 0x80e80d +apr.QUANT_LUMA_PROXY[14].val = 0xffffff +apr.QUANT_CHROMA_PROXY[14].val = 0xffffff +apr.QUANT_LUMA_PROXY[15].val = 0xffffff +apr.QUANT_CHROMA_PROXY[15].val = 0xffffff +apr.QUANT_LUMA_PROXY[16].val = 0x80d80b +apr.QUANT_CHROMA_PROXY[16].val = 0x80d80b +apr.QUANT_LUMA_PROXY[17].val = 0xfff80e +apr.QUANT_CHROMA_PROXY[17].val = 0xfff80e +apr.QUANT_LUMA_PROXY[18].val = 0xffffff +apr.QUANT_CHROMA_PROXY[18].val = 0xffffff +apr.QUANT_LUMA_PROXY[19].val = 0xffffff +apr.QUANT_CHROMA_PROXY[19].val = 0xffffff +apr.QUANT_LUMA_PROXY[20].val = 0x80e80d +apr.QUANT_CHROMA_PROXY[20].val = 0x80e80d +apr.QUANT_LUMA_PROXY[21].val = 0xffffff +apr.QUANT_CHROMA_PROXY[21].val = 0xffffff +apr.QUANT_LUMA_PROXY[22].val = 0xffffff +apr.QUANT_CHROMA_PROXY[22].val = 0xffffff +apr.QUANT_LUMA_PROXY[23].val = 0xffffff +apr.QUANT_CHROMA_PROXY[23].val = 0xffffff +apr.QUANT_LUMA_PROXY[24].val = 0xfff80d +apr.QUANT_CHROMA_PROXY[24].val = 0xfff80d +apr.QUANT_LUMA_PROXY[25].val = 0xffffff +apr.QUANT_CHROMA_PROXY[25].val = 0xffffff +apr.QUANT_LUMA_PROXY[26].val = 0xffffff +apr.QUANT_CHROMA_PROXY[26].val = 0xffffff +apr.QUANT_LUMA_PROXY[27].val = 0xffffff +apr.QUANT_CHROMA_PROXY[27].val = 0xffffff +apr.QUANT_LUMA_PROXY[28].val = 0xffffff +apr.QUANT_CHROMA_PROXY[28].val = 0xffffff +apr.QUANT_LUMA_PROXY[29].val = 0xffffff +apr.QUANT_CHROMA_PROXY[29].val = 0xffffff +apr.QUANT_LUMA_PROXY[30].val = 0xffffff +apr.QUANT_CHROMA_PROXY[30].val = 0xffffff +apr.QUANT_LUMA_PROXY[31].val = 0xffffff +apr.QUANT_CHROMA_PROXY[31].val = 0xffffff + +apr.DC_QUANT_SCALE[0].val = 0x401 +apr.DC_QUANT_SCALE[1].val = 0x803 +apr.DC_QUANT_SCALE[2].val = 0xc05 +apr.DC_QUANT_SCALE[3].val = 0x1007 +apr.DC_QUANT_SCALE[4].val = 0x1409 +apr.DC_QUANT_SCALE[5].val = 0x180b +apr.DC_QUANT_SCALE[6].val = 0x1c0d +apr.DC_QUANT_SCALE[7].val = 0x200f +apr.DC_QUANT_SCALE[8].val = 0x2411 +apr.DC_QUANT_SCALE[9].val = 0x2813 +apr.DC_QUANT_SCALE[10].val = 0x2c15 +apr.DC_QUANT_SCALE[11].val = 0x3017 +apr.DC_QUANT_SCALE[12].val = 0x3419 +apr.DC_QUANT_SCALE[13].val = 0x381b +apr.DC_QUANT_SCALE[14].val = 0x3c1d +apr.DC_QUANT_SCALE[15].val = 0x401f +apr.DC_QUANT_SCALE[16].val = 0x4421 +apr.DC_QUANT_SCALE[17].val = 0x4823 +apr.DC_QUANT_SCALE[18].val = 0x4c25 +apr.DC_QUANT_SCALE[19].val = 0x5027 +apr.DC_QUANT_SCALE[20].val = 0x5429 +apr.DC_QUANT_SCALE[21].val = 0x582b +apr.DC_QUANT_SCALE[22].val = 0x5c2d +apr.DC_QUANT_SCALE[23].val = 0x602f +apr.DC_QUANT_SCALE[24].val = 0x6431 +apr.DC_QUANT_SCALE[25].val = 0x6833 +apr.DC_QUANT_SCALE[26].val = 0x6c35 +apr.DC_QUANT_SCALE[27].val = 0x7037 +apr.DC_QUANT_SCALE[28].val = 0x7439 +apr.DC_QUANT_SCALE[29].val = 0x783b +apr.DC_QUANT_SCALE[30].val = 0x7c3d +apr.DC_QUANT_SCALE[31].val = 0x803f +apr.DC_QUANT_SCALE[32].val = 0x8441 +apr.DC_QUANT_SCALE[33].val = 0x8843 +apr.DC_QUANT_SCALE[34].val = 0x8c45 +apr.DC_QUANT_SCALE[35].val = 0x9047 +apr.DC_QUANT_SCALE[36].val = 0x9449 +apr.DC_QUANT_SCALE[37].val = 0x984b +apr.DC_QUANT_SCALE[38].val = 0x9c4d +apr.DC_QUANT_SCALE[39].val = 0xa04f +apr.DC_QUANT_SCALE[40].val = 0xa451 +apr.DC_QUANT_SCALE[41].val = 0xa853 +apr.DC_QUANT_SCALE[42].val = 0xac55 +apr.DC_QUANT_SCALE[43].val = 0xb057 +apr.DC_QUANT_SCALE[44].val = 0xb459 +apr.DC_QUANT_SCALE[45].val = 0xb85b +apr.DC_QUANT_SCALE[46].val = 0xbc5d +apr.DC_QUANT_SCALE[47].val = 0xc05f +apr.DC_QUANT_SCALE[48].val = 0xc461 +apr.DC_QUANT_SCALE[49].val = 0xc863 +apr.DC_QUANT_SCALE[50].val = 0xcc65 +apr.DC_QUANT_SCALE[51].val = 0xd067 +apr.DC_QUANT_SCALE[52].val = 0xd469 +apr.DC_QUANT_SCALE[53].val = 0xd86b +apr.DC_QUANT_SCALE[54].val = 0xdc6d +apr.DC_QUANT_SCALE[55].val = 0xe06f +apr.DC_QUANT_SCALE[56].val = 0xe471 +apr.DC_QUANT_SCALE[57].val = 0xe873 +apr.DC_QUANT_SCALE[58].val = 0xec75 +apr.DC_QUANT_SCALE[59].val = 0xf077 +apr.DC_QUANT_SCALE[60].val = 0xf479 +apr.DC_QUANT_SCALE[61].val = 0xf87b +apr.DC_QUANT_SCALE[62].val = 0xfc7d +apr.DC_QUANT_SCALE[63].val = 0x1007f +apr.DC_QUANT_SCALE[64].val = 0x11084 +apr.DC_QUANT_SCALE[65].val = 0x1208c +apr.DC_QUANT_SCALE[66].val = 0x13094 +apr.DC_QUANT_SCALE[67].val = 0x1409c +apr.DC_QUANT_SCALE[68].val = 0x150a4 +apr.DC_QUANT_SCALE[69].val = 0x160ac +apr.DC_QUANT_SCALE[70].val = 0x170b4 +apr.DC_QUANT_SCALE[71].val = 0x180bc +apr.DC_QUANT_SCALE[72].val = 0x190c4 +apr.DC_QUANT_SCALE[73].val = 0x1a0cc +apr.DC_QUANT_SCALE[74].val = 0x1b0d4 +apr.DC_QUANT_SCALE[75].val = 0x1c0dc +apr.DC_QUANT_SCALE[76].val = 0x1d0e4 +apr.DC_QUANT_SCALE[77].val = 0x1e0ec +apr.DC_QUANT_SCALE[78].val = 0x1f0f4 +apr.DC_QUANT_SCALE[79].val = 0x200fc +apr.DC_QUANT_SCALE[80].val = 0x21104 +apr.DC_QUANT_SCALE[81].val = 0x2210c +apr.DC_QUANT_SCALE[82].val = 0x23114 +apr.DC_QUANT_SCALE[83].val = 0x2411c +apr.DC_QUANT_SCALE[84].val = 0x25124 +apr.DC_QUANT_SCALE[85].val = 0x2612c +apr.DC_QUANT_SCALE[86].val = 0x27134 +apr.DC_QUANT_SCALE[87].val = 0x2813c +apr.DC_QUANT_SCALE[88].val = 0x29144 +apr.DC_QUANT_SCALE[89].val = 0x2a14c +apr.DC_QUANT_SCALE[90].val = 0x2b154 +apr.DC_QUANT_SCALE[91].val = 0x2c15c +apr.DC_QUANT_SCALE[92].val = 0x2d164 +apr.DC_QUANT_SCALE[93].val = 0x2e16c +apr.DC_QUANT_SCALE[94].val = 0x2f174 +apr.DC_QUANT_SCALE[95].val = 0x3017c +apr.DC_QUANT_SCALE[96].val = 0x31184 +apr.DC_QUANT_SCALE[97].val = 0x3218c +apr.DC_QUANT_SCALE[98].val = 0x33194 +apr.DC_QUANT_SCALE[99].val = 0x3419c +apr.DC_QUANT_SCALE[100].val = 0x351a4 +apr.DC_QUANT_SCALE[101].val = 0x361ac +apr.DC_QUANT_SCALE[102].val = 0x371b4 +apr.DC_QUANT_SCALE[103].val = 0x381bc +apr.DC_QUANT_SCALE[104].val = 0x391c4 +apr.DC_QUANT_SCALE[105].val = 0x3a1cc +apr.DC_QUANT_SCALE[106].val = 0x3b1d4 +apr.DC_QUANT_SCALE[107].val = 0x3c1dc +apr.DC_QUANT_SCALE[108].val = 0x3d1e4 +apr.DC_QUANT_SCALE[109].val = 0x3e1ec +apr.DC_QUANT_SCALE[110].val = 0x3f1f4 +apr.DC_QUANT_SCALE[111].val = 0x1fc +print("Set matrices") + +# dunno how this gets calculated +OUT_SZ = 0x1000000 +out_buf_phys = u.heap.memalign(0x4000, OUT_SZ) +iface.writemem(out_buf_phys, b'\xAA' * OUT_SZ) +out_buf_iova = dart.iomap(0, out_buf_phys, OUT_SZ) +print(f"Output buffer @ phys {out_buf_phys:016X} iova {out_buf_iova:016X}") + +IN_SZ_LUMA = align_up(im_W*im_H) +in_buf_luma_phys = u.heap.memalign(0x4000, IN_SZ_LUMA) +iface.writemem(in_buf_luma_phys, image_data_luma + b'\xaa' * (IN_SZ_LUMA - len(image_data_luma))) +in_buf_luma_iova = dart.iomap(0, in_buf_luma_phys, IN_SZ_LUMA) +print(f"Input buffer luma @ phys {in_buf_luma_phys:016X} iova {in_buf_luma_iova:016X}") +IN_SZ_CHROMA = align_up(im_W*2*im_H) +in_buf_chroma_phys = u.heap.memalign(0x4000, IN_SZ_CHROMA) +iface.writemem(in_buf_chroma_phys, image_data_chroma + b'\xaa' * (IN_SZ_CHROMA - len(image_data_chroma))) +in_buf_chroma_iova = dart.iomap(0, in_buf_chroma_phys, IN_SZ_CHROMA) +print(f"Input buffer chroma @ phys {in_buf_chroma_phys:016X} iova {in_buf_chroma_iova:016X}") +dart.dump_all() + +desc = EncodeNotRawDescriptor( + flags=0x373c, + flags2=0, + output_iova=out_buf_iova, + max_out_sz=OUT_SZ, + offset_x=0, + offset_y=0, + # this is the important set + pix_surface_w_2_=im_W, + pix_surface_h_2_=im_H, + # changing this doesn't seem to break anything + pix_surface_w=im_W, + pix_surface_h=im_H, + # XXX how does the div work exactly? it's different in "tiled" mode + luma_stride=divroundup(im_W, 64), + chroma_stride=divroundup(im_W*2, 64), + alpha_stride=divroundup(im_W, 64), + unk_pad_0x26_=b'\x00\x00', + + luma_iova=in_buf_luma_iova, + pix_plane0_tileheader_thing_=0, + chroma_iova=in_buf_chroma_iova, + pix_plane1_tileheader_thing_=0, + alpha_iova=in_buf_luma_iova, + pix_plane2_tileheader_thing_=0, + + # changing this does add extra 0 bytes + frame_header_sz=bswp16(0x94), + unk_pad_0x5a_=b'\x00', + bitstream_version=0, + encoder_identifier=0xcafeface, + # cannot change arbitrily, will break + pix_surface_w_byteswap_=bswp16(im_W), + pix_surface_h_byteswap_=bswp16(im_H), + # seemingly can change arbitrarily + chroma_format_interlace_mode=0xc0, + aspect_ratio_frame_rate=0, + color_primaries=2, + transfer_characteristic=2, + matrix_coefficients=1, + alpha_channel_type=1, + # tables will still be output even if bits not set here + frame_hdr_reserved14=b'\x00\x03', + unk_pad_0x6c_=b'\x00' * 128, + deprecated_number_of_slices=0, + # this one affects the encoding not just the header + log2_desired_slice_size_in_mb=0x30, + quantization_index=0x2, + + # this impacts the quality somehow, not quite understood + # might be a target bitrate + unk_0xf0_=0xffff, + unk_0xf2_=0xffff, + + # none of this stuff is understood, and it never seems to change + unk_0xf4_=0x8000402015100c0c, + unk_0xfc_=0x2c8080, + unk_0x100_0_=0x880080, + unk_0x100_1_=0x4e00c5, + unk_0x100_2_=0x9000d0, + unk_0x100_3_=0x200122, + unk_0x110_0_=0x400200, # looks like a quant table, but ??? not used ??? + unk_0x110_1_=0x400200, + unk_0x110_2_=0x400200, + unk_0x110_3_=0x400200, + unk_0x110_4_=0x400200, + unk_0x110_5_=0x400200, + unk_0x110_6_=0x400200, + unk_0x110_7_=0x400200, + unk_0x110_8_=0x400200, + unk_0x110_9_=0x400200, + unk_0x110_10_=0x400200, + unk_0x110_11_=0x400200, + unk_0x110_12_=0x400200, + unk_0x110_13_=0x400200, + unk_0x110_14_=0x400200, + unk_0x110_15_=0x400200, + + quant_table_sel=0x23, + unk_pad_0x154_=b'\x00' * 44, +) +desc_bytes = struct.pack(ENCODE_NOT_RAW_STRUCT, *desc) +chexdump(desc_bytes) + +iface.writemem(desc_ring_phys, desc_bytes) + +# let's go +apr.DR_HEAD = len(desc_bytes) + +start_time = time.time() +while apr.IRQ_STATUS.val == 0: + if time.time() - start_time > 5: + print("TIMED OUT!!!") + break + +print(f"Done, IRQ status is {apr.IRQ_STATUS}") +print(f"ST0 = {apr.ST0}") +print(f"ST1 = {apr.ST1}") +print(f"REG_0x1c = {apr.REG_0x1c}") +print(f"REG_0x3c = {apr.REG_0x3c}") +print(f"REG_0x44 = {apr.REG_0x44}") + +print(f"DR_HEAD = {apr.DR_HEAD}") +print(f"DR_TAIL = {apr.DR_TAIL}") + +print(f"unk REG_0x38 = {apr.REG_0x38}") +print(f"unk REG_0x40 = {apr.REG_0x40}") +print(f"unk REG_0x48 = {apr.REG_0x48}") +print(f"unk REG_0x50 = {apr.REG_0x50}") +print(f"unk REG_0x54 = {apr.REG_0x54}") + +apr.IRQ_STATUS = apr.IRQ_STATUS.val + +dr_memory_new = iface.readmem(desc_ring_phys, DESC_RING_SZ) +chexdump(dr_memory_new) + +out_buf_new = iface.readmem(out_buf_phys, OUT_SZ) +with open('prores.bin', 'wb') as f: + f.write(out_buf_new) + +outlen = struct.unpack(">I", out_buf_new[:4])[0] +if outlen <= len(out_buf_new): + with open('prores.mov', 'wb') as f: + f.write(b'\x00\x00\x00\x14\x66\x74\x79\x70\x71\x74\x20\x20\x00\x00\x02\x00\x71\x74\x20\x20\x00\x00\x00\x08\x77\x69\x64\x65') + f.write(struct.pack(">I", outlen + 8)) + f.write(b'mdat') + f.write(out_buf_new[:outlen]) + f.write(b'\x00\x00\x03\x1e\x6d\x6f\x6f\x76\x00\x00\x00\x6c\x6d\x76\x68\x64\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\xe8\x00\x00\x00\x11\x00\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x02\x89\x74\x72\x61\x6b\x00\x00\x00\x5c\x74\x6b\x68\x64\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x11\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x07\x80\x00\x00\x04\x38\x00\x00\x00\x00\x00\x24\x65\x64\x74\x73\x00\x00\x00\x1c\x65\x6c\x73\x74\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x11\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x02\x01\x6d\x64\x69\x61\x00\x00\x00\x20\x6d\x64\x68\x64\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3c\x00\x00\x00\x01\x00\x7f\xff\x00\x00\x00\x00\x00\x2d\x68\x64\x6c\x72\x00\x00\x00\x00\x6d\x68\x6c\x72\x76\x69\x64\x65\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0c\x56\x69\x64\x65\x6f\x48\x61\x6e\x64\x6c\x65\x72\x00\x00\x01\xac\x6d\x69\x6e\x66\x00\x00\x00\x14\x76\x6d\x68\x64\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x2c\x68\x64\x6c\x72\x00\x00\x00\x00\x64\x68\x6c\x72\x75\x72\x6c\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x44\x61\x74\x61\x48\x61\x6e\x64\x6c\x65\x72\x00\x00\x00\x24\x64\x69\x6e\x66\x00\x00\x00\x1c\x64\x72\x65\x66\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x0c\x75\x72\x6c\x20\x00\x00\x00\x01\x00\x00\x01\x40\x73\x74\x62\x6c\x00\x00\x00\xdc\x73\x74\x73\x64\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\xcc\x61\x70\x63\x6e\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x46\x46\x4d\x50\x00\x00\x02\x00\x00\x00\x02\x00\x07\x80\x04\x38\x00\x48\x00\x00\x00\x48\x00\x00\x00\x00\x00\x00\x00\x01\x1f\x4c\x61\x76\x63\x35\x39\x2e\x32\x35\x2e\x31\x30\x30\x20\x70\x72\x6f\x72\x65\x73\x5f\x76\x69\x64\x65\x6f\x74\x6f\x6f\x6c\x62\x00\x18\xff\xff\x00\x00\x00\x6c\x67\x6c\x62\x6c\x00\x00\x00\x64\x61\x70\x63\x6e\x00\x00\x00\x00\x00\x00\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\xff\x07\x80\x04\x38\x00\x48\x00\x00\x00\x48\x00\x00\x00\x00\x00\x00\x00\x01\x10\x41\x70\x70\x6c\x65\x20\x50\x72\x6f\x52\x65\x73\x20\x34\x32\x32\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x18\xff\xff\x00\x00\x00\x0a\x66\x69\x65\x6c\x01\x00\x00\x00\x00\x00\x00\x00\x00\x0a\x66\x69\x65\x6c\x01\x00\x00\x00\x00\x18\x73\x74\x74\x73\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x01\x00\x00\x00\x00\x1c\x73\x74\x73\x63\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x14\x73\x74\x73\x7a\x00\x00\x00\x00') + f.write(struct.pack(">I", outlen)) + f.write(b'\x00\x00\x00\x01\x00\x00\x00\x14\x73\x74\x63\x6f\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x24\x00\x00\x00\x21\x75\x64\x74\x61\x00\x00\x00\x19\xa9\x73\x77\x72\x00\x0d\x55\xc4\x4c\x61\x76\x66\x35\x39\x2e\x32\x30\x2e\x31\x30\x31') +# ffmpeg -i prores.mov prores-dec%d.png diff --git a/tools/proxyclient/experiments/scaler.py b/tools/proxyclient/experiments/scaler.py new file mode 100644 index 0000000..2a319ed --- /dev/null +++ b/tools/proxyclient/experiments/scaler.py @@ -0,0 +1,1527 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * +from m1n1.hw.dart import DART +from m1n1.hw.scaler import * +from m1n1.utils import * +import struct +import time +from PIL import Image, ImageDraw + +SCALER_ADT = '/arm-io/scaler0' +DART_ADT = '/arm-io/dart-scaler0' + +p.pmgr_adt_clocks_enable(DART_ADT) +p.pmgr_adt_clocks_enable(SCALER_ADT) + +dart = DART.from_adt(u, DART_ADT) +dart.initialize() + +scaler_base, _ = u.adt[SCALER_ADT].get_reg(0) +apiodma_base, _ = u.adt[SCALER_ADT].get_reg(1) +dpe_ctrl_base, _ = u.adt[SCALER_ADT].get_reg(2) + +scaler = ScalerMainRegs(u, scaler_base) + +def dpe_start(): + p.write32(dpe_ctrl_base + 0x400, 0x1) + p.write32(dpe_ctrl_base + 0x404, 0x1) + p.write32(dpe_ctrl_base + 0x438, 0xf) + p.write32(dpe_ctrl_base + 0x43c, 0x5) + p.write32(dpe_ctrl_base + 0x408, 0x1) + p.write32(dpe_ctrl_base + 0x440, 0x5) + p.write32(dpe_ctrl_base + 0x444, 0x4) + p.write32(dpe_ctrl_base + 0x40c, 0x1) + p.write32(dpe_ctrl_base + 0x448, 0x5) + p.write32(dpe_ctrl_base + 0x44c, 0x5) + p.write32(dpe_ctrl_base + 0x410, 0x1) + p.write32(dpe_ctrl_base + 0x450, 0x7) + p.write32(dpe_ctrl_base + 0x454, 0x7) + p.write32(dpe_ctrl_base + 0x414, 0x1) + p.write32(dpe_ctrl_base + 0x458, 0xd) + p.write32(dpe_ctrl_base + 0x45c, 0xc) + p.write32(dpe_ctrl_base + 0x418, 0x1) + p.write32(dpe_ctrl_base + 0x460, 0x13) + p.write32(dpe_ctrl_base + 0x464, 0x12) + p.write32(dpe_ctrl_base + 0x41c, 0x1) + p.write32(dpe_ctrl_base + 0x468, 0x9) + p.write32(dpe_ctrl_base + 0x46c, 0xa) + p.write32(dpe_ctrl_base + 0x420, 0x1) + p.write32(dpe_ctrl_base + 0x470, 0x33) + p.write32(dpe_ctrl_base + 0x474, 0x2c) + p.write32(dpe_ctrl_base + 0x424, 0x1) + p.write32(dpe_ctrl_base + 0x478, 0x15) + p.write32(dpe_ctrl_base + 0x47c, 0x15) + p.write32(dpe_ctrl_base + 0x428, 0x1) + p.write32(dpe_ctrl_base + 0x480, 0xe) + p.write32(dpe_ctrl_base + 0x484, 0x5) + p.write32(dpe_ctrl_base + 0x42c, 0x1) + p.write32(dpe_ctrl_base + 0x488, 0x27) + p.write32(dpe_ctrl_base + 0x48c, 0x15) + p.write32(dpe_ctrl_base + 0x430, 0x1) + p.write32(dpe_ctrl_base + 0x490, 0x15) + p.write32(dpe_ctrl_base + 0x494, 0xe) + p.write32(dpe_ctrl_base + 0x434, 0x1) + p.write32(dpe_ctrl_base + 0x498, 0x0) + p.write32(dpe_ctrl_base + 0x49c, 0x0) + p.write32(dpe_ctrl_base + 0x4, 0x1000) + p.write32(dpe_ctrl_base + 0x0, 0x101) + +def dpe_stop(): + p.write32(dpe_ctrl_base + 0x0, 0x103) + while p.read32(dpe_ctrl_base + 0x0) & 0xC != 4: + ... + p.write32(dpe_ctrl_base + 0x0, p.read32(dpe_ctrl_base + 0x0) & 0xfffffffc) + +print(f"Hardware version {scaler.HW_VERSION.val:08X}") + +scaler.RESET = 1 +scaler.RESET = 0 + +print(f"Hardware version after reset {scaler.HW_VERSION.val:08X}") + +if len(sys.argv) < 3: + print(f"Usage: {sys.argv[0]} input.png output.png") + sys.exit(-1) + +input_image_fn = sys.argv[1] +output_image_fn = sys.argv[2] + +in_data = b'' +with Image.open(input_image_fn) as im: + in_W, in_H = im.size + in_BYTESPP = 4 + in_STRIDE = in_W * in_BYTESPP + in_SZ = in_W * in_H * in_BYTESPP + + for y in range(in_H): + for x in range(in_W): + r, g, b = im.getpixel((x, y)) + in_data += struct.pack("BBBB", r, g, b, 255) + +out_W = in_W * 5 +out_H = in_H * 3 +out_BYTESPP = 4 +out_STRIDE = out_W * out_BYTESPP +out_SZ = out_W * out_H * out_BYTESPP * 2 # HACK: double size for testing purposes + +for i in range(in_W * in_H): + in_data += struct.pack("<I", i & 0xFFFFFFFF) +# chexdump(in_data) + +out_buf_phys = u.heap.memalign(0x4000, out_SZ) +iface.writemem(out_buf_phys, b'\xAA' * out_SZ) +out_buf_iova = dart.iomap(0, out_buf_phys, out_SZ) +print(f"Output buffer @ phys {out_buf_phys:016X} iova {out_buf_iova:016X}") + +in_buf_phys = u.heap.memalign(0x4000, in_SZ) +iface.writemem(in_buf_phys, in_data) +in_buf_iova = dart.iomap(0, in_buf_phys, in_SZ) +print(f"Input buffer @ phys {in_buf_phys:016X} iova {in_buf_iova:016X}") +dart.dump_all() + + + +dpe_start() + +# reset CM +p.write32(scaler_base + 0x3800, 0x0) + +# RDMA control +p.write32(scaler_base + 0x180, 0x1) +p.write32(scaler_base + 0x184, 0x1e) +p.write32(scaler_base + 0x188, 0x0) +p.write32(scaler_base + 0x18c, 0x0) +p.write32(scaler_base + 0x190, 0x0) + +# transform config (flip/rotate) +scaler.FLIP_ROTATE.set() + +# cache hints +scaler.CACHE_HINTS_THING0[0].val = 0x7d311 +scaler.CACHE_HINTS_THING0[1].val = 0x7d311 +scaler.CACHE_HINTS_THING0[2].val = 0x7d311 +scaler.CACHE_HINTS_THING0[3].val = 0x7d311 +scaler.CACHE_HINTS_THING2[0].val = 0xbd311 +scaler.CACHE_HINTS_THING2[1].val = 0xbd311 +scaler.CACHE_HINTS_THING2[2].val = 0xbd311 +# scaler.CACHE_HINTS_THING2[3].val = 0xbd311 +scaler.CACHE_HINTS_THING1[0].val = 0x707 +scaler.CACHE_HINTS_THING1[1].val = 0x707 +scaler.CACHE_HINTS_THING1[2].val = 0x707 +scaler.CACHE_HINTS_THING1[3].val = 0x707 +scaler.CACHE_HINTS_THING3[0].val = 0xc0bd307 +scaler.CACHE_HINTS_THING3[1].val = 0xc0bd307 +scaler.CACHE_HINTS_THING3[2].val = 0xc0bd307 +# scaler.CACHE_HINTS_THING3[3].val = 0xc0bd307 + +# tunables +scaler.TUNABLES_THING0[0].val = 0x20 +scaler.TUNABLES_THING0[1].val = 0x20 +scaler.TUNABLES_THING0[2].val = 0x20 +scaler.TUNABLES_THING0[3].val = 0x20 +scaler.TUNABLES_THING1[0].val = 0x4000720 +scaler.TUNABLES_THING1[1].val = 0x4000720 +scaler.TUNABLES_THING1[2].val = 0x4000720 +# scaler.TUNABLES_THING1[3].val = 0x4000720 + +# dest base addresses +scaler.DST_PLANE1_LO = 0 +scaler.DST_PLANE1_HI = 0 +scaler.DST_PLANE0_LO = out_buf_iova & 0xFFFFFFFF +scaler.DST_PLANE0_HI = out_buf_iova >> 32 +scaler.DST_PLANE2_LO = 0 +scaler.DST_PLANE2_HI = 0 + +# src base addresses +scaler.SRC_PLANE1_LO = 0 +scaler.SRC_PLANE1_HI = 0 +scaler.SRC_PLANE0_LO = in_buf_iova & 0xFFFFFFFF +scaler.SRC_PLANE0_HI = in_buf_iova >> 32 +scaler.SRC_PLANE2_LO = 0 +scaler.SRC_PLANE2_HI = 0 + +# dest stride +scaler.DST_PLANE1_STRIDE = 0 +scaler.DST_PLANE0_STRIDE = out_STRIDE +scaler.DST_PLANE2_STRIDE = 0 + +# src stride +scaler.SRC_PLANE1_STRIDE = 0 +scaler.SRC_PLANE0_STRIDE = in_STRIDE +scaler.SRC_PLANE2_STRIDE = 0 + +# dest offset +scaler.DST_PLANE1_OFFSET = 0 +scaler.DST_PLANE0_OFFSET = 0 +scaler.DST_PLANE2_OFFSET = 0 + +# src offset +scaler.SRC_PLANE1_OFFSET = 0 +scaler.SRC_PLANE0_OFFSET = 0 +scaler.SRC_PLANE2_OFFSET = 0 + +# dest sizes +scaler.DST_W = out_W +scaler.DST_H = out_H + +scaler.DST_SIZE_THING3 = 0 +scaler.DST_SIZE_THING6 = 0 +scaler.DST_SIZE_THING2 = 0 +scaler.DST_SIZE_THING5 = 0 +scaler.DST_SIZE_THING4 = 0 +scaler.DST_SIZE_THING7 = 0 + +# src sizes +scaler.SRC_W = in_W +scaler.SRC_H = in_H + +scaler.SRC_SIZE_THING3 = 0 +scaler.SRC_SIZE_THING6 = 0 +scaler.SRC_SIZE_THING2 = 0 +scaler.SRC_SIZE_THING5 = 0 +scaler.SRC_SIZE_THING4 = 0 +scaler.SRC_SIZE_THING7 = 0 + +# swizzling +scaler.SRC_SWIZZLE = 0x03020100 +scaler.DST_SWIZZLE = 0x03020100 + +# WDMA control +p.write32(scaler_base + 0x280, 0x1) +p.write32(scaler_base + 0x284, 0x81e) +p.write32(scaler_base + 0x288, 0x800) +p.write32(scaler_base + 0x28c, 0x800) + +# pixel averaging +scaler.PIXEL_AVERAGING = 0 + +# ASE enhancement +p.write32(scaler_base + 0x16800, 0x0) + +# ASE 3x1 transform +p.write32(scaler_base + 0x16080, 0x0) +p.write32(scaler_base + 0x16084, 0xb710367) +p.write32(scaler_base + 0x16088, 0x128) + +# ASE interpolation +p.write32(scaler_base + 0x16600, 0x15) + +# ASE angle detect +p.write32(scaler_base + 0x16504, 0x2000500) +p.write32(scaler_base + 0x16508, 0x3200) +p.write32(scaler_base + 0x16534, 0x8) +p.write32(scaler_base + 0x1651c, 0x851400) +p.write32(scaler_base + 0x16568, 0x250500) +p.write32(scaler_base + 0x16588, 0x496513) + +# ASE config +p.write32(scaler_base + 0x16000, 0x0) + +# chroma upsampling +p.write32(scaler_base + 0x800, 0xc) + +# chroma downsampling +p.write32(scaler_base + 0x900, 0x0) + +# DDA init V??? +scaler.SCALE_H_DDA_THING0 = 0 +scaler.SCALE_H_DDA_THING2 = 0 +scaler.SCALE_V_DDA_THING1 = 0 + +# vertical scaling +scaler.SCALE_V_RATIO_0 = int(in_H / out_H * 0x400000) +scaler.SCALE_V_RATIO_4 = 0 # XXX what does this do? +scaler.SCALE_V_RATIO_1 = 0 # XXX what does this do? +scaler.SCALE_V_RATIO_2 = 0 # XXX what does this set do? +scaler.SCALE_V_RATIO_3 = 0 # XXX what does this set do? +scaler.SCALE_V_RATIO_5 = 0 # XXX what does this set do? +scaler.SCALE_V_FLAGS.set(EN=1) + +# XXX this is a random filter grabbed from a random trace +scaler.SCALE_FILTER_V_BLOCK0[0].val = 0x0 +scaler.SCALE_FILTER_V_BLOCK0[1].val = 0x50005 +scaler.SCALE_FILTER_V_BLOCK1[0].val = 0x50000 +scaler.SCALE_FILTER_V_BLOCK0[2].val = 0xb000b +scaler.SCALE_FILTER_V_BLOCK0[3].val = 0x100010 +scaler.SCALE_FILTER_V_BLOCK1[1].val = 0x10000b +scaler.SCALE_FILTER_V_BLOCK0[4].val = 0x140014 +scaler.SCALE_FILTER_V_BLOCK0[5].val = 0x180018 +scaler.SCALE_FILTER_V_BLOCK1[2].val = 0x180014 +scaler.SCALE_FILTER_V_BLOCK0[6].val = 0x1c001c +scaler.SCALE_FILTER_V_BLOCK0[7].val = 0x200020 +scaler.SCALE_FILTER_V_BLOCK1[3].val = 0x20001c +scaler.SCALE_FILTER_V_BLOCK0[8].val = 0x230023 +scaler.SCALE_FILTER_V_BLOCK0[9].val = 0x260026 +scaler.SCALE_FILTER_V_BLOCK1[4].val = 0x260023 +scaler.SCALE_FILTER_V_BLOCK0[10].val = 0x290029 +scaler.SCALE_FILTER_V_BLOCK0[11].val = 0x2c002c +scaler.SCALE_FILTER_V_BLOCK1[5].val = 0x2c0029 +scaler.SCALE_FILTER_V_BLOCK0[12].val = 0x2e002e +scaler.SCALE_FILTER_V_BLOCK0[13].val = 0x300030 +scaler.SCALE_FILTER_V_BLOCK1[6].val = 0x30002e +scaler.SCALE_FILTER_V_BLOCK0[14].val = 0x320032 +scaler.SCALE_FILTER_V_BLOCK0[15].val = 0x330033 +scaler.SCALE_FILTER_V_BLOCK1[7].val = 0x330032 +scaler.SCALE_FILTER_V_BLOCK0[16].val = 0xff87ff87 +scaler.SCALE_FILTER_V_BLOCK0[17].val = 0xff90ff90 +scaler.SCALE_FILTER_V_BLOCK1[8].val = 0xff90ff87 +scaler.SCALE_FILTER_V_BLOCK0[18].val = 0xff99ff99 +scaler.SCALE_FILTER_V_BLOCK0[19].val = 0xffa1ffa1 +scaler.SCALE_FILTER_V_BLOCK1[9].val = 0xffa1ff99 +scaler.SCALE_FILTER_V_BLOCK0[20].val = 0xffaaffaa +scaler.SCALE_FILTER_V_BLOCK0[21].val = 0xffb2ffb2 +scaler.SCALE_FILTER_V_BLOCK1[10].val = 0xffb2ffaa +scaler.SCALE_FILTER_V_BLOCK0[22].val = 0xffbaffba +scaler.SCALE_FILTER_V_BLOCK0[23].val = 0xffc2ffc2 +scaler.SCALE_FILTER_V_BLOCK1[11].val = 0xffc2ffba +scaler.SCALE_FILTER_V_BLOCK0[24].val = 0xffcaffca +scaler.SCALE_FILTER_V_BLOCK0[25].val = 0xffd2ffd2 +scaler.SCALE_FILTER_V_BLOCK1[12].val = 0xffd2ffca +scaler.SCALE_FILTER_V_BLOCK0[26].val = 0xffd9ffd9 +scaler.SCALE_FILTER_V_BLOCK0[27].val = 0xffe0ffe0 +scaler.SCALE_FILTER_V_BLOCK1[13].val = 0xffe0ffd9 +scaler.SCALE_FILTER_V_BLOCK0[28].val = 0xffe7ffe7 +scaler.SCALE_FILTER_V_BLOCK0[29].val = 0xffeeffee +scaler.SCALE_FILTER_V_BLOCK1[14].val = 0xffeeffe7 +scaler.SCALE_FILTER_V_BLOCK0[30].val = 0xfff4fff4 +scaler.SCALE_FILTER_V_BLOCK0[31].val = 0xfffafffa +scaler.SCALE_FILTER_V_BLOCK1[15].val = 0xfffafff4 +scaler.SCALE_FILTER_V_BLOCK0[32].val = 0xff06ff06 +scaler.SCALE_FILTER_V_BLOCK0[33].val = 0xff0cff0c +scaler.SCALE_FILTER_V_BLOCK1[16].val = 0xff0cff06 +scaler.SCALE_FILTER_V_BLOCK0[34].val = 0xff13ff13 +scaler.SCALE_FILTER_V_BLOCK0[35].val = 0xff1aff1a +scaler.SCALE_FILTER_V_BLOCK1[17].val = 0xff1aff13 +scaler.SCALE_FILTER_V_BLOCK0[36].val = 0xff21ff21 +scaler.SCALE_FILTER_V_BLOCK0[37].val = 0xff28ff28 +scaler.SCALE_FILTER_V_BLOCK1[18].val = 0xff28ff21 +scaler.SCALE_FILTER_V_BLOCK0[38].val = 0xff30ff30 +scaler.SCALE_FILTER_V_BLOCK0[39].val = 0xff38ff38 +scaler.SCALE_FILTER_V_BLOCK1[19].val = 0xff38ff30 +scaler.SCALE_FILTER_V_BLOCK0[40].val = 0xff41ff41 +scaler.SCALE_FILTER_V_BLOCK0[41].val = 0xff49ff49 +scaler.SCALE_FILTER_V_BLOCK1[20].val = 0xff49ff41 +scaler.SCALE_FILTER_V_BLOCK0[42].val = 0xff52ff52 +scaler.SCALE_FILTER_V_BLOCK0[43].val = 0xff5bff5b +scaler.SCALE_FILTER_V_BLOCK1[21].val = 0xff5bff52 +scaler.SCALE_FILTER_V_BLOCK0[44].val = 0xff63ff63 +scaler.SCALE_FILTER_V_BLOCK0[45].val = 0xff6cff6c +scaler.SCALE_FILTER_V_BLOCK1[22].val = 0xff6cff63 +scaler.SCALE_FILTER_V_BLOCK0[46].val = 0xff75ff75 +scaler.SCALE_FILTER_V_BLOCK0[47].val = 0xff7eff7e +scaler.SCALE_FILTER_V_BLOCK1[23].val = 0xff7eff75 +scaler.SCALE_FILTER_V_BLOCK0[48].val = 0xff02ff02 +scaler.SCALE_FILTER_V_BLOCK0[49].val = 0xfefcfefc +scaler.SCALE_FILTER_V_BLOCK1[24].val = 0xfefcff02 +scaler.SCALE_FILTER_V_BLOCK0[50].val = 0xfef7fef7 +scaler.SCALE_FILTER_V_BLOCK0[51].val = 0xfef3fef3 +scaler.SCALE_FILTER_V_BLOCK1[25].val = 0xfef3fef7 +scaler.SCALE_FILTER_V_BLOCK0[52].val = 0xfeeffeef +scaler.SCALE_FILTER_V_BLOCK0[53].val = 0xfeedfeed +scaler.SCALE_FILTER_V_BLOCK1[26].val = 0xfeedfeef +scaler.SCALE_FILTER_V_BLOCK0[54].val = 0xfeecfeec +scaler.SCALE_FILTER_V_BLOCK0[55].val = 0xfeebfeeb +scaler.SCALE_FILTER_V_BLOCK1[27].val = 0xfeebfeec +scaler.SCALE_FILTER_V_BLOCK0[56].val = 0xfeebfeeb +scaler.SCALE_FILTER_V_BLOCK0[57].val = 0xfeecfeec +scaler.SCALE_FILTER_V_BLOCK1[28].val = 0xfeecfeeb +scaler.SCALE_FILTER_V_BLOCK0[58].val = 0xfeeefeee +scaler.SCALE_FILTER_V_BLOCK0[59].val = 0xfef1fef1 +scaler.SCALE_FILTER_V_BLOCK1[29].val = 0xfef1feee +scaler.SCALE_FILTER_V_BLOCK0[60].val = 0xfef4fef4 +scaler.SCALE_FILTER_V_BLOCK0[61].val = 0xfef8fef8 +scaler.SCALE_FILTER_V_BLOCK1[30].val = 0xfef8fef4 +scaler.SCALE_FILTER_V_BLOCK0[62].val = 0xfefcfefc +scaler.SCALE_FILTER_V_BLOCK0[63].val = 0xff01ff01 +scaler.SCALE_FILTER_V_BLOCK1[31].val = 0xff01fefc +scaler.SCALE_FILTER_V_BLOCK0[64].val = 0x0 +scaler.SCALE_FILTER_V_BLOCK0[65].val = 0xffe7ffe7 +scaler.SCALE_FILTER_V_BLOCK1[32].val = 0xffe70000 +scaler.SCALE_FILTER_V_BLOCK0[66].val = 0xffcfffcf +scaler.SCALE_FILTER_V_BLOCK0[67].val = 0xffb9ffb9 +scaler.SCALE_FILTER_V_BLOCK1[33].val = 0xffb9ffcf +scaler.SCALE_FILTER_V_BLOCK0[68].val = 0xffa4ffa4 +scaler.SCALE_FILTER_V_BLOCK0[69].val = 0xff90ff90 +scaler.SCALE_FILTER_V_BLOCK1[34].val = 0xff90ffa4 +scaler.SCALE_FILTER_V_BLOCK0[70].val = 0xff7dff7d +scaler.SCALE_FILTER_V_BLOCK0[71].val = 0xff6bff6b +scaler.SCALE_FILTER_V_BLOCK1[35].val = 0xff6bff7d +scaler.SCALE_FILTER_V_BLOCK0[72].val = 0xff5bff5b +scaler.SCALE_FILTER_V_BLOCK0[73].val = 0xff4cff4c +scaler.SCALE_FILTER_V_BLOCK1[36].val = 0xff4cff5b +scaler.SCALE_FILTER_V_BLOCK0[74].val = 0xff3eff3e +scaler.SCALE_FILTER_V_BLOCK0[75].val = 0xff31ff31 +scaler.SCALE_FILTER_V_BLOCK1[37].val = 0xff31ff3e +scaler.SCALE_FILTER_V_BLOCK0[76].val = 0xff26ff26 +scaler.SCALE_FILTER_V_BLOCK0[77].val = 0xff1bff1b +scaler.SCALE_FILTER_V_BLOCK1[38].val = 0xff1bff26 +scaler.SCALE_FILTER_V_BLOCK0[78].val = 0xff12ff12 +scaler.SCALE_FILTER_V_BLOCK0[79].val = 0xff0aff0a +scaler.SCALE_FILTER_V_BLOCK1[39].val = 0xff0aff12 +scaler.SCALE_FILTER_V_BLOCK0[80].val = 0x2210221 +scaler.SCALE_FILTER_V_BLOCK0[81].val = 0x1f901f9 +scaler.SCALE_FILTER_V_BLOCK1[40].val = 0x1f90221 +scaler.SCALE_FILTER_V_BLOCK0[82].val = 0x1d001d0 +scaler.SCALE_FILTER_V_BLOCK0[83].val = 0x1a901a9 +scaler.SCALE_FILTER_V_BLOCK1[41].val = 0x1a901d0 +scaler.SCALE_FILTER_V_BLOCK0[84].val = 0x1820182 +scaler.SCALE_FILTER_V_BLOCK0[85].val = 0x15d015d +scaler.SCALE_FILTER_V_BLOCK1[42].val = 0x15d0182 +scaler.SCALE_FILTER_V_BLOCK0[86].val = 0x1380138 +scaler.SCALE_FILTER_V_BLOCK0[87].val = 0x1140114 +scaler.SCALE_FILTER_V_BLOCK1[43].val = 0x1140138 +scaler.SCALE_FILTER_V_BLOCK0[88].val = 0xf100f1 +scaler.SCALE_FILTER_V_BLOCK0[89].val = 0xcf00cf +scaler.SCALE_FILTER_V_BLOCK1[44].val = 0xcf00f1 +scaler.SCALE_FILTER_V_BLOCK0[90].val = 0xae00ae +scaler.SCALE_FILTER_V_BLOCK0[91].val = 0x8e008e +scaler.SCALE_FILTER_V_BLOCK1[45].val = 0x8e00ae +scaler.SCALE_FILTER_V_BLOCK0[92].val = 0x6f006f +scaler.SCALE_FILTER_V_BLOCK0[93].val = 0x520052 +scaler.SCALE_FILTER_V_BLOCK1[46].val = 0x52006f +scaler.SCALE_FILTER_V_BLOCK0[94].val = 0x350035 +scaler.SCALE_FILTER_V_BLOCK0[95].val = 0x1a001a +scaler.SCALE_FILTER_V_BLOCK1[47].val = 0x1a0035 +scaler.SCALE_FILTER_V_BLOCK0[96].val = 0x4e404e4 +scaler.SCALE_FILTER_V_BLOCK0[97].val = 0x4b804b8 +scaler.SCALE_FILTER_V_BLOCK1[48].val = 0x4b804e4 +scaler.SCALE_FILTER_V_BLOCK0[98].val = 0x48b048b +scaler.SCALE_FILTER_V_BLOCK0[99].val = 0x45f045f +scaler.SCALE_FILTER_V_BLOCK1[49].val = 0x45f048b +scaler.SCALE_FILTER_V_BLOCK0[100].val = 0x4320432 +scaler.SCALE_FILTER_V_BLOCK0[101].val = 0x4050405 +scaler.SCALE_FILTER_V_BLOCK1[50].val = 0x4050432 +scaler.SCALE_FILTER_V_BLOCK0[102].val = 0x3d803d8 +scaler.SCALE_FILTER_V_BLOCK0[103].val = 0x3ab03ab +scaler.SCALE_FILTER_V_BLOCK1[51].val = 0x3ab03d8 +scaler.SCALE_FILTER_V_BLOCK0[104].val = 0x37e037e +scaler.SCALE_FILTER_V_BLOCK0[105].val = 0x3510351 +scaler.SCALE_FILTER_V_BLOCK1[52].val = 0x351037e +scaler.SCALE_FILTER_V_BLOCK0[106].val = 0x3240324 +scaler.SCALE_FILTER_V_BLOCK0[107].val = 0x2f802f8 +scaler.SCALE_FILTER_V_BLOCK1[53].val = 0x2f80324 +scaler.SCALE_FILTER_V_BLOCK0[108].val = 0x2cc02cc +scaler.SCALE_FILTER_V_BLOCK0[109].val = 0x2a102a1 +scaler.SCALE_FILTER_V_BLOCK1[54].val = 0x2a102cc +scaler.SCALE_FILTER_V_BLOCK0[110].val = 0x2760276 +scaler.SCALE_FILTER_V_BLOCK0[111].val = 0x24b024b +scaler.SCALE_FILTER_V_BLOCK1[55].val = 0x24b0276 +scaler.SCALE_FILTER_V_BLOCK0[112].val = 0x73b073b +scaler.SCALE_FILTER_V_BLOCK0[113].val = 0x71e071e +scaler.SCALE_FILTER_V_BLOCK1[56].val = 0x71e073b +scaler.SCALE_FILTER_V_BLOCK0[114].val = 0x7000700 +scaler.SCALE_FILTER_V_BLOCK0[115].val = 0x6e106e1 +scaler.SCALE_FILTER_V_BLOCK1[57].val = 0x6e10700 +scaler.SCALE_FILTER_V_BLOCK0[116].val = 0x6c006c0 +scaler.SCALE_FILTER_V_BLOCK0[117].val = 0x69e069e +scaler.SCALE_FILTER_V_BLOCK1[58].val = 0x69e06c0 +scaler.SCALE_FILTER_V_BLOCK0[118].val = 0x67a067a +scaler.SCALE_FILTER_V_BLOCK0[119].val = 0x6560656 +scaler.SCALE_FILTER_V_BLOCK1[59].val = 0x656067a +scaler.SCALE_FILTER_V_BLOCK0[120].val = 0x6300630 +scaler.SCALE_FILTER_V_BLOCK0[121].val = 0x6090609 +scaler.SCALE_FILTER_V_BLOCK1[60].val = 0x6090630 +scaler.SCALE_FILTER_V_BLOCK0[122].val = 0x5e205e2 +scaler.SCALE_FILTER_V_BLOCK0[123].val = 0x5b905b9 +scaler.SCALE_FILTER_V_BLOCK1[61].val = 0x5b905e2 +scaler.SCALE_FILTER_V_BLOCK0[124].val = 0x5900590 +scaler.SCALE_FILTER_V_BLOCK0[125].val = 0x5660566 +scaler.SCALE_FILTER_V_BLOCK1[62].val = 0x5660590 +scaler.SCALE_FILTER_V_BLOCK0[126].val = 0x53b053b +scaler.SCALE_FILTER_V_BLOCK0[127].val = 0x5100510 +scaler.SCALE_FILTER_V_BLOCK1[63].val = 0x510053b +scaler.SCALE_FILTER_V_BLOCK0[128].val = 0x82c082c +scaler.SCALE_FILTER_V_BLOCK0[129].val = 0x82b082b +scaler.SCALE_FILTER_V_BLOCK1[64].val = 0x82b082c +scaler.SCALE_FILTER_V_BLOCK0[130].val = 0x8280828 +scaler.SCALE_FILTER_V_BLOCK0[131].val = 0x8200820 +scaler.SCALE_FILTER_V_BLOCK1[65].val = 0x8200828 +scaler.SCALE_FILTER_V_BLOCK0[132].val = 0x81b081b +scaler.SCALE_FILTER_V_BLOCK0[133].val = 0x8130813 +scaler.SCALE_FILTER_V_BLOCK1[66].val = 0x813081b +scaler.SCALE_FILTER_V_BLOCK0[134].val = 0x8080808 +scaler.SCALE_FILTER_V_BLOCK0[135].val = 0x7fc07fc +scaler.SCALE_FILTER_V_BLOCK1[67].val = 0x7fc0808 +scaler.SCALE_FILTER_V_BLOCK0[136].val = 0x7ed07ed +scaler.SCALE_FILTER_V_BLOCK0[137].val = 0x7dd07dd +scaler.SCALE_FILTER_V_BLOCK1[68].val = 0x7dd07ed +scaler.SCALE_FILTER_V_BLOCK0[138].val = 0x7cb07cb +scaler.SCALE_FILTER_V_BLOCK0[139].val = 0x7b607b6 +scaler.SCALE_FILTER_V_BLOCK1[69].val = 0x7b607cb +scaler.SCALE_FILTER_V_BLOCK0[140].val = 0x7a207a2 +scaler.SCALE_FILTER_V_BLOCK0[141].val = 0x78a078a +scaler.SCALE_FILTER_V_BLOCK1[70].val = 0x78a07a2 +scaler.SCALE_FILTER_V_BLOCK0[142].val = 0x7710771 +scaler.SCALE_FILTER_V_BLOCK0[143].val = 0x7570757 +scaler.SCALE_FILTER_V_BLOCK1[71].val = 0x7570771 +scaler.SCALE_FILTER_V_BLOCK0[144].val = 0x73d073d +scaler.SCALE_FILTER_V_BLOCK0[145].val = 0x7570757 +scaler.SCALE_FILTER_V_BLOCK1[72].val = 0x757073d +scaler.SCALE_FILTER_V_BLOCK0[146].val = 0x7710771 +scaler.SCALE_FILTER_V_BLOCK0[147].val = 0x78a078a +scaler.SCALE_FILTER_V_BLOCK1[73].val = 0x78a0771 +scaler.SCALE_FILTER_V_BLOCK0[148].val = 0x7a207a2 +scaler.SCALE_FILTER_V_BLOCK0[149].val = 0x7b607b6 +scaler.SCALE_FILTER_V_BLOCK1[74].val = 0x7b607a2 +scaler.SCALE_FILTER_V_BLOCK0[150].val = 0x7cb07cb +scaler.SCALE_FILTER_V_BLOCK0[151].val = 0x7dd07dd +scaler.SCALE_FILTER_V_BLOCK1[75].val = 0x7dd07cb +scaler.SCALE_FILTER_V_BLOCK0[152].val = 0x7ed07ed +scaler.SCALE_FILTER_V_BLOCK0[153].val = 0x7fc07fc +scaler.SCALE_FILTER_V_BLOCK1[76].val = 0x7fc07ed +scaler.SCALE_FILTER_V_BLOCK0[154].val = 0x8080808 +scaler.SCALE_FILTER_V_BLOCK0[155].val = 0x8130813 +scaler.SCALE_FILTER_V_BLOCK1[77].val = 0x8130808 +scaler.SCALE_FILTER_V_BLOCK0[156].val = 0x81b081b +scaler.SCALE_FILTER_V_BLOCK0[157].val = 0x8200820 +scaler.SCALE_FILTER_V_BLOCK1[78].val = 0x820081b +scaler.SCALE_FILTER_V_BLOCK0[158].val = 0x8280828 +scaler.SCALE_FILTER_V_BLOCK0[159].val = 0x82b082b +scaler.SCALE_FILTER_V_BLOCK1[79].val = 0x82b0828 +scaler.SCALE_FILTER_V_BLOCK0[160].val = 0x4e404e4 +scaler.SCALE_FILTER_V_BLOCK0[161].val = 0x5100510 +scaler.SCALE_FILTER_V_BLOCK1[80].val = 0x51004e4 +scaler.SCALE_FILTER_V_BLOCK0[162].val = 0x53b053b +scaler.SCALE_FILTER_V_BLOCK0[163].val = 0x5660566 +scaler.SCALE_FILTER_V_BLOCK1[81].val = 0x566053b +scaler.SCALE_FILTER_V_BLOCK0[164].val = 0x5900590 +scaler.SCALE_FILTER_V_BLOCK0[165].val = 0x5b905b9 +scaler.SCALE_FILTER_V_BLOCK1[82].val = 0x5b90590 +scaler.SCALE_FILTER_V_BLOCK0[166].val = 0x5e205e2 +scaler.SCALE_FILTER_V_BLOCK0[167].val = 0x6090609 +scaler.SCALE_FILTER_V_BLOCK1[83].val = 0x60905e2 +scaler.SCALE_FILTER_V_BLOCK0[168].val = 0x6300630 +scaler.SCALE_FILTER_V_BLOCK0[169].val = 0x6560656 +scaler.SCALE_FILTER_V_BLOCK1[84].val = 0x6560630 +scaler.SCALE_FILTER_V_BLOCK0[170].val = 0x67a067a +scaler.SCALE_FILTER_V_BLOCK0[171].val = 0x69e069e +scaler.SCALE_FILTER_V_BLOCK1[85].val = 0x69e067a +scaler.SCALE_FILTER_V_BLOCK0[172].val = 0x6c006c0 +scaler.SCALE_FILTER_V_BLOCK0[173].val = 0x6e106e1 +scaler.SCALE_FILTER_V_BLOCK1[86].val = 0x6e106c0 +scaler.SCALE_FILTER_V_BLOCK0[174].val = 0x7000700 +scaler.SCALE_FILTER_V_BLOCK0[175].val = 0x71e071e +scaler.SCALE_FILTER_V_BLOCK1[87].val = 0x71e0700 +scaler.SCALE_FILTER_V_BLOCK0[176].val = 0x2210221 +scaler.SCALE_FILTER_V_BLOCK0[177].val = 0x24b024b +scaler.SCALE_FILTER_V_BLOCK1[88].val = 0x24b0221 +scaler.SCALE_FILTER_V_BLOCK0[178].val = 0x2760276 +scaler.SCALE_FILTER_V_BLOCK0[179].val = 0x2a102a1 +scaler.SCALE_FILTER_V_BLOCK1[89].val = 0x2a10276 +scaler.SCALE_FILTER_V_BLOCK0[180].val = 0x2cc02cc +scaler.SCALE_FILTER_V_BLOCK0[181].val = 0x2f802f8 +scaler.SCALE_FILTER_V_BLOCK1[90].val = 0x2f802cc +scaler.SCALE_FILTER_V_BLOCK0[182].val = 0x3240324 +scaler.SCALE_FILTER_V_BLOCK0[183].val = 0x3510351 +scaler.SCALE_FILTER_V_BLOCK1[91].val = 0x3510324 +scaler.SCALE_FILTER_V_BLOCK0[184].val = 0x37e037e +scaler.SCALE_FILTER_V_BLOCK0[185].val = 0x3ab03ab +scaler.SCALE_FILTER_V_BLOCK1[92].val = 0x3ab037e +scaler.SCALE_FILTER_V_BLOCK0[186].val = 0x3d803d8 +scaler.SCALE_FILTER_V_BLOCK0[187].val = 0x4050405 +scaler.SCALE_FILTER_V_BLOCK1[93].val = 0x40503d8 +scaler.SCALE_FILTER_V_BLOCK0[188].val = 0x4320432 +scaler.SCALE_FILTER_V_BLOCK0[189].val = 0x45f045f +scaler.SCALE_FILTER_V_BLOCK1[94].val = 0x45f0432 +scaler.SCALE_FILTER_V_BLOCK0[190].val = 0x48b048b +scaler.SCALE_FILTER_V_BLOCK0[191].val = 0x4b804b8 +scaler.SCALE_FILTER_V_BLOCK1[95].val = 0x4b8048b +scaler.SCALE_FILTER_V_BLOCK0[192].val = 0x0 +scaler.SCALE_FILTER_V_BLOCK0[193].val = 0x1a001a +scaler.SCALE_FILTER_V_BLOCK1[96].val = 0x1a0000 +scaler.SCALE_FILTER_V_BLOCK0[194].val = 0x350035 +scaler.SCALE_FILTER_V_BLOCK0[195].val = 0x520052 +scaler.SCALE_FILTER_V_BLOCK1[97].val = 0x520035 +scaler.SCALE_FILTER_V_BLOCK0[196].val = 0x6f006f +scaler.SCALE_FILTER_V_BLOCK0[197].val = 0x8e008e +scaler.SCALE_FILTER_V_BLOCK1[98].val = 0x8e006f +scaler.SCALE_FILTER_V_BLOCK0[198].val = 0xae00ae +scaler.SCALE_FILTER_V_BLOCK0[199].val = 0xcf00cf +scaler.SCALE_FILTER_V_BLOCK1[99].val = 0xcf00ae +scaler.SCALE_FILTER_V_BLOCK0[200].val = 0xf100f1 +scaler.SCALE_FILTER_V_BLOCK0[201].val = 0x1140114 +scaler.SCALE_FILTER_V_BLOCK1[100].val = 0x11400f1 +scaler.SCALE_FILTER_V_BLOCK0[202].val = 0x1380138 +scaler.SCALE_FILTER_V_BLOCK0[203].val = 0x15d015d +scaler.SCALE_FILTER_V_BLOCK1[101].val = 0x15d0138 +scaler.SCALE_FILTER_V_BLOCK0[204].val = 0x1820182 +scaler.SCALE_FILTER_V_BLOCK0[205].val = 0x1a901a9 +scaler.SCALE_FILTER_V_BLOCK1[102].val = 0x1a90182 +scaler.SCALE_FILTER_V_BLOCK0[206].val = 0x1d001d0 +scaler.SCALE_FILTER_V_BLOCK0[207].val = 0x1f901f9 +scaler.SCALE_FILTER_V_BLOCK1[103].val = 0x1f901d0 +scaler.SCALE_FILTER_V_BLOCK0[208].val = 0xff02ff02 +scaler.SCALE_FILTER_V_BLOCK0[209].val = 0xff0aff0a +scaler.SCALE_FILTER_V_BLOCK1[104].val = 0xff0aff02 +scaler.SCALE_FILTER_V_BLOCK0[210].val = 0xff12ff12 +scaler.SCALE_FILTER_V_BLOCK0[211].val = 0xff1bff1b +scaler.SCALE_FILTER_V_BLOCK1[105].val = 0xff1bff12 +scaler.SCALE_FILTER_V_BLOCK0[212].val = 0xff26ff26 +scaler.SCALE_FILTER_V_BLOCK0[213].val = 0xff31ff31 +scaler.SCALE_FILTER_V_BLOCK1[106].val = 0xff31ff26 +scaler.SCALE_FILTER_V_BLOCK0[214].val = 0xff3eff3e +scaler.SCALE_FILTER_V_BLOCK0[215].val = 0xff4cff4c +scaler.SCALE_FILTER_V_BLOCK1[107].val = 0xff4cff3e +scaler.SCALE_FILTER_V_BLOCK0[216].val = 0xff5bff5b +scaler.SCALE_FILTER_V_BLOCK0[218].val = 0xff6bff6b +scaler.SCALE_FILTER_V_BLOCK1[108].val = 0xff6bff5b +scaler.SCALE_FILTER_V_BLOCK0[218].val = 0xff7dff7d +scaler.SCALE_FILTER_V_BLOCK0[219].val = 0xff90ff90 +scaler.SCALE_FILTER_V_BLOCK1[109].val = 0xff90ff7d +scaler.SCALE_FILTER_V_BLOCK0[220].val = 0xffa4ffa4 +scaler.SCALE_FILTER_V_BLOCK0[221].val = 0xffb9ffb9 +scaler.SCALE_FILTER_V_BLOCK1[110].val = 0xffb9ffa4 +scaler.SCALE_FILTER_V_BLOCK0[222].val = 0xffcfffcf +scaler.SCALE_FILTER_V_BLOCK0[223].val = 0xffe7ffe7 +scaler.SCALE_FILTER_V_BLOCK1[111].val = 0xffe7ffcf +scaler.SCALE_FILTER_V_BLOCK0[224].val = 0xff06ff06 +scaler.SCALE_FILTER_V_BLOCK0[225].val = 0xff01ff01 +scaler.SCALE_FILTER_V_BLOCK1[112].val = 0xff01ff06 +scaler.SCALE_FILTER_V_BLOCK0[226].val = 0xfefcfefc +scaler.SCALE_FILTER_V_BLOCK0[227].val = 0xfef8fef8 +scaler.SCALE_FILTER_V_BLOCK1[113].val = 0xfef8fefc +scaler.SCALE_FILTER_V_BLOCK0[228].val = 0xfef4fef4 +scaler.SCALE_FILTER_V_BLOCK0[229].val = 0xfef1fef1 +scaler.SCALE_FILTER_V_BLOCK1[114].val = 0xfef1fef4 +scaler.SCALE_FILTER_V_BLOCK0[230].val = 0xfeeefeee +scaler.SCALE_FILTER_V_BLOCK0[231].val = 0xfeecfeec +scaler.SCALE_FILTER_V_BLOCK1[115].val = 0xfeecfeee +scaler.SCALE_FILTER_V_BLOCK0[232].val = 0xfeebfeeb +scaler.SCALE_FILTER_V_BLOCK0[233].val = 0xfeebfeeb +scaler.SCALE_FILTER_V_BLOCK1[116].val = 0xfeebfeeb +scaler.SCALE_FILTER_V_BLOCK0[234].val = 0xfeecfeec +scaler.SCALE_FILTER_V_BLOCK0[235].val = 0xfeedfeed +scaler.SCALE_FILTER_V_BLOCK1[117].val = 0xfeedfeec +scaler.SCALE_FILTER_V_BLOCK0[236].val = 0xfeeffeef +scaler.SCALE_FILTER_V_BLOCK0[237].val = 0xfef3fef3 +scaler.SCALE_FILTER_V_BLOCK1[118].val = 0xfef3feef +scaler.SCALE_FILTER_V_BLOCK0[238].val = 0xfef7fef7 +scaler.SCALE_FILTER_V_BLOCK0[239].val = 0xfefcfefc +scaler.SCALE_FILTER_V_BLOCK1[119].val = 0xfefcfef7 +scaler.SCALE_FILTER_V_BLOCK0[240].val = 0xff87ff87 +scaler.SCALE_FILTER_V_BLOCK0[241].val = 0xff7eff7e +scaler.SCALE_FILTER_V_BLOCK1[120].val = 0xff7eff87 +scaler.SCALE_FILTER_V_BLOCK0[242].val = 0xff75ff75 +scaler.SCALE_FILTER_V_BLOCK0[243].val = 0xff6cff6c +scaler.SCALE_FILTER_V_BLOCK1[121].val = 0xff6cff75 +scaler.SCALE_FILTER_V_BLOCK0[244].val = 0xff63ff63 +scaler.SCALE_FILTER_V_BLOCK0[245].val = 0xff5bff5b +scaler.SCALE_FILTER_V_BLOCK1[122].val = 0xff5bff63 +scaler.SCALE_FILTER_V_BLOCK0[246].val = 0xff52ff52 +scaler.SCALE_FILTER_V_BLOCK0[247].val = 0xff49ff49 +scaler.SCALE_FILTER_V_BLOCK1[123].val = 0xff49ff52 +scaler.SCALE_FILTER_V_BLOCK0[248].val = 0xff41ff41 +scaler.SCALE_FILTER_V_BLOCK0[249].val = 0xff38ff38 +scaler.SCALE_FILTER_V_BLOCK1[124].val = 0xff38ff41 +scaler.SCALE_FILTER_V_BLOCK0[250].val = 0xff30ff30 +scaler.SCALE_FILTER_V_BLOCK0[251].val = 0xff28ff28 +scaler.SCALE_FILTER_V_BLOCK1[125].val = 0xff28ff30 +scaler.SCALE_FILTER_V_BLOCK0[252].val = 0xff21ff21 +scaler.SCALE_FILTER_V_BLOCK0[253].val = 0xff1aff1a +scaler.SCALE_FILTER_V_BLOCK1[126].val = 0xff1aff21 +scaler.SCALE_FILTER_V_BLOCK0[254].val = 0xff13ff13 +scaler.SCALE_FILTER_V_BLOCK0[255].val = 0xff0cff0c +scaler.SCALE_FILTER_V_BLOCK1[127].val = 0xff0cff13 +scaler.SCALE_FILTER_V_BLOCK0[256].val = 0x0 +scaler.SCALE_FILTER_V_BLOCK0[257].val = 0xfffafffa +scaler.SCALE_FILTER_V_BLOCK1[128].val = 0xfffa0000 +scaler.SCALE_FILTER_V_BLOCK0[258].val = 0xfff4fff4 +scaler.SCALE_FILTER_V_BLOCK0[259].val = 0xffeeffee +scaler.SCALE_FILTER_V_BLOCK1[129].val = 0xffeefff4 +scaler.SCALE_FILTER_V_BLOCK0[260].val = 0xffe7ffe7 +scaler.SCALE_FILTER_V_BLOCK0[261].val = 0xffe0ffe0 +scaler.SCALE_FILTER_V_BLOCK1[130].val = 0xffe0ffe7 +scaler.SCALE_FILTER_V_BLOCK0[262].val = 0xffd9ffd9 +scaler.SCALE_FILTER_V_BLOCK0[263].val = 0xffd2ffd2 +scaler.SCALE_FILTER_V_BLOCK1[131].val = 0xffd2ffd9 +scaler.SCALE_FILTER_V_BLOCK0[264].val = 0xffcaffca +scaler.SCALE_FILTER_V_BLOCK0[265].val = 0xffc2ffc2 +scaler.SCALE_FILTER_V_BLOCK1[132].val = 0xffc2ffca +scaler.SCALE_FILTER_V_BLOCK0[266].val = 0xffbaffba +scaler.SCALE_FILTER_V_BLOCK0[267].val = 0xffb2ffb2 +scaler.SCALE_FILTER_V_BLOCK1[133].val = 0xffb2ffba +scaler.SCALE_FILTER_V_BLOCK0[268].val = 0xffaaffaa +scaler.SCALE_FILTER_V_BLOCK0[269].val = 0xffa1ffa1 +scaler.SCALE_FILTER_V_BLOCK1[134].val = 0xffa1ffaa +scaler.SCALE_FILTER_V_BLOCK0[270].val = 0xff99ff99 +scaler.SCALE_FILTER_V_BLOCK0[271].val = 0xff90ff90 +scaler.SCALE_FILTER_V_BLOCK1[135].val = 0xff90ff99 +scaler.SCALE_FILTER_V_BLOCK0[272].val = 0x340034 +scaler.SCALE_FILTER_V_BLOCK0[273].val = 0x330033 +scaler.SCALE_FILTER_V_BLOCK1[136].val = 0x330034 +scaler.SCALE_FILTER_V_BLOCK0[274].val = 0x320032 +scaler.SCALE_FILTER_V_BLOCK0[275].val = 0x300030 +scaler.SCALE_FILTER_V_BLOCK1[137].val = 0x300032 +scaler.SCALE_FILTER_V_BLOCK0[276].val = 0x2e002e +scaler.SCALE_FILTER_V_BLOCK0[277].val = 0x2c002c +scaler.SCALE_FILTER_V_BLOCK1[138].val = 0x2c002e +scaler.SCALE_FILTER_V_BLOCK0[278].val = 0x290029 +scaler.SCALE_FILTER_V_BLOCK0[279].val = 0x260026 +scaler.SCALE_FILTER_V_BLOCK1[139].val = 0x260029 +scaler.SCALE_FILTER_V_BLOCK0[280].val = 0x230023 +scaler.SCALE_FILTER_V_BLOCK0[281].val = 0x200020 +scaler.SCALE_FILTER_V_BLOCK1[140].val = 0x200023 +scaler.SCALE_FILTER_V_BLOCK0[282].val = 0x1c001c +scaler.SCALE_FILTER_V_BLOCK0[283].val = 0x180018 +scaler.SCALE_FILTER_V_BLOCK1[141].val = 0x18001c +scaler.SCALE_FILTER_V_BLOCK0[284].val = 0x140014 +scaler.SCALE_FILTER_V_BLOCK0[285].val = 0x100010 +scaler.SCALE_FILTER_V_BLOCK1[142].val = 0x100014 +scaler.SCALE_FILTER_V_BLOCK0[286].val = 0xb000b +scaler.SCALE_FILTER_V_BLOCK0[287].val = 0x50005 +scaler.SCALE_FILTER_V_BLOCK1[143].val = 0x5000b + +# DDA init H +scaler.SCALE_H_DDA_THING0 = 0 +scaler.SCALE_H_DDA_THING2 = 0 +scaler.SCALE_H_DDA_THING1 = 0 + +# horizontal scaling +scaler.SCALE_H_RATIO_0 = int(in_W / out_W * 0x400000) +scaler.SCALE_H_RATIO_4 = 0 # XXX what does this do? +scaler.SCALE_H_RATIO_1 = 0 # XXX what does this do? +scaler.SCALE_H_RATIO_2 = int(out_W / in_W * 0x400000) # XXX what does this set do? zeroing this one out doesn't work +scaler.SCALE_H_RATIO_3 = 0 # XXX what does this set do? +scaler.SCALE_H_RATIO_5 = 0 # XXX what does this set do? +scaler.SCALE_H_FLAGS.set(EN=1) + +scaler.SCALE_FILTER_H_BLOCK0[0].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[1].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[0].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[2].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[3].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[1].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[4].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[5].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[2].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[6].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[7].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[3].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[8].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[9].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[4].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[10].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[11].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[5].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[12].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[13].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[6].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[14].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[15].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[7].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[16].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[17].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[8].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[18].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[19].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[9].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[20].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[21].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[10].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[22].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[23].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[11].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[24].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[25].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[12].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[26].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[27].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[13].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[28].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[29].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[14].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[30].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[31].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[15].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[32].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[33].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[16].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[34].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[35].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[17].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[36].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[37].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[18].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[38].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[39].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[19].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[40].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[41].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[20].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[42].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[43].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[21].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[44].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[45].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[22].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[46].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[47].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[23].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[48].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[49].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[24].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[50].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[51].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[25].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[52].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[53].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[26].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[54].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[55].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[27].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[56].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[57].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[28].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[58].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[59].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[29].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[60].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[61].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[30].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[62].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[63].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[31].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[64].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[65].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[32].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[66].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[67].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[33].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[68].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[69].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[34].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[70].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[71].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[35].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[72].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[73].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[36].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[74].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[75].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[37].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[76].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[77].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[38].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[78].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[79].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[39].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[80].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[81].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[40].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[82].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[83].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[41].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[84].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[85].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[42].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[86].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[87].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[43].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[88].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[89].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[44].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[90].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[91].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[45].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[92].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[93].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[46].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[94].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[95].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[47].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[96].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[97].val = 0x50005 +scaler.SCALE_FILTER_H_BLOCK1[48].val = 0x50000 +scaler.SCALE_FILTER_H_BLOCK0[98].val = 0xb000b +scaler.SCALE_FILTER_H_BLOCK0[99].val = 0x100010 +scaler.SCALE_FILTER_H_BLOCK1[49].val = 0x10000b +scaler.SCALE_FILTER_H_BLOCK0[100].val = 0x140014 +scaler.SCALE_FILTER_H_BLOCK0[101].val = 0x180018 +scaler.SCALE_FILTER_H_BLOCK1[50].val = 0x180014 +scaler.SCALE_FILTER_H_BLOCK0[102].val = 0x1c001c +scaler.SCALE_FILTER_H_BLOCK0[103].val = 0x200020 +scaler.SCALE_FILTER_H_BLOCK1[51].val = 0x20001c +scaler.SCALE_FILTER_H_BLOCK0[104].val = 0x230023 +scaler.SCALE_FILTER_H_BLOCK0[105].val = 0x260026 +scaler.SCALE_FILTER_H_BLOCK1[52].val = 0x260023 +scaler.SCALE_FILTER_H_BLOCK0[106].val = 0x290029 +scaler.SCALE_FILTER_H_BLOCK0[107].val = 0x2c002c +scaler.SCALE_FILTER_H_BLOCK1[53].val = 0x2c0029 +scaler.SCALE_FILTER_H_BLOCK0[108].val = 0x2e002e +scaler.SCALE_FILTER_H_BLOCK0[109].val = 0x300030 +scaler.SCALE_FILTER_H_BLOCK1[54].val = 0x30002e +scaler.SCALE_FILTER_H_BLOCK0[110].val = 0x320032 +scaler.SCALE_FILTER_H_BLOCK0[111].val = 0x330033 +scaler.SCALE_FILTER_H_BLOCK1[55].val = 0x330032 +scaler.SCALE_FILTER_H_BLOCK0[112].val = 0xff87ff87 +scaler.SCALE_FILTER_H_BLOCK0[113].val = 0xff90ff90 +scaler.SCALE_FILTER_H_BLOCK1[56].val = 0xff90ff87 +scaler.SCALE_FILTER_H_BLOCK0[114].val = 0xff99ff99 +scaler.SCALE_FILTER_H_BLOCK0[115].val = 0xffa1ffa1 +scaler.SCALE_FILTER_H_BLOCK1[57].val = 0xffa1ff99 +scaler.SCALE_FILTER_H_BLOCK0[116].val = 0xffaaffaa +scaler.SCALE_FILTER_H_BLOCK0[117].val = 0xffb2ffb2 +scaler.SCALE_FILTER_H_BLOCK1[58].val = 0xffb2ffaa +scaler.SCALE_FILTER_H_BLOCK0[118].val = 0xffbaffba +scaler.SCALE_FILTER_H_BLOCK0[119].val = 0xffc2ffc2 +scaler.SCALE_FILTER_H_BLOCK1[59].val = 0xffc2ffba +scaler.SCALE_FILTER_H_BLOCK0[120].val = 0xffcaffca +scaler.SCALE_FILTER_H_BLOCK0[121].val = 0xffd2ffd2 +scaler.SCALE_FILTER_H_BLOCK1[60].val = 0xffd2ffca +scaler.SCALE_FILTER_H_BLOCK0[122].val = 0xffd9ffd9 +scaler.SCALE_FILTER_H_BLOCK0[123].val = 0xffe0ffe0 +scaler.SCALE_FILTER_H_BLOCK1[61].val = 0xffe0ffd9 +scaler.SCALE_FILTER_H_BLOCK0[124].val = 0xffe7ffe7 +scaler.SCALE_FILTER_H_BLOCK0[125].val = 0xffeeffee +scaler.SCALE_FILTER_H_BLOCK1[62].val = 0xffeeffe7 +scaler.SCALE_FILTER_H_BLOCK0[126].val = 0xfff4fff4 +scaler.SCALE_FILTER_H_BLOCK0[127].val = 0xfffafffa +scaler.SCALE_FILTER_H_BLOCK1[63].val = 0xfffafff4 +scaler.SCALE_FILTER_H_BLOCK0[128].val = 0xff06ff06 +scaler.SCALE_FILTER_H_BLOCK0[129].val = 0xff0cff0c +scaler.SCALE_FILTER_H_BLOCK1[64].val = 0xff0cff06 +scaler.SCALE_FILTER_H_BLOCK0[130].val = 0xff13ff13 +scaler.SCALE_FILTER_H_BLOCK0[131].val = 0xff1aff1a +scaler.SCALE_FILTER_H_BLOCK1[65].val = 0xff1aff13 +scaler.SCALE_FILTER_H_BLOCK0[132].val = 0xff21ff21 +scaler.SCALE_FILTER_H_BLOCK0[133].val = 0xff28ff28 +scaler.SCALE_FILTER_H_BLOCK1[66].val = 0xff28ff21 +scaler.SCALE_FILTER_H_BLOCK0[134].val = 0xff30ff30 +scaler.SCALE_FILTER_H_BLOCK0[135].val = 0xff38ff38 +scaler.SCALE_FILTER_H_BLOCK1[67].val = 0xff38ff30 +scaler.SCALE_FILTER_H_BLOCK0[136].val = 0xff41ff41 +scaler.SCALE_FILTER_H_BLOCK0[137].val = 0xff49ff49 +scaler.SCALE_FILTER_H_BLOCK1[68].val = 0xff49ff41 +scaler.SCALE_FILTER_H_BLOCK0[138].val = 0xff52ff52 +scaler.SCALE_FILTER_H_BLOCK0[139].val = 0xff5bff5b +scaler.SCALE_FILTER_H_BLOCK1[69].val = 0xff5bff52 +scaler.SCALE_FILTER_H_BLOCK0[140].val = 0xff63ff63 +scaler.SCALE_FILTER_H_BLOCK0[141].val = 0xff6cff6c +scaler.SCALE_FILTER_H_BLOCK1[70].val = 0xff6cff63 +scaler.SCALE_FILTER_H_BLOCK0[142].val = 0xff75ff75 +scaler.SCALE_FILTER_H_BLOCK0[143].val = 0xff7eff7e +scaler.SCALE_FILTER_H_BLOCK1[71].val = 0xff7eff75 +scaler.SCALE_FILTER_H_BLOCK0[144].val = 0xff02ff02 +scaler.SCALE_FILTER_H_BLOCK0[145].val = 0xfefcfefc +scaler.SCALE_FILTER_H_BLOCK1[72].val = 0xfefcff02 +scaler.SCALE_FILTER_H_BLOCK0[146].val = 0xfef7fef7 +scaler.SCALE_FILTER_H_BLOCK0[147].val = 0xfef3fef3 +scaler.SCALE_FILTER_H_BLOCK1[73].val = 0xfef3fef7 +scaler.SCALE_FILTER_H_BLOCK0[148].val = 0xfeeffeef +scaler.SCALE_FILTER_H_BLOCK0[149].val = 0xfeedfeed +scaler.SCALE_FILTER_H_BLOCK1[74].val = 0xfeedfeef +scaler.SCALE_FILTER_H_BLOCK0[150].val = 0xfeecfeec +scaler.SCALE_FILTER_H_BLOCK0[151].val = 0xfeebfeeb +scaler.SCALE_FILTER_H_BLOCK1[75].val = 0xfeebfeec +scaler.SCALE_FILTER_H_BLOCK0[152].val = 0xfeebfeeb +scaler.SCALE_FILTER_H_BLOCK0[153].val = 0xfeecfeec +scaler.SCALE_FILTER_H_BLOCK1[76].val = 0xfeecfeeb +scaler.SCALE_FILTER_H_BLOCK0[154].val = 0xfeeefeee +scaler.SCALE_FILTER_H_BLOCK0[155].val = 0xfef1fef1 +scaler.SCALE_FILTER_H_BLOCK1[77].val = 0xfef1feee +scaler.SCALE_FILTER_H_BLOCK0[156].val = 0xfef4fef4 +scaler.SCALE_FILTER_H_BLOCK0[157].val = 0xfef8fef8 +scaler.SCALE_FILTER_H_BLOCK1[78].val = 0xfef8fef4 +scaler.SCALE_FILTER_H_BLOCK0[158].val = 0xfefcfefc +scaler.SCALE_FILTER_H_BLOCK0[159].val = 0xff01ff01 +scaler.SCALE_FILTER_H_BLOCK1[79].val = 0xff01fefc +scaler.SCALE_FILTER_H_BLOCK0[160].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[161].val = 0xffe7ffe7 +scaler.SCALE_FILTER_H_BLOCK1[80].val = 0xffe70000 +scaler.SCALE_FILTER_H_BLOCK0[162].val = 0xffcfffcf +scaler.SCALE_FILTER_H_BLOCK0[163].val = 0xffb9ffb9 +scaler.SCALE_FILTER_H_BLOCK1[81].val = 0xffb9ffcf +scaler.SCALE_FILTER_H_BLOCK0[164].val = 0xffa4ffa4 +scaler.SCALE_FILTER_H_BLOCK0[165].val = 0xff90ff90 +scaler.SCALE_FILTER_H_BLOCK1[82].val = 0xff90ffa4 +scaler.SCALE_FILTER_H_BLOCK0[166].val = 0xff7dff7d +scaler.SCALE_FILTER_H_BLOCK0[167].val = 0xff6bff6b +scaler.SCALE_FILTER_H_BLOCK1[83].val = 0xff6bff7d +scaler.SCALE_FILTER_H_BLOCK0[168].val = 0xff5bff5b +scaler.SCALE_FILTER_H_BLOCK0[169].val = 0xff4cff4c +scaler.SCALE_FILTER_H_BLOCK1[84].val = 0xff4cff5b +scaler.SCALE_FILTER_H_BLOCK0[170].val = 0xff3eff3e +scaler.SCALE_FILTER_H_BLOCK0[171].val = 0xff31ff31 +scaler.SCALE_FILTER_H_BLOCK1[85].val = 0xff31ff3e +scaler.SCALE_FILTER_H_BLOCK0[172].val = 0xff26ff26 +scaler.SCALE_FILTER_H_BLOCK0[173].val = 0xff1bff1b +scaler.SCALE_FILTER_H_BLOCK1[86].val = 0xff1bff26 +scaler.SCALE_FILTER_H_BLOCK0[174].val = 0xff12ff12 +scaler.SCALE_FILTER_H_BLOCK0[175].val = 0xff0aff0a +scaler.SCALE_FILTER_H_BLOCK1[87].val = 0xff0aff12 +scaler.SCALE_FILTER_H_BLOCK0[176].val = 0x2210221 +scaler.SCALE_FILTER_H_BLOCK0[177].val = 0x1f901f9 +scaler.SCALE_FILTER_H_BLOCK1[88].val = 0x1f90221 +scaler.SCALE_FILTER_H_BLOCK0[178].val = 0x1d001d0 +scaler.SCALE_FILTER_H_BLOCK0[179].val = 0x1a901a9 +scaler.SCALE_FILTER_H_BLOCK1[89].val = 0x1a901d0 +scaler.SCALE_FILTER_H_BLOCK0[180].val = 0x1820182 +scaler.SCALE_FILTER_H_BLOCK0[181].val = 0x15d015d +scaler.SCALE_FILTER_H_BLOCK1[90].val = 0x15d0182 +scaler.SCALE_FILTER_H_BLOCK0[182].val = 0x1380138 +scaler.SCALE_FILTER_H_BLOCK0[183].val = 0x1140114 +scaler.SCALE_FILTER_H_BLOCK1[91].val = 0x1140138 +scaler.SCALE_FILTER_H_BLOCK0[184].val = 0xf100f1 +scaler.SCALE_FILTER_H_BLOCK0[185].val = 0xcf00cf +scaler.SCALE_FILTER_H_BLOCK1[92].val = 0xcf00f1 +scaler.SCALE_FILTER_H_BLOCK0[186].val = 0xae00ae +scaler.SCALE_FILTER_H_BLOCK0[187].val = 0x8e008e +scaler.SCALE_FILTER_H_BLOCK1[93].val = 0x8e00ae +scaler.SCALE_FILTER_H_BLOCK0[188].val = 0x6f006f +scaler.SCALE_FILTER_H_BLOCK0[189].val = 0x520052 +scaler.SCALE_FILTER_H_BLOCK1[94].val = 0x52006f +scaler.SCALE_FILTER_H_BLOCK0[190].val = 0x350035 +scaler.SCALE_FILTER_H_BLOCK0[191].val = 0x1a001a +scaler.SCALE_FILTER_H_BLOCK1[95].val = 0x1a0035 +scaler.SCALE_FILTER_H_BLOCK0[192].val = 0x4e404e4 +scaler.SCALE_FILTER_H_BLOCK0[193].val = 0x4b804b8 +scaler.SCALE_FILTER_H_BLOCK1[96].val = 0x4b804e4 +scaler.SCALE_FILTER_H_BLOCK0[194].val = 0x48b048b +scaler.SCALE_FILTER_H_BLOCK0[195].val = 0x45f045f +scaler.SCALE_FILTER_H_BLOCK1[97].val = 0x45f048b +scaler.SCALE_FILTER_H_BLOCK0[196].val = 0x4320432 +scaler.SCALE_FILTER_H_BLOCK0[197].val = 0x4050405 +scaler.SCALE_FILTER_H_BLOCK1[98].val = 0x4050432 +scaler.SCALE_FILTER_H_BLOCK0[198].val = 0x3d803d8 +scaler.SCALE_FILTER_H_BLOCK0[199].val = 0x3ab03ab +scaler.SCALE_FILTER_H_BLOCK1[99].val = 0x3ab03d8 +scaler.SCALE_FILTER_H_BLOCK0[200].val = 0x37e037e +scaler.SCALE_FILTER_H_BLOCK0[201].val = 0x3510351 +scaler.SCALE_FILTER_H_BLOCK1[100].val = 0x351037e +scaler.SCALE_FILTER_H_BLOCK0[202].val = 0x3240324 +scaler.SCALE_FILTER_H_BLOCK0[203].val = 0x2f802f8 +scaler.SCALE_FILTER_H_BLOCK1[101].val = 0x2f80324 +scaler.SCALE_FILTER_H_BLOCK0[204].val = 0x2cc02cc +scaler.SCALE_FILTER_H_BLOCK0[205].val = 0x2a102a1 +scaler.SCALE_FILTER_H_BLOCK1[102].val = 0x2a102cc +scaler.SCALE_FILTER_H_BLOCK0[206].val = 0x2760276 +scaler.SCALE_FILTER_H_BLOCK0[207].val = 0x24b024b +scaler.SCALE_FILTER_H_BLOCK1[103].val = 0x24b0276 +scaler.SCALE_FILTER_H_BLOCK0[208].val = 0x73b073b +scaler.SCALE_FILTER_H_BLOCK0[209].val = 0x71e071e +scaler.SCALE_FILTER_H_BLOCK1[104].val = 0x71e073b +scaler.SCALE_FILTER_H_BLOCK0[210].val = 0x7000700 +scaler.SCALE_FILTER_H_BLOCK0[211].val = 0x6e106e1 +scaler.SCALE_FILTER_H_BLOCK1[105].val = 0x6e10700 +scaler.SCALE_FILTER_H_BLOCK0[212].val = 0x6c006c0 +scaler.SCALE_FILTER_H_BLOCK0[213].val = 0x69e069e +scaler.SCALE_FILTER_H_BLOCK1[106].val = 0x69e06c0 +scaler.SCALE_FILTER_H_BLOCK0[214].val = 0x67a067a +scaler.SCALE_FILTER_H_BLOCK0[215].val = 0x6560656 +scaler.SCALE_FILTER_H_BLOCK1[107].val = 0x656067a +scaler.SCALE_FILTER_H_BLOCK0[216].val = 0x6300630 +scaler.SCALE_FILTER_H_BLOCK0[217].val = 0x6090609 +scaler.SCALE_FILTER_H_BLOCK1[108].val = 0x6090630 +scaler.SCALE_FILTER_H_BLOCK0[218].val = 0x5e205e2 +scaler.SCALE_FILTER_H_BLOCK0[219].val = 0x5b905b9 +scaler.SCALE_FILTER_H_BLOCK1[109].val = 0x5b905e2 +scaler.SCALE_FILTER_H_BLOCK0[220].val = 0x5900590 +scaler.SCALE_FILTER_H_BLOCK0[221].val = 0x5660566 +scaler.SCALE_FILTER_H_BLOCK1[110].val = 0x5660590 +scaler.SCALE_FILTER_H_BLOCK0[222].val = 0x53b053b +scaler.SCALE_FILTER_H_BLOCK0[223].val = 0x5100510 +scaler.SCALE_FILTER_H_BLOCK1[111].val = 0x510053b +scaler.SCALE_FILTER_H_BLOCK0[224].val = 0x82c082c +scaler.SCALE_FILTER_H_BLOCK0[225].val = 0x82b082b +scaler.SCALE_FILTER_H_BLOCK1[112].val = 0x82b082c +scaler.SCALE_FILTER_H_BLOCK0[226].val = 0x8280828 +scaler.SCALE_FILTER_H_BLOCK0[227].val = 0x8200820 +scaler.SCALE_FILTER_H_BLOCK1[113].val = 0x8200828 +scaler.SCALE_FILTER_H_BLOCK0[228].val = 0x81b081b +scaler.SCALE_FILTER_H_BLOCK0[229].val = 0x8130813 +scaler.SCALE_FILTER_H_BLOCK1[114].val = 0x813081b +scaler.SCALE_FILTER_H_BLOCK0[230].val = 0x8080808 +scaler.SCALE_FILTER_H_BLOCK0[231].val = 0x7fc07fc +scaler.SCALE_FILTER_H_BLOCK1[115].val = 0x7fc0808 +scaler.SCALE_FILTER_H_BLOCK0[232].val = 0x7ed07ed +scaler.SCALE_FILTER_H_BLOCK0[233].val = 0x7dd07dd +scaler.SCALE_FILTER_H_BLOCK1[116].val = 0x7dd07ed +scaler.SCALE_FILTER_H_BLOCK0[234].val = 0x7cb07cb +scaler.SCALE_FILTER_H_BLOCK0[235].val = 0x7b607b6 +scaler.SCALE_FILTER_H_BLOCK1[117].val = 0x7b607cb +scaler.SCALE_FILTER_H_BLOCK0[236].val = 0x7a207a2 +scaler.SCALE_FILTER_H_BLOCK0[237].val = 0x78a078a +scaler.SCALE_FILTER_H_BLOCK1[118].val = 0x78a07a2 +scaler.SCALE_FILTER_H_BLOCK0[238].val = 0x7710771 +scaler.SCALE_FILTER_H_BLOCK0[239].val = 0x7570757 +scaler.SCALE_FILTER_H_BLOCK1[119].val = 0x7570771 +scaler.SCALE_FILTER_H_BLOCK0[240].val = 0x73d073d +scaler.SCALE_FILTER_H_BLOCK0[241].val = 0x7570757 +scaler.SCALE_FILTER_H_BLOCK1[120].val = 0x757073d +scaler.SCALE_FILTER_H_BLOCK0[242].val = 0x7710771 +scaler.SCALE_FILTER_H_BLOCK0[243].val = 0x78a078a +scaler.SCALE_FILTER_H_BLOCK1[121].val = 0x78a0771 +scaler.SCALE_FILTER_H_BLOCK0[244].val = 0x7a207a2 +scaler.SCALE_FILTER_H_BLOCK0[245].val = 0x7b607b6 +scaler.SCALE_FILTER_H_BLOCK1[122].val = 0x7b607a2 +scaler.SCALE_FILTER_H_BLOCK0[246].val = 0x7cb07cb +scaler.SCALE_FILTER_H_BLOCK0[247].val = 0x7dd07dd +scaler.SCALE_FILTER_H_BLOCK1[123].val = 0x7dd07cb +scaler.SCALE_FILTER_H_BLOCK0[248].val = 0x7ed07ed +scaler.SCALE_FILTER_H_BLOCK0[249].val = 0x7fc07fc +scaler.SCALE_FILTER_H_BLOCK1[124].val = 0x7fc07ed +scaler.SCALE_FILTER_H_BLOCK0[250].val = 0x8080808 +scaler.SCALE_FILTER_H_BLOCK0[251].val = 0x8130813 +scaler.SCALE_FILTER_H_BLOCK1[125].val = 0x8130808 +scaler.SCALE_FILTER_H_BLOCK0[252].val = 0x81b081b +scaler.SCALE_FILTER_H_BLOCK0[253].val = 0x8200820 +scaler.SCALE_FILTER_H_BLOCK1[126].val = 0x820081b +scaler.SCALE_FILTER_H_BLOCK0[254].val = 0x8280828 +scaler.SCALE_FILTER_H_BLOCK0[255].val = 0x82b082b +scaler.SCALE_FILTER_H_BLOCK1[127].val = 0x82b0828 +scaler.SCALE_FILTER_H_BLOCK0[256].val = 0x4e404e4 +scaler.SCALE_FILTER_H_BLOCK0[257].val = 0x5100510 +scaler.SCALE_FILTER_H_BLOCK1[128].val = 0x51004e4 +scaler.SCALE_FILTER_H_BLOCK0[258].val = 0x53b053b +scaler.SCALE_FILTER_H_BLOCK0[259].val = 0x5660566 +scaler.SCALE_FILTER_H_BLOCK1[129].val = 0x566053b +scaler.SCALE_FILTER_H_BLOCK0[260].val = 0x5900590 +scaler.SCALE_FILTER_H_BLOCK0[261].val = 0x5b905b9 +scaler.SCALE_FILTER_H_BLOCK1[130].val = 0x5b90590 +scaler.SCALE_FILTER_H_BLOCK0[262].val = 0x5e205e2 +scaler.SCALE_FILTER_H_BLOCK0[263].val = 0x6090609 +scaler.SCALE_FILTER_H_BLOCK1[131].val = 0x60905e2 +scaler.SCALE_FILTER_H_BLOCK0[264].val = 0x6300630 +scaler.SCALE_FILTER_H_BLOCK0[265].val = 0x6560656 +scaler.SCALE_FILTER_H_BLOCK1[132].val = 0x6560630 +scaler.SCALE_FILTER_H_BLOCK0[266].val = 0x67a067a +scaler.SCALE_FILTER_H_BLOCK0[267].val = 0x69e069e +scaler.SCALE_FILTER_H_BLOCK1[133].val = 0x69e067a +scaler.SCALE_FILTER_H_BLOCK0[268].val = 0x6c006c0 +scaler.SCALE_FILTER_H_BLOCK0[269].val = 0x6e106e1 +scaler.SCALE_FILTER_H_BLOCK1[134].val = 0x6e106c0 +scaler.SCALE_FILTER_H_BLOCK0[270].val = 0x7000700 +scaler.SCALE_FILTER_H_BLOCK0[271].val = 0x71e071e +scaler.SCALE_FILTER_H_BLOCK1[135].val = 0x71e0700 +scaler.SCALE_FILTER_H_BLOCK0[272].val = 0x2210221 +scaler.SCALE_FILTER_H_BLOCK0[273].val = 0x24b024b +scaler.SCALE_FILTER_H_BLOCK1[136].val = 0x24b0221 +scaler.SCALE_FILTER_H_BLOCK0[274].val = 0x2760276 +scaler.SCALE_FILTER_H_BLOCK0[275].val = 0x2a102a1 +scaler.SCALE_FILTER_H_BLOCK1[137].val = 0x2a10276 +scaler.SCALE_FILTER_H_BLOCK0[276].val = 0x2cc02cc +scaler.SCALE_FILTER_H_BLOCK0[277].val = 0x2f802f8 +scaler.SCALE_FILTER_H_BLOCK1[138].val = 0x2f802cc +scaler.SCALE_FILTER_H_BLOCK0[278].val = 0x3240324 +scaler.SCALE_FILTER_H_BLOCK0[279].val = 0x3510351 +scaler.SCALE_FILTER_H_BLOCK1[139].val = 0x3510324 +scaler.SCALE_FILTER_H_BLOCK0[280].val = 0x37e037e +scaler.SCALE_FILTER_H_BLOCK0[281].val = 0x3ab03ab +scaler.SCALE_FILTER_H_BLOCK1[140].val = 0x3ab037e +scaler.SCALE_FILTER_H_BLOCK0[282].val = 0x3d803d8 +scaler.SCALE_FILTER_H_BLOCK0[283].val = 0x4050405 +scaler.SCALE_FILTER_H_BLOCK1[141].val = 0x40503d8 +scaler.SCALE_FILTER_H_BLOCK0[284].val = 0x4320432 +scaler.SCALE_FILTER_H_BLOCK0[285].val = 0x45f045f +scaler.SCALE_FILTER_H_BLOCK1[142].val = 0x45f0432 +scaler.SCALE_FILTER_H_BLOCK0[286].val = 0x48b048b +scaler.SCALE_FILTER_H_BLOCK0[287].val = 0x4b804b8 +scaler.SCALE_FILTER_H_BLOCK1[143].val = 0x4b8048b +scaler.SCALE_FILTER_H_BLOCK0[288].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[289].val = 0x1a001a +scaler.SCALE_FILTER_H_BLOCK1[144].val = 0x1a0000 +scaler.SCALE_FILTER_H_BLOCK0[290].val = 0x350035 +scaler.SCALE_FILTER_H_BLOCK0[291].val = 0x520052 +scaler.SCALE_FILTER_H_BLOCK1[145].val = 0x520035 +scaler.SCALE_FILTER_H_BLOCK0[292].val = 0x6f006f +scaler.SCALE_FILTER_H_BLOCK0[293].val = 0x8e008e +scaler.SCALE_FILTER_H_BLOCK1[146].val = 0x8e006f +scaler.SCALE_FILTER_H_BLOCK0[294].val = 0xae00ae +scaler.SCALE_FILTER_H_BLOCK0[295].val = 0xcf00cf +scaler.SCALE_FILTER_H_BLOCK1[147].val = 0xcf00ae +scaler.SCALE_FILTER_H_BLOCK0[296].val = 0xf100f1 +scaler.SCALE_FILTER_H_BLOCK0[297].val = 0x1140114 +scaler.SCALE_FILTER_H_BLOCK1[148].val = 0x11400f1 +scaler.SCALE_FILTER_H_BLOCK0[298].val = 0x1380138 +scaler.SCALE_FILTER_H_BLOCK0[299].val = 0x15d015d +scaler.SCALE_FILTER_H_BLOCK1[149].val = 0x15d0138 +scaler.SCALE_FILTER_H_BLOCK0[300].val = 0x1820182 +scaler.SCALE_FILTER_H_BLOCK0[301].val = 0x1a901a9 +scaler.SCALE_FILTER_H_BLOCK1[150].val = 0x1a90182 +scaler.SCALE_FILTER_H_BLOCK0[302].val = 0x1d001d0 +scaler.SCALE_FILTER_H_BLOCK0[303].val = 0x1f901f9 +scaler.SCALE_FILTER_H_BLOCK1[151].val = 0x1f901d0 +scaler.SCALE_FILTER_H_BLOCK0[304].val = 0xff02ff02 +scaler.SCALE_FILTER_H_BLOCK0[305].val = 0xff0aff0a +scaler.SCALE_FILTER_H_BLOCK1[152].val = 0xff0aff02 +scaler.SCALE_FILTER_H_BLOCK0[306].val = 0xff12ff12 +scaler.SCALE_FILTER_H_BLOCK0[307].val = 0xff1bff1b +scaler.SCALE_FILTER_H_BLOCK1[153].val = 0xff1bff12 +scaler.SCALE_FILTER_H_BLOCK0[308].val = 0xff26ff26 +scaler.SCALE_FILTER_H_BLOCK0[309].val = 0xff31ff31 +scaler.SCALE_FILTER_H_BLOCK1[154].val = 0xff31ff26 +scaler.SCALE_FILTER_H_BLOCK0[310].val = 0xff3eff3e +scaler.SCALE_FILTER_H_BLOCK0[311].val = 0xff4cff4c +scaler.SCALE_FILTER_H_BLOCK1[155].val = 0xff4cff3e +scaler.SCALE_FILTER_H_BLOCK0[312].val = 0xff5bff5b +scaler.SCALE_FILTER_H_BLOCK0[313].val = 0xff6bff6b +scaler.SCALE_FILTER_H_BLOCK1[156].val = 0xff6bff5b +scaler.SCALE_FILTER_H_BLOCK0[314].val = 0xff7dff7d +scaler.SCALE_FILTER_H_BLOCK0[315].val = 0xff90ff90 +scaler.SCALE_FILTER_H_BLOCK1[157].val = 0xff90ff7d +scaler.SCALE_FILTER_H_BLOCK0[316].val = 0xffa4ffa4 +scaler.SCALE_FILTER_H_BLOCK0[317].val = 0xffb9ffb9 +scaler.SCALE_FILTER_H_BLOCK1[158].val = 0xffb9ffa4 +scaler.SCALE_FILTER_H_BLOCK0[318].val = 0xffcfffcf +scaler.SCALE_FILTER_H_BLOCK0[319].val = 0xffe7ffe7 +scaler.SCALE_FILTER_H_BLOCK1[159].val = 0xffe7ffcf +scaler.SCALE_FILTER_H_BLOCK0[320].val = 0xff06ff06 +scaler.SCALE_FILTER_H_BLOCK0[321].val = 0xff01ff01 +scaler.SCALE_FILTER_H_BLOCK1[160].val = 0xff01ff06 +scaler.SCALE_FILTER_H_BLOCK0[322].val = 0xfefcfefc +scaler.SCALE_FILTER_H_BLOCK0[323].val = 0xfef8fef8 +scaler.SCALE_FILTER_H_BLOCK1[161].val = 0xfef8fefc +scaler.SCALE_FILTER_H_BLOCK0[324].val = 0xfef4fef4 +scaler.SCALE_FILTER_H_BLOCK0[325].val = 0xfef1fef1 +scaler.SCALE_FILTER_H_BLOCK1[162].val = 0xfef1fef4 +scaler.SCALE_FILTER_H_BLOCK0[326].val = 0xfeeefeee +scaler.SCALE_FILTER_H_BLOCK0[327].val = 0xfeecfeec +scaler.SCALE_FILTER_H_BLOCK1[163].val = 0xfeecfeee +scaler.SCALE_FILTER_H_BLOCK0[328].val = 0xfeebfeeb +scaler.SCALE_FILTER_H_BLOCK0[329].val = 0xfeebfeeb +scaler.SCALE_FILTER_H_BLOCK1[164].val = 0xfeebfeeb +scaler.SCALE_FILTER_H_BLOCK0[330].val = 0xfeecfeec +scaler.SCALE_FILTER_H_BLOCK0[331].val = 0xfeedfeed +scaler.SCALE_FILTER_H_BLOCK1[165].val = 0xfeedfeec +scaler.SCALE_FILTER_H_BLOCK0[332].val = 0xfeeffeef +scaler.SCALE_FILTER_H_BLOCK0[333].val = 0xfef3fef3 +scaler.SCALE_FILTER_H_BLOCK1[166].val = 0xfef3feef +scaler.SCALE_FILTER_H_BLOCK0[334].val = 0xfef7fef7 +scaler.SCALE_FILTER_H_BLOCK0[335].val = 0xfefcfefc +scaler.SCALE_FILTER_H_BLOCK1[167].val = 0xfefcfef7 +scaler.SCALE_FILTER_H_BLOCK0[336].val = 0xff87ff87 +scaler.SCALE_FILTER_H_BLOCK0[337].val = 0xff7eff7e +scaler.SCALE_FILTER_H_BLOCK1[168].val = 0xff7eff87 +scaler.SCALE_FILTER_H_BLOCK0[338].val = 0xff75ff75 +scaler.SCALE_FILTER_H_BLOCK0[339].val = 0xff6cff6c +scaler.SCALE_FILTER_H_BLOCK1[169].val = 0xff6cff75 +scaler.SCALE_FILTER_H_BLOCK0[340].val = 0xff63ff63 +scaler.SCALE_FILTER_H_BLOCK0[341].val = 0xff5bff5b +scaler.SCALE_FILTER_H_BLOCK1[170].val = 0xff5bff63 +scaler.SCALE_FILTER_H_BLOCK0[342].val = 0xff52ff52 +scaler.SCALE_FILTER_H_BLOCK0[343].val = 0xff49ff49 +scaler.SCALE_FILTER_H_BLOCK1[171].val = 0xff49ff52 +scaler.SCALE_FILTER_H_BLOCK0[344].val = 0xff41ff41 +scaler.SCALE_FILTER_H_BLOCK0[345].val = 0xff38ff38 +scaler.SCALE_FILTER_H_BLOCK1[172].val = 0xff38ff41 +scaler.SCALE_FILTER_H_BLOCK0[346].val = 0xff30ff30 +scaler.SCALE_FILTER_H_BLOCK0[347].val = 0xff28ff28 +scaler.SCALE_FILTER_H_BLOCK1[173].val = 0xff28ff30 +scaler.SCALE_FILTER_H_BLOCK0[348].val = 0xff21ff21 +scaler.SCALE_FILTER_H_BLOCK0[349].val = 0xff1aff1a +scaler.SCALE_FILTER_H_BLOCK1[174].val = 0xff1aff21 +scaler.SCALE_FILTER_H_BLOCK0[350].val = 0xff13ff13 +scaler.SCALE_FILTER_H_BLOCK0[351].val = 0xff0cff0c +scaler.SCALE_FILTER_H_BLOCK1[175].val = 0xff0cff13 +scaler.SCALE_FILTER_H_BLOCK0[352].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[353].val = 0xfffafffa +scaler.SCALE_FILTER_H_BLOCK1[176].val = 0xfffa0000 +scaler.SCALE_FILTER_H_BLOCK0[354].val = 0xfff4fff4 +scaler.SCALE_FILTER_H_BLOCK0[355].val = 0xffeeffee +scaler.SCALE_FILTER_H_BLOCK1[177].val = 0xffeefff4 +scaler.SCALE_FILTER_H_BLOCK0[356].val = 0xffe7ffe7 +scaler.SCALE_FILTER_H_BLOCK0[357].val = 0xffe0ffe0 +scaler.SCALE_FILTER_H_BLOCK1[178].val = 0xffe0ffe7 +scaler.SCALE_FILTER_H_BLOCK0[358].val = 0xffd9ffd9 +scaler.SCALE_FILTER_H_BLOCK0[359].val = 0xffd2ffd2 +scaler.SCALE_FILTER_H_BLOCK1[179].val = 0xffd2ffd9 +scaler.SCALE_FILTER_H_BLOCK0[360].val = 0xffcaffca +scaler.SCALE_FILTER_H_BLOCK0[361].val = 0xffc2ffc2 +scaler.SCALE_FILTER_H_BLOCK1[180].val = 0xffc2ffca +scaler.SCALE_FILTER_H_BLOCK0[362].val = 0xffbaffba +scaler.SCALE_FILTER_H_BLOCK0[363].val = 0xffb2ffb2 +scaler.SCALE_FILTER_H_BLOCK1[181].val = 0xffb2ffba +scaler.SCALE_FILTER_H_BLOCK0[364].val = 0xffaaffaa +scaler.SCALE_FILTER_H_BLOCK0[365].val = 0xffa1ffa1 +scaler.SCALE_FILTER_H_BLOCK1[182].val = 0xffa1ffaa +scaler.SCALE_FILTER_H_BLOCK0[366].val = 0xff99ff99 +scaler.SCALE_FILTER_H_BLOCK0[367].val = 0xff90ff90 +scaler.SCALE_FILTER_H_BLOCK1[183].val = 0xff90ff99 +scaler.SCALE_FILTER_H_BLOCK0[368].val = 0x340034 +scaler.SCALE_FILTER_H_BLOCK0[369].val = 0x330033 +scaler.SCALE_FILTER_H_BLOCK1[184].val = 0x330034 +scaler.SCALE_FILTER_H_BLOCK0[370].val = 0x320032 +scaler.SCALE_FILTER_H_BLOCK0[371].val = 0x300030 +scaler.SCALE_FILTER_H_BLOCK1[185].val = 0x300032 +scaler.SCALE_FILTER_H_BLOCK0[372].val = 0x2e002e +scaler.SCALE_FILTER_H_BLOCK0[373].val = 0x2c002c +scaler.SCALE_FILTER_H_BLOCK1[186].val = 0x2c002e +scaler.SCALE_FILTER_H_BLOCK0[374].val = 0x290029 +scaler.SCALE_FILTER_H_BLOCK0[375].val = 0x260026 +scaler.SCALE_FILTER_H_BLOCK1[187].val = 0x260029 +scaler.SCALE_FILTER_H_BLOCK0[376].val = 0x230023 +scaler.SCALE_FILTER_H_BLOCK0[377].val = 0x200020 +scaler.SCALE_FILTER_H_BLOCK1[188].val = 0x200023 +scaler.SCALE_FILTER_H_BLOCK0[378].val = 0x1c001c +scaler.SCALE_FILTER_H_BLOCK0[379].val = 0x180018 +scaler.SCALE_FILTER_H_BLOCK1[189].val = 0x18001c +scaler.SCALE_FILTER_H_BLOCK0[380].val = 0x140014 +scaler.SCALE_FILTER_H_BLOCK0[381].val = 0x100010 +scaler.SCALE_FILTER_H_BLOCK1[190].val = 0x100014 +scaler.SCALE_FILTER_H_BLOCK0[382].val = 0xb000b +scaler.SCALE_FILTER_H_BLOCK0[383].val = 0x50005 +scaler.SCALE_FILTER_H_BLOCK1[191].val = 0x5000b +scaler.SCALE_FILTER_H_BLOCK0[384].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[385].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[192].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[386].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[387].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[193].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[388].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[389].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[194].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[390].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[391].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[195].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[392].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[393].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[196].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[394].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[395].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[197].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[396].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[397].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[198].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[398].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[399].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[199].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[400].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[401].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[200].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[402].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[403].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[201].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[404].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[405].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[202].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[406].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[407].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[203].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[408].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[409].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[204].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[410].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[411].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[205].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[412].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[413].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[206].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[414].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[415].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[207].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[416].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[417].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[208].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[418].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[419].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[209].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[420].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[421].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[210].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[422].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[423].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[211].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[424].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[425].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[212].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[426].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[427].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[213].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[428].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[429].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[214].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[430].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[431].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[215].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[432].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[433].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[216].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[434].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[435].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[217].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[436].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[437].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[218].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[438].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[439].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[219].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[440].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[441].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[220].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[442].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[443].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[221].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[444].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[445].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[222].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[446].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[447].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[223].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[448].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[449].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[224].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[450].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[451].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[225].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[452].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[453].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[226].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[454].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[455].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[227].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[456].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[457].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[228].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[458].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[459].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[229].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[460].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[461].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[230].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[462].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[463].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[231].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[464].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[465].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[232].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[466].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[467].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[233].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[468].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[469].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[234].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[470].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[471].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[235].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[472].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[473].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[236].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[474].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[475].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[237].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[476].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[477].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[238].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[478].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK0[479].val = 0x0 +scaler.SCALE_FILTER_H_BLOCK1[239].val = 0x0 + +# pseudo linear scaling +scaler.PSEUDO_LINEAR_SCALING = 0 + +# reshape +p.write32(scaler_base + 0xe8, 0x0) + +# reset CM 3x +p.write32(scaler_base + 0x3800, 0x0) +p.write32(scaler_base + 0x3800, 0x0) +p.write32(scaler_base + 0x3800, 0x0) + +# enable prescaler +p.write32(scaler_base + 0x824, 0xc) + +# alpha override +p.write32(scaler_base + 0x8c, 0xffff) + +# dither +p.write32(scaler_base + 0xa00, 0x0) + +# commit convert +p.write32(scaler_base + 0x13808, 0x0) +p.write32(scaler_base + 0x1380c, 0x0) +p.write32(scaler_base + 0x13800, 0x0) +p.write32(scaler_base + 0x13804, 0x0) + +# convert map +p.write32(scaler_base + 0x13810, 0x8) +p.write32(scaler_base + 0x13814, 0x8) +p.write32(scaler_base + 0x13818, 0x8) +p.write32(scaler_base + 0x1381c, 0x8) +p.write32(scaler_base + 0x13804, 0x0) +p.write32(scaler_base + 0x13c04, 0x0) +p.write32(scaler_base + 0x13c10, 0x8) +p.write32(scaler_base + 0x13c14, 0x8) +p.write32(scaler_base + 0x13c18, 0x8) +p.write32(scaler_base + 0x13c1c, 0x8) + +# commit revert +p.write32(scaler_base + 0x13c00, 0x1) + +# (don't) program histogram +p.write32(scaler_base + 0x3000, 0x0) +p.write32(scaler_base + 0x124, 0x0) + +# tag transform registers +p.write32(scaler_base + 0x110, 0x1) + +# start +p.write32(scaler_base + 0x98, 0xfffffffe) +scaler.START = 1 + +start_time = time.time() +while scaler.MSR_GLBL_IRQSTS.reg.DONE == 0: + if time.time() - start_time > 5: + print("TIMED OUT!!!") + break + +print(f"IRQ status is now {scaler.MSR_GLBL_IRQSTS}") +print(f"Debug status is now {scaler.MSR_CTRL_DBGSTS}") + +out_buf_new = iface.readmem(out_buf_phys, out_SZ) +chexdump(out_buf_new) + +with Image.new(mode='RGBA', size=(out_W, out_H)) as im: + for y in range(out_H): + for x in range(out_W): + block = out_buf_new[ + y*out_STRIDE + x*out_BYTESPP: + y*out_STRIDE + (x+1)*out_BYTESPP] + + r, g, b, a = block + im.putpixel((x, y), (r, g, b, a)) + + im.save(output_image_fn) diff --git a/tools/proxyclient/experiments/smc.py b/tools/proxyclient/experiments/smc.py new file mode 100755 index 0000000..5e4a383 --- /dev/null +++ b/tools/proxyclient/experiments/smc.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +import time +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +import struct +from m1n1.setup import * +from m1n1.fw.smc import SMCClient, SMCError + +smc_addr = u.adt["arm-io/smc"].get_reg(0)[0] +smc = SMCClient(u, smc_addr) +smc.start() +smc.start_ep(0x20) + +smc.verbose = 0 + +smcep = smc.epmap[0x20] + +def gpio_key(pin): + assert(pin < (1 << 16)) + + fourcc = 'gP' + ('00'+(hex(pin)[2:]))[-2:] + return fourcc + +## Enable wifi/bluetooth +#RFKILL_PIN = 13 +#smcep.write(gpio_key(RFKILL_PIN), struct.pack('<I', 0x800000 | 0x0)) +#smcep.write(gpio_key(RFKILL_PIN), struct.pack('<I', 0x800000 | 0x1)) + +count = smcep.read32b("#KEY") +print(f"Key count: {count}") + +for i in range(count): + k = smcep.get_key_by_index(i) + length, type, flags = smcep.get_key_info(k) + if flags & 0x80: + try: + val = smcep.read_type(k, length, type) + print(f"#{i}: {k} = ({type}, {flags:#x}) {val}") + except SMCError as e: + print(f"#{i}: {k} = ({type}, {flags:#x}) <error {e}>") + else: + print(f"#{i}: {k} = ({type}, {flags:#x}) <not available>") + + +smc.stop() diff --git a/tools/proxyclient/experiments/smc_watcher.py b/tools/proxyclient/experiments/smc_watcher.py new file mode 100755 index 0000000..9d07339 --- /dev/null +++ b/tools/proxyclient/experiments/smc_watcher.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib, fnmatch, signal +import time +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +import struct +from m1n1.setup import * +from m1n1.shell import run_shell +from m1n1.fw.smc import SMCClient, SMCError + +smc_addr = u.adt["arm-io/smc"].get_reg(0)[0] +smc = SMCClient(u, smc_addr) +smc.start() +smc.start_ep(0x20) + +smc.verbose = 0 + +smcep = smc.epmap[0x20] + +count = smcep.read32b("#KEY") +print(f"Key count: {count}") + +print("Scanning keys...") + +pats = sys.argv[1:] + +vals = {} + +fmts = { + "D?CR": "#x", + "AC-I": "#x", + "D?FC": "#x", + "D?VM": lambda v: (v>>8) | ((v&0xff)<<8), + "D?VX": lambda v: (v>>8) | ((v&0xff)<<8), + "B0RM": lambda v: (v>>8) | ((v&0xff)<<8), + ##"BAAC": lambda v: ((v&0xff00)>>8) | ((v&0xff)<<8), +} + +smcep.write8("NTAP", 1) + +for i in range(count): + k = smcep.get_key_by_index(i) + if not any(fnmatch.fnmatchcase(k, i) for i in pats): + continue + if any(fnmatch.fnmatchcase('-' + k, i) for i in pats): + continue + length, type, flags = smcep.get_key_info(k) + if type in ("ch8*", "{jst"): + continue + if flags & 0x80: + try: + val = smcep.read_type(k, length, type) + fmt = None + for fk, fv in fmts.items(): + if fnmatch.fnmatchcase(k, fk): + fmt = fv + if fmt is None: + fmt = lambda a: ("%.02f" % a) if isinstance(a, float) else a + elif isinstance(fmt, str): + def ff(fmt): + return lambda a: f"{a:{fmt}}" + fmt = ff(fmt) + vals[k] = val, length, type, fmt + print(f"#{i}: {k} = ({type}, {flags:#x}) {fmt(val)}") + except SMCError as e: + print(f"#{i}: {k} = ({type}, {flags:#x}) <error {e}>") + else: + print(f"#{i}: {k} = ({type}, {flags:#x}) <not available>") + +slots = {} + +def poll(): + global cnt + reprint = cnt % 10 == 0 + changed = set() + for k, (oval, length, type, fmt) in vals.items(): + val = smcep.read_type(k, length, type) + if val != oval: + if k not in slots: + reprint = True + slots[k] = fmt(val) + changed.add(k) + vals[k] = val, length, type, fmt + if reprint: + print("\x1b[1;4m", end="") + for k, v in slots.items(): + wd = len(f"{v:>8}") + print(f"{k:>{wd}s}", end=" ") + print("\x1b[m") + for k, v in slots.items(): + if k in changed: + print("\x1b[32m", end="") + print(f"{v:>8}\x1b[m", end=" ") + print() + cnt += 1 + time.sleep(1) + +def handle_sigint(signal=None, stack=None): + global doshell + doshell = True + +signal.signal(signal.SIGINT, handle_sigint) + +doshell = False +try: + cnt = 0 + while True: + poll() + if doshell: + run_shell(globals(), msg="Interrupted") + doshell = False +finally: + smc.stop() + diff --git a/tools/proxyclient/experiments/speaker_amp.py b/tools/proxyclient/experiments/speaker_amp.py new file mode 100755 index 0000000..7a592c3 --- /dev/null +++ b/tools/proxyclient/experiments/speaker_amp.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +import time +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +# speaker_amp.py -- play audio through the embedded speaker on Mac mini +# +# sample usage with sox: +# +# sox INPUT_FILE -t raw -r 48000 -c 1 -e signed-int -b 32 -L - gain -63 | python3 ./speaker_amp.py +# +# (expects mono, 24-bit signed samples padded to 32 bits on the msb side) + +import argparse +from m1n1.setup import * +from m1n1.hw.dart import DART, DARTRegs +from m1n1.hw.i2c import I2C +from m1n1.hw.pmgr import PMGR +from m1n1.hw.nco import NCO +from m1n1.hw.admac import * +from m1n1.hw.mca import * + +argparser = argparse.ArgumentParser() +argparser.add_argument("-f", "--file", "--input", "--samples", + type=str, default=None, + help='input filename to take samples from ' \ + '(default: standard input)') +argparser.add_argument("-b", "--bufsize", type=int, default=1024*32, + help='size of buffers to keep submitting to DMA') +args = argparser.parse_args() + +inputf = open(args.file, "rb") if args.file is not None \ + else sys.stdin.buffer + +p.pmgr_adt_clocks_enable("/arm-io/i2c1") +p.pmgr_adt_clocks_enable("/arm-io/admac-sio") +p.pmgr_adt_clocks_enable("/arm-io/dart-sio") +p.pmgr_adt_clocks_enable("/arm-io/mca-switch") + +# reset AUDIO_P +PS_AUDIO_P = PMGR(u).regs[0].PS4[5] +PS_AUDIO_P.set(DEV_DISABLE=1) +PS_AUDIO_P.set(RESET=1) +PS_AUDIO_P.set(RESET=0) +PS_AUDIO_P.set(DEV_DISABLE=0) + +i2c1 = I2C(u, "/arm-io/i2c1") + +dart_base, _ = u.adt["/arm-io/dart-sio"].get_reg(0) # stream index 2 +dart = DART(iface, DARTRegs(u, dart_base), util=u) +dart.initialize() + +cl_no = 0 + +admac = ADMAC(u, "/arm-io/admac-sio", dart, debug=True) +tx_chan = admac.chans[4*cl_no] + +tx_chan.disable() +tx_chan.reset() +tx_chan.read_reports() # read stale reports +tx_chan.buswidth = E_BUSWIDTH.W_32BIT +tx_chan.framesize = E_FRAME.F_1_WORD + +nco = NCO(u, "/arm-io/nco") +nco[cl_no].set_rate(48000 * 256) +nco[cl_no].enable() + +mca_switch1_base = u.adt["/arm-io/mca-switch"].get_reg(1)[0] +mca_cl_base = u.adt["/arm-io/mca-switch"].get_reg(0)[0] + 0x4000*cl_no +cl = MCACluster(u, mca_cl_base) + +regs, serdes = cl.regs, cl.txa + +regs.SYNCGEN_STATUS.set(RST=1, EN=0) +regs.SYNCGEN_STATUS.set(RST=0) +regs.SYNCGEN_MCLK_SEL.val =(1 + cl_no) +regs.SYNCGEN_HI_PERIOD.val = 0 +regs.SYNCGEN_LO_PERIOD.val = 0xfe # full period minus two + +serdes.STATUS.set(EN=0) +serdes.CONF.set( + NSLOTS=0, + SLOT_WIDTH=E_SLOT_WIDTH.W_32BIT, + BCLK_POL=1, + UNK1=1, UNK2=1, + IDLE_UNDRIVEN=1, + SYNC_SEL=(1 + cl_no) +) +serdes.BITDELAY.val = 0 + +serdes.CHANMASK[0].val = 0xffff_fffe +serdes.CHANMASK[1].val = 0xffff_fffe + +regs.PORT_ENABLES.set(CLOCK1=1, CLOCK2=1, DATA=1) +regs.PORT_CLK_SEL.set(SEL=(cl_no + 1)) +regs.PORT_DATA_SEL.val = cl_no + 1 +regs.MCLK_STATUS.set(EN=1) +regs.SYNCGEN_STATUS.set(EN=1) + +p.write32(mca_switch1_base + 0x8000*cl_no, 0x102048) + +# toggle the GPIO line driving the speaker-amp IC reset +p.write32(0x23c1002d4, 0x76a02) # invoke reset +p.write32(0x23c1002d4, 0x76a03) # take out of reset + +tx_chan.submit(inputf.read(args.bufsize)) +tx_chan.enable() +while tx_chan.can_submit(): + tx_chan.submit(inputf.read(args.bufsize)) + +serdes.STATUS.set(EN=1) + +# by ADT and leaked schematic, i2c1 contains TAS5770L, +# which is not a public part. but there's e.g. TAS2770 +# with similar registers +# +# https://www.ti.com/product/TAS2770 +# +# if the speaker-amp IC loses clock on the serial sample input, +# it automatically switches to software shutdown. +# + +i2c1.write_reg(0x31, 0x08, [0x40]) +i2c1.write_reg(0x31, 0x0a, [0x06, 0x00, 0x1a]) +i2c1.write_reg(0x31, 0x1b, [0x01, 0x82, 0x06]) +i2c1.write_reg(0x31, 0x16, [0x50, 0x04]) +i2c1.write_reg(0x31, 0x0d, [0x00]) +#i2c1.write_reg(0x31, 0x03, [0x14]) + +# amplifier gain, presumably this is the lowest setting +i2c1.write_reg(0x31, 0x03, [0x0]) + +# take the IC out of software shutdown +i2c1.write_reg(0x31, 0x02, [0x0c]) + +while (buf := inputf.read(args.bufsize)): + while not tx_chan.can_submit(): + tx_chan.poll() + tx_chan.submit(buf) + +# mute +i2c1.write_reg(0x31, 0x02, [0x0d]) + +# software shutdown +i2c1.write_reg(0x31, 0x02, [0x0e]) + +tx_chan.disable() diff --git a/tools/proxyclient/experiments/spi.py b/tools/proxyclient/experiments/spi.py new file mode 100644 index 0000000..82d9528 --- /dev/null +++ b/tools/proxyclient/experiments/spi.py @@ -0,0 +1,182 @@ +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * +from m1n1 import asm +from m1n1.shell import run_shell +from m1n1.gpiola import GPIOLogicAnalyzer +from m1n1.hw.spi import * + +p.smp_start_secondaries() + +p.set32(0x28e580208, 1<<31) +p.clear32(0x28e580208, 1<<31) + +spi = u.adt["arm-io/spi3"].get_reg(0)[0] +regs = SPIRegs(u, spi) + +mon.add(spi, 0x10) +mon.add(spi + 0x30, 0x10) +mon.add(spi + 0x40, 0x400) + +aic = u.adt["arm-io/aic"].get_reg(0)[0] +mon.add(aic + 0x6800 + (1109 // 32) * 4, 4) + +gpio = u.adt["arm-io/gpio0"].get_reg(0)[0] + +mon.add(gpio, 0x1c8) +mon.add(gpio+0x1e0, 0x300) + +mon.poll() + +m = GPIOLogicAnalyzer(u, "arm-io/gpio0", + pins={"miso": 0x34, "mosi": 0x35, "clk": 0x36, "cs": 0x37}, + #pins={"miso": 0xa, "mosi": 0xb, "clk": 0x20, "cs": 0x21}, + #pins={"clk": 46, "mosi": 47, "miso": 48, "cs": 49}, + div=1, on_pin_change=False) + +#p.write32(spi + 0x100, 0xffffffff) + +regs.CTRL.val = 0xc +regs.PIN.val = 0x2 +regs.CONFIG.val = 0x20 | (1<<15) | 6 +regs.CONFIG.val = 0x20 | (1<<15) | 4 +regs.CONFIG.val = 0x20 | (1<<15) | 2 +regs.CONFIG.val = 0x20 | (3<<15) | 0 + +def try_all_bits(): + for i in range(0, 0x200, 4): + v = p.read32(spi + i) + for j in range(32): + p.write32(spi + i, v ^ (1<<j)) + print(f"{i:4x}:{v:8x}:{j:2d} FIFO level:", regs.FIFO_LEVEL.reg.LEVEL_TX) + mon.poll() + p.write32(spi + i, v) + + +m.regs = { + "CTRL": (spi + 0x00, R_CTRL), + "STATUS": (spi + 0x08, R_STATUS), + "RXCNT": (spi + 0x34), + "TXCNT": (spi + 0x4c), + "FIFO_STAT": (spi + 0x10c, R_FIFO_STAT), + "ISTATUS1": (spi + 0x134, R_ISTATUS1), + "ISTATUS2": (spi + 0x13c, R_ISTATUS2), + "XFSTATUS": (spi + 0x1c0), + "SHIFTCONFIG": (spi + 0x150), + "PINCONFIG": (spi + 0x154), + "PIN": (spi + 0xc), + "3c": (spi + 0x3c), + "DIVSTATUS": (spi + 0x1e0, R_DIVSTATUS) +} + +m.regs = {} + +m.start(300000, bufsize=0x80000) + + +regs.STATUS.val = 0xffffffff +regs.ISTATUS1.val = 0xffffffff +regs.ISTATUS2.val = 0xffffffff + +regs.CLKDIV.val = 0xfff +regs.INTER_DLY.val = 0x1000 + +regs.SHIFTCONFIG.val = 0x20fcf7 + +regs.PIN.val = 0x2 +print("pinconfig", hex(regs.PINCONFIG.val)) +regs.PINCONFIG.val = 0x100 +#regs.PINCONFIG.val = 0x2-7 +print("pinconfig", hex(regs.PINCONFIG.val)) +print("shiftconfig", hex(regs.SHIFTCONFIG.val)) + +#regs.PIN.val = 0x0 +#regs.PIN.val = 0x2 +# auto_cs OR pin_cs + +#p.write32(spi + 0x150, 0x80c07) +#p.write32(spi + 0x150, 0x88c07) +print(hex(p.read32(spi + 0x150))) + +#p.write32(spi + 0x160, 0) +p.write32(spi + 0x160, 0xfff0020) +p.write32(spi + 0x168, 0xffffb20) +#p.write32(spi + 0x164, 0x06000210) +#p.write32(spi + 0x180, 0x02000000) +#p.write32(spi + 0x18c, 0x500) +#regs.INTER_DLY2 = 0x20000001 + +p.write32(spi + 0x200, 0x0010) + +p.write32(spi + 0x3c, 0xffffffff) + +regs.PINCONFIG.val = 0x002 +regs.PINCONFIG.val = 0x200 + + +#p.write32(0x28e0380bc, 0x80100000) +#p.write32(0x28e0380c4, 0x80100000) + +data = b"Asahi Linux" + +for i in range(2): + for j in data: + regs.TXDATA.val = j + regs.RXCNT.val = len(data) + regs.TXCNT.val = len(data) + + regs.STATUS.val = 0xffffffff + regs.ISTATUS1.val = 0xffffffff + regs.ISTATUS2.val = 0xffffffff + + regs.PIN.val = 0x0 + regs.CTRL.val = 0x1 + #regs.TXDATA.val = 0xff + #regs.TXDATA.val = 0xff + + i = 0 + while regs.TXCNT.val != 0: + print(f"{regs.TXCNT.val:#x} {regs.FIFO_STAT.reg} {regs.STATUS.val:#x} {regs.ISTATUS2.val:#x} {p.read32(spi + 0x134):#x}") + regs.STATUS.val = 0xffffffff + regs.ISTATUS1.val = 0xffffffff + regs.ISTATUS2.val = 0xffffffff + #regs.CTRL.val = 0x0 + #time.sleep(0.1) + #regs.CTRL.val = 0x1[ + print(hex(i)) + #p.write32(spi + i, 0xffffffff) + #p.write32(spi + i, 0) + i += 4 + if i > 0x100: + break + time.sleep(0.001) + print(f"{regs.RXCNT.val:#x} {regs.FIFO_STAT.reg} {regs.STATUS.val:#x} {regs.ISTATUS2.val:#x}") + regs.STATUS.val = 0xffffffff + regs.ISTATUS1.val = 0xffffffff + regs.ISTATUS2.val = 0xffffffff + + mon.poll() + + while regs.FIFO_STAT.reg.LEVEL_RX: + print("RX", hex(regs.RXDATA.val)) + + regs.CTRL.val = 0 + +m.complete() +m.show() + +def poll(count=1000): + lval = None + for i in range(count): + pins = 0x35, 0x36, 0x37 + vals = [p.read32(gpio + 4 * pin) & 1 for pin in pins] + if vals != lval: + print(f"{i:6d}: {vals}") + lval = vals + +mon.poll() + +#run_shell(globals(), msg="Have fun!") + + diff --git a/tools/proxyclient/experiments/sprr_test_permissions.py b/tools/proxyclient/experiments/sprr_test_permissions.py new file mode 100755 index 0000000..9dff141 --- /dev/null +++ b/tools/proxyclient/experiments/sprr_test_permissions.py @@ -0,0 +1,366 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from contextlib import contextmanager + +from m1n1.setup import * +from m1n1.find_regs import * +from m1n1 import asm + +p.smp_start_secondaries() + +class ARMPageTable: + PAGESIZE = 0x4000 + + def __init__(self, memalign, free): + self.memalign = memalign + self.free = free + + self.l0 = self.memalign(self.PAGESIZE, self.PAGESIZE) + self.l1 = [self.memalign(self.PAGESIZE, self.PAGESIZE), self.memalign( + self.PAGESIZE, self.PAGESIZE)] + self.l2 = {} + + p.write64(self.l0, self.make_table_pte(self.l1[0])) + p.write64(self.l0+8, self.make_table_pte(self.l1[1])) + + def make_table_pte(self, addr): + # table mapping, access bit set + return addr | 0b11 | (1 << 10) + + def map_page(self, vaddr, paddr, access_bits): + ap = (access_bits & 0b1100) >> 2 + pxn = (access_bits & 0b0010) >> 1 + uxn = (access_bits & 0b0001) + + # block mapping, access bit set + pte = paddr | 0b01 | (1 << 10) + + # move access bits in place + pte |= ap << 6 + pte |= pxn << 54 + pte |= uxn << 53 + + l0_idx = (vaddr >> (25+11+11)) & 1 + l1_idx = (vaddr >> (25+11)) & 0x7ff + l2_idx = (vaddr >> 25) & 0x7ff + + tbl = self.l2.get((l0_idx, l1_idx), None) + if not tbl: + tbl = self.memalign(self.PAGESIZE, self.PAGESIZE) + self.l2[(l0_idx, l1_idx)] = tbl + p.write64(self.l1[l0_idx] + 8*l1_idx, self.make_table_pte(tbl)) + + p.write64(tbl + 8*l2_idx, pte) + + def map(self, vaddr, paddr, sz, access_bits): + assert sz % 0x2000000 == 0 + assert vaddr % 0x2000000 == 0 + assert paddr % 0x2000000 == 0 + assert access_bits <= 0b1111 + + while sz > 0: + self.map_page(vaddr, paddr, access_bits) + sz -= 0x2000000 + vaddr += 0x2000000 + paddr += 0x2000000 + + +def build_and_write_code(heap, code): + page = heap.memalign(0x4000, 0x4000) + compiled = asm.ARMAsm(code, page).data + iface.writemem(page, compiled) + p.dc_cvau(page, len(compiled)) + p.ic_ivau(page, len(compiled)) + return page + + +def setup_exception_vectors(heap, gxf=False): + if gxf: + elr = "S3_6_C15_C10_6" + eret = ".long 0x201400" + indicator = 0xf2 + else: + elr = "ELR_EL1" + eret = "eret" + indicator = 0xf0 + + return build_and_write_code(heap, """ + .rept 16 + b 1f + .align 7 + .endr + + 1: + // store that we failed + mov x10, 0x{indicator:x} + + // move PC two instruction further and clear 0xf0 0000 0000 to + // make sure we end up in the r-x mapping either way and don't + // repeat the instruction that just faulted + // we skip the second instruction since that one is used to + // indicate success + ldr x11, =0x0fffffffff + mrs x12, {elr} + add x12, x12, #8 + and x12, x12, x11 + msr {elr}, x12 + + isb + {eret} + """.format(eret=eret, elr=elr, indicator=indicator)) + + +print("Setting up..") +pagetable = ARMPageTable(u.memalign, u.free) +pagetable.map(0x800000000, 0x800000000, 0xc00000000, 0) +pagetable.map(0xf800000000, 0x800000000, 0xc00000000, 1) + +el2_vectors = setup_exception_vectors(u.heap, gxf=False) +gl2_vectors = setup_exception_vectors(u.heap, gxf=True) + +probe_page = build_and_write_code(u.heap, "mov x10, 0x80\nret\nret\nret\n") +probe_page_vaddr = probe_page | 0xf000000000 + +code_page = build_and_write_code(u.heap, """ + #define SPRR_PERM_EL0 S3_6_C15_C1_5 + #define SPRR_PERM_EL1 S3_6_C15_C1_6 + #define SPRR_PERM_EL2 S3_6_C15_C1_7 + + #define GXF_CONFIG_EL2 s3_6_c15_c1_2 + #define GXF_ENTER_EL2 s3_6_c15_c8_1 + #define GXF_ABORT_EL2 s3_6_c15_c8_2 + + #define MPIDR_GL2 S3_6_C15_C10_1 + #define VBAR_GL2 S3_6_C15_C10_2 + #define SPSR_GL2 S3_6_C15_C10_3 + #define ELR_GL2 S3_6_C15_C10_6 + #define FAR_GL2 S3_6_C15_C10_7 + + #define genter .long 0x00201420 + #define gexit .long 0x201400 + + // just store everything since i'm too lazy to think about + // register assignments + str x30, [sp, #-16]! + stp x28, x29, [sp, #-16]! + stp x26, x27, [sp, #-16]! + stp x24, x25, [sp, #-16]! + stp x22, x23, [sp, #-16]! + stp x20, x21, [sp, #-16]! + stp x18, x19, [sp, #-16]! + stp x16, x17, [sp, #-16]! + stp x14, x15, [sp, #-16]! + stp x12, x13, [sp, #-16]! + stp x10, x11, [sp, #-16]! + stp x8, x9, [sp, #-16]! + stp x6, x7, [sp, #-16]! + stp x4, x5, [sp, #-16]! + stp x2, x3, [sp, #-16]! + stp x0, x1, [sp, #-16]! + + mov x20, x0 // store SPRR value for later + mov x21, 0 // clear result + + // setup exception vectors + ldr x0, =0x{vectors:x} + msr VBAR_EL2, x0 + isb + + // prepare MMU + ldr x0, =0x0400ff + msr MAIR_EL1, x0 + ldr x0, =0x27510b510 + msr TCR_EL1, x0 + ldr x0, =0x{ttbr:x} + msr TTBR0_EL2, x0 + + // enable SPPR + mov x0, 1 + msr s3_6_c15_c1_0, x0 + msr s3_6_c15_c1_3, xzr + isb + + // clear all SPPR registers + // (note that reads from/writes to EL1 will be redirected to EL2 anyway) + ldr x0, =0 + msr SPRR_PERM_EL0, x0 + msr SPRR_PERM_EL1, x0 + msr SPRR_PERM_EL2, x0 + msr s3_6_c15_c1_3, x0 + + // setup SPPR_EL2 + msr SPRR_PERM_EL2, x20 + isb + dsb ishst + tlbi vmalle1is + dsb ish + isb + + + msr s3_6_c15_c1_3, xzr + isb + + // enable MMU + ldr x1, =0x1005 + mrs x0, SCTLR_EL1 + mov x3, x0 + orr x0, x0, x1 + msr SCTLR_EL1, x0 + isb + + // configure and enable GXF + mov x0, 1 + msr GXF_CONFIG_EL2, x0 + isb + ldr x0, =gxf_entry + msr GXF_ENTER_EL2, x0 + ldr x0, =gxf_abort + msr GXF_ABORT_EL2, x0 + isb + + // test GXF access + genter + + // test execute access + ldr x1, =0x{probe_page:x} + mov x10, 0 + blr x1 + lsl x21, x21, #8 + orr x21, x21, x10 + + // test read access + ldr x1, =0x{probe_page:x} + mov x10, 0 + ldr x1, [x1] + mov x10, 0x80 + lsl x21, x21, #8 + orr x21, x21, x10 + + // test write access + ldr x1, =0x{probe_page:x} + add x1, x1, 0x20 + mov x10, 0 + str x1, [x1] + mov x10, 0x80 + lsl x21, x21, #8 + orr x21, x21, x10 + + // disable MMU again + dsb ish + isb + msr SCTLR_EL1, x3 + isb + + mov x0, 0 + msr GXF_CONFIG_EL2, x0 + msr s3_6_c15_c1_0, x0 + + mov x0, x21 + + // restore everything except for x0 + add sp, sp, #8 + ldr x1, [sp], #8 + ldp x2, x3, [sp], #16 + ldp x4, x5, [sp], #16 + ldp x6, x7, [sp], #16 + ldp x8, x9, [sp], #16 + ldp x10, x11, [sp], #16 + ldp x12, x13, [sp], #16 + ldp x14, x15, [sp], #16 + ldp x16, x17, [sp], #16 + ldp x18, x19, [sp], #16 + ldp x20, x21, [sp], #16 + ldp x22, x23, [sp], #16 + ldp x24, x25, [sp], #16 + ldp x26, x27, [sp], #16 + ldp x28, x29, [sp], #16 + ldr x30, [sp], #16 + + ret + + + gxf_entry: + // setup GL exception vectors + ldr x0, =0x{gxf_vectors:x} + msr VBAR_GL2, x0 + isb + + // we might double fault -> store state here + mrs x14, S3_6_C15_C10_3 + mrs x15, S3_6_C15_C10_4 + mrs x16, S3_6_C15_C10_5 + mrs x17, ELR_GL2 + mrs x18, FAR_GL2 + + // test execute access + ldr x1, =0x{probe_page:x} + mov x10, 0 + blr x1 + lsl x21, x21, #8 + orr x21, x21, x10 + + // test read access + ldr x1, =0x{probe_page:x} + mov x10, 0 + ldr x1, [x1] + mov x10, 0x80 + lsl x21, x21, #8 + orr x21, x21, x10 + + // test write access + ldr x1, =0x{probe_page:x} + add x1, x1, #0x20 + mov x10, 0 + str x1, [x1] + mov x10, 0x80 + lsl x21, x21, #8 + orr x21, x21, x10 + + // restore state in case we faulted in here + msr S3_6_C15_C10_3, x14 + msr S3_6_C15_C10_4, x15 + msr S3_6_C15_C10_5, x16 + msr ELR_GL2, x17 + msr FAR_GL2, x18 + + isb + gexit + + gxf_abort: + // store that we failed + mov x10, 0xf1 + + // move PC two instruction further and clear 0xf0 0000 0000 to + // make sure we end up in the r-x mapping either way and don't + // repeat the instruction that just faulted + // we skip the second instruction since that one is used to + // indicate success + ldr x11, =0x0fffffffff + mrs x12, ELR_GL2 + add x12, x12, #8 + and x12, x12, x11 + msr ELR_GL2, x12 + + isb + gexit + """.format(ttbr=pagetable.l0, vectors=el2_vectors, probe_page=probe_page_vaddr, gxf_vectors=gl2_vectors)) + +print("Running code now...") +for i in range(0x10): + sprr_val = 0x5 | ((i & 0xf) << 4) + ret = p.smp_call_sync(1, code_page, sprr_val) + + glret = ret >> 24 + glx = 'x' if (glret >> 16) & 0xff == 0x80 else '-' + glr = 'r' if (glret >> 8) & 0xff == 0x80 else '-' + glw = 'w' if glret & 0xff == 0x80 else '-' + + x = 'x' if (ret >> 16) & 0xff == 0x80 else '-' + r = 'r' if (ret >> 8) & 0xff == 0x80 else '-' + w = 'w' if ret & 0xff == 0x80 else '-' + + print("SPRR: {0:04b} result: {1:x} GL: {2}{3}{4} EL: {5}{6}{7}".format( + i, ret, glr, glw, glx, r, w, x)) diff --git a/tools/proxyclient/experiments/timer_test.py b/tools/proxyclient/experiments/timer_test.py new file mode 100755 index 0000000..b3422c9 --- /dev/null +++ b/tools/proxyclient/experiments/timer_test.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, pathlib +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) + +from m1n1.setup import * + +HV_VTMR_CTL = (3, 5, 15, 1, 3) +HV_VTMR_CTL_VMASK = (1 << 0) +HV_VTMR_CTL_PMASK = (1 << 1) + +HV_VTMR_LIST = (3, 5, 15, 1, 2) + +TGE = (1<<27) + +u.msr(CNTHCTL_EL2, 3 << 10) # EL1PTEN | EL1PCTEN + +def run_test(ctl, tval): + u.inst(0xd5033fdf) # isb + + u.msr(ctl, 0) + u.msr(tval, int(freq * 0.8)) + u.msr(ctl, 1) + + for i in range(6): + p.nop() + time.sleep(0.2) + #u.inst(0xd5033fdf, call=p.el1_call) + print(" . (ISR_EL1=%d) CTL=%x VTMR_LIST=%x" % (u.mrs(ISR_EL1), u.mrs(ctl), u.mrs(HV_VTMR_LIST))) + + u.msr(ctl, 0) + +def test_hv_timers(): + u.msr(DAIF, 0x3c0) + print("Testing HV timers...") + print(" TGE = 1") + + u.msr(HCR_EL2, u.mrs(HCR_EL2) | TGE | (1 << 3) | (1 << 4)) + + print(" P:") + run_test(CNTP_CTL_EL0, CNTP_TVAL_EL0) + print(" V:") + run_test(CNTV_CTL_EL0, CNTV_TVAL_EL0) + +def test_guest_timers(): + u.msr(DAIF, 0) + print("Testing guest timers...") + + print(" TGE = 1, vGIC mode=0, timers unmasked") + u.msr(HCR_EL2, (u.mrs(HCR_EL2) | TGE) | (1 << 3) | (1 << 4)) + u.msr(HACR_EL2, 0) + u.msr(HV_VTMR_CTL, 3) + + print(" P:") + #run_test(CNTP_CTL_EL02, CNTP_TVAL_EL02) + print(" V:") + #run_test(CNTV_CTL_EL02, CNTV_TVAL_EL02) + + print(" TGE = 1, vGIC mode=0, timers masked") + u.msr(HV_VTMR_CTL, 0) + + print(" P:") + run_test(CNTP_CTL_EL02, CNTP_TVAL_EL02) + print(" V:") + run_test(CNTV_CTL_EL02, CNTV_TVAL_EL02) + + print(" TGE = 0, vGIC mode=0, timers unmasked") + u.msr(HCR_EL2, (u.mrs(HCR_EL2) & ~TGE) | (1 << 3) | (1 << 4)) + u.msr(HACR_EL2, 0) + u.msr(HV_VTMR_CTL, 3) + + print(" P:") + run_test(CNTP_CTL_EL02, CNTP_TVAL_EL02) + print(" V:") + run_test(CNTV_CTL_EL02, CNTV_TVAL_EL02) + + print(" TGE = 0, vGIC mode=0, timers masked") + u.msr(HV_VTMR_CTL, 0) + + print(" P:") + run_test(CNTP_CTL_EL02, CNTP_TVAL_EL02) + print(" V:") + run_test(CNTV_CTL_EL02, CNTV_TVAL_EL02) + + print(" TGE = 0, vGIC mode=1, timers unmasked") + u.msr(HCR_EL2, (u.mrs(HCR_EL2) & ~TGE) | (1 << 3) | (1 << 4)) + u.msr(HACR_EL2, 1<<20) + u.msr(HV_VTMR_CTL, 3) + + print(" P:") + run_test(CNTP_CTL_EL02, CNTP_TVAL_EL02) + print(" V:") + run_test(CNTV_CTL_EL02, CNTV_TVAL_EL02) + + print(" TGE = 0, vGIC mode=1, timers masked") + u.msr(HV_VTMR_CTL, 0) + + print(" P:") + run_test(CNTP_CTL_EL02, CNTP_TVAL_EL02) + print(" V:") + run_test(CNTV_CTL_EL02, CNTV_TVAL_EL02) + + return + +freq = u.mrs(CNTFRQ_EL0) +print("Timer freq: %d" % freq) + +test_hv_timers() +test_guest_timers() |
