diff options
Diffstat (limited to 'tools/proxyclient/m1n1')
103 files changed, 28566 insertions, 0 deletions
diff --git a/tools/proxyclient/m1n1/__init__.py b/tools/proxyclient/m1n1/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tools/proxyclient/m1n1/__init__.py diff --git a/tools/proxyclient/m1n1/adt.py b/tools/proxyclient/m1n1/adt.py new file mode 100644 index 0000000..8cc7ebe --- /dev/null +++ b/tools/proxyclient/m1n1/adt.py @@ -0,0 +1,713 @@ +# SPDX-License-Identifier: MIT +import itertools, fnmatch, sys +from construct import * +import sys + +from .utils import AddrLookup, FourCC, SafeGreedyRange + +__all__ = ["load_adt"] + +ADTPropertyStruct = Struct( + "name" / PaddedString(32, "ascii"), + "size" / Int32ul, + "value" / Bytes(this.size & 0x7fffffff) +) + +ADTNodeStruct = Struct( + "property_count" / Int32ul, + "child_count" / Int32ul, + "properties" / Array(this.property_count, Aligned(4, ADTPropertyStruct)), + "children" / Array(this.child_count, LazyBound(lambda: ADTNodeStruct)) +) + +ADTStringList = SafeGreedyRange(CString("ascii")) + +ADT2Tuple = Array(2, Hex(Int64ul)) +ADT3Tuple = Array(3, Hex(Int64ul)) + +Function = Struct( + "phandle" / Int32ul, + "name" / FourCC, + "args" / SafeGreedyRange(Int32ul), +) + +STD_PROPERTIES = { + "cpu-impl-reg": ADT2Tuple, + "name": CString("ascii"), + "compatible": ADTStringList, + "model": CString("ascii"), + "#size-cells": Int32ul, + "#address-cells": Int32ul, + "clock-ids": SafeGreedyRange(Int32ul), + "clock-gates": SafeGreedyRange(Int32ul), + "power-gates": SafeGreedyRange(Int32ul), +} + +PMAPIORanges = SafeGreedyRange(Struct( + "addr" / Hex(Int64ul), + "size" / Hex(Int64ul), + "flags" / Hex(Int32ul), + "name" / FourCC, +)) + +PMGRPSRegs = SafeGreedyRange(Struct( + "reg" / Int32ul, + "offset" / Hex(Int32ul), + "mask" / Hex(Int32ul), +)) + +PMGRPerfRegs = SafeGreedyRange(Struct( + "reg" / Int32ul, + "offset" / Hex(Int32ul), + "size" / Hex(Int32ul), + "unk" / Int32ul, +)) + +PMGRPWRGateRegs = SafeGreedyRange(Struct( + "reg" / Int32ul, + "offset" / Hex(Int32ul), + "mask" / Hex(Int32ul), + "unk" / Hex(Int32ul), +)) + +PMGRDeviceFlags = BitStruct( + "b7" / Flag, + "b6" / Flag, + "perf" / Flag, + "no_ps" / Flag, + "critical" / Flag, + "b2" / Flag, + "notify_pmp" / Flag, + "on" / Flag, +) + +PMGRDevices = SafeGreedyRange(Struct( + "flags" / PMGRDeviceFlags, + "unk1_0" / Int8ul, + "unk1_1" / Int8ul, + "unk1_2" / Int8ul, + "parents" / Array(2, Int16ul), + "perf_idx" / Int8ul, + "perf_block" / Int8ul, + "psidx" / Int8ul, + "psreg" / Int8ul, + "unk2_0" / Int16ul, + "pd" / Int8ul, + "ps_cfg16" / Int8ul, + Const(0, Int32ul), + Const(0, Int32ul), + "unk2_3" / Int16ul, + "id" / Int16ul, + "unk3" / Int32ul, + "name" / PaddedString(16, "ascii") +)) + +PMGRClocks = SafeGreedyRange(Struct( + "perf_idx" / Int8ul, + "perf_block" / Int8ul, + "unk" / Int8ul, + "id" / Int8ul, + Const(0, Int32ul), + "name" / PaddedString(16, "ascii"), +)) + +PMGRPowerDomains = SafeGreedyRange(Struct( + "unk" / Const(0, Int8ul), + "perf_idx" / Int8ul, + "perf_block" / Int8ul, + "id" / Int8ul, + Const(0, Int32ul), + "name" / PaddedString(16, "ascii"), +)) + +PMGRDeviceBridges = SafeGreedyRange(Struct( + "idx" / Int32ub, + "subdevs" / HexDump(Bytes(0x48)), +)) + +PMGREvents = SafeGreedyRange(Struct( + "unk1" / Int8ul, + "unk2" / Int8ul, + "unk3" / Int8ul, + "id" / Int8ul, + "perf2_idx" / Int8ul, + "perf2_block" / Int8ul, + "perf_idx" / Int8ul, + "perf_block" / Int8ul, + "name" / PaddedString(16, "ascii"), +)) + +GPUPerfState = Struct( + "freq" / Int32ul, + "volt" / Int32ul, +) + +SpeakerConfig = Struct( + "rx_slot" / Int8ul, + "amp_gain" / Int8ul, + "vsense_slot" / Int8ul, + "isense_slot" / Int8ul, +) + +DCBlockerConfig = Struct( + "dc_blk0" / Hex(Int8ul), + "dc_blk1" / Hex(Int8ul), + "pad" / Hex(Int16ul), +) + +Coef = ExprAdapter(Int32ul, + lambda x, ctx: (x - ((x & 0x1000000) << 1)) / 65536, + lambda x, ctx: int(round(x * 65536)) & 0x1ffffff) + +MTRPolynomFuseAGX = GreedyRange(Struct( + "id" / Int32ul, + "data" / Prefixed(Int32ul, GreedyRange(Coef)), +)) + +DEV_PROPERTIES = { + "pmgr": { + "*": { + "clusters": SafeGreedyRange(Int32ul), + "devices": PMGRDevices, + "ps-regs": PMGRPSRegs, + "perf-regs": PMGRPerfRegs, + "pwrgate-regs": PMGRPWRGateRegs, + "power-domains": PMGRPowerDomains, + "clocks": PMGRClocks, + "device-bridges": PMGRDeviceBridges, + "voltage-states*": SafeGreedyRange(Int32ul), + "events": PMGREvents, + "mtr-polynom-fuse-agx": MTRPolynomFuseAGX, + } + }, + "clpc": { + "*": { + "events": SafeGreedyRange(Int32ul), + "devices": SafeGreedyRange(Int32ul), + } + }, + "soc-tuner": { + "*": { + "device-set-*": SafeGreedyRange(Int32ul), + "mcc-configs": SafeGreedyRange(Int32ul), + } + }, + "mcc": { + "*": { + "dramcfg-data": SafeGreedyRange(Int32ul), + "config-data": SafeGreedyRange(Int32ul), + } + }, + "*pmu*": { + "*": { + "info-*name*": CString("ascii"), + "info-*": SafeGreedyRange(Hex(Int32ul)), + }, + }, + "stockholm-spmi": { + "*": { + "required-functions": ADTStringList, + }, + }, + "sgx": { + "*": { + "perf-states*": SafeGreedyRange(GPUPerfState), + "*-kp": Float32l, + "*-ki": Float32l, + "*-ki-*": Float32l, + "*-gain*": Float32l, + "*-scale*": Float32l, + } + }, + "arm-io": { + "*": { + "clock-frequencies": SafeGreedyRange(Int32ul), + "clock-frequencies-regs": SafeGreedyRange(Hex(Int64ul)), + "clock-frequencies-nclk": SafeGreedyRange(Int32ul), + }, + }, + "defaults": { + "*": { + "pmap-io-ranges": PMAPIORanges, + } + }, + "audio-*": { + "*": { + "speaker-config": SafeGreedyRange(SpeakerConfig), + "amp-dcblocker-config": DCBlockerConfig, + }, + }, + "*aop-audio*": { + "*": { + "clockSource": FourCC, + "identifier": FourCC, + }, + }, + "*alc?/audio-leap-mic*": { + "*": { + "audio-stream-formatter": FourCC, + } + } +} + +def parse_prop(node, path, node_name, name, v, is_template=False): + t = None + + if is_template: + t = CString("ascii") + + dev_props = None + for k, pt in DEV_PROPERTIES.items(): + if fnmatch.fnmatch(path, k): + dev_props = pt + break + + if not dev_props: + for k, pt in DEV_PROPERTIES.items(): + if fnmatch.fnmatch(node_name, k): + dev_props = pt + break + + possible_match = False + if dev_props: + for compat_match, cprops in dev_props.items(): + for k, pt in cprops.items(): + if fnmatch.fnmatch(name, k): + possible_match = True + break + + if possible_match: + try: + compat = node.compatible[0] + except AttributeError: + compat = "" + + for compat_match, cprops in dev_props.items(): + if fnmatch.fnmatch(compat, compat_match): + for k, pt in cprops.items(): + if fnmatch.fnmatch(name, k): + t = pt + break + else: + continue + break + + if v == b'' or v is None: + return None, None + + if name.startswith("function-"): + if len(v) == 4: + t = FourCC + else: + t = Function + + if name == "reg" and path != "/device-tree/memory": + n = node._parent + while n is not None and n._parent is not None: + if "ranges" not in n._properties: + break + n = n._parent + else: + rs = node._reg_struct + if len(v) % rs.sizeof() == 0: + t = SafeGreedyRange(rs) + + elif name == "ranges": + try: + ac, sc = node.address_cells, node.size_cells + except AttributeError: + return None, v + pac, _ = node._parent.address_cells, node._parent.size_cells + at = Hex(Int64ul) if ac == 2 else Array(ac, Hex(Int32ul)) + pat = Hex(Int64ul) if pac == 2 else Array(pac, Hex(Int32ul)) + st = Hex(Int64ul) if sc == 2 else Array(sc, Hex(Int32ul)) + t = SafeGreedyRange(Struct("bus_addr" / at, "parent_addr" / pat, "size" / st)) + + elif name == "interrupts": + # parse "interrupts" as Array of Int32ul, wrong for nodes whose + # "interrupt-parent" has "interrupt-cells" = 2 + # parsing this correctly would require a second pass + t = Array(len(v) // 4, Int32ul) + + if t is not None: + v = Sequence(t, Terminated).parse(v)[0] + return t, v + + if name in STD_PROPERTIES: + t = STD_PROPERTIES[name] + elif v and v[-1] == 0 and all(0x20 <= i <= 0x7e for i in v[:-1]): + t = CString("ascii") + elif len(v) == 4: + t = Int32ul + elif len(v) == 8: + t = Int64ul + elif len(v) == 16 and all(v[i] == 0 for i in (6, 7, 14, 15)): + t = ADT2Tuple + + if t is not None: + try: + v = Sequence(t, Terminated).parse(v)[0] + except: + print("Failed to parse:", path, name, v.hex()) + raise + + return t, v + +def build_prop(path, name, v, t=None): + if v is None: + return b'' + if t is not None: + return t.build(v) + + if isinstance(v, bytes): + return v + + if name in STD_PROPERTIES: + t = STD_PROPERTIES[name] + elif isinstance(v, str): + t = CString("ascii") + elif isinstance(v, int): + if v > 0xffffffff: + t = Int64ul + else: + t = Int32ul + elif isinstance(v, float): + t = Float32l + elif isinstance(v, tuple) and all(isinstance(i, int) for i in v): + t = Array(len(v), Int32ul) + + return t.build(v) + +class ADTNode: + def __init__(self, val=None, path="/", parent=None): + self._children = [] + self._properties = {} + self._types = {} + self._parent_path = path + self._parent = parent + + if val is not None: + for p in val.properties: + if p.name == "name": + _name = p.value.decode("ascii").rstrip("\0") + break + else: + raise ValueError(f"Node in {path} has no name!") + + path = self._parent_path + _name + + for p in val.properties: + is_template = bool(p.size & 0x80000000) + try: + t, v = parse_prop(self, path, _name, p.name, p.value, is_template) + self._types[p.name] = t, is_template + self._properties[p.name] = v + except Exception as e: + print(f"Exception parsing {path}.{p.name} value {p.value.hex()}:", file=sys.stderr) + raise + + # Second pass + for k, (t, is_template) in self._types.items(): + if t is None: + t, v = parse_prop(self, path, _name, k, self._properties[k], is_template) + self._types[k] = t, is_template + self._properties[k] = v + + for c in val.children: + node = ADTNode(c, f"{self._path}/", parent=self) + self._children.append(node) + + @property + def _path(self): + return self._parent_path + self.name + + def __getitem__(self, item): + if isinstance(item, str): + while item.startswith("/"): + item = item[1:] + if "/" in item: + a, b = item.split("/", 1) + return self[a][b] + for i in self._children: + if i.name == item: + return i + raise KeyError(f"Child node '{item}' not found") + return self._children[item] + + def __setitem__(self, item, value): + if isinstance(item, str): + while item.startswith("/"): + item = item[1:] + if "/" in item: + a, b = item.split("/", 1) + self[a][b] = value + return + for i, c in enumerate(self._children): + if c.name == item: + self._children[i] = value + break + else: + self._children.append(value) + else: + self._children[item] = value + + def __delitem__(self, item): + if isinstance(item, str): + while item.startswith("/"): + item = item[1:] + if "/" in item: + a, b = item.split("/", 1) + del self[a][b] + return + for i, c in enumerate(self._children): + if c.name == item: + del self._children[i] + return + raise KeyError(f"Child node '{item}' not found") + + del self._children[item] + + def __contains__(self, item): + if isinstance(item, str): + while item.startswith("/"): + item = item[1:] + if "/" in item: + a, b = item.split("/", 1) + return b in self[a] + for c in self._children: + if c.name == item: + return True + return False + + return item in self._children + + def __getattr__(self, attr): + attr = attr.replace("_", "-") + attr = attr.replace("--", "_") + if attr in self._properties: + return self._properties[attr] + raise AttributeError(attr) + + def __setattr__(self, attr, value): + if attr[0] == "_": + self.__dict__[attr] = value + return + attr = attr.replace("_", "-") + attr = attr.replace("--", "_") + self._properties[attr] = value + + def __delattr__(self, attr): + if attr[0] == "_": + del self.__dict__[attr] + return + del self._properties[attr] + + def getprop(self, name, default=None): + return self._properties.get(name, default) + + @property + def address_cells(self): + try: + return self._properties["#address-cells"] + except KeyError: + raise AttributeError("#address-cells") + + @property + def size_cells(self): + try: + return self._properties["#size-cells"] + except KeyError: + raise AttributeError("#size-cells") + + @property + def interrupt_cells(self): + try: + return self._properties["#interrupt-cells"] + except KeyError: + raise AttributeError("#interrupt-cells") + + def _fmt_prop(self, k, v): + t, is_template = self._types.get(k, (None, False)) + if is_template: + return f"<< {v} >>" + elif isinstance(v, ListContainer): + return f"[{', '.join(self._fmt_prop(k, i) for i in v)}]" + elif isinstance(v, bytes): + if all(i == 0 for i in v): + return f"zeroes({len(v):#x})" + else: + return v.hex() + elif k.startswith("function-"): + if isinstance(v, str): + return f"{v}()" + elif v is None: + return f"None" + else: + args = [] + for arg in v.args: + b = arg.to_bytes(4, "big") + is_ascii = all(0x20 <= c <= 0x7e for c in b) + args.append(f"{arg:#x}" if not is_ascii else f"'{b.decode('ascii')}'") + return f"{v.phandle}:{v.name}({', '.join(args)})" + name.startswith("function-") + else: + return str(v) + + def __str__(self, t=""): + return "\n".join([ + t + f"{self.name} {{", + *(t + f" {k} = {self._fmt_prop(k, v)}" for k, v in self._properties.items() if k != "name"), + "", + *(i.__str__(t + " ") for i in self._children), + t + "}" + ]) + + def __repr__(self): + return f"<ADTNode {self.name}>" + + def __iter__(self): + return iter(self._children) + + @property + def _reg_struct(self): + ac, sc = self._parent.address_cells, self._parent.size_cells + return Struct( + "addr" / Hex(Int64ul) if ac == 2 else Array(ac, Hex(Int32ul)), + "size" / Hex(Int64ul) if sc == 2 else Array(sc, Hex(Int32ul)) + ) + + def get_reg(self, idx): + reg = self.reg[idx] + addr = reg.addr + size = reg.size + + return self._parent.translate(addr), size + + def translate(self, addr): + node = self + while node is not None: + if "ranges" not in node._properties: + break + for r in node.ranges: + ba = r.bus_addr + # PCIe special case, because Apple really broke + # the spec here with their little endian antics + if isinstance(ba, list) and len(ba) == 3: + ba = (ba[0] << 64) | (ba[2] << 32) | ba[1] + if ba <= addr < (ba + r.size): + addr = addr - ba + r.parent_addr + break + node = node._parent + + return addr + + def to_bus_addr(self, addr): + node = self._parent + + descend = [] + while node is not None: + if "ranges" not in node._properties: + break + descend.append(node) + node = node._parent + + for node in reversed(descend): + for r in node.ranges: + if r.parent_addr <= addr < (r.parent_addr + r.size): + addr = addr - r.parent_addr + r.bus_addr + break + return addr + + def tostruct(self): + properties = [] + for k,v in itertools.chain(self._properties.items()): + t, is_template = self._types.get(k, (None, False)) + value = build_prop(self._path, k, v, t=t) + properties.append({ + "name": k, + "size": len(value) | (0x80000000 if is_template else 0), + "value": value + }) + + data = { + "property_count": len(self._properties), + "child_count": len(self._children), + "properties": properties, + "children": [c.tostruct() for c in self._children] + } + return data + + def build(self): + return ADTNodeStruct.build(self.tostruct()) + + def walk_tree(self): + yield self + for child in self: + yield from child + + def build_addr_lookup(self): + lookup = AddrLookup() + for node in self.walk_tree(): + reg = getattr(node, 'reg', None) + if not isinstance(reg, list): + continue + + for index in range(len(reg)): + try: + addr, size = node.get_reg(index) + except AttributeError: + continue + if size == 0: + continue + lookup.add(range(addr, addr + size), node.name + f"[{index}]") + + return lookup + + def create_node(self, name): + while name.startswith("/"): + name = name[1:] + if "/" in name: + a, b = name.split("/", 1) + return self[a].create_node(b) + + node = ADTNode(path=self._path + "/", parent=self) + node.name = name + node._types["reg"] = (SafeGreedyRange(node._reg_struct), False) + self[name] = node + return node + +def load_adt(data): + return ADTNode(ADTNodeStruct.parse(data)) + +if __name__ == "__main__": + import sys, argparse, pathlib + + parser = argparse.ArgumentParser(description='ADT test for m1n1') + parser.add_argument('input', type=pathlib.Path) + parser.add_argument('output', nargs='?', type=pathlib.Path) + parser.add_argument('-r', '--retrieve', help='retrieve and store the adt from m1n1', action='store_true') + parser.add_argument('-a', '--dump-addr', help='dump address lookup table', action='store_true') + args = parser.parse_args() + + if args.retrieve: + if args.input.exists(): + print('Error "{}" exists!'.format(args.input)) + sys.exit() + + from .setup import * + adt_data = u.get_adt() + args.input.write_bytes(adt_data) + else: + adt_data = args.input.read_bytes() + + adt = load_adt(adt_data) + print(adt) + new_data = adt.build() + if args.output is not None: + args.output.write_bytes(new_data) + assert new_data == adt_data[:len(new_data)] + assert adt_data[len(new_data):] == bytes(len(adt_data) - len(new_data)) + + if args.dump_addr: + print("Address lookup table:") + print(adt.build_addr_lookup()) diff --git a/tools/proxyclient/m1n1/agx/__init__.py b/tools/proxyclient/m1n1/agx/__init__.py new file mode 100644 index 0000000..26368ce --- /dev/null +++ b/tools/proxyclient/m1n1/agx/__init__.py @@ -0,0 +1,343 @@ +# SPDX-License-Identifier: MIT +import bisect, time + +from .object import GPUObject, GPUAllocator +from .initdata import build_initdata +from .channels import * +from .event import GPUEventManager +from ..proxy import IODEV +from ..malloc import Heap +from ..hw.uat import UAT, MemoryAttr +from ..hw.agx import * +from ..fw.agx import AGXASC +from ..fw.agx.channels import ChannelInfoSet, ChannelInfo + +class AGXChannels: + pass + +class AGXQueue: + pass + +class AGX: + PAGE_SIZE = 0x4000 + MAX_EVENTS = 128 + + def __init__(self, u): + self.start_time = time.time() + self.u = u + self.p = u.proxy + + self.iface = u.iface + self.show_stats = False + + self.asc_dev = u.adt["/arm-io/gfx-asc"] + self.sgx_dev = u.adt["/arm-io/sgx"] + self.sgx = SGXRegs(u, self.sgx_dev.get_reg(0)[0]) + + self.log("Initializing allocations") + + self.aic_base = u.adt["/arm-io/aic"].get_reg(0)[0] + + self.all_objects = {} + self.tracked_objects = {} + + # Memory areas + self.fw_va_base = self.sgx_dev.rtkit_private_vm_region_base + self.fw_va_size = self.sgx_dev.rtkit_private_vm_region_size + self.kern_va_base = self.fw_va_base + self.fw_va_size + + # Set up UAT + self.uat = UAT(self.u.iface, self.u) + + # Allocator for RTKit/ASC objects + self.uat.allocator = Heap(self.kern_va_base + 0x80000000, + self.kern_va_base + 0x81000000, + self.PAGE_SIZE) + + self.asc = AGXASC(self.u, self.asc_dev.get_reg(0)[0], self, self.uat) + self.asc.verbose = 0 + self.asc.mgmt.verbose = 0 + + self.kobj = GPUAllocator(self, "kernel", + self.kern_va_base, 0x10000000, + AttrIndex=MemoryAttr.Shared, AP=1, guard_pages=4) + self.cmdbuf = GPUAllocator(self, "cmdbuf", + self.kern_va_base + 0x10000000, 0x10000000, + AttrIndex=MemoryAttr.Shared, AP=0, guard_pages=4) + self.kshared = GPUAllocator(self, "kshared", + self.kern_va_base + 0x20000000, 0x10000000, + AttrIndex=MemoryAttr.Shared, AP=1, guard_pages=4) + self.kshared2 = GPUAllocator(self, "kshared2", + self.kern_va_base + 0x30000000, 0x100000, + AttrIndex=MemoryAttr.Shared, AP=0, PXN=1, guard_pages=4) + + self.io_allocator = Heap(self.kern_va_base + 0x38000000, + self.kern_va_base + 0x40000000, + block=self.PAGE_SIZE) + + self.mon = None + self.event_mgr = GPUEventManager(self) + + self.p.iodev_set_usage(IODEV.FB, 0) + + self.initdata_hook = None + + # Early init, needed? + self.poke_sgx() + + def poke_sgx(self): + self.sgx_base = self.sgx_dev.get_reg(0)[0] + self.p.read32(self.sgx_base + 0xd14000) + self.p.write32(self.sgx_base + 0xd14000, 0x70001) + + def find_object(self, addr, ctx=0): + all_objects = list(self.all_objects.items()) + all_objects.sort() + + idx = bisect.bisect_left(all_objects, ((ctx, addr + 1), "")) - 1 + if idx < 0 or idx >= len(all_objects): + return None, None + + (ctx, base), obj = all_objects[idx] + return base, obj + + def reg_object(self, obj, track=True): + self.all_objects[(obj._ctx, obj._addr)] = obj + if track: + if self.mon is not None: + obj.add_to_mon(self.mon) + self.tracked_objects[(obj._ctx, obj._addr)] = obj + + def unreg_object(self, obj): + del self.all_objects[(obj._ctx, obj._addr)] + if obj._addr in self.tracked_objects: + del self.tracked_objects[(obj._ctx, obj._addr)] + + def poll_objects(self): + for obj in self.tracked_objects.values(): + diff = obj.poll() + if diff is not None: + self.log(diff) + + def alloc_channels(self, cls, name, channel_id, count=1, ring_size=0x100, rx=False): + + # All channels have 0x100 items + item_count = ring_size + item_size = cls.item_size + ring_size = item_count * item_size + + self.log(f"Allocating {count} channel(s) for {name} ({item_count} * {item_size:#x} bytes each)") + + state_obj = self.kshared.new_buf(0x30 * count, f"Channel.{name}.state", track=False) + if rx: + ring_buf = self.kshared.new_buf(ring_size * count, f"Channel.{name}.ring", track=False) + else: + ring_buf = self.kobj.new_buf(ring_size * count, f"Channel.{name}.ring", track=False) + + info = ChannelInfo() + info.state_addr = state_obj._addr + info.ringbuffer_addr = ring_buf._addr + if name == "FWCtl": + self.fwctl_chinfo = info + else: + setattr(self.ch_info, name, info) + + return [cls(self, name + ("" if count == 1 else f"[{i}]"), channel_id, + state_obj._paddr + 0x30 * i, + ring_buf._paddr + ring_size * i, item_count) + for i in range(count)] + + def init_channels(self): + self.log("Initializing channels...") + self.ch_info = ChannelInfoSet() + self.ch = AGXChannels() + self.ch.queue = [] + + # Command queue submission channels + for index in range(4): + queue = AGXQueue() + self.ch.queue.append(queue) + for typeid, chtype in enumerate(("TA", "3D", "CL")): + name = f"{chtype}_{index}" + chan = self.alloc_channels(GPUCmdQueueChannel, name, + (index << 2) | typeid)[0] + setattr(queue, "q_" + chtype, chan) + + # Device control channel + self.ch.devctrl = self.alloc_channels(GPUDeviceControlChannel, "DevCtrl", 0x11)[0] + + # GPU -> CPU channels + self.ch.event = self.alloc_channels(GPUEventChannel, "Event", None, rx=True)[0] + self.ch.log = self.alloc_channels(GPULogChannel, "FWLog", None, 6, rx=True) + self.ch.ktrace = self.alloc_channels(GPUKTraceChannel, "KTrace", None, ring_size=0x200, rx=True)[0] + self.ch.stats = self.alloc_channels(GPUStatsChannel, "Stats", None, rx=True)[0] + + self.ch.fwctl = self.alloc_channels(GPUFWCtlChannel, "FWCtl", None, rx=False)[0] + + # For some reason, the FWLog channels have their rings in a different place... + self.fwlog_ring = self.ch_info.FWLog.ringbuffer_addr + self.ch_info.FWLog.ringbuffer_addr = self.kshared.buf(0x150000, "FWLog_Dummy") + + def poll_channels(self): + for chan in self.ch.log: + chan.poll() + self.ch.ktrace.poll() + if self.show_stats: + self.ch.stats.poll() + self.ch.event.poll() + + def kick_firmware(self): + self.asc.db.doorbell(0x10) + + def show_irqs(self): + hw_state = self.aic_base + 0x4200 + irqs = [] + for irq in self.sgx_dev.interrupts: + v = int(bool((self.p.read32(hw_state + (irq // 32) * 4) & (1 << (irq % 32))))) + irqs.append(v) + self.log(f' SGX IRQ state: {irqs}') + + def timeout(self, msg): + if self.mon: + self.mon.poll() + self.poll_objects() + self.log(msg) + self.log(r' (\________/) ') + self.log(r' | | ') + self.log(r"'.| \ , / |.'") + self.log(r'--| / (( \ |--') + self.log(r".'| _-_- |'.") + self.log(r' |________| ') + self.log(r'') + self.log(r' Timeout nya~!!!!!') + self.log(r'') + self.log(f' Stamp index: {int(msg.stamp_index)}') + self.show_pending_stamps() + self.log(f' Fault info:') + self.log(self.initdata.regionC.fault_info) + + self.show_irqs() + self.check_fault() + self.recover() + + def faulted(self, msg): + if self.mon: + self.mon.poll() + self.poll_objects() + self.log(msg) + self.log(r' (\________/) ') + self.log(r' | | ') + self.log(r"'.| \ , / |.'") + self.log(r'--| / (( \ |--') + self.log(r".'| _-_- |'.") + self.log(r' |________| ') + self.log(r'') + self.log(r' Fault nya~!!!!!') + self.log(r'') + self.show_pending_stamps() + self.log(f' Fault info:') + self.log(self.initdata.regionC.fault_info) + + self.show_irqs() + self.check_fault() + self.recover() + + def show_pending_stamps(self): + self.initdata.regionC.pull() + self.log(f' Pending stamps:') + for i in self.initdata.regionC.pending_stamps: + if i.info or i.wait_value: + self.log(f" - #{i.info >> 3:3d}: {i.info & 0x7}/{i.wait_value:#x}") + i.info = 0 + i.wait_value = 0 + tmp = i.regmap() + tmp.info.val = 0 + tmp.wait_value.val = 0 + + #self.initdata.regionC.push() + + def check_fault(self): + fault_info = self.sgx.FAULT_INFO.reg + if fault_info.value == 0xacce5515abad1dea: + raise Exception("Got fault notification, but fault address is unreadable") + + self.log(f" Fault info: {fault_info}") + + if not fault_info.FAULTED: + return + + fault_addr = fault_info.ADDR + if fault_addr & 0x8000000000: + fault_addr |= 0xffffff8000000000 + base, obj = self.find_object(fault_addr) + info = "" + if obj is not None: + info = f" ({obj!s} + {fault_addr - base:#x})" + self.log(f" GPU fault at {fault_addr:#x}{info}") + self.log(f" Faulting unit: {agx_decode_unit(fault_info.UNIT)}") + + def recover(self): + status = self.fw_status + self.log(f" Halt count: {status.halt_count.val}") + halted = bool(status.halted.val) + self.log(f" Halted: {halted}") + if halted: + self.log(f" Attempting recovery...") + status.halted.val = 0 + status.resume.val = 1 + else: + raise Exception("Cannot recover") + self.show_irqs() + + def resume(self): + self.log("Starting ASC") + self.asc.start() + + self.log("Starting endpoints") + self.asc.start_ep(0x20) + self.asc.start_ep(0x21) + + def start(self): + self.resume() + + self.init_channels() + + self.log("Building initdata") + self.initdata = build_initdata(self) + if self.initdata_hook: + self.initdata_hook(self) + + self.fw_status = self.initdata.fw_status.regmap() + self.uat.flush_dirty() + + self.log("Sending initdata") + self.asc.fw.send_initdata(self.initdata._addr & 0xfff_ffffffff) + self.asc.work() + + self.log("Sending DC_Init") + self.ch.devctrl.send_init() + self.asc.work() + + self.log("Sending DC_UpdateIdleTS") + self.ch.devctrl.update_idle_ts() + self.asc.work() + + def stop(self): + self.asc.stop() + + def work(self): + self.asc.work() + + def wait_for_events(self, timeout=1.0): + now = time.time() + deadline = now + timeout + cnt = self.event_mgr.event_count + while now < deadline and self.event_mgr.event_count == cnt: + self.asc.work() + now = time.time() + if self.event_mgr.event_count == cnt: + raise Exception("Timed out waiting for events") + + def log(self, msg): + t = time.time() - self.start_time + print(f"[AGX][{t:10.03f}] " + str(msg)) diff --git a/tools/proxyclient/m1n1/agx/channels.py b/tools/proxyclient/m1n1/agx/channels.py new file mode 100644 index 0000000..c91f347 --- /dev/null +++ b/tools/proxyclient/m1n1/agx/channels.py @@ -0,0 +1,178 @@ +# SPDX-License-Identifier: MIT + +from construct import * +from ..fw.agx.channels import * +from ..fw.agx.cmdqueue import * + +class GPUChannel: + STATE_FIELDS = ChannelStateFields + + def __init__(self, agx, name, channel_id, state_addr, ring_addr, ring_size): + self.agx = agx + self.u = agx.u + self.name = name + self.channel_id = channel_id + self.iface = agx.u.iface + self.state_addr = state_addr + self.ring_addr = ring_addr + self.ring_size = ring_size + self.state = self.STATE_FIELDS(self.u, self.state_addr) + self.state.READ_PTR.val = 0 + self.state.WRITE_PTR.val = 0 + + @classmethod + @property + def item_size(cls): + return cls.MSG_CLASS.sizeof() + + def log(self, msg): + self.agx.log(f"[{self.name}] {msg}") + +class GPUTXChannel(GPUChannel): + def doorbell(self): + self.agx.asc.db.doorbell(self.channel_id) + + def send_message(self, msg): + wptr = self.state.WRITE_PTR.val + self.iface.writemem(self.ring_addr + self.item_size * wptr, + msg.build()) + self.state.WRITE_PTR.val = (wptr + 1) % self.ring_size + self.doorbell() + +class GPURXChannel(GPUChannel): + def poll(self): + wptr = self.state.WRITE_PTR.val + rptr = self.state.READ_PTR.val + + if wptr >= self.ring_size: + raise Exception(f"wptr = {wptr:#x} > {self.ring_size:#x}") + + while rptr != wptr: + msg = self.iface.readmem(self.ring_addr + self.item_size * rptr, + self.item_size) + self.handle_message(self.MSG_CLASS.parse(msg)) + rptr = (rptr + 1) % self.ring_size + self.state.READ_PTR.val = rptr + + def handle_message(self, msg): + self.log(f"Message: {msg}") + +class GPUCmdQueueChannel(GPUTXChannel): + MSG_CLASS = RunCmdQueueMsg + + def run(self, queue, event): + msg = RunCmdQueueMsg() + msg.queue_type = queue.TYPE + msg.cmdqueue = queue.info + msg.cmdqueue_addr = queue.info._addr + msg.head = queue.wptr + msg.event_number = event + msg.new_queue = 1 if queue.first_time else 0 + queue.first_time = False + #print(msg) + self.send_message(msg) + +class GPUDeviceControlChannel(GPUTXChannel): + MSG_CLASS = DeviceControlMsg + + def send_init(self): + self.send_message(DC_Init()) + + def dc_09(self, a, ptr, b): + # Writes to InitData.RegionB + msg = DC_09() + msg.unk_4 = a + msg.unkptr_c = ptr + msg.unk_14 = b + self.send_message(msg) + + def send_foo(self, t, d=None): + msg = DC_Any() + msg.msg_type = t + if d is not None: + msg.data = d + self.send_message(msg) + + def update_idle_ts(self): + self.send_message(DC_UpdateIdleTS()) + + def destroy_context(self, ctx): + msg = DC_DestroyContext() + msg.unk_4 = 0 + msg.unk_8 = 2 + msg.unk_c = 0 + msg.unk_10 = 0 + msg.unk_14 = 0xffff + msg.unk_18 = 0 + msg.context_addr = ctx.gpu_context._addr + print(msg) + self.send_message(msg) + + # Maybe related to stamps? + def write32(self, addr, val): + msg = DC_Write32() + msg.addr = addr + msg.data = val + msg.unk_10 = 0 + msg.unk_14 = 0 + msg.unk_18 = 0 + msg.unk_1c = 0 + print(msg) + self.send_message(msg) + + def dc_1e(self, a, b): + msg = DC_1e() + msg.unk_4 = a + msg.unk_c = b + print(msg) + self.send_message(msg) + +class GPUFWCtlChannel(GPUTXChannel): + STATE_FIELDS = FWControlStateFields + MSG_CLASS = FWCtlMsg + + def doorbell(self): + self.agx.asc.db.fwctl_doorbell() + + def send_inval(self, ctx, addr=0): + msg = FWCtlMsg() + msg.addr = addr + msg.unk_8 = 0 + msg.context_id = ctx + msg.unk_10 = 1 + msg.unk_12 = 2 + print(msg) + self.send_message(msg) + +class GPUEventChannel(GPURXChannel): + MSG_CLASS = EventMsg + + def handle_message(self, msg): + if isinstance(msg, FlagMsg): + self.agx.event_mgr.fired(msg.firing) + elif isinstance(msg, FaultMsg): + self.agx.faulted(msg) + elif isinstance(msg, TimeoutMsg): + self.agx.timeout(msg) + else: + self.log(f"Unknown event: {msg}") + +class GPULogChannel(GPURXChannel): + MSG_CLASS = FWLogMsg + + def handle_message(self, msg): + ts = msg.timestamp / 24000000 + self.log(f"[{msg.seq_no:<4d}{ts:14.7f}] {msg.msg}") + +class GPUKTraceChannel(GPURXChannel): + MSG_CLASS = KTraceMsg + + def handle_message(self, msg): + self.log(f"{msg}") + +class GPUStatsChannel(GPURXChannel): + MSG_CLASS = HexDump(Bytes(0x60)) + + def handle_message(self, msg): + if self.agx.show_stats: + self.log(f"stat {msg}") diff --git a/tools/proxyclient/m1n1/agx/context.py b/tools/proxyclient/m1n1/agx/context.py new file mode 100644 index 0000000..41ebed5 --- /dev/null +++ b/tools/proxyclient/m1n1/agx/context.py @@ -0,0 +1,247 @@ +# SPDX-License-Identifier: MIT +from ..utils import chexdump +from ..malloc import Heap +from construct.core import * +from ..fw.agx.channels import * +from ..fw.agx.cmdqueue import * +from ..fw.agx.microsequence import * +from ..hw.uat import MemoryAttr +from .object import * +import textwrap + +class GPUContext: + def __init__(self, agx): + self.agx = agx + self.uat = self.agx.uat + self.u = self.agx.u + self.p = self.agx.p + self.verbose = False + + #self.job_list = agx.kshared.new(JobList) + #self.job_list.first_job = 0 + #self.job_list.last_head = self.job_list._addr # Empty list has self as last_head + #self.job_list.unkptr_10 = 0 + #self.job_list.push() + + self.gpu_context = agx.kobj.new(GPUContextData).push() + + self.ttbr0_base = self.u.memalign(self.agx.PAGE_SIZE, self.agx.PAGE_SIZE) + self.p.memset32(self.ttbr0_base, 0, self.agx.PAGE_SIZE) + + self.objects = {} + + # 32K VA pages since buffer manager needs that + self.uobj = GPUAllocator(agx, "Userspace", 0x1600000000, 0x100000000, ctx=None, + guard_pages=16, + va_block=32768, nG=1, AP=0, PXN=1, UXN=1) + + self.gobj = GPUAllocator(agx, "GEM", 0x1500000000, 0x100000000, ctx=None, + guard_pages=16, nG=1, AP=0, PXN=1, UXN=1) + + self.pipeline_base = 0x1100000000 + self.pipeline_size = 1 << 32 + self.pobj = GPUAllocator(agx, "Pipelines", self.pipeline_base + 0x10000, self.pipeline_size, + ctx=None, guard_pages=1, nG=1, AP=0, PXN=1, UXN=1) + + def bind(self, ctx_id): + self.ctx = ctx_id + self.uobj.ctx = ctx_id + self.gobj.ctx = ctx_id + self.pobj.ctx = ctx_id + self.uat.bind_context(ctx_id, self.ttbr0_base) + self.thing = self.buf_at(0x6fffff8000, 0, 0x4000, "thing") + + def make_stream(self, base): + return self.uat.iostream(self.ctx, base, recurse=False) + + def new_at(self, addr, objtype, name=None, track=True, **flags): + obj = GPUObject(self, objtype) + obj._stream = self.make_stream + if name is not None: + obj._name = name + + size_align = align_up(obj._size, self.agx.PAGE_SIZE) + obj._addr = addr + + obj._paddr = self.agx.u.memalign(self.agx.PAGE_SIZE, size_align) + #if isinstance(obj.val, ConstructClassBase): + #obj.val._addr = obj._addr + + self.agx.log(f"[Context@{self.gpu_context._addr:#x}] Map {obj._name} size {obj._size:#x} @ {obj._addr:#x} ({obj._paddr:#x})") + + flags2 = {"AttrIndex": MemoryAttr.Shared} + flags2.update(flags) + obj._map_flags = flags2 + + obj._size_align = size_align + self.agx.uat.iomap_at(self.ctx, obj._addr, obj._paddr, size_align, **flags2) + self.objects[obj._addr] = obj + self.agx.reg_object(obj, track=track) + + return obj + + def buf_at(self, addr, is_pipeline, size, name=None, track=True): + return self.new_at(addr, Bytes(size), name, track=track, + AttrIndex=MemoryAttr.Shared, PXN=1, + nG=1, AP=(1 if is_pipeline else 0)) + + def load_blob(self, addr, is_pipeline, filename, track=True): + data = open(filename, "rb").read() + obj = self.new_at(addr, Bytes(len(data)), filename, track=track, + AttrIndex=MemoryAttr.Shared, PXN=1, + nG=1, AP=(1 if is_pipeline else 0)) + obj.val = data + obj.push() + + return obj + + def free(self, obj): + obj._dead = True + self.agx.uat.iomap_at(self.ctx, obj._addr, 0, obj._size_align, VALID=0) + del self.objects[obj._addr] + self.agx.unreg_object(obj) + + def free_at(self, addr): + self.free(self.objects[obj._addr]) + +class GPUWorkQueue: + def __init__(self, agx, context, job_list): + self.agx = agx + self.u = agx.u + self.p = agx.p + self.context = context + + self.info = agx.kobj.new(CommandQueueInfo) + + self.pointers = agx.kshared.new(CommandQueuePointers).push() + self.pmap = CommandQueuePointerMap(self.u, self.pointers._paddr) + + self.rb_size = self.pointers.rb_size + self.ring = agx.kobj.new_buf(8 * self.rb_size, "GPUWorkQueue.RB") + + self.info.pointers = self.pointers + self.info.rb_addr = self.ring._addr + self.info.job_list = job_list + self.info.gpu_buf_addr = agx.kobj.buf(0x2c18, "GPUWorkQueue.gpu_buf") + self.info.gpu_context = context.gpu_context + self.info.push() + + self.wptr = 0 + self.first_time = True + + self.agx.uat.flush_dirty() + + def submit(self, work): + work.push() + + self.p.write64(self.ring._paddr + 8 * self.wptr, work._addr) + self.wptr = (self.wptr + 1) % self.rb_size + self.agx.uat.flush_dirty() + self.pmap.CPU_WPTR.val = self.wptr + + def wait_empty(self): + while self.wptr != self.pmap.GPU_DONEPTR.val: + self.agx.work() + +class GPU3DWorkQueue(GPUWorkQueue): + TYPE = 1 + +class GPUTAWorkQueue(GPUWorkQueue): + TYPE = 0 + +class GPUMicroSequence: + def __init__(self, agx): + self.agx = agx + self.off = 0 + self.ops = [] + self.obj = None + + def append(self, op): + off = self.off + self.ops.append(op) + self.off += op.sizeof() + return off + + def finalize(self): + self.ops.append(EndCmd()) + self.size = sum(i.sizeof() for i in self.ops) + self.obj = self.agx.kobj.new_buf(self.size, "GPUMicroSequence", track=False) + self.obj.val = b"".join(i.build() for i in self.ops) + self.obj.push() + return self.obj + + def dump(self): + chexdump(self.agx.iface.readmem(self.obj._paddr, self.size)) + print(MicroSequence.parse_stream(self.agx.uat.iostream(0, self.obj._addr))) + + def __str__(self): + s = f"GPUMicroSequence: {len(self.ops)} ops\n" + for i, op in enumerate(self.ops): + op_s = textwrap.indent(str(op), ' ' * 4) + s += f"[{i:2}:{op.sizeof():#x}] = {op!s}\n" + return s + +class GPUBufferManager: + def __init__(self, agx, context, blocks=8): + self.agx = agx + self.ctx = context + + self.block_ctl_obj = agx.kshared.new(BufferManagerBlockControl) + self.block_ctl_obj.total = blocks + self.block_ctl_obj.wptr = 0 + self.block_ctl_obj.unk = 0 + self.block_ctl = self.block_ctl_obj.push().regmap() + + self.counter_obj = agx.kshared.new(BufferManagerCounter) + self.counter_obj.count = 0 + self.counter = self.counter_obj.push().regmap() + + self.misc_obj = agx.kshared.new(BufferManagerMisc) + self.misc_obj.cpu_flag = 1 + self.misc = self.misc_obj.push().regmap() + + self.page_size = 0x8000 + self.pages_per_block = 4 + self.block_size = self.pages_per_block * self.page_size + + self.page_list = context.uobj.new(Array(0x10000 // 4, Int32ul), "BM PageList", track=False) + self.block_list = context.uobj.new(Array(0x8000 // 4, Int32ul), "BM BlockList", track=False) + + self.info = info = agx.kobj.new(BufferManagerInfo) + info.page_list_addr = self.page_list._addr + info.page_list_size = self.page_list._size + info.page_count = self.block_ctl_obj.total * 4 + info.block_count = self.block_ctl_obj.total + + info.block_list_addr = self.block_list._addr + info.block_ctl = self.block_ctl_obj + info.last_page = info.page_count - 1 + info.block_size = self.block_size + + info.counter = self.counter_obj + + self.populate() + self.block_ctl_obj.pull() + self.block_list.push() + self.page_list.push() + + info.push() + + def increment(self): + self.counter_obj.count += 1 + self.counter_obj.push() + + def populate(self): + idx = self.block_ctl.wptr.val + total = self.block_ctl.total.val + while idx < total: + block = self.ctx.uobj.new_buf(self.block_size, "BM Block", track=False) + self.block_list[idx * 2] = block._addr // self.page_size + + page_idx = idx * self.pages_per_block + for i in range(self.pages_per_block): + self.page_list[page_idx + i] = block._addr // self.page_size + i + + idx += 1 + self.block_ctl.wptr.val = idx + diff --git a/tools/proxyclient/m1n1/agx/event.py b/tools/proxyclient/m1n1/agx/event.py new file mode 100644 index 0000000..693f3a5 --- /dev/null +++ b/tools/proxyclient/m1n1/agx/event.py @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: MIT +from ..utils import chexdump +from ..malloc import Heap +from construct.core import * +from ..fw.agx.channels import * +from ..fw.agx.cmdqueue import * +from ..fw.agx.microsequence import * +from ..hw.uat import MemoryAttr +from .object import * +import textwrap + +class GPUEventManager: + MAX_EVENTS = 128 + + def __init__(self, agx): + self.agx = agx + + self.event_count = 0 + self.free_events = set(range(self.MAX_EVENTS)) + self.events = [None] * self.MAX_EVENTS + + def allocate_event(self): + if not self.free_events: + raise Exception("No free events") + ev_id = self.free_events.pop() + + ev = GPUEvent(ev_id) + self.events[ev_id] = ev + + return ev + + def free_event(self, ev): + self.events[ev.id] = None + self.free_events.add(ev.id) + + def fired(self, flags): + self.agx.log("= Events fired =") + for i, v in enumerate(flags): + for j in range(64): + if v & (1 << j): + ev_id = i * 64 + j + ev = self.events[ev_id] + self.agx.log(f"Event fired: {ev_id}") + if ev is None: + raise Exception("Received spurious notification for event ID {ev}") + ev.fire() + self.event_count += 1 + +class GPUEvent: + def __init__(self, ev_id): + self.id = ev_id + self.fired = False + + def fire(self): + self.fired = True + + def rearm(self): + self.fired = False diff --git a/tools/proxyclient/m1n1/agx/initdata.py b/tools/proxyclient/m1n1/agx/initdata.py new file mode 100644 index 0000000..d6fa76a --- /dev/null +++ b/tools/proxyclient/m1n1/agx/initdata.py @@ -0,0 +1,387 @@ +# SPDX-License-Identifier: MIT +from ..fw.agx.initdata import * +from ..fw.agx.channels import ChannelInfo +from ..hw.uat import MemoryAttr + +from construct import Container + +def build_iomappings(agx, chip_id): + def iomap(phys, size, range_size, rw): + off = phys & 0x3fff + virt = agx.io_allocator.malloc(size + 0x4000 + off) + agx.uat.iomap_at(0, virt, phys - off, size + off, AttrIndex=MemoryAttr.Device) + return IOMapping(phys, virt + off, size, range_size, rw) + + # for t8103 + if chip_id == 0x8103: + return [ + iomap(0x204d00000, 0x1c000, 0x1c000, 1), # Fender + iomap(0x20e100000, 0x4000, 0x4000, 0), # AICTimer + iomap(0x23b104000, 0x4000, 0x4000, 1), # AICSWInt + iomap(0x204000000, 0x20000, 0x20000, 1), # RGX + IOMapping(), # UVD + IOMapping(), # unused + IOMapping(), # DisplayUnderrunWA + iomap(0x23b2e8000, 0x1000, 0x1000, 0), # AnalogTempSensorControllerRegs + iomap(0x23bc00000, 0x1000, 0x1000, 1), # PMPDoorbell + iomap(0x204d80000, 0x5000, 0x5000, 1), # MetrologySensorRegs + iomap(0x204d61000, 0x1000, 0x1000, 1), # GMGIFAFRegs + iomap(0x200000000, 0xd6400, 0xd6400, 1), # MCache registers + IOMapping(), # AICBankedRegisters + iomap(0x23b738000, 0x1000, 0x1000, 1), # PMGRScratch + IOMapping(), # NIA Special agent idle register die 0 + IOMapping(), # NIA Special agent idle register die 1 + IOMapping(), # CRE registers + IOMapping(), # Streaming codec registers + IOMapping(), # + IOMapping(), # + ] + elif chip_id == 0x8112: + return [ + iomap(0x204d00000, 0x14000, 0x14000, 1), # Fender + iomap(0x20e100000, 0x4000, 0x4000, 0), # AICTimer + iomap(0x23b0c4000, 0x4000, 0x4000, 1), # AICSWInt + iomap(0x204000000, 0x20000, 0x20000, 1), # RGX + IOMapping(), # UVD + IOMapping(), # unused + IOMapping(), # DisplayUnderrunWA + iomap(0x23b2c0000, 0x1000, 0x1000, 0), # AnalogTempSensorControllerRegs + IOMapping(), # PMPDoorbell + iomap(0x204d80000, 0x8000, 0x8000, 1), # MetrologySensorRegs + iomap(0x204d61000, 0x1000, 0x1000, 1), # GMGIFAFRegs + iomap(0x200000000, 0xd6400, 0xd6400, 1), # MCache registers + IOMapping(), # AICBankedRegisters + IOMapping(), # PMGRScratch + IOMapping(), # NIA Special agent idle register die 0 + IOMapping(), # NIA Special agent idle register die 1 + iomap(0x204e00000, 0x10000, 0x10000, 0), # CRE registers + iomap(0x27d050000, 0x4000, 0x4000, 0), # Streaming codec registers + iomap(0x23b3d0000, 0x1000, 0x1000, 0), # + iomap(0x23b3c0000, 0x1000, 0x1000, 0), # + ] + elif chip_id in (0x6000, 0x6001, 0x6002): + mcc_cnt = {0x6002: 16, 0x6001: 8, 0x6000: 4} + return [ + iomap(0x404d00000, 0x1c000, 0x1c000, 1), # Fender + iomap(0x20e100000, 0x4000, 0x4000, 0), # AICTimer + iomap(0x28e104000, 0x4000, 0x4000, 1), # AICSWInt + iomap(0x404000000, 0x20000, 0x20000, 1), # RGX + IOMapping(), # UVD + IOMapping(), # unused + IOMapping(), # DisplayUnderrunWA + iomap(0x28e494000, 0x1000, 0x1000, 0), # AnalogTempSensorControllerRegs + IOMapping(), # PMPDoorbell + iomap(0x404d80000, 0x8000, 0x8000, 1), # MetrologySensorRegs + iomap(0x204d61000, 0x1000, 0x1000, 1), # GMGIFAFRegs + iomap(0x200000000, mcc_cnt[chip_id] * 0xd8000, 0xd8000, 1), # MCache registers + IOMapping(), # AICBankedRegisters + IOMapping(), # PMPDoorbell + iomap(0x2643c4000, 0x1000, 0x1000, 1), # NIA Special agent idle register die 0 + iomap(0x22643c4000, 0x1000, 0x1000, 1) if chip_id == 0x6002 else IOMapping(), # NIA Special agent idle register die 1 + IOMapping(), # CRE registers + IOMapping(), # Streaming codec registers + iomap(0x28e3d0000, 0x1000, 0x1000, 1), + iomap(0x28e3c0000, 0x2000, 0x2000, 0), + ] + + +CHIP_INFO = { + 0x8103: Container( + chip_id = 0x8103, + min_sram_volt = 850, + max_power = 19551, + max_freq_mhz = 1278, + unk_87c = -220, + unk_8cc = 9880, + unk_924 = [[0] * 8] * 8, + unk_e48 = [[0] * 8] * 8, + unk_e24 = 112, + gpu_fast_die0_sensor_mask64 = 0x12, + gpu_fast_die0_sensor_mask64_alt = 0x12, + gpu_fast_die0_sensor_present = 0x01, + shared1_tab = [ + -1, 0x7282, 0x50ea, 0x370a, 0x25be, 0x1c1f, 0x16fb + ] + ([-1] * 10), + shared1_a4 = 0xffff, + shared2_tab = [0x800, 0x1555, -1, -1, -1, -1, -1, -1, 0, 0], + shared2_unk_508 = 0xc0007, + unk_3cf4 = [1000.0, 0, 0, 0, 0, 0, 0, 0], + unk_3d14 = [45.0, 0, 0, 0, 0, 0, 0, 0], + unk_118ec = None, + hwdb_4e0 = 0, + hwdb_534 = 0, + num_cores = 8, + gpu_core = 11, + gpu_rev = 4, + hwdb_ab8 = 0x48, + hwdb_abc = 0x8, + hwdb_b30 = 0, + rel_max_powers = [0, 19, 26, 38, 60, 87, 100], + ), + 0x6001: Container( + chip_id = 0x6001, + min_sram_volt = 790, + max_power = 81415, + max_freq_mhz = 1296, + unk_87c = 900, + unk_8cc = 11000, + unk_924 = [[i, *([0] * 7)] for i in [ + 9.838, 9.819, 9.826, 9.799, + 0, 0, 0, 0, + ]], + unk_e48 = [[i, *([0] * 7)] for i in [ + 13, 13, 13, 13, 0, 0, 0, 0, + ]], + unk_e24 = 125, + gpu_fast_die0_sensor_mask64 = 0x80808080, + gpu_fast_die0_sensor_mask64_alt = 0x90909090, + gpu_fast_die0_sensor_present = 0x0f, + shared1_tab = [0] + ([0xffff] * 16), + shared1_a4 = 0xffff, + shared2_tab = [-1, -1, -1, -1, 0x2aa, 0xaaa, -1, -1, 0, 0], + shared2_unk_508 = 0xcc00001, + unk_3cf4 = [1314.0, 1330.0, 1314.0, 1288.0, 0, 0, 0, 0], + unk_3d14 = [21.0, 21.0, 22.0, 21.0, 0, 0, 0, 0], + unk_118ec = [ + 0, 1, 2, + 1, 1, 90, 75, 1, 1, + 1, 2, 90, 75, 1, 1, + 1, 1, 90, 75, 1, 1 + ], + hwdb_4e0 = 4, + hwdb_534 = 1, + num_cores = 32, + gpu_core = 13, + gpu_rev = 5, + hwdb_ab8 = 0x2084, + hwdb_abc = 0x80, + hwdb_b30 = 0, + rel_max_powers = [0, 15, 20, 27, 36, 52, 100], + ), + 0x6002: Container( + chip_id = 0x6002, + min_sram_volt = 790, + max_power = 166743, + max_freq_mhz = 1296, + unk_87c = 900, + unk_8cc = 11000, + unk_924 = [[i, *([0] * 7)] for i in [ + 9.838, 9.819, 9.826, 9.799, + 9.799, 9.826, 9.819, 9.838, + ]], + unk_c30 = 0, + unk_e48 = [[i, *([0] * 7)] for i in [ + 13, 13, 13, 13, 13, 13, 13, 13, + ]], + unk_e24 = 125, + gpu_fast_die0_sensor_mask64 = 0x8080808080808080, + gpu_fast_die0_sensor_mask64_alt = 0x9090909090909090, + gpu_fast_die0_sensor_present = 0xff, + shared1_tab = [0] + ([0xffff] * 16), + shared1_a4 = 0xffff, + shared2_tab = [-1, -1, -1, -1, 0x2aa, 0xaaa, -1, -1, 0, 0], + shared2_unk_508 = 0xcc00001, + unk_3cf4 = [1244.0, 1260.0, 1242.0, 1214.0, + 1072.0, 1066.0, 1044.0, 1042.0], + unk_3d14 = [18.0, 18.0, 18.0, 17.0, 15.0, 15.0, 15.0, 14.0], + unk_8924 = 0, + unk_118ec = [ + 0, 1, 2, + 1, 1, 90, 75, 1, 1, + 1, 2, 90, 75, 1, 1, + 1, 1, 90, 75, 1, 1 + ], + hwdb_4e0 = 4, + hwdb_534 = 1, + num_cores = 64, + gpu_core = 13, + gpu_rev = 5, + hwdb_ab8 = 0x2084, + hwdb_abc = 0x80, + hwdb_b30 = 0, + rel_max_powers = [0, 15, 19, 25, 34, 50, 100], + ), + 0x8112: Container( + chip_id = 0x8112, + min_sram_volt = 780, + max_power = 22800, + max_freq_mhz = 1398, + unk_87c = 900, + unk_8cc = 11000, + unk_924 = [[ + 0.0, 0.0, 0.0, 0.0, + 5.3, 0.0, 5.3, 6.6, + ]] + ([[0] * 8] * 7), + unk_e48 = [[ + 0.0, 0.0, 0.0, 0.0, + 5.3, 0.0, 5.3, 6.6, + ]] + ([[0] * 8] * 7), + unk_e24 = 125, + gpu_fast_die0_sensor_mask64 = 0x6800, + gpu_fast_die0_sensor_mask64_alt = 0x6800, + gpu_fast_die0_sensor_present = 0x02, + shared1_tab = [0] + ([0xffff] * 16), + shared1_a4 = 0, + shared2_tab = [-1, -1, -1, -1, -1, -1, -1, -1, 0xaa5aa, 0], + shared2_unk_508 = 0xc00000, + unk_3cf4 = [1920.0, 0, 0, 0, 0, 0, 0, 0], + unk_3d14 = [74.0, 0, 0, 0, 0, 0, 0, 0], + unk_118ec = None, + hwdb_4e0 = 4, + hwdb_534 = 0, + num_cores = 10, + gpu_core = 15, + gpu_rev = 3, + hwdb_ab8 = 0x2048, + hwdb_abc = 0x4000, + hwdb_b30 = 1, + rel_max_powers = [0, 18, 27, 37, 52, 66, 82, 96, 100], + ), +} +def build_initdata(agx): + sgx = agx.u.adt["/arm-io/sgx"] + chosen = agx.u.adt["/chosen"] + chip_info = CHIP_INFO[chosen.chip_id] + + initdata = agx.kshared.new(InitData) + + initdata.ver_info = (1, 1, 16, 1) + + initdata.regionA = agx.kshared.new_buf(0x4000, "InitData_RegionA").push() + + regionB = agx.kobj.new(InitData_RegionB) + + regionB.channels = agx.ch_info + + regionB.stats_ta = agx.kobj.new(InitData_GPUGlobalStatsTA).push() + regionB.stats_3d = agx.kobj.new(InitData_GPUGlobalStats3D).push() + + # size: 0x180, Empty + # 13.0: grew + #regionB.stats_cp = agx.kobj.new_buf(0x180, "RegionB.unkptr_180").push() + regionB.stats_cp = agx.kobj.new_buf(0x980, "RegionB.unkptr_180").push() + + # size: 0x3b80, few floats, few ints, needed for init + regionB.hwdata_a = agx.kobj.new(AGXHWDataA(sgx, chip_info), track=False) + + # size: 0x80, empty + regionB.unk_190 = agx.kobj.new_buf(0x80, "RegionB.unkptr_190").push() + + # size: 0xc0, fw writes timestamps into this + regionB.unk_198 = agx.kobj.new_buf(0xc0, "RegionB.unkptr_198").push() + + # size: 0xb80, io stuff + hwdata = agx.kobj.new(AGXHWDataB(sgx, chip_info), track=False) + hwdata.io_mappings = build_iomappings(agx, chosen.chip_id) + + k = 1.02 #? + count = sgx.perf_state_count + table_count = sgx.perf_state_table_count + base_pstate = sgx.getprop("gpu-perf-base-pstate", 3) + base_freq = sgx.perf_states[base_pstate].freq + max_freq = sgx.perf_states[count - 1].freq + for i in range(count): + ps = sgx.perf_states[i] + hwdata.frequencies[i] = ps.freq // 1000000 + + volt = [ps.volt] * 8 + for j in range(1, table_count): + volt[j] = sgx.perf_states[count * j + i].volt + sram_volt = [max(chip_info.min_sram_volt, i) for i in volt] + + hwdata.voltages[i] = volt + hwdata.voltages_sram[i] = sram_volt + + regionB.hwdata_a.unk_74[i] = k + hwdata.unk_9b4[i] = k + hwdata.rel_max_powers[i] = chip_info.rel_max_powers[i] + hwdata.rel_boost_freqs[i] = max(0, int((ps.freq - base_freq) / (max_freq - base_freq) * 100)) + + regionB.hwdata_a.push() + + regionB.hwdata_b = hwdata.push() + regionB.hwdata_b_addr2 = hwdata._addr + + regionB.fwlog_ring2 = agx.fwlog_ring + + # Unallocated, Size 0x1000 + regionB.unk_1b8 = agx.kobj.new_buf(0x1000, "RegionB.unkptr_1b8").push() + + # Unallocated, size 0x300 + regionB.unk_1c0 = agx.kobj.new_buf(0x300, "RegionB.unkptr_1c0").push() + + # Unallocated, unknown size + regionB.unk_1c8 = agx.kobj.new_buf(0x1000, "RegionB.unkptr_1c8").push() + + # Size: 0x4000 + regionB.buffer_mgr_ctl = agx.kshared2.new(InitData_BufferMgrCtl).push() + regionB.buffer_mgr_ctl_addr2 = regionB.buffer_mgr_ctl._addr + + regionB.unk_6a80 = 0 + regionB.gpu_idle = 0 + regionB.unk_6a9c = 0 + regionB.unk_ctr0 = 0 + regionB.unk_ctr1 = 0 + regionB.unk_6aa8 = 0 + regionB.unk_6aac = 0 + regionB.unk_ctr2 = 0 + regionB.unk_6ab4 = 0 + regionB.unk_6ab8 = 0 + regionB.unk_6abc = 0 + regionB.unk_6ac0 = 0 + regionB.unk_6ac4 = 0 + regionB.unk_ctr3 = 0 + regionB.unk_6acc = 0 + regionB.unk_6ad0 = 0 + regionB.unk_6ad4 = 0 + regionB.unk_6ad8 = 0 + regionB.unk_6adc = 0 + regionB.unk_6ae0 = 0 + regionB.unk_6ae4 = 0 + regionB.unk_6ae8 = 0 + regionB.unk_6aec = 0 + regionB.unk_6af0 = 0 + regionB.unk_ctr4 = 0 + regionB.unk_ctr5 = 0 + regionB.unk_6afc = 0 + + initdata.regionB = regionB.push() + + initdata.regionC = agx.kshared.new(InitData_RegionC(sgx, chip_info), track=False).push() + + #self.regionC_addr = agx.ksharedshared_heap.malloc(0x88000) + + initdata.fw_status = agx.kobj.new(InitData_FWStatus) + initdata.fw_status.fwctl_channel = agx.fwctl_chinfo + initdata.fw_status.push() + + ## This section seems to be data that would be used by firmware side page allocation + ## But the current firmware doesn't have this functionality enabled, so it's not used? + initdata.uat_num_levels = 3 + initdata.uat_page_bits = 14 + initdata.uat_page_size = 0x4000 + + if chip_info.chip_id in (0x8103, 0x8112): + phys_mask = 0xffffffc000 + else: + phys_mask = 0x3ffffffc000 + + initdata.uat_level_info = [ + UatLevelInfo(36, 8, phys_mask), + UatLevelInfo(25, 2048, phys_mask), + UatLevelInfo(14, 2048, phys_mask), + ] + + # Host handles FW allocations for existing firmware versions + initdata.host_mapped_fw_allocations = 1 + + + #initdata.regionC.idle_ts = agx.u.mrs("CNTPCT_EL0") + 24000000 + #initdata.regionC.idle_unk = 0x5b2e8 + #initdata.regionC.idle_to_off_timeout_ms = 20000 + + initdata.regionC.push() + initdata.push() + + #print(InitData.parse_stream(agx.uat.iostream(0, initdata._addr))) + return initdata diff --git a/tools/proxyclient/m1n1/agx/object.py b/tools/proxyclient/m1n1/agx/object.py new file mode 100644 index 0000000..8f382f9 --- /dev/null +++ b/tools/proxyclient/m1n1/agx/object.py @@ -0,0 +1,263 @@ +# SPDX-License-Identifier: MIT +import io, time + +from ..malloc import Heap +from ..utils import * +from ..constructutils import ConstructClassBase, str_value +from construct import Bytes, Container, HexDump +from ..hw.uat import MemoryAttr + +class GPUObject: + def __init__(self, allocator, objtype): + self._raw = False + if isinstance(objtype, int): + self.val = bytes(objtype) + self._size = objtype + self._name = b"Bytes({objtype})" + self._raw = True + elif isinstance(objtype, ConstructClassBase): + self.val = objtype + objtype = type(objtype) + self._size = objtype.sizeof() + self._name = objtype.__name__ + elif isinstance(objtype, type) and issubclass(objtype, ConstructClassBase): + self._size = objtype.sizeof() + self.val = objtype() + self._name = objtype.__name__ + else: + self._size = objtype.sizeof() + self.val = objtype.parse(bytes(self._size)) + self._name = type(objtype).__name__ + + self._alloc = allocator + self._type = objtype + self._addr = None + self._data = None + self._dead = False + self._map_flags = {} + self._mon_val = None + self._skipped_pushes = 0 + self._compress_threshold = 65536 + self._strm = None + self._read_phys = False + + def push(self, if_needed=False): + self._mon_val = self.val + assert self._addr is not None + + if self._raw: + data = self.val + else: + context = Container() + context._parsing = False + context._building = True + context._sizing = False + context._params = context + # build locally and push as a block for efficiency + ios = io.BytesIO() + self._type._build(self.val, ios, context, "(pushing)") + data = ios.getvalue() + + #if self._alloc.verbose: + #t = time.time() + #self._alloc.agx.log(f"[{self._name} @{self._addr:#x}] chk {self._size} bytes") + if if_needed and data[:] == self._data: + self._skipped_pushes += 1 + #if self._alloc.verbose: + #t2 = time.time() + #mbs = self._size / (t2 - t) / 1000000 + #self._alloc.agx.log(f"[{self._name} @{self._addr:#x}] chk done ({mbs:.02f} MB/s)") + return self + + self._skipped_pushes = 0 + + t = time.time() + if data == bytes(self._size): + if self._alloc.verbose: + self._alloc.agx.log(f"[{self._name} @{self._addr:#x}] zeroing {self._size} bytes") + self._alloc.agx.p.memset8(self._paddr, 0, self._size) + elif self._size > self._compress_threshold: + if self._alloc.verbose: + self._alloc.agx.log(f"[{self._name} @{self._addr:#x}] pushing {self._size} bytes (compressed)") + self._alloc.agx.u.compressed_writemem(self._paddr, data) + else: + if self._alloc.verbose: + self._alloc.agx.log(f"[{self._name} @{self._addr:#x}] pushing {self._size} bytes") + self._alloc.agx.iface.writemem(self._paddr, data) + if self._alloc.verbose: + t2 = time.time() + mbs = self._size / (t2 - t) / 1000000 + self._alloc.agx.log(f"[{self._name} @{self._addr:#x}] push done ({mbs:.02f} MB/s)") + #stream.write(data) + if isinstance(self._type, type) and issubclass(self._type, ConstructClassBase): + if self._strm is None: + self._strm = self._alloc.make_stream(self._addr) + self.val.set_addr(self._addr, self._strm) + + self._data = bytes(data) + return self + + def _pull(self): + if self._raw: + assert self._paddr is not None + return self._alloc.agx.iface.readmem(self._paddr, self._size) + + assert self._addr is not None + context = Container() + context._parsing = True + context._building = False + context._sizing = False + context._params = context + if self._alloc.verbose: + self._alloc.agx.log(f"[{self._name} @{self._addr:#x}] pulling {self._size} bytes") + if self._read_phys: + stream = io.BytesIO() + stream.write(self._alloc.agx.iface.readmem(self._paddr, self._size)) + stream.seek(0) + else: + stream = self._alloc.make_stream(self._addr) + return self._type._parse(stream, context, f"(pulling {self._name})") + + def pull(self): + self._mon_val = self.val = self._pull() + return self + + def poll(self): + prev_val = self._mon_val + self._mon_val = cur_val = self._pull() + if not hasattr(cur_val, "diff"): + return None + if cur_val != prev_val: + diff = cur_val.diff(prev_val) + assert diff is not None + return f"GPUObject {self._name} ({self._size:#x} @ {self._addr:#x}): " + diff + else: + return None + + @property + def _ctx(self): + return self._alloc.ctx + + def add_to_mon(self, mon): + mon.add(self._addr, self._size, self._name, offset=0, + readfn=lambda a, s: self._alloc.agx.iface.readmem(a - self._addr + self._paddr, s)) + + def _set_addr(self, addr, paddr=None): + self._addr = addr + self._paddr = paddr + if isinstance(self.val, ConstructClassBase): + self.val.set_addr(addr) + + def __getitem__(self, item): + return self.val[item] + def __setitem__(self, item, value): + self.val[item] = value + + def __getattr__(self, attr): + return getattr(self.val, attr) + + def __setattr__(self, attr, val): + if attr.startswith("_") or attr == "val": + self.__dict__[attr] = val + return + + setattr(self.val, attr, val) + + def __str__(self): + if isinstance(self.val, bytes) and len(self.val) > 128: + s_val = f"<{len(self.val)} bytes>" + else: + s_val = str_value(self.val) + return f"GPUObject {self._name} ({self._size:#x} @ {self._addr:#x}): " + s_val + + def free(self): + if self._dead: + return + self._dead = True + self._alloc.free(self) + +class GPUAllocator: + def __init__(self, agx, name, start, size, + ctx=0, page_size=16384, va_block=None, guard_pages=1, **kwargs): + self.page_size = page_size + if va_block is None: + va_block = page_size + self.agx = agx + self.ctx = ctx + self.name = name + self.va = Heap(start, start + size, block=va_block) + self.verbose = 0 + self.guard_pages = guard_pages + self.objects = {} + self.flags = kwargs + self.align_to_end = True + + def make_stream(self, base): + return self.agx.uat.iostream(self.ctx, base, recurse=False) + + def new(self, objtype, name=None, track=True, **kwargs): + obj = GPUObject(self, objtype) + obj._stream = self.make_stream + if name is not None: + obj._name = name + + guard_size = self.page_size * self.guard_pages + + size_align = align_up(obj._size, self.page_size) + addr = self.va.malloc(size_align + guard_size) + paddr = self.agx.u.memalign(self.page_size, size_align) + off = 0 + if self.align_to_end: + off = size_align - obj._size + + flags = dict(self.flags) + flags.update(kwargs) + + obj._addr_align = addr + obj._paddr_align = paddr + obj._size_align = size_align + self.agx.uat.iomap_at(self.ctx, addr, paddr, size_align, **flags) + obj._set_addr(addr + off, paddr + off) + obj._map_flags = flags + + self.objects[obj._addr] = obj + + if self.verbose: + self.agx.log(f"[{self.name}] Alloc {obj._name} size {obj._size:#x} @ {obj._addr:#x} ({obj._paddr:#x})") + + self.agx.reg_object(obj, track=track) + return obj + + def new_buf(self, size, name, track=True): + return self.new(HexDump(Bytes(size)), name=name, track=track) + + def buf(self, size, name, track=True): + return self.new_buf(size, name, track).push()._addr + + def free(self, obj): + obj._dead = True + is_private = obj._map_flags.get("AttrIndex", MemoryAttr.Normal) != MemoryAttr.Shared + if is_private and obj._addr_align > 0xf8000000000: + flags2 = dict(obj._map_flags) + flags2["AttrIndex"] = MemoryAttr.Shared + self.agx.uat.iomap_at(self.ctx, obj._addr_align, obj._paddr_align, + obj._size_align, **flags2) + self.agx.uat.flush_dirty() + self.agx.uat.handoff.prepare_cacheflush(obj._addr_align, obj._size_align) + self.agx.ch.fwctl.send_inval(0x40, obj._addr_align) + self.agx.uat.handoff.wait_cacheflush() + + self.agx.uat.iomap_at(self.ctx, obj._addr_align, 0, + obj._size_align, VALID=0) + + if is_private and obj._addr_align > 0xf8000000000: + self.agx.uat.flush_dirty() + self.agx.uat.handoff.complete_cacheflush() + + self.agx.u.free(obj._paddr_align) + self.va.free(obj._addr_align) + del self.objects[obj._addr] + self.agx.unreg_object(obj) + + if self.verbose: + self.agx.log(f"[{self.name}] Free {obj._name} size {obj._size:#x} @ {obj._addr:#x} ({obj._paddr:#x})") diff --git a/tools/proxyclient/m1n1/agx/render.py b/tools/proxyclient/m1n1/agx/render.py new file mode 100644 index 0000000..b29683b --- /dev/null +++ b/tools/proxyclient/m1n1/agx/render.py @@ -0,0 +1,1075 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import sys, json, zipfile + +json.c_make_encoder = None + +from m1n1.proxy import * +from .context import * +from .event import GPUEventManager +from .uapi import * +from m1n1.constructutils import ConstructClass, Ver + +def unswizzle(agx, addr, w, h, psize, dump=None, grid=False): + iface = agx.u.iface + + tw = 64 + th = 64 + ntx = (w + tw - 1) // 64 + nty = (h + th - 1) // 64 + data = iface.readmem(addr, ntx * nty * psize * tw * th) + new_data = [] + for y in range(h): + ty = y // th + for x in range(w): + tx = x // tw + toff = tw * th * psize * (ty * ntx + tx) + j = x & (tw - 1) + i = y & (th - 1) + off = ( + ((j & 1) << 0) | ((i & 1) << 1) | + ((j & 2) << 1) | ((i & 2) << 2) | + ((j & 4) << 2) | ((i & 4) << 3) | + ((j & 8) << 3) | ((i & 8) << 4) | + ((j & 16) << 4) | ((i & 16) << 5) | + ((j & 32) << 5) | ((i & 32) << 6)) + r,g,b,a = data[toff + psize*off: toff + psize*(off+1)] + if grid: + if x % 64 == 0 or y % 64 == 0: + r,g,b,a = 255,255,255,255 + elif x % 32 == 0 or y % 32 == 0: + r,g,b,a = 128,128,128,255 + new_data.append(bytes([b, g, r, a])) + data = b"".join(new_data) + if dump: + open(dump, "wb").write(data[:w*h*psize]) + #iface.writemem(addr, data) + +class GPUFrame: + def __init__(self, context, filename=None, track=False): + self.ctx = context + self.agx = context.agx + self.objects = [] + self.cmdbuf = None + self.track = track + if filename is not None: + self.load(filename) + + def add_object(self, obj): + self.objects.append(obj) + + def save(self, filename): + cmdbuf = self.cmdbuf + with zipfile.ZipFile(filename, "w") as zf: + cmdbuf_data = json.dumps(cmdbuf, indent=4).encode("utf-8") + zf.writestr("cmdbuf.json", cmdbuf_data) + + obj_info = [] + for obj in self.objects: + if obj._data == bytes(obj._size): + filename = None + else: + filename = f"obj_{obj._addr:x}.bin" + zf.writestr(filename, obj._data) + obj_info.append({ + "file": filename, + "name": obj._name, + "addr": obj._addr, + "size": obj._size, + "map_flags": obj._map_flags, + }) + + obj_info_data = json.dumps(obj_info, indent=4).encode("utf-8") + zf.writestr("objects.json", obj_info_data) + + def load(self, filename): + with zipfile.ZipFile(filename, "r") as zf: + with zf.open("cmdbuf.json", "r") as fd: + self.cmdbuf = drm_asahi_cmdbuf_t.from_json(fd) + with zf.open("objects.json", "r") as fd: + obj_info = json.load(fd) + + self.objects = [] + for i in obj_info: + filename = i["file"] + obj = self.ctx.new_at(i["addr"], Bytes(i["size"]), name=i["name"], track=self.track, + **i["map_flags"]) + if filename is not None: + with zf.open(i["file"], "r") as fd: + data = fd.read() + obj.val = data + obj.push() + else: + obj.val = bytes(i["size"]) + obj.push() + self.objects.append(obj) + +class GPUWork: + def __init__(self, renderer): + self.objects = [] + self.renderer = renderer + + def add(self, obj): + self.objects.append(obj) + + def free(self): + for obj in self.objects: + obj.free() + self.objects = [] + +class GPURenderer: + def __init__(self, ctx, buffers=16, bm_slot=0, queue=0): + self.agx = agx = ctx.agx + self.queue = queue + + # 0..63 + self.ctx = ctx + self.ctx_id = ctx.ctx + + # 0..255 + self.buffers = buffers + self.buffer_mgr_slot = bm_slot + + ## These MUST go together + self.buffer_mgr = GPUBufferManager(agx, ctx, buffers) + self.buffer_mgr_initialized = False + self.unk_emptybuf = agx.kobj.new_buf(0x40, "unk_emptybuf") + self.tpc_size = 0 + + ##### Job group + + self.job_list = agx.kshared.new(JobList) + self.job_list.first_job = 0 + self.job_list.last_head = self.job_list._addr # Empty list has self as last_head + self.job_list.unkptr_10 = 0 + self.job_list.push() + + ##### Work Queues + + self.ts3d_1 = agx.kshared.new(Int64ul, name="3D timestamp 1") + self.ts3d_2 = agx.kshared.new(Int64ul, name="3D timestamp 2") + self.tsta_1 = agx.kshared.new(Int64ul, name="TA timestamp 1") + self.tsta_2 = agx.kshared.new(Int64ul, name="TA timestamp 2") + + self.wq_3d = GPU3DWorkQueue(agx, ctx, self.job_list) + self.wq_ta = GPUTAWorkQueue(agx, ctx, self.job_list) + + self.wq_3d.info.uuid = 0x3D0000 | bm_slot + self.wq_3d.info.push() + self.wq_ta.info.uuid = 0x7A0000 | bm_slot + self.wq_ta.info.push() + + self.stamp_value_3d = 0x3D000000 | (bm_slot << 16) + self.stamp_value_ta = 0x7A000000 | (bm_slot << 16) + + ##### TA stamps + + # start? + self.stamp_ta1 = agx.kshared.new(StampCounter, name="TA stamp 1") + self.stamp_ta1.value = self.stamp_value_ta + self.stamp_ta1.push() + + # complete? + self.stamp_ta2 = agx.kobj.new(StampCounter, name="TA stamp 2") + self.stamp_ta2.value = self.stamp_value_ta + self.stamp_ta2.push() + + ##### 3D stamps + + # start? + self.stamp_3d1 = agx.kshared.new(StampCounter, name="3D stamp 1") + self.stamp_3d1.value = self.stamp_value_3d + self.stamp_3d1.push() + + # complete? + self.stamp_3d2 = agx.kobj.new(StampCounter, name="3D stamp 2") + self.stamp_3d2.value = self.stamp_value_3d + self.stamp_3d2.push() + + + ##### Things userspace deals with for macOS + + #self.aux_fb = ctx.uobj.new_buf(0x8000, "Aux FB thing") + ##self.deflake_1 = ctx.uobj.new_buf(0x20, "Deflake 1") + ##self.deflake_2 = ctx.uobj.new_buf(0x280, "Deflake 2") + ##self.deflake_3 = ctx.uobj.new_buf(0x540, "Deflake 3") + #self.deflake = ctx.uobj.new_buf(0x7e0, "Deflake") + #self.unk_buf = ctx.uobj.new(Array(0x800, Int64ul), "Unknown Buffer") + #self.unk_buf.val = [0, *range(1, 0x400), *(0x400 * [0])] + #self.unk_buf.push() + + ##### Some kind of feedback/status buffer, GPU managed? + + self.event_control = agx.kobj.new(EventControl) + self.event_control.event_count = agx.kobj.new(Int32ul, "event_count") + self.event_control.event_count.val = 0 + self.event_control.event_count.push() + + self.event_control.generation = 0 + self.event_control.cur_count = 0 + self.event_control.unk_10 = 0x50 + self.event_control.push() + + self.frames = 0 + + self.ev_ta = ev_ta = self.agx.event_mgr.allocate_event() + self.ev_3d = ev_3d = self.agx.event_mgr.allocate_event() + + self.work = [] + + def submit(self, cmdbuf, wait_for=None): + nclusters = 8 + + work = GPUWork(self) + self.work.append(work) + + self.buffer_mgr.increment() + + aux_fb = self.ctx.uobj.new_buf(0x20000, "Aux FB thing", track=False) + work.add(aux_fb) + + # t8103 + deflake_1_size = 0x540 + deflake_2_size = 0x280 + deflake_3_size = 0x20 + + # t6002 - 9 times larger instead of 8? works with 8... + deflake_1_size *= nclusters + deflake_2_size *= nclusters + deflake_3_size *= nclusters + + deflake_1 = self.ctx.uobj.new_buf(deflake_1_size, "Deflake 1", track=True) + deflake_2 = self.ctx.uobj.new_buf(deflake_2_size, "Deflake 2", track=True) + deflake_3 = self.ctx.uobj.new_buf(deflake_3_size, "Deflake 3", track=True) + work.add(deflake_1) + work.add(deflake_2) + work.add(deflake_3) + + unk_buf = self.ctx.uobj.new(Array(0x800, Int64ul), "Unknown Buffer", track=False) + work.add(unk_buf) + + unk_buf.val = [0, *range(2, 0x401), *(0x400 * [0])] + unk_buf.push() + + work.cmdbuf = cmdbuf + + self.frames += 1 + + work.ev_ta = ev_ta = self.ev_ta + work.ev_3d = ev_3d = self.ev_3d + + self.ev_ta.rearm() + self.ev_3d.rearm() + + self.agx.log(f"ev_ta: {ev_ta.id}") + self.agx.log(f"ev_3d: {ev_3d.id}") + + #self.event_control.base_stamp = self.stamp_value >> 8 + #self.event_control.push() + + self.prev_stamp_value_3d = self.stamp_value_3d + self.prev_stamp_value_ta = self.stamp_value_ta + self.stamp_value_3d += 0x100 + self.stamp_value_ta += 0x100 + self.event_control.event_count.val += 2 + self.event_control.event_count.push() + + work.stamp_value_3d = self.stamp_value_3d + work.stamp_value_ta = self.stamp_value_ta + + agx = self.agx + ctx = self.ctx + + work.width = width = cmdbuf.fb_width + work.height = height = cmdbuf.fb_height + + ##### TVB allocations / Tiler config + + tile_width = 32 + tile_height = 32 + tiles_x = ((width + tile_width - 1) // tile_width) + tiles_y = ((height + tile_height - 1) // tile_height) + tiles = tiles_x * tiles_y + + mtiles_x = 4 + mtiles_y = 4 + + mtile_x1 = align(((tiles_x + mtiles_x - 1) // mtiles_x), 4) + mtile_x2 = 2 * mtile_x1 + mtile_x3 = 3 * mtile_x1 + mtile_y1 = align(((tiles_y + mtiles_y - 1) // mtiles_y), 4) + mtile_y2 = 2 * mtile_y1 + mtile_y3 = 3 * mtile_y1 + + mtile_stride = mtile_x1 * mtile_y1 + + ## TODO: *samples + tiles_per_mtile_x = mtile_x1 + tiles_per_mtile_y = mtile_y1 + + tile_blocks_x = (tiles_x + 15) // 16 + tile_blocks_y = (tiles_y + 15) // 16 + tile_blocks = tile_blocks_x * tile_blocks_y + + tiling_params = TilingParameters() + # rgn_header_size + rgn_entry_size = 5 + tiling_params.size1 = (rgn_entry_size * tiles_per_mtile_x * tiles_per_mtile_y + 3) // 4 + # PPP_MULTISAMPLECTL + tiling_params.unk_4 = 0x88 + # PPP_CTRL + tiling_params.unk_8 = 0x203 # bit 0: GL clip mode + # PPP_SCREEN + tiling_params.x_max = width - 1 + tiling_params.y_max = height - 1 + # TE_SCREEN + tiling_params.tile_count = ((tiles_y-1) << 12) | (tiles_x-1) + # TE_MTILE1 + tiling_params.x_blocks = mtile_x3 | (mtile_x2 << 9) | (mtile_x1 << 18) + # TE_MTILE2 + tiling_params.y_blocks = mtile_y3 | (mtile_y2 << 9) | (mtile_y1 << 18) + tiling_params.size2 = mtile_stride + tiling_params.size3 = 2 * mtile_stride + tiling_params.unk_24 = 0x100 + tiling_params.unk_28 = 0x8000 + + tilemap_size = (4 * tiling_params.size1 * mtiles_x * mtiles_y) + + tmtiles_x = tiles_per_mtile_x * mtiles_x + tmtiles_y = tiles_per_mtile_y * mtiles_y + + tpc_entry_size = 8 + tpc_size = tpc_entry_size * tmtiles_x * tmtiles_y * nclusters + + if self.tpc_size < tpc_size: + self.tpc = ctx.uobj.new_buf(tpc_size, "TPC", track=True).push() + self.tpc_size = tpc_size + + depth_aux_buffer_addr = 0 + if cmdbuf.depth_buffer: + size = align_pot(max(width, tile_width)) * align_pot(max(height, tile_width)) // 32 + depth_aux_buffer = self.ctx.uobj.new_buf(size, "Depth Aux", track=True) + work.add(depth_aux_buffer) + depth_aux_buffer_addr = depth_aux_buffer._addr + + stencil_aux_buffer_addr = 0 + if cmdbuf.stencil_buffer: + size = align_pot(max(width, tile_width)) * align_pot(max(height, tile_width)) // 32 + stencil_aux_buffer = self.ctx.uobj.new_buf(size, "Stencil Aux", track=False) + work.add(stencil_aux_buffer) + stencil_aux_buffer_addr = stencil_aux_buffer._addr + + #tvb_tilemap_size = 0x80 * mtile_stride + tvb_tilemap_size = tilemap_size + tvb_tilemap = ctx.uobj.new_buf(tvb_tilemap_size, "TVB Tilemap", track=True).push() + work.tvb_tilemap_size = tvb_tilemap_size + work.tvb_tilemap = tvb_tilemap + work.add(tvb_tilemap) + + # rogue: 0x180 * 4? + tvb_heapmeta_size = 0x200 + #tvb_heapmeta_size = 0x600 + tvb_heapmeta = ctx.uobj.new_buf(tvb_heapmeta_size, "TVB Heap Meta", track=False).push() + work.add(tvb_heapmeta) + + unk_tile_buf1 = self.ctx.uobj.new_buf(tvb_tilemap_size * nclusters, "Unk tile buf 1", track=True) + print("tvb_tilemap_size", hex(tvb_tilemap_size)) + unk_tile_buf2 = self.ctx.uobj.new_buf(0x4 * nclusters, "Unk tile buf 2", track=True) + #size = 0xc0 * nclusters + size = 0xc80 + unk_tile_buf3 = self.ctx.uobj.new_buf(size, "Unk tile buf 3", track=True) + unk_tile_buf4 = self.ctx.uobj.new_buf(0x280 * nclusters, "Unk tile buf 4", track=True) + unk_tile_buf5 = self.ctx.uobj.new_buf(0x30 * nclusters, "Unk tile buf 5", track=True) + work.add(unk_tile_buf1) + work.add(unk_tile_buf2) + work.add(unk_tile_buf3) + work.add(unk_tile_buf4) + work.add(unk_tile_buf5) + + ##### Buffer stuff? + + # buffer related? + bufferthing_buf = ctx.uobj.new_buf(0x80, "BufferThing.unkptr_18", track=True) + work.add(bufferthing_buf) + + work.buf_desc = buf_desc = agx.kobj.new(BufferThing, track=False) + work.add(buf_desc) + buf_desc.unk_0 = 0x0 + buf_desc.unk_8 = 0x0 + buf_desc.unk_10 = 0x0 + buf_desc.unkptr_18 = bufferthing_buf._addr + buf_desc.unk_20 = 0x0 + buf_desc.bm_misc_addr = self.buffer_mgr.misc_obj._addr + buf_desc.unk_2c = 0x0 + buf_desc.unk_30 = 0x0 + buf_desc.unk_38 = 0x0 + buf_desc.push() + + uuid_3d = cmdbuf.cmd_3d_id + uuid_ta = cmdbuf.cmd_ta_id + encoder_id = cmdbuf.encoder_id + + #print(barrier_cmd) + + #self.wq_ta.submit(ta_barrier_cmd) + + ##### 3D barrier command + + barrier_cmd = agx.kobj.new(WorkCommandBarrier, track=False) + work.add(barrier_cmd) + barrier_cmd.stamp = self.stamp_ta2 + barrier_cmd.wait_value = self.stamp_value_ta + barrier_cmd.stamp_self = self.stamp_value_3d + barrier_cmd.event = ev_ta.id + barrier_cmd.uuid = uuid_3d + + #print(barrier_cmd) + + self.wq_3d.submit(barrier_cmd) + + ##### 3D execution + + work.wc_3d = wc_3d = agx.kobj.new(WorkCommand3D, track=False) + work.add(work.wc_3d) + wc_3d.counter = 0 + wc_3d.context_id = self.ctx_id + wc_3d.unk_8 = 0 + wc_3d.event_control = self.event_control + wc_3d.buffer_mgr = self.buffer_mgr.info + wc_3d.buf_thing = buf_desc + wc_3d.unk_emptybuf_addr = self.unk_emptybuf._addr + wc_3d.tvb_tilemap = tvb_tilemap._addr + wc_3d.unk_40 = 0x88 + wc_3d.unk_48 = 0x1 + wc_3d.tile_blocks_y = mtile_y1 + wc_3d.tile_blocks_x = mtile_x1 + wc_3d.unk_50 = 0x0 + wc_3d.unk_58 = 0x0 + + TAN_60 = 1.732051 + wc_3d.merge_upper_x = TAN_60 / width + wc_3d.merge_upper_y = TAN_60 / height + wc_3d.unk_68 = 0x0 + wc_3d.tile_count = tiles + + wc_3d.unk_758 = Flag() + wc_3d.unk_75c = Flag() + wc_3d.unk_buf = WorkCommand1_UnkBuf() + wc_3d.busy_flag = Flag() + wc_3d.unk_buf2 = WorkCommand1_UnkBuf2() + wc_3d.unk_buf2.unk_0 = 0 + wc_3d.unk_buf2.unk_8 = 0 + wc_3d.unk_buf2.unk_10 = 1 + wc_3d.ts1 = TimeStamp(0) + wc_3d.ts2 = TimeStamp(self.ts3d_1._addr) + wc_3d.ts3 = TimeStamp(self.ts3d_2._addr) + wc_3d.unk_914 = 0 + wc_3d.unk_918 = 0 + wc_3d.unk_920 = 0 + wc_3d.unk_924 = 1 + # Ventura + wc_3d.unk_928_0 = 0 + wc_3d.unk_928_4 = 0 + wc_3d.ts_flag = TsFlag() + + # cmdbuf.ds_flags + # 0 - no depth + # 0x80000 - depth store enable + # 0x08000 - depth load enable + + # 0x00044 - compressed depth + + # 0x40000 - stencil store enable + # 0x04000 - stencil load enable + # 0x00110 - compressed stencil + + # Z store format + # 0x4000000 - Depth16Unorm + + # For Depth16Unorm: 0x40000 here also + # AFBI.[ 0. 4] unk1 = 0x4c000 + + # ASAHI_CMDBUF_SET_WHEN_RELOADING_Z_OR_S + # Actually set when loading *and* storing Z, OR loading *and* storing S + + # Structures embedded in WorkCommand3D + if True: + wc_3d.struct_1 = Start3DStruct1() + wc_3d.struct_1.store_pipeline_bind = cmdbuf.store_pipeline_bind + wc_3d.struct_1.store_pipeline_addr = cmdbuf.store_pipeline | 4 + wc_3d.struct_1.unk_8 = 0x0 + wc_3d.struct_1.unk_c = 0x0 + + TAN_60 = 1.732051 + wc_3d.struct_1.merge_upper_x = TAN_60 / width + wc_3d.struct_1.merge_upper_y = TAN_60 / height + + wc_3d.struct_1.unk_18 = 0x0 + # ISP_MTILE_SIZE + wc_3d.struct_1.tile_blocks_y = mtile_y1 + wc_3d.struct_1.tile_blocks_x = mtile_x1 + wc_3d.struct_1.unk_24 = 0x0 + wc_3d.struct_1.tile_counts = ((tiles_y-1) << 12) | (tiles_x-1) + wc_3d.struct_1.unk_2c = 0x8 + wc_3d.struct_1.depth_clear_val1 = cmdbuf.depth_clear_value + wc_3d.struct_1.stencil_clear_val1 = cmdbuf.stencil_clear_value + wc_3d.struct_1.unk_35 = 0x7 # clear flags? 2 = depth 4 = stencil? + wc_3d.struct_1.unk_36 = 0x0 + wc_3d.struct_1.unk_38 = 0x0 + wc_3d.struct_1.unk_3c = 0x1 + wc_3d.struct_1.unk_40 = 0 + wc_3d.struct_1.unk_44_padding = bytes(0xac) + wc_3d.struct_1.depth_bias_array = Start3DArrayAddr(cmdbuf.depth_bias_array) + wc_3d.struct_1.scissor_array = Start3DArrayAddr(cmdbuf.scissor_array) + wc_3d.struct_1.visibility_result_buffer = 0x0 + wc_3d.struct_1.unk_118 = 0x0 + wc_3d.struct_1.unk_120 = [0] * 37 + wc_3d.struct_1.unk_reload_pipeline = Start3DClearPipelineBinding( + cmdbuf.partial_reload_pipeline_bind, cmdbuf.partial_reload_pipeline | 4) + wc_3d.struct_1.unk_258 = 0 + wc_3d.struct_1.unk_260 = 0 + wc_3d.struct_1.unk_268 = 0 + wc_3d.struct_1.unk_270 = 0 + wc_3d.struct_1.reload_pipeline = Start3DClearPipelineBinding( + cmdbuf.partial_reload_pipeline_bind, cmdbuf.partial_reload_pipeline | 4) + wc_3d.struct_1.depth_flags = cmdbuf.ds_flags | 0x44 + wc_3d.struct_1.unk_290 = 0x0 + wc_3d.struct_1.depth_buffer_ptr1 = cmdbuf.depth_buffer + wc_3d.struct_1.unk_2a0 = 0x0 + wc_3d.struct_1.unk_2a8 = 0x0 + wc_3d.struct_1.depth_buffer_ptr2 = cmdbuf.depth_buffer + wc_3d.struct_1.depth_buffer_ptr3 = cmdbuf.depth_buffer + wc_3d.struct_1.depth_aux_buffer_ptr = depth_aux_buffer_addr + wc_3d.struct_1.stencil_buffer_ptr1 = cmdbuf.stencil_buffer + wc_3d.struct_1.unk_2d0 = 0x0 + wc_3d.struct_1.unk_2d8 = 0x0 + wc_3d.struct_1.stencil_buffer_ptr2 = cmdbuf.stencil_buffer + wc_3d.struct_1.stencil_buffer_ptr3 = cmdbuf.stencil_buffer + wc_3d.struct_1.stencil_aux_buffer_ptr = stencil_aux_buffer_addr + wc_3d.struct_1.unk_2f8 = [0x0, 0x0] + wc_3d.struct_1.aux_fb_unk0 = 4 #0x8 # sometimes 4 + wc_3d.struct_1.unk_30c = 0x0 + wc_3d.struct_1.aux_fb = AuxFBInfo(0xc000, 0, width, height) + wc_3d.struct_1.unk_320_padding = bytes(0x10) + wc_3d.struct_1.unk_partial_store_pipeline = Start3DStorePipelineBinding( + cmdbuf.partial_store_pipeline_bind, cmdbuf.partial_store_pipeline | 4) + wc_3d.struct_1.partial_store_pipeline = Start3DStorePipelineBinding( + cmdbuf.partial_store_pipeline_bind, cmdbuf.partial_store_pipeline | 4) + wc_3d.struct_1.depth_clear_val2 = cmdbuf.depth_clear_value + wc_3d.struct_1.stencil_clear_val2 = cmdbuf.stencil_clear_value + wc_3d.struct_1.unk_375 = 3 + wc_3d.struct_1.unk_376 = 0x0 + wc_3d.struct_1.unk_378 = 0x10 + wc_3d.struct_1.unk_37c = 0x0 + wc_3d.struct_1.unk_380 = 0x0 + wc_3d.struct_1.unk_388 = 0x0 + wc_3d.struct_1.unk_390_0 = 0x0 # Ventura + wc_3d.struct_1.depth_dimensions = (width - 1) | ((height - 1) << 15) + + if True: + wc_3d.struct_2 = Start3DStruct2() + wc_3d.struct_2.unk_0 = 0xa000 + wc_3d.struct_2.clear_pipeline = Start3DClearPipelineBinding( + cmdbuf.load_pipeline_bind, cmdbuf.load_pipeline | 4) + wc_3d.struct_2.unk_18 = 0x88 + wc_3d.struct_2.scissor_array = cmdbuf.scissor_array + wc_3d.struct_2.depth_bias_array = cmdbuf.depth_bias_array + wc_3d.struct_2.aux_fb = wc_3d.struct_1.aux_fb + # ISP_ZLS_PIXELS + wc_3d.struct_2.depth_dimensions = wc_3d.struct_1.depth_dimensions + wc_3d.struct_2.visibility_result_buffer = 0x0 + # ISP_ZLSCTL + wc_3d.struct_2.depth_flags = cmdbuf.ds_flags + wc_3d.struct_2.unk_58_g14_0 = 0x4040404 + wc_3d.struct_2.unk_58_g14_8 = 0 + wc_3d.struct_2.depth_buffer_ptr1 = cmdbuf.depth_buffer + wc_3d.struct_2.depth_buffer_ptr2 = cmdbuf.depth_buffer + wc_3d.struct_2.unk_68_g14_0 = 0 + wc_3d.struct_2.stencil_buffer_ptr1 = cmdbuf.stencil_buffer + wc_3d.struct_2.stencil_buffer_ptr2 = cmdbuf.stencil_buffer + wc_3d.struct_2.unk_78 = [0] * 4 + wc_3d.struct_2.depth_aux_buffer_ptr1 = depth_aux_buffer_addr + wc_3d.struct_2.unk_a0 = 0 + wc_3d.struct_2.depth_aux_buffer_ptr2 = depth_aux_buffer_addr + wc_3d.struct_2.unk_b0 = 0 + wc_3d.struct_2.stencil_aux_buffer_ptr1 = stencil_aux_buffer_addr + wc_3d.struct_2.unk_c0 = 0 + wc_3d.struct_2.stencil_aux_buffer_ptr2 = stencil_aux_buffer_addr + wc_3d.struct_2.unk_d0 = 0 + wc_3d.struct_2.tvb_tilemap = tvb_tilemap._addr + wc_3d.struct_2.tvb_heapmeta_addr = tvb_heapmeta._addr + wc_3d.struct_2.unk_e8 = tiling_params.size1 << 24 + wc_3d.struct_2.tvb_heapmeta_addr2 = tvb_heapmeta._addr + # 0x10000 - clear empty tiles + # ISP_CTL (but bits seem to have moved) + wc_3d.struct_2.unk_f8 = 0x10280 #0x10280 # TODO: varies 0, 0x280, 0x10000, 0x10280 + wc_3d.struct_2.aux_fb_ptr = aux_fb._addr + wc_3d.struct_2.unk_108 = [0x0, 0x0, 0x0, 0x0, 0x0, 0x0] + wc_3d.struct_2.pipeline_base = self.ctx.pipeline_base + wc_3d.struct_2.unk_140 = 0x8c60 + wc_3d.struct_2.unk_148 = 0x0 + wc_3d.struct_2.unk_150 = 0x0 + wc_3d.struct_2.unk_158 = 0x1c + wc_3d.struct_2.unk_160 = 0 + wc_3d.struct_2.unk_168_padding = bytes(0x1d8) + wc_3d.struct_2.unk_198_padding = bytes(0x1a8) + + if True: + wc_3d.struct_6 = Start3DStruct6() + wc_3d.struct_6.tvb_overflow_count = 0x0 + wc_3d.struct_6.unk_8 = 0x0 # 1? + wc_3d.struct_6.unk_c = 0x0 # 1? + wc_3d.struct_6.unk_10 = 0x0 + wc_3d.struct_6.encoder_id = cmdbuf.encoder_id + wc_3d.struct_6.unk_1c = 0xffffffff + wc_3d.struct_6.unknown_buffer = unk_buf._addr + wc_3d.struct_6.unk_28 = 0x0 + wc_3d.struct_6.unk_30 = 0x0 + wc_3d.struct_6.unk_34 = 0x0 + + if True: + wc_3d.struct_7 = Start3DStruct7() + wc_3d.struct_7.unk_0 = 0x0 + wc_3d.struct_7.stamp1 = self.stamp_3d1 + wc_3d.struct_7.stamp2 = self.stamp_3d2 + wc_3d.struct_7.stamp_value = self.stamp_value_3d + wc_3d.struct_7.ev_3d = ev_3d.id + wc_3d.struct_7.evctl_index = 0x0 + wc_3d.struct_7.unk_24 = 1 + wc_3d.struct_7.uuid = uuid_3d + wc_3d.struct_7.prev_stamp_value = self.prev_stamp_value_3d >> 8 + wc_3d.struct_7.unk_30 = 0x0 + + wc_3d.set_addr() # Update inner structure addresses + #print("WC3D", hex(wc_3d._addr)) + #print(" s1", hex(wc_3d.struct_1._addr)) + #print(" s2", hex(wc_3d.struct_2._addr)) + #print(" s6", hex(wc_3d.struct_6._addr)) + #print(" s7", hex(wc_3d.struct_7._addr)) + + ms = GPUMicroSequence(agx) + + start_3d = Start3DCmd() + start_3d.struct1 = wc_3d.struct_1 # 0x44 bytes! + start_3d.struct2 = wc_3d.struct_2 # 0x168 bytes! + start_3d.buf_thing = buf_desc + start_3d.stats_ptr = agx.initdata.regionB.stats_3d.stats._addr + start_3d.busy_flag_ptr = wc_3d.busy_flag._addr + start_3d.struct6 = wc_3d.struct_6 # 4 bytes! + start_3d.struct7 = wc_3d.struct_7 # 4 bytes! + start_3d.cmdqueue_ptr = self.wq_3d.info._addr + start_3d.workitem_ptr = wc_3d._addr + start_3d.context_id = self.ctx_id + start_3d.unk_50 = 0x1 + start_3d.event_generation = self.event_control.generation + start_3d.buffer_mgr_slot = self.buffer_mgr_slot + start_3d.unk_5c = 0x0 + start_3d.prev_stamp_value = self.prev_stamp_value_3d >> 8 + start_3d.unk_68 = 0x0 + start_3d.unk_buf_ptr = wc_3d.unk_758._addr + start_3d.unk_buf2_ptr = wc_3d.unk_buf2._addr + start_3d.unk_7c = 0x0 + start_3d.unk_80 = 0x0 + start_3d.unk_84 = 0x0 + start_3d.uuid = uuid_3d + start_3d.attachments = [] + start_3d.unk_194 = 0 + start_3d.unkptr_19c = self.event_control.unk_buf._addr + + work.fb = None + work.depth = None + + for i in cmdbuf.attachments[:cmdbuf.attachment_count]: + cache_lines = align_up(i.size, 128) // 128 + order = 1 # FIXME + start_3d.attachments.append(Attachment(i.pointer, cache_lines, 0x17, order)) # FIXME check + if work.fb is None and i.type == ASAHI_ATTACHMENT_C: + work.fb = i.pointer + if work.depth is None and i.type == ASAHI_ATTACHMENT_Z: + work.depth = i.pointer + start_3d.attachments += [Attachment(0, 0, 0, 0)] * (16 - len(start_3d.attachments)) + start_3d.num_attachments = cmdbuf.attachment_count + start_3d.unk_190 = 0x0 + + start_3d_offset = ms.append(start_3d) + + ts1 = TimestampCmd() + ts1.unk_1 = 0x0 + ts1.unk_2 = 0x0 + ts1.unk_3 = 0x80 + ts1.ts0_addr = wc_3d.ts1._addr + ts1.ts1_addr = wc_3d.ts2._addr + ts1.ts2_addr = wc_3d.ts2._addr + ts1.cmdqueue_ptr = self.wq_3d.info._addr + ts1.unk_24 = 0x0 + if Ver.check("V >= V13_0B4"): + ts1.unkptr_2c_0 = wc_3d.ts_flag._addr + ts1.uuid = uuid_3d + ts1.unk_30_padding = 0x0 + ms.append(ts1) + + ms.append(WaitForInterruptCmd(0, 1, 0)) + + ts2 = TimestampCmd() + ts2.unk_1 = 0x0 + ts2.unk_2 = 0x0 + ts2.unk_3 = 0x0 + ts2.ts0_addr = wc_3d.ts1._addr + ts2.ts1_addr = wc_3d.ts2._addr + ts2.ts2_addr = wc_3d.ts3._addr + ts2.cmdqueue_ptr = self.wq_3d.info._addr + ts2.unk_24 = 0x0 + if Ver.check("V >= V13_0B4"): + ts2.unkptr_2c_0 = wc_3d.ts_flag._addr + ts2.uuid = uuid_3d + ts2.unk_30_padding = 0x0 + ms.append(ts2) + + finish_3d = Finalize3DCmd() + finish_3d.uuid = uuid_3d + finish_3d.unk_8 = 0 + finish_3d.stamp = self.stamp_3d2 + finish_3d.stamp_value = self.stamp_value_3d + finish_3d.unk_18 = 0 + finish_3d.buf_thing = buf_desc + finish_3d.buffer_mgr = self.buffer_mgr.info + finish_3d.unk_2c = 1 + finish_3d.stats_ptr = agx.initdata.regionB.stats_3d.stats._addr + finish_3d.struct7 = wc_3d.struct_7 + finish_3d.busy_flag_ptr = wc_3d.busy_flag._addr + finish_3d.cmdqueue_ptr = self.wq_3d.info._addr + finish_3d.workitem_ptr = wc_3d._addr + finish_3d.unk_5c = self.ctx_id + finish_3d.unk_buf_ptr = wc_3d.unk_758._addr + finish_3d.unk_6c = 0 + finish_3d.unk_74 = 0 + finish_3d.unk_7c = 0 + finish_3d.unk_84 = 0 + finish_3d.unk_8c = 0 + finish_3d.unk_8c_g14 = 0 + finish_3d.restart_branch_offset = start_3d_offset - ms.off + finish_3d.unk_98 = 0 + finish_3d.unk_9c = bytes(0x10) + ms.append(finish_3d) + ms.finalize() + + work.add(ms.obj) + + wc_3d.microsequence_ptr = ms.obj._addr + wc_3d.microsequence_size = ms.size + + print(wc_3d) + self.wq_3d.submit(wc_3d) + + ##### TA init + + #print(ctx_info) + if wait_for is not None: + barrier_cmd = agx.kobj.new(WorkCommandBarrier, track=False) + work.add(barrier_cmd) + if not isinstance(wait_for, tuple): + barrier_cmd.stamp = wait_for.renderer.stamp_3d2 + barrier_cmd.wait_value = wait_for.stamp_value_3d + barrier_cmd.event = wait_for.ev_3d.id + else: + barrier_cmd.stamp_addr = wait_for[0] + barrier_cmd.wait_value = wait_for[1] + barrier_cmd.event = wait_for[2] + + barrier_cmd.stamp_self = self.stamp_value_ta + barrier_cmd.uuid = uuid_ta + + self.wq_ta.submit(barrier_cmd) + + if not self.buffer_mgr_initialized: + wc_initbm = agx.kobj.new(WorkCommandInitBM, track=False) + work.add(wc_initbm) + wc_initbm.context_id = self.ctx_id + wc_initbm.buffer_mgr_slot = self.buffer_mgr_slot + wc_initbm.unk_c = 0 + wc_initbm.unk_10 = self.buffer_mgr.info.block_count + wc_initbm.buffer_mgr = self.buffer_mgr.info + wc_initbm.stamp_value = self.stamp_value_ta + + self.wq_ta.submit(wc_initbm) + + self.buffer_mgr_initialized = True + + ##### TA execution + + work.wc_ta = wc_ta = agx.kobj.new(WorkCommandTA, track=False) + work.add(work.wc_ta) + wc_ta.context_id = self.ctx_id + wc_ta.counter = 1 + wc_ta.unk_8 = 0 + wc_ta.event_control = self.event_control + wc_ta.buffer_mgr_slot = self.buffer_mgr_slot + wc_ta.buffer_mgr = self.buffer_mgr.info + wc_ta.buf_thing = buf_desc + wc_ta.unk_emptybuf_addr = wc_3d.unk_emptybuf_addr + wc_ta.unk_34 = 0x0 + + wc_ta.unk_154 = bytes(0x268) + wc_ta.unk_3e8 = bytes(0x74) + wc_ta.unk_594 = WorkCommand0_UnkBuf() + + wc_ta.ts1 = TimeStamp(0) + wc_ta.ts2 = TimeStamp(self.tsta_1._addr) + wc_ta.ts3 = TimeStamp(self.tsta_2._addr) + wc_ta.unk_5c4 = 0 + wc_ta.unk_5c8 = 0 + wc_ta.unk_5cc = 0 + wc_ta.unk_5d0 = 0 + wc_ta.unk_5d4 = 1 #0x27 #1 + # Ventura + wc_ta.unk_5e0 = 0 + wc_ta.unk_5e4 = 0 + wc_ta.ts_flag = TsFlag() + + # Structures embedded in WorkCommandTA + if True: + wc_ta.tiling_params = tiling_params + + if True: + wc_ta.struct_2 = StartTACmdStruct2() + wc_ta.struct_2.unk_0 = 0x200 + wc_ta.struct_2.unk_8 = 0x1e3ce508 # fixed + wc_ta.struct_2.unk_c = 0x1e3ce508 # fixed + wc_ta.struct_2.tvb_tilemap = tvb_tilemap._addr + wc_ta.struct_2.tvb_cluster_tilemaps = unk_tile_buf1._addr + wc_ta.struct_2.tpc = self.tpc._addr + wc_ta.struct_2.tvb_heapmeta_addr = tvb_heapmeta._addr | 0x8000_0000_0000_0000 + wc_ta.struct_2.iogpu_unk_54 = 0x6b0003 # fixed + wc_ta.struct_2.iogpu_unk_55 = 0x3a0012 # fixed + wc_ta.struct_2.iogpu_unk_56 = 0x1 # fixed + wc_ta.struct_2.tvb_cluster_meta1 = unk_tile_buf2._addr | 0x4_0000_0000_0000 + wc_ta.struct_2.unk_48 = 0xa000 + wc_ta.struct_2.unk_50 = 0x88 # fixed + wc_ta.struct_2.tvb_heapmeta_addr2 = tvb_heapmeta._addr + wc_ta.struct_2.unk_60 = 0x0 # fixed + wc_ta.struct_2.core_mask = 0xffffffffffffffff + #wc_ta.struct_2.unk_68 = 0xff << (8 * (self.buffer_mgr_slot % 8)) + wc_ta.struct_2.iogpu_deflake_1 = deflake_1._addr + wc_ta.struct_2.iogpu_deflake_2 = deflake_2._addr + wc_ta.struct_2.unk_80 = 0x1 # fixed + wc_ta.struct_2.iogpu_deflake_3 = deflake_3._addr | 0x4_0000_0000_0000 # check + wc_ta.struct_2.encoder_addr = cmdbuf.encoder_ptr + wc_ta.struct_2.tvb_cluster_meta2 = unk_tile_buf3._addr + wc_ta.struct_2.tvb_cluster_meta3 = unk_tile_buf4._addr + wc_ta.struct_2.tiling_control = 0xa040 #0xa041 # fixed + wc_ta.struct_2.unk_b0 = [0x0, 0x0, 0x0, 0x0, 0x0, 0x0] # fixed + wc_ta.struct_2.pipeline_base = self.ctx.pipeline_base + wc_ta.struct_2.tvb_cluster_meta4 = unk_tile_buf5._addr | 0x3000_0000_0000_0000 + wc_ta.struct_2.unk_f0 = 0x20 # fixed + wc_ta.struct_2.unk_f8 = 0x8c60 # fixed + wc_ta.struct_2.unk_100 = [0x0, 0x0, 0x0] # fixed + wc_ta.struct_2.unk_118 = 0x1c # fixed + + if True: + wc_ta.struct_3 = StartTACmdStruct3() + wc_ta.struct_3.unk_480 = [0x0, 0x0, 0x0, 0x0, 0x0, 0x0] # fixed + wc_ta.struct_3.unk_498 = 0x0 # fixed + wc_ta.struct_3.unk_4a0 = 0x0 # fixed + wc_ta.struct_3.iogpu_deflake_1 = deflake_1._addr + wc_ta.struct_3.unk_4ac = 0x0 # fixed + wc_ta.struct_3.unk_4b0 = 0x0 # fixed + wc_ta.struct_3.unk_4b8 = 0x0 # fixed + wc_ta.struct_3.unk_4bc = 0x0 # fixed + wc_ta.struct_3.unk_4c4_padding = bytes(0x48) + wc_ta.struct_3.unk_50c = 0x0 # fixed + wc_ta.struct_3.unk_510 = 0x0 # fixed + wc_ta.struct_3.unk_518 = 0x0 # fixed + wc_ta.struct_3.unk_520 = 0x0 # fixed + wc_ta.struct_3.unk_528 = 0x0 # fixed + wc_ta.struct_3.unk_52c = 0x0 # fixed + wc_ta.struct_3.unk_530 = 0x0 # fixed + wc_ta.struct_3.encoder_id = cmdbuf.encoder_id + wc_ta.struct_3.unk_538 = 0x0 # fixed + wc_ta.struct_3.unk_53c = 0xffffffff + wc_ta.struct_3.unknown_buffer = wc_3d.struct_6.unknown_buffer + wc_ta.struct_3.unk_548 = 0x0 # fixed + wc_ta.struct_3.unk_550 = [ + 0x0, 0x0, # fixed + 0x0, # 1 for boot stuff? + 0x0, 0x0, 0x0] # fixed + wc_ta.struct_3.stamp1 = self.stamp_ta1 + wc_ta.struct_3.stamp2 = self.stamp_ta2 + wc_ta.struct_3.stamp_value = self.stamp_value_ta + wc_ta.struct_3.ev_ta = ev_ta.id + wc_ta.struct_3.evctl_index = 0 + wc_ta.struct_3.unk_584 = 0x0 # 1 for boot stuff? + wc_ta.struct_3.uuid2 = uuid_ta + wc_ta.struct_3.prev_stamp_value = self.prev_stamp_value_ta >> 8 + wc_ta.struct_3.unk_590 = 0 # sometimes 1? + + wc_ta.set_addr() # Update inner structure addresses + #print("wc_ta", wc_ta) + + ms = GPUMicroSequence(agx) + + start_ta = StartTACmd() + start_ta.tiling_params = wc_ta.tiling_params + start_ta.struct2 = wc_ta.struct_2 # len 0x120 + start_ta.buffer_mgr = self.buffer_mgr.info + start_ta.buf_thing = buf_desc + start_ta.stats_ptr = agx.initdata.regionB.stats_ta.stats._addr + start_ta.cmdqueue_ptr = self.wq_ta.info._addr + start_ta.context_id = self.ctx_id + start_ta.unk_38 = 1 + start_ta.event_generation = self.event_control.generation + start_ta.buffer_mgr_slot = self.buffer_mgr_slot + start_ta.unk_48 = 0#1 #0 + start_ta.unk_50 = 0 + start_ta.struct3 = wc_ta.struct_3 + + start_ta.unkptr_5c = wc_ta.unk_594._addr + start_ta.unk_64 = 0x0 # fixed + start_ta.unk_68 = 0x0 # sometimes 1? + start_ta.uuid = uuid_ta + start_ta.unk_70 = 0x0 # fixed + start_ta.unk_74 = [ # fixed + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + ] + start_ta.unk_15c = 0x0 # fixed + start_ta.unk_160 = 0x0 # fixed + start_ta.unk_168 = 0x0 # fixed + start_ta.unk_16c = 0x0 # fixed + start_ta.unk_170 = 0x0 # fixed + start_ta.unk_178 = 0x0 # fixed? + start_ta.unk_17c = 0x0 + start_ta.unkptr_180 = self.event_control.unk_buf._addr + start_ta.unk_188 = 0x0 + + start_ta_offset = ms.append(start_ta) + + ts1 = TimestampCmd() + ts1.unk_1 = 0x0 + ts1.unk_2 = 0x0 + ts1.unk_3 = 0x80 + ts1.ts0_addr = wc_ta.ts1._addr + ts1.ts1_addr = wc_ta.ts2._addr + ts1.ts2_addr = wc_ta.ts2._addr + ts1.cmdqueue_ptr = self.wq_ta.info._addr + ts1.unk_24 = 0x0 + if Ver.check("V >= V13_0B4"): + ts1.unkptr_2c_0 = wc_ta.ts_flag._addr + ts1.uuid = uuid_ta + ts1.unk_30_padding = 0x0 + ms.append(ts1) + + ms.append(WaitForInterruptCmd(1, 0, 0)) + + ts2 = TimestampCmd() + ts2.unk_1 = 0x0 + ts2.unk_2 = 0x0 + ts2.unk_3 = 0x0 + ts2.ts0_addr = wc_ta.ts1._addr + ts2.ts1_addr = wc_ta.ts2._addr + ts2.ts2_addr = wc_ta.ts3._addr + ts2.cmdqueue_ptr = self.wq_ta.info._addr + ts2.unk_24 = 0x0 + if Ver.check("V >= V13_0B4"): + ts2.unkptr_2c_0 = wc_ta.ts_flag._addr + ts2.uuid = uuid_ta + ts2.unk_30_padding = 0x0 + ms.append(ts2) + + finish_ta = FinalizeTACmd() + finish_ta.buf_thing = buf_desc + finish_ta.buffer_mgr = self.buffer_mgr.info + finish_ta.stats_ptr = agx.initdata.regionB.stats_ta.stats._addr + finish_ta.cmdqueue_ptr = self.wq_ta.info._addr + finish_ta.context_id = self.ctx_id + finish_ta.unk_28 = 0x0 # fixed + finish_ta.struct3 = wc_ta.struct_3 + finish_ta.unk_34 = 0x0 # fixed + finish_ta.uuid = uuid_ta + finish_ta.stamp = self.stamp_ta2 + finish_ta.stamp_value = self.stamp_value_ta + finish_ta.unk_48 = 0x0 # fixed + finish_ta.unk_50 = 0x0 # fixed + finish_ta.unk_54 = 0x0 # fixed + finish_ta.unk_58 = 0x0 # fixed + finish_ta.unk_60 = 0x0 # fixed + finish_ta.unk_64 = 0x0 # fixed + finish_ta.unk_68 = 0x0 # fixed + finish_ta.unk_6c_g14 = 0 # fixed + finish_ta.restart_branch_offset = start_ta_offset - ms.off + finish_ta.unk_70 = 0x0 # fixed + finish_ta.unk_74 = bytes(0x10) # Ventura + ms.append(finish_ta) + + ms.finalize() + + work.add(ms.obj) + + wc_ta.unkptr_45c = self.tpc._addr + wc_ta.tvb_size = tpc_size + wc_ta.microsequence_ptr = ms.obj._addr + wc_ta.microsequence_size = ms.size + wc_ta.ev_3d = ev_3d.id + wc_ta.stamp_value = self.stamp_value_ta + + print(wc_ta) + self.wq_ta.submit(wc_ta) + + self.agx.log("Submit done") + return work + + def run(self): + ##### Run queues + self.agx.log("Run queues") + self.agx.ch.queue[self.queue].q_3D.run(self.wq_3d, self.ev_3d.id) + self.agx.ch.queue[self.queue].q_TA.run(self.wq_ta, self.ev_ta.id) + self.agx.log("Run done") + + def wait(self): + self.agx.log("Waiting...") + work = self.work[-1] + + ##### Wait for work completion + while not self.ev_3d.fired: + self.agx.wait_for_events(timeout=2.0) + + if not self.ev_3d.fired: + self.agx.log("3D event didn't fire") + + self.agx.log(f"Event {self.ev_3d.id} fired") + #print("Stamps:") + #print(self.stamp_ta1.pull()) + #print(self.stamp_ta2.pull()) + #print(self.stamp_3d1.pull()) + #print(self.stamp_3d2.pull()) + + #print("WCs:") + #print(work.wc_3d.pull()) + #print(work.wc_ta.pull()) + + #if work.fb is not None and work.width and work.height: + if work.fb is not None and work.width and work.height and work.width == 1920: + agx = self.agx + self.agx.log(f"Render {work.width}x{work.height} @ {work.fb:#x}") + base, obj = self.agx.find_object(work.fb, self.ctx_id) + + #unswizzle(agx, obj._paddr, work.width, work.height, 4, "fb.bin", grid=False) + #open("fb.bin", "wb").write(self.agx.u.iface.readmem(obj._paddr, work.width*work.height*4)) + #os.system(f"convert -size {work.width}x{work.height} -depth 8 rgba:fb.bin -alpha off frame{self.frames}.png") + self.agx.p.fb_blit(0, 0, work.width, work.height, obj._paddr, work.width, PIX_FMT.XBGR) + + if False: #work.depth is not None: + base, obj = self.agx.find_object(work.depth, self.ctx_id) + + width = align_up(work.width, 64) + height = align_up(work.height, 64) + + obj.pull() + chexdump(obj.val) + + unswizzle(self.agx, obj._paddr, work.width, work.height, 4, "depth.bin", grid=False) + os.system(f"convert -size {work.width}x{work.height} -depth 8 rgba:depth.bin -alpha off depth.png") + + for i in self.work: + i.free() + + self.work = [] diff --git a/tools/proxyclient/m1n1/agx/shim.py b/tools/proxyclient/m1n1/agx/shim.py new file mode 100644 index 0000000..253812a --- /dev/null +++ b/tools/proxyclient/m1n1/agx/shim.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT + +import errno, ctypes, sys, atexit, os, os.path, mmap +from construct import * + +from m1n1 import malloc +from m1n1.utils import Register32 +from m1n1.agx import AGX +from m1n1.agx.render import * +from m1n1.agx.uapi import * +from m1n1.proxyutils import * +from m1n1.utils import * + +PAGE_SIZE = 32768 +SHIM_MEM_SIZE = 4 * 1024 * 1024 * 1024 + +class IOCTL(Register32): + NR = 7, 0 + TYPE = 15, 8 + SIZE = 29, 16 + DIR = 31, 30 + +_IOC_NONE = 0 +_IOC_WRITE = 1 +_IOC_READ = 2 + +_IO = lambda type, nr: IOCTL(TYPE=type, NR=nr, SIZE=0, DIR=_IOC_NONE) +_IOR = lambda type, nr, size: IOCTL(TYPE=type, NR=nr, SIZE=size, DIR=_IOC_READ) +_IOW = lambda type, nr, size: IOCTL(TYPE=type, NR=nr, SIZE=size, DIR=_IOC_WRITE) +_IOWR = lambda type, nr, size: IOCTL(TYPE=type, NR=nr, SIZE=size, DIR=_IOC_READ|_IOC_WRITE) + +DRM_IOCTL_BASE = ord('d') + +def IO(nr): + def dec(f): + f._ioctl = _IO(DRM_IOCTL_BASE, nr) + return f + return dec + +def IOR(nr, cls): + def dec(f): + f._ioctl = _IOR(DRM_IOCTL_BASE, nr, cls.sizeof()) + f._arg_cls = cls + return f + return dec + +def IOW(nr, cls): + def dec(f): + f._ioctl = _IOW(DRM_IOCTL_BASE, nr, cls.sizeof()) + f._arg_cls = cls + return f + return dec + +def IOWR(nr, cls): + def dec(f): + f._ioctl = _IOWR(DRM_IOCTL_BASE, nr, cls.sizeof()) + f._arg_cls = cls + return f + return dec + +class DRMAsahiShim: + def __init__(self, memfd): + self.memfd = memfd + self.initialized = False + self.ioctl_map = {} + for key in dir(self): + f = getattr(self, key) + ioctl = getattr(f, "_ioctl", None) + if ioctl is not None: + self.ioctl_map[ioctl.value] = ioctl, f + self.bos = {} + self.pull_buffers = bool(os.getenv("ASAHI_SHIM_PULL")) + self.dump_frames = bool(os.getenv("ASAHI_SHIM_DUMP")) + self.frame = 0 + self.agx = None + + def read_buf(self, ptr, size): + return ctypes.cast(ptr, ctypes.POINTER(ctypes.c_ubyte * size))[0] + + def init_agx(self): + from m1n1.setup import p, u, iface + + p.pmgr_adt_clocks_enable("/arm-io/gfx-asc") + p.pmgr_adt_clocks_enable("/arm-io/sgx") + + self.agx = agx = AGX(u) + + mon = RegMonitor(u, ascii=True, bufsize=0x8000000) + agx.mon = mon + + sgx = agx.sgx_dev + #mon.add(sgx.gpu_region_base, sgx.gpu_region_size, "contexts") + #mon.add(sgx.gfx_shared_region_base, sgx.gfx_shared_region_size, "gfx-shared") + #mon.add(sgx.gfx_handoff_base, sgx.gfx_handoff_size, "gfx-handoff") + + #mon.add(agx.initdasgx.gfx_handoff_base, sgx.gfx_handoff_size, "gfx-handoff") + + atexit.register(p.reboot) + agx.start() + + def init(self): + if self.initialized: + return + + self.init_agx() + self.ctx = GPUContext(self.agx) + self.ctx.bind(0x17) + self.renderer = GPURenderer(self.ctx, 0x40, bm_slot=10, queue=1) + + self.initialized = True + + @IOW(DRM_COMMAND_BASE + 0x00, drm_asahi_submit_t) + def submit(self, fd, args): + sys.stdout.write(".") + sys.stdout.flush() + + size = drm_asahi_cmdbuf_t.sizeof() + cmdbuf = drm_asahi_cmdbuf_t.parse(self.read_buf(args.cmdbuf, size)) + + self.log("Pushing objects...") + for obj in self.bos.values(): + #if obj._skipped_pushes > 64:# and obj._addr > 0x1200000000 and obj._size > 131072: + #continue + obj.push(True) + self.log("Push done") + + attachment_objs = [] + for i in cmdbuf.attachments: + for obj in self.bos.values(): + if obj._addr == i.pointer: + attachment_objs.append(obj) + + if self.dump_frames: + name = f"shim_frame{self.frame:03d}.agx" + f = GPUFrame(self.renderer.ctx) + f.cmdbuf = cmdbuf + for obj in self.bos.values(): + f.add_object(obj) + f.save(name) + + self.renderer.submit(cmdbuf) + self.renderer.run() + self.renderer.wait() + + if self.pull_buffers: + self.log("Pulling buffers...") + for obj in attachment_objs: + obj.pull() + obj._map[:] = obj.val + obj.val = obj._map + self.log("Pull done") + + #print("HEAP STATS") + #self.ctx.uobj.va.check() + #self.ctx.gobj.va.check() + #self.ctx.pobj.va.check() + #self.agx.kobj.va.check() + #self.agx.cmdbuf.va.check() + #self.agx.kshared.va.check() + #self.agx.kshared2.va.check() + + self.frame += 1 + return 0 + + @IOW(DRM_COMMAND_BASE + 0x01, drm_asahi_wait_bo_t) + def wait_bo(self, fd, args): + self.log("Wait BO!", args) + return 0 + + @IOWR(DRM_COMMAND_BASE + 0x02, drm_asahi_create_bo_t) + def create_bo(self, fd, args): + memfd_offset = args.offset + + if args.flags & ASAHI_BO_PIPELINE: + alloc = self.renderer.ctx.pobj + else: + alloc = self.renderer.ctx.gobj + + obj = alloc.new(args.size, name=f"GBM offset {memfd_offset:#x}", track=False) + obj._memfd_offset = memfd_offset + obj._pushed = False + obj.val = obj._map = mmap.mmap(self.memfd, args.size, offset=memfd_offset) + self.bos[memfd_offset] = obj + args.offset = obj._addr + + if args.flags & ASAHI_BO_PIPELINE: + args.offset -= self.renderer.ctx.pipeline_base + + self.log(f"Create BO @ {memfd_offset:#x}") + return 0 + + @IOWR(DRM_COMMAND_BASE + 0x04, drm_asahi_get_param_t) + def get_param(self, fd, args): + self.log("Get Param!", args) + return 0 + + @IOWR(DRM_COMMAND_BASE + 0x05, drm_asahi_get_bo_offset_t) + def get_bo_offset(self, fd, args): + self.log("Get BO Offset!", args) + return 0 + + def bo_free(self, memfd_offset): + self.log(f"Free BO @ {memfd_offset:#x}") + self.bos[memfd_offset].free() + del self.bos[memfd_offset] + sys.stdout.flush() + + def ioctl(self, fd, request, p_arg): + self.init() + + p_arg = ctypes.c_void_p(p_arg) + + if request not in self.ioctl_map: + self.log(f"Unknown ioctl: fd={fd} request={IOCTL(request)} arg={p_arg:#x}") + return -errno.ENOSYS + + ioctl, f = self.ioctl_map[request] + + size = ioctl.SIZE + if ioctl.DIR & _IOC_WRITE: + args = f._arg_cls.parse(self.read_buf(p_arg, size)) + ret = f(fd, args) + elif ioctl.DIR & _IOC_READ: + args = f._arg_cls.parse(bytes(size)) + ret = f(fd, args) + else: + ret = f(fd) + + if ioctl.DIR & _IOC_READ: + data = args.build() + assert len(data) == size + ctypes.memmove(p_arg, data, size) + + sys.stdout.flush() + return ret + + def log(self, s): + if self.agx is None: + print("[Shim] " + s) + else: + self.agx.log("[Shim] " + s) + +Shim = DRMAsahiShim diff --git a/tools/proxyclient/m1n1/agx/uapi.py b/tools/proxyclient/m1n1/agx/uapi.py new file mode 100644 index 0000000..75850cb --- /dev/null +++ b/tools/proxyclient/m1n1/agx/uapi.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT + +from construct import * +from m1n1.constructutils import ConstructClass + +__all__ = [] + +DRM_COMMAND_BASE = 0x40 + +ASAHI_BO_PIPELINE = 1 + +class drm_asahi_submit_t(ConstructClass): + subcon = Struct( + "cmdbuf" / Int64ul, + "in_syncs" / Int64ul, + "in_sync_count" / Int32ul, + "out_sync" / Int32ul, + ) + +class drm_asahi_wait_bo_t(ConstructClass): + subcon = Struct( + "handle" / Int32ul, + Padding(4), + "timeout_ns" / Int64sl, + ) + +class drm_asahi_create_bo_t(ConstructClass): + subcon = Struct( + "size" / Int32ul, + "flags" / Int32ul, + "handle" / Int32ul, + Padding(4), + "offset" / Int64ul, + ) + +#class drm_asahi_mmap_bo_t(ConstructClass): + #subcon = Struct( + #"handle" / Int32ul, + #"flags" / Int32ul, + #"offset" / Int64ul, + #) + +class drm_asahi_get_param_t(ConstructClass): + subcon = Struct( + "param" / Int32ul, + Padding(4), + "value" / Int64ul, + ) + +class drm_asahi_get_bo_offset_t(ConstructClass): + subcon = Struct( + "handle" / Int32ul, + Padding(4), + "offset" / Int64ul, + ) + +ASAHI_MAX_ATTACHMENTS = 16 + +ASAHI_ATTACHMENT_C = 0 +ASAHI_ATTACHMENT_Z = 1 +ASAHI_ATTACHMENT_S = 2 + +class drm_asahi_attachment_t(ConstructClass): + subcon = Struct( + "type" / Int32ul, + "size" / Int32ul, + "pointer" / Int64ul, + ) + +ASAHI_CMDBUF_LOAD_C = (1 << 0) +ASAHI_CMDBUF_LOAD_Z = (1 << 1) +ASAHI_CMDBUF_LOAD_S = (1 << 2) + +class drm_asahi_cmdbuf_t(ConstructClass): + subcon = Struct( + "flags" / Int64ul, + + "encoder_ptr" / Int64ul, + "encoder_id" / Int32ul, + + "cmd_ta_id" / Int32ul, + "cmd_3d_id" / Int32ul, + + "ds_flags" / Int32ul, + "depth_buffer" / Int64ul, + "stencil_buffer" / Int64ul, + + "scissor_array" / Int64ul, + "depth_bias_array" / Int64ul, + + "fb_width" / Int32ul, + "fb_height" / Int32ul, + + "load_pipeline" / Int32ul, + "load_pipeline_bind" / Int32ul, + + "store_pipeline" / Int32ul, + "store_pipeline_bind" / Int32ul, + + "partial_reload_pipeline" / Int32ul, + "partial_reload_pipeline_bind" / Int32ul, + + "partial_store_pipeline" / Int32ul, + "partial_store_pipeline_bind" / Int32ul, + + "depth_clear_value" / Float32l, + "stencil_clear_value" / Int8ul, + Padding(3), + + "attachments" / Array(ASAHI_MAX_ATTACHMENTS, drm_asahi_attachment_t), + "attachment_count" / Int32ul, + ) + +__all__.extend(k for k, v in globals().items() + if ((callable(v) or isinstance(v, type)) and v.__module__ == __name__) or isinstance(v, int)) diff --git a/tools/proxyclient/m1n1/asm.py b/tools/proxyclient/m1n1/asm.py new file mode 100644 index 0000000..ef1f3af --- /dev/null +++ b/tools/proxyclient/m1n1/asm.py @@ -0,0 +1,133 @@ +# SPDX-License-Identifier: MIT +import os, tempfile, shutil, subprocess + +__all__ = ["AsmException", "ARMAsm"] + +uname = os.uname() + +if uname.sysname == "Darwin": + DEFAULT_ARCH = "aarch64-linux-gnu-" + if uname.machine == "arm64": + TOOLCHAIN = "/opt/homebrew/opt/llvm/bin/" + else: + TOOLCHAIN = "/usr/local/opt/llvm/bin/" + USE_CLANG = "1" +else: + if uname.machine == "aarch64": + DEFAULT_ARCH = "" + else: + DEFAULT_ARCH = "aarch64-linux-gnu-" + USE_CLANG = "0" + TOOLCHAIN = "" + +use_clang = os.environ.get("USE_CLANG", USE_CLANG).strip() == "1" +toolchain = os.environ.get("TOOLCHAIN", TOOLCHAIN) + +if use_clang: + CC = toolchain + "clang --target=%ARCH" + LD = toolchain + "ld.lld" + OBJCOPY = toolchain + "llvm-objcopy" + OBJDUMP = toolchain + "llvm-objdump" + NM = toolchain + "llvm-nm" +else: + CC = toolchain + "%ARCHgcc" + LD = toolchain + "%ARCHld" + OBJCOPY = toolchain + "%ARCHobjcopy" + OBJDUMP = toolchain + "%ARCHobjdump" + NM = toolchain + "%ARCHnm" + +class AsmException(Exception): + pass + +class BaseAsm(object): + def __init__(self, source, addr = 0): + self.source = source + self._tmp = tempfile.mkdtemp() + os.sep + self.addr = addr + self.compile(source) + + def _call(self, program, args): + subprocess.check_call(program.replace("%ARCH", self.ARCH) + " " + args, shell=True) + + def _get(self, program, args): + return subprocess.check_output(program.replace("%ARCH", self.ARCH) + " " + args, shell=True).decode("ascii") + + def compile(self, source): + self.sfile = self._tmp + "b.S" + with open(self.sfile, "w") as fd: + fd.write(self.HEADER + "\n") + fd.write(source + "\n") + fd.write(self.FOOTER + "\n") + + self.ofile = self._tmp + "b.o" + self.elffile = self._tmp + "b.elf" + self.bfile = self._tmp + "b.b" + self.nfile = self._tmp + "b.n" + + self._call(CC, f"{self.CFLAGS} -c -o {self.ofile} {self.sfile}") + self._call(LD, f"{self.LDFLAGS} --Ttext={self.addr:#x} -o {self.elffile} {self.ofile}") + self._call(OBJCOPY, f"-j.text -O binary {self.elffile} {self.bfile}") + self._call(NM, f"{self.elffile} > {self.nfile}") + + with open(self.bfile, "rb") as fd: + self.data = fd.read() + + with open(self.nfile) as fd: + for line in fd: + line = line.replace("\n", "") + addr, type, name = line.split() + addr = int(addr, 16) + setattr(self, name, addr) + self.start = self._start + self.len = len(self.data) + self.end = self.start + self.len + + def objdump(self): + self._call(OBJDUMP, f"-rd {self.elffile}") + + def disassemble(self): + output = self._get(OBJDUMP, f"-zd {self.elffile}") + + for line in output.split("\n"): + if not line or line.startswith("/"): + continue + sl = line.split() + if not sl or sl[0][-1] != ":": + continue + yield line + + def __del__(self): + if self._tmp: + shutil.rmtree(self._tmp) + self._tmp = None + +class ARMAsm(BaseAsm): + ARCH = os.path.join(os.environ.get("ARCH", DEFAULT_ARCH)) + CFLAGS = "-pipe -Wall -march=armv8.4-a" + LDFLAGS = "-maarch64elf" + HEADER = """ + .text + .globl _start +_start: + """ + FOOTER = """ + .pool + """ + +if __name__ == "__main__": + import sys + code = """ + ldr x0, =0xDEADBEEF + b test + mrs x0, spsel + svc 1 + %s +test: + b test + ret +""" % (" ".join(sys.argv[1:])) + c = ARMAsm(code, 0x1238) + c.objdump() + assert c.start == 0x1238 + if not sys.argv[1:]: + assert c.test == 0x1248 diff --git a/tools/proxyclient/m1n1/constructutils.py b/tools/proxyclient/m1n1/constructutils.py new file mode 100644 index 0000000..ff0c5b7 --- /dev/null +++ b/tools/proxyclient/m1n1/constructutils.py @@ -0,0 +1,879 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +import inspect, textwrap, json, re, sys, os + +from construct import * +from construct.core import evaluate +from construct.lib import HexDisplayedInteger +from .utils import * + +g_struct_trace = set() +g_struct_addrmap = {} +g_depth = 0 + +def ZPadding(size): + return Const(bytes(size), Bytes(size)) + +def recusive_reload(obj, token=None): + global g_depth + + if token is None: + g_depth = 0 + token = object() + + cur_token = getattr(obj, "_token", None) + if cur_token is token: + return + + g_depth += 1 + #print(" " * g_depth + f"> {obj}", id(obj), id(token)) + if isinstance(obj, Construct) and hasattr(obj, 'subcon'): + # Single subcon types + if inspect.isclass(obj.subcon): + #print("> isclass") + if hasattr(obj.subcon, "_reloadcls"): + #print("> Recursive (subcon)") + obj.subcon = obj.subcon._reloadcls(token=token) + else: + if isinstance(obj.subcon, Construct): + recusive_reload(obj.subcon, token) + if isinstance(obj, Construct) and hasattr(obj, 'subcons'): + # Construct types that have lists + new_subcons = [] + for i, item in enumerate(obj.subcons): + if inspect.isclass(item): + if hasattr(item, "_reloadcls"): + #print("> Recursive (subcons)") + item = item._reloadcls() + else: + if isinstance(item, Construct): + recusive_reload(item, token) + new_subcons.append(item) + obj.subcons = new_subcons + + if isinstance(obj, Construct) and hasattr(obj, 'cases'): + # Construct types that have lists + for i, item in list(obj.cases.items()): + if inspect.isclass(item): + if hasattr(item, "_reloadcls"): + #print("> Recursive (cases)") + obj.cases[i] = item._reloadcls(token=token) + else: + if isinstance(item, Construct): + recusive_reload(item, token) + + for field in dir(obj): + value = getattr(obj, field) + if inspect.isclass(value): + if hasattr(value, "_reloadcls"): + #print("> Recursive (value)") + setattr(obj, field, value._reloadcls(token=token)) + else: + if isinstance(value, Construct): + recusive_reload(value, token) + + obj._token = token + + g_depth -= 1 + +def str_value(value, repr=False): + if isinstance(value, bytes) and value == bytes(len(value)): + return f"bytes({len(value):#x})" + if isinstance(value, bytes) and repr: + return f"bytes.fromhex('{value.hex()}')" + if isinstance(value, DecDisplayedInteger): + return str(value) + if isinstance(value, int): + if value in g_struct_addrmap: + desc = g_struct_addrmap[value] + return f"{value:#x} ({desc})" + else: + return f"{value:#x}" + if isinstance(value, ListContainer): + om = "" + while len(value) > 1 and not value[-1]: + value = value[:-1] + om = " ..." + if len(value) <= 16: + return "[" + ", ".join(map(str_value, value)) + f"{om}]" + else: + sv = ["[\n"] + for off in range(0, len(value), 16): + sv.append(" " + ", ".join(map(str_value, value[off:off+16])) + ",\n") + sv.append(f"{om}]\n") + return "".join(sv) + + return str(value) + +class DecDisplayedInteger(int): + @staticmethod + def new(intvalue): + obj = DecDisplayedInteger(intvalue) + return obj + +class Dec(Adapter): + def _decode(self, obj, context, path): + try: + if isinstance(obj, int): + return DecDisplayedInteger.new(obj) + return obj + except Exception as e: + print(e) + raise + + def _encode(self, obj, context, path): + return obj + + def _emitparse(self, code): + return self.subcon._compileparse(code) + + def _emitseq(self, ksy, bitwise): + return self.subcon._compileseq(ksy, bitwise) + + def _emitprimitivetype(self, ksy, bitwise): + return self.subcon._compileprimitivetype(ksy, bitwise) + + def _emitfulltype(self, ksy, bitwise): + return self.subcon._compilefulltype(ksy, bitwise) + +class ConstructClassException(Exception): + pass + + +# We need to inherrit Construct as a metaclass so things like If and Select will work +class ReloadableConstructMeta(ReloadableMeta, Construct): + + def __new__(cls, name, bases, attrs): + cls = super().__new__(cls, name, bases, attrs) + cls.name = name + if cls.SHORT_NAME is not None: + cls.short_name = cls.SHORT_NAME + else: + cls.short_name = re.sub('[a-z]', '', cls.name) + if len(cls.short_name) > 5: + cls.short_name = cls.short_name[:3] + cls.short_name[-2:] + + try: + cls.flagbuildnone = cls.subcon.flagbuildnone + except AttributeError: + cls.flagbuildnone = False + + cls.docs = None + + cls._off = {} + if "subcon" not in attrs: + return cls + + subcon = attrs["subcon"] + if isinstance(subcon, Struct): + off = 0 + for subcon in subcon.subcons: + try: + sizeof = subcon.sizeof() + except: + sizeof = None + if isinstance(subcon, Ver): + if not subcon._active(): + cls._off[subcon.name] = -1, 0 + continue + subcon = subcon.subcon + if isinstance(subcon, Renamed): + name = subcon.name + subcon = subcon.subcon + cls._off[name] = off, sizeof + if sizeof is None: + break + off += sizeof + return cls + +class ConstructClassBase(Reloadable, metaclass=ReloadableConstructMeta): + """ Offers two benifits over regular construct + + 1. It's reloadable, and can recusrivly reload other refrenced ConstructClasses + 2. It's a class, so you can define methods + + Currently only supports parsing, but could be extended to support building + + Example: + Instead of: + MyStruct = Struct( + "field1" / Int32ul + ) + + class MyClass(ConstructClass): + subcon = Struct( + "field1" / Int32ul + ) + + """ + SHORT_NAME = None + + parsed = None + + def __init__(self): + self._pointers = set() + self._addr = None + self._meta = {} + + def regmap(self): + return ConstructRegMap(type(self), self._stream.to_accessor(), self._addr) + + @classmethod + def sizeof(cls, **contextkw): + context = Container(**contextkw) + context._parsing = False + context._building = False + context._sizing = True + context._params = context + return cls._sizeof(context, "(sizeof)") + + def Apply(self, dict=None, **kwargs): + if dict is None: + dict = kwargs + + for key in dict: + if not key.startswith('_'): + setattr(self, key, dict[key]) + self._keys += [key] + + def set_addr(self, addr=None, stream=None): + #print("set_addr", type(self), addr) + if addr is not None: + self._addr = addr + self._set_meta(self, stream) + + @classmethod + def _build(cls, obj, stream, context, path): + cls._build_prepare(obj) + + addr = stream.tell() + try: + new_obj = cls.subcon._build(obj, stream, context, f"{path} -> {cls.name}") + except ConstructClassException: + raise + except ConstructError: + raise + except Exception as e: + raise ConstructClassException(f"at {path} -> {cls.name}") from e + + # if obj is a raw value or Container, instance a proper object for it + if not isinstance(obj, ConstructClassBase): + obj = cls.__new__(cls) + + # update the object with anything that build updated (such as defaults) + obj._apply(new_obj) + + obj._addr = addr + cls._set_meta(obj, stream) + return obj + + @classmethod + def _sizeof(cls, context, path): + return cls.subcon._sizeof(context, f"{path} -> {cls.name}") + + @classmethod + def _reloadcls(cls, force=False, token=None): + #print(f"_reloadcls({cls})", id(cls)) + newcls = Reloadable._reloadcls.__func__(cls, force) + if hasattr(newcls, "subcon"): + recusive_reload(newcls.subcon, token) + return newcls + + def _apply(self, obj): + raise NotImplementedError() + + @classmethod + def _set_meta(cls, self, stream=None): + if stream is not None: + self._pointers = set() + self._meta = {} + self._stream = stream + + if isinstance(cls.subcon, Struct): + subaddr = int(self._addr) + for subcon in cls.subcon.subcons: + try: + sizeof = subcon.sizeof() + except: + break + if isinstance(subcon, Ver): + subcon = subcon.subcon + if isinstance(subcon, Renamed): + name = subcon.name + #print(name, subcon) + subcon = subcon.subcon + if stream is not None and getattr(stream, "meta_fn", None): + meta = stream.meta_fn(subaddr, sizeof) + if meta is not None: + self._meta[name] = meta + if isinstance(subcon, Pointer): + self._pointers.add(name) + continue + try: + #print(name, subcon) + val = self[name] + except: + pass + else: + if isinstance(val, ConstructClassBase): + val.set_addr(subaddr) + if isinstance(val, list): + for i in val: + if isinstance(i, ConstructClassBase): + i.set_addr(subaddr) + subaddr += i.sizeof() + + subaddr += sizeof + + @classmethod + def _parse(cls, stream, context, path): + #print(f"parse {cls} @ {stream.tell():#x} {path}") + addr = stream.tell() + obj = cls.subcon._parse(stream, context, path) + size = stream.tell() - addr + + # Don't instance Selects + if isinstance(cls.subcon, Select): + return obj + + # Skip calling the __init__ constructor, so that it can be used for building + # Use parsed instead, if you need a post-parsing constructor + self = cls.__new__(cls) + self._addr = addr + self._path = path + self._meta = {} + cls._set_meta(self, stream) + + self._apply(obj) + + if self._addr > 0x10000: + g_struct_trace.add((self._addr, f"{cls.name} (end: {self._addr + size:#x})")) + g_struct_addrmap[self._addr] = f"{cls.name}" + return self + + @classmethod + def _build_prepare(cls, obj): + pass + + def build_stream(self, obj=None, stream=None, **contextkw): + assert stream != None + if obj is None: + obj = self + + return Construct.build_stream(self, obj, stream, **contextkw) + + def build(self, obj=None, **contextkw): + if obj is None: + obj = self + + return Construct.build(self, obj, **contextkw) + +class ROPointer(Pointer): + def _build(self, obj, stream, context, path): + return obj + + def _parse(self, stream, context, path): + recurse = getattr(stream, "recurse", False) + if not recurse: + return None + + return Pointer._parse(self, stream, context, path) + +class ConstructClass(ConstructClassBase, Container): + """ Offers two benifits over regular construct + + 1. It's reloadable, and can recusrivly reload other refrenced ConstructClasses + 2. It's a class, so you can define methods + + Currently only supports parsing, but could be extended to support building + + Example: + Instead of: + MyStruct = Struct( + "field1" / Int32ul + ) + + class MyClass(ConstructClass): + subcon = Struct( + "field1" / Int32ul + ) + """ + + def diff(self, other, show_all=False): + return self.__str__(other=other, show_all=show_all) + + def __eq__(self, other): + return all(self[k] == other[k] for k in self + if (not k.startswith("_")) + and (k not in self._pointers) + and not callable(self[k])) + + def __str__(self, ignore=[], other=None, show_all=False) -> str: + + str = self.__class__.__name__ + if self._addr is not None: + str += f" @ 0x{self._addr:x}:" + + str += "\n" + + keys = list(self) + keys.sort(key = lambda x: self._off.get(x, (-1, 0))[0]) + + for key in keys: + if key in self._off: + offv, sizeof = self._off[key] + if offv == -1: + print(key, offv, sizeof) + continue + if key in ignore or key.startswith('_'): + continue + value = getattr(self, key) + need_diff = False + if other is not None: + if key in self._pointers or callable(value): + continue + other_value = getattr(other, key) + if not show_all and other_value == value: + continue + offv, sizeof = self._off[key] + if sizeof == 0: + continue + def _valdiff(value, other_value): + if hasattr(value, "diff"): + return value.diff(other_value) + elif isinstance(value, bytes) and isinstance(other_value, bytes): + pad = bytes() + if len(value) & 3: + pad = bytes(4 - (len(value) & 3)) + return chexdiff32(other_value+pad, value+pad, offset=offv, offset2=0) + else: + val_repr = str_value(value) + if other_value != value: + other_repr = str_value(other_value) + return f"\x1b[33;1;4m{val_repr}\x1b[m ← \x1b[34m{other_repr}\x1b[m" + return val_repr + + if isinstance(value, list): + val_repr = "{\n" + for i, (a, b) in enumerate(zip(value, other_value)): + if a == b: + continue + val_repr += f"[{i}] = " + textwrap.indent(_valdiff(a, b), " ") + "\n" + offv += sizeof // len(value) + val_repr += "}\n" + else: + val_repr = _valdiff(value, other_value) + + else: + val_repr = str_value(value) + off = "" + meta = "" + if key in self._off: + offv, sizeof = self._off[key] + if sizeof is not None: + sizeofs = f"{sizeof:3x}" + else: + sizeofs = " *" + off = f"\x1b[32m[{offv:3x}.{sizeofs}]\x1b[m " + if key in self._meta: + meta = f" \x1b[34m{self._meta[key]}\x1b[m" + if '\n' in val_repr: + val_repr = textwrap.indent(val_repr, f'\x1b[90m{self.short_name:>5s}.\x1b[m') + if not val_repr.endswith('\n'): + val_repr += '\n' + str += f"\x1b[90m{self.short_name:>5s}.{off}\x1b[95m{key}\x1b[m ={meta}\n{val_repr}" + else: + str += f"\x1b[90m{self.short_name:>5s}.{off}\x1b[95m{key}\x1b[m = {val_repr}{meta}\n" + + return str + + def _dump(self): + print(f"# {self.__class__.__name__}") + if self._addr is not None: + print(f"# Address: 0x{self._addr:x}") + + keys = list(self) + keys.sort(key = lambda x: self._off.get(x, (-1, 0))[0]) + for key in keys: + if key.startswith('_'): + continue + value = getattr(self, key) + val_repr = str_value(value, repr=True) + print(f"self.{key} = {val_repr}") + + + @classmethod + def _build_prepare(cls, obj): + if isinstance(cls.subcon, Struct): + for subcon in cls.subcon.subcons: + if isinstance(subcon, Ver): + subcon = subcon.subcon + if not isinstance(subcon, Renamed): + continue + name = subcon.name + subcon = subcon.subcon + if isinstance(subcon, Lazy): + subcon = subcon.subcon + if not isinstance(subcon, Pointer): + continue + addr_field = subcon.offset.__getfield__() + # Ugh. + parent = subcon.offset._Path__parent(obj) + if not hasattr(obj, name) and hasattr(parent, addr_field): + # No need for building + setattr(obj, name, None) + elif hasattr(obj, name): + subobj = getattr(obj, name) + try: + addr = subobj._addr + except (AttributeError, KeyError): + addr = None + if addr is not None: + setattr(parent, addr_field, addr) + + @classmethod + def _parse(cls, stream, context, path): + self = ConstructClassBase._parse.__func__(cls, stream, context, path) + + for key in self: + if key.startswith('_'): + continue + try: + val = int(self[key]) + except: + continue + if (0x1000000000 <= val <= 0x1f00000000 or + 0xf8000000000 <= val <= 0xff000000000 or + 0xffffff8000000000 <= val <= 0xfffffff000000000): + g_struct_trace.add((val, f"{cls.name}.{key}")) + return self + + def _apply_classful(self, obj): + obj2 = dict(obj) + if isinstance(self.__class__.subcon, Struct): + for subcon in self.__class__.subcon.subcons: + name = subcon.name + if name is None: + continue + subcon = subcon.subcon + if isinstance(subcon, Lazy): + continue + if isinstance(subcon, Pointer): + subcon = subcon.subcon + if isinstance(subcon, Ver): + subcon = subcon.subcon + if isinstance(subcon, Array): + subcon = subcon.subcon + + if name not in obj2: + continue + + val = obj2[name] + if not isinstance(subcon, type) or not issubclass(subcon, ConstructClassBase): + continue + + def _map(v): + if not isinstance(v, subcon): + sc = subcon() + sc._apply(v) + return sc + return v + + if isinstance(val, list): + obj2[name] = list(map(_map, val)) + else: + obj2[name] = _map(val) + + self._apply(obj2) + + def _apply(self, obj): + self.update(obj) + + def items(self): + for k in list(self): + if k.startswith("_"): + continue + yield k, self[k] + + def addrof(self, name): + return self._addr + self._off[name][0] + + @classmethod + def offsetof(cls, name): + return cls._off[name][0] + + def clone(self): + obj = type(self)() + obj.update(self) + return obj + + @classmethod + def from_json(cls, fd): + d = json.load(fd) + obj = cls() + obj._apply_classful(d) + return obj + + @classmethod + def is_versioned(cls): + for subcon in cls.subcon.subcons: + if isinstance(subcon, Ver): + return True + while True: + try: + subcon = subcon.subcon + if isinstance(subcon, type) and issubclass(subcon, ConstructClass) and subcon.is_versioned(): + return True + except: + break + return False + + @classmethod + def to_rust(cls): + assert isinstance(cls.subcon, Struct) + s = [] + if cls.is_versioned(): + s.append("#[versions(AGX)]"), + + s += [ + "#[derive(Debug, Clone, Copy)]", + "#[repr(C, packed(4))]", + f"struct {cls.__name__} {{", + ] + pad = 0 + has_ver = False + for subcon in cls.subcon.subcons: + if isinstance(subcon, Ver): + if not has_ver: + s.append("") + s.append(f" #[ver({subcon.cond})]") + subcon = subcon.subcon + has_ver = True + else: + has_ver = False + + name = subcon.name + if name is None: + name = f"__pad{pad}" + pad += 1 + + array_len = [] + skip = False + while subcon: + if isinstance(subcon, Lazy): + skip = True + break + elif isinstance(subcon, Pointer): + skip = True + break + elif isinstance(subcon, Array): + array_len.append(subcon.count) + elif isinstance(subcon, (HexDump, Default, Renamed, Dec, Hex, Const)): + pass + else: + break + subcon = subcon.subcon + + if isinstance(subcon, Bytes): + array_len.append(subcon.length) + subcon = Int8ul + + if skip: + #s.append(f" // {name}: {subcon}") + continue + + TYPE_MAP = { + Int64ul: "u64", + Int32ul: "u32", + Int16ul: "u16", + Int8ul: "u8", + Int64sl: "i64", + Int32sl: "i32", + Int16sl: "i16", + Int8sl: "i8", + Float32l: "f32", + Float64l: "f64", + } + + t = TYPE_MAP.get(subcon, repr(subcon)) + + if isinstance(subcon, type) and issubclass(subcon, ConstructClass): + t = subcon.__name__ + if subcon.is_versioned(): + t += "::ver" + + for n in array_len[::-1]: + t = f"[{t}; {n:#x}]" + + s.append(f" {name}: {t},") + + if has_ver: + s.append("") + + s += ["}"] + return "\n".join(s) + +class ConstructValueClass(ConstructClassBase): + """ Same as Construct, but for subcons that are single values, rather than containers + + the value is stored as .value + """ + + def __eq__(self, other): + return self.value == other.value + + def __str__(self) -> str: + str = f"{self.__class__.__name__} @ 0x{self._addr:x}:" + str += f"\t{str_value(self.value)}" + return str + + def __getitem__(self, i): + if i == "value": + return self.value + raise Exception(f"Invalid index {i}") + + @classmethod + def _build(cls, obj, stream, context, path): + return super()._build(obj.value, stream, context, path) + + def _apply(self, obj): + self.value = obj + _apply_classful = _apply + +class ConstructRegMap(BaseRegMap): + TYPE_MAP = { + Int8ul: Register8, + Int16ul: Register16, + Int32ul: Register32, + Int64ul: Register64, + } + + def __init__(self, cls, backend, base): + self._addrmap = {} + self._rngmap = SetRangeMap() + self._namemap = {} + assert isinstance(cls.subcon, Struct) + for subcon in cls.subcon.subcons: + if isinstance(subcon, Ver): + subcon = subcon.subcon + if not isinstance(subcon, Renamed): + continue + name = subcon.name + subcon = subcon.subcon + if subcon not in self.TYPE_MAP: + continue + rtype = self.TYPE_MAP[subcon] + if name not in cls._off: + continue + addr, size = cls._off[name] + self._addrmap[addr] = name, rtype + self._namemap[name] = addr, rtype + super().__init__(backend, base) + + def __getattr__(self, k): + if k.startswith("_"): + return self.__dict__[k] + return self._accessor[k] + + def __setattr__(self, k, v): + if k.startswith("_"): + self.__dict__[k] = v + return + self._accessor[k].val = v + +class Ver(Subconstruct): + # Ugly hack to make this survive across reloads... + try: + _version = sys.modules["m1n1.constructutils"].Ver._version + except (KeyError, AttributeError): + _version = {"V": os.environ.get("AGX_FWVER", "V12_3"), + "G": os.environ.get("AGX_GPU", "G13")} + + MATRIX = { + "V": ["V12_1", "V12_3", "V12_4", "V13_0B4"], + "G": ["G13", "G14"], + } + + def __init__(self, version, subcon): + self.cond = version + self.vcheck = self.parse_ver(version) + self._name = subcon.name + self.subcon = subcon + self.flagbuildnone = True + self.docs = "" + self.parsed = None + + @property + def name(self): + if self._active(): + return self._name + else: + return None + + @staticmethod + def _split_ver(s): + if not s: + return None + parts = re.split(r"[-,. ]", s) + parts2 = [] + for i in parts: + try: + parts2.append(int(i)) + except ValueError: + parts2.append(i) + if len(parts2) > 3 and parts2[-2] == "beta": + parts2[-3] -= 1 + parts2[-2] = 99 + return tuple(parts2) + + @classmethod + def parse_ver(cls, version): + expr = version.replace("&&", " and ").replace("||", " or ") + + base_loc = {j: i for row in cls.MATRIX.values() for i, j in enumerate(row)} + + def check_ver(ver): + loc = dict(base_loc) + for k, v in ver.items(): + loc[k] = cls.MATRIX[k].index(v) + return eval(expr, None, loc) + + return check_ver + + @classmethod + def check(cls, version): + return cls.parse_ver(version)(cls._version) + + def _active(self): + return self.vcheck(self._version) + + def _parse(self, stream, context, path): + if not self._active(): + return None + obj = self.subcon._parse(stream, context, path) + return obj + + def _build(self, obj, stream, context, path): + if not self._active(): + return None + return self.subcon._build(obj, stream, context, path) + + def _sizeof(self, context, path): + if not self._active(): + return 0 + return self.subcon._sizeof(context, path) + + @classmethod + def set_version_key(cls, key, version): + cls._version[key] = version + + @classmethod + def set_version(cls, u): + cls.set_version_key("V", u.version) + cls.set_version_key("G", u.adt["/arm-io"].soc_generation.replace("H", "G")) + +def show_struct_trace(log=print): + for addr, desc in sorted(list(g_struct_trace)): + log(f"{addr:>#18x}: {desc}") + +__all__ = ["ConstructClass", "ConstructValueClass", "Dec", "ROPointer", "show_struct_trace", "ZPadding", "Ver"] diff --git a/tools/proxyclient/m1n1/find_regs.py b/tools/proxyclient/m1n1/find_regs.py new file mode 100644 index 0000000..9982661 --- /dev/null +++ b/tools/proxyclient/m1n1/find_regs.py @@ -0,0 +1,99 @@ +# SPDX-License-Identifier: MIT +import struct + +from . import asm, sysreg +from .proxyutils import GuardedHeap + +__all__ = ["dynamic_regs", "impdef_regs", "static_regs", "find_regs"] + +def _all(): + for op1 in range(1 << 3): + for CRn in (0b1011, 0b1111): + for CRm in range(1 << 4): + for op2 in range(1 << 3): + yield 3, op1, CRn, CRm, op2 + +dynamic_regs = [ + sysreg.CNTVCT_ALIAS_EL0, + sysreg.CNTPCT_ALIAS_EL0, +] + +impdef_regs = list(_all()) +static_regs = [i for i in _all() if i not in dynamic_regs] + +def find_regs(u, regs=None, block=1024, call=None, values=True): + if regs is None: + regs = impdef_regs + + p = u.proxy + iface = u.iface + + data_len = 8 * block + + with GuardedHeap(u.heap) as heap: + data_buffer = heap.malloc(data_len) + + template = asm.ARMAsm(""" + mov x2, x0 + mrs x2, s3_0_c0_c0_0 + str x2, [x1], #8 + """, 0x1000) + + mov, mrs, st = struct.unpack("3I", template.data) + + + BAD = 0xacce5515abad1dea + OOPS = 0xdeadc0dedeadc0de + + iregs = iter(regs) + + while True: + insns = [] + bregs = [] + for i in iregs: + op0, op1, CRn, CRm, op2 = enc = sysreg.sysreg_parse(i) + bregs.append(enc) + assert op0 == 3 + + insns.extend((mov, mrs | (op1 << 16) | (CRn << 12) | (CRm << 8) | (op2 << 5), st)) + + if len(bregs) >= block: + break + + if not bregs: + break + + p.memset64(data_buffer, OOPS, data_len) + u.exec(insns, BAD, data_buffer, call=call, silent=True, ignore_exceptions=True) + data = iface.readmem(data_buffer, 8 * len(bregs)) + for reg, val in zip(bregs, struct.unpack(f"<{len(bregs)}Q", data)): + if val == OOPS: + raise Exception(f"Failed to execute reg-finder code at {reg}") + if val != BAD: + if values: + yield reg, val + else: + yield reg + +if __name__ == "__main__": + from m1n1.setup import * + + p.iodev_set_usage(IODEV.FB, 0) + + for reg, val in find_regs(u): + print(f"{sysreg_name(reg)} ({', '.join(map(str, reg))}) = 0x{val:x}") + + try: + u.msr(reg, val, silent=True) + except: + print(" - READONLY") + try: + u.mrs(reg, silent=True, call="el1") + except: + print(" - ### EL2 only ###") + try: + u.mrs(reg, silent=True, call="el0") + except: + pass + else: + print(" - *** EL0 accessible ***") diff --git a/tools/proxyclient/m1n1/fw/__init__.py b/tools/proxyclient/m1n1/fw/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tools/proxyclient/m1n1/fw/__init__.py diff --git a/tools/proxyclient/m1n1/fw/afk/__init__.py b/tools/proxyclient/m1n1/fw/afk/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tools/proxyclient/m1n1/fw/afk/__init__.py diff --git a/tools/proxyclient/m1n1/fw/afk/epic.py b/tools/proxyclient/m1n1/fw/afk/epic.py new file mode 100644 index 0000000..e95281d --- /dev/null +++ b/tools/proxyclient/m1n1/fw/afk/epic.py @@ -0,0 +1,292 @@ +# SPDX-License-Identifier: MIT + +import struct +from io import BytesIO +from construct import * +from ..common import * +from ...utils import * +from ..asc import StandardASC +from ..asc.base import * +from .rbep import AFKRingBufEndpoint + +EPICType = "EPICType" / Enum(Int32ul, + NOTIFY = 0, + COMMAND = 3, + REPLY = 4, + NOTIFY_ACK = 8, +) + +EPICCategory = "EPICCategory" / Enum(Int8ul, + REPORT = 0x00, + NOTIFY = 0x10, + REPLY = 0x20, + COMMAND = 0x30, +) + +EPICHeader = Struct( + "channel" / Int32ul, + "type" / EPICType, + "version" / Const(2, Int8ul), + "seq" / Int16ul, + "pad" / Const(0, Int8ul), + "unk" / Const(0, Int32ul), + "timestamp" / Default(Int64ul, 0), +) + +EPICSubHeader = Struct( + "length" / Int32ul, + "version" / Default(Int8ul, 4), + "category" / EPICCategory, + "type" / Hex(Int16ul), + "timestamp" / Default(Int64ul, 0), + "seq" / Int16ul, + "unk" / Default(Hex(Int16ul), 0), + "inline_len" / Hex(Int32ul), +) + +EPICAnnounce = Struct( + "name" / Padded(32, CString("utf8")), + "props" / Optional(OSSerialize()) +) + +EPICSetProp = Struct( + "name_len" / Int32ul, + "name" / Aligned(4, CString("utf8")), + "value" / OSSerialize() +) + +EPICCmd = Struct( + "retcode" / Default(Hex(Int32ul), 0), + "rxbuf" / Hex(Int64ul), + "txbuf" / Hex(Int64ul), + "rxlen" / Hex(Int32ul), + "txlen" / Hex(Int32ul), + "rxcookie" / Optional(Default(Bool(Int8ul), False)), + "txcookie" / Optional(Default(Bool(Int8ul), False)), +) + + +class EPICError(Exception): + pass + + +class EPICService: + RX_BUFSIZE = 0x4000 + TX_BUFSIZE = 0x4000 + + def __init__(self, ep): + self.iface = ep.asc.iface + self.ep = ep + self.ready = False + self.chan = None + self.seq = 0 + + def log(self, msg): + print(f"[{self.ep.name}.{self.SHORT}] {msg}") + + def init(self, props): + self.log(f"Init: {props}") + self.props = props + self.rxbuf, self.rxbuf_dva = self.ep.asc.ioalloc(self.RX_BUFSIZE) + self.txbuf, self.txbuf_dva = self.ep.asc.ioalloc(self.TX_BUFSIZE) + self.ready = True + + def wait(self): + while not self.ready: + self.ep.asc.work() + + def handle_report(self, category, type, seq, fd): + self.log(f"Report {category}/{type} #{seq}") + chexdump(fd.read()) + + def handle_notify(self, category, type, seq, fd): + retcode = struct.unpack("<I", fd.read(4))[0] + self.log(f"Notify {category}/{type} #{seq} ({retcode})") + data = fd.read() + chexdump(data) + print("Send ACK") + + data = data[:0x50] + b"\x01\x00\x00\x00" + data[0x54:] + + pkt = struct.pack("<I", 0) + data + self.ep.send_epic(self.chan, EPICType.NOTIFY_ACK, EPICCategory.REPLY, type, seq, pkt, len(data)) + + def handle_reply(self, category, type, seq, fd): + off = fd.tell() + data = fd.read() + if len(data) == 4: + retcode = struct.unpack("<I", data)[0] + if retcode: + raise EPICError(f"IOP returned errcode {retcode:#x}") + else: + self.reply = retcode + return + fd.seek(off) + cmd = EPICCmd.parse_stream(fd) + payload = fd.read() + self.log(f"Response {type:#x} #{seq}: {cmd.retcode:#x}") + if cmd.retcode != 0: + raise EPICError(f"IOP returned errcode {cmd.retcode:#x}") + if payload: + self.log("Inline payload:") + chexdump(payload) + assert cmd.rxbuf == self.rxbuf_dva + self.reply = self.iface.readmem(self.rxbuf, cmd.rxlen) + + def handle_cmd(self, category, type, seq, fd): + cmd = EPICCmd.parse_stream(fd) + self.log(f"Command {type:#x} #{seq}: {cmd.retcode:#x}") + + def send_cmd(self, type, data, retlen=None): + if retlen is None: + retlen = len(data) + cmd = Container() + cmd.rxbuf = self.rxbuf_dva + cmd.txbuf = self.txbuf_dva + cmd.txlen = len(data) + cmd.rxlen = retlen + self.iface.writemem(self.txbuf, data) + self.reply = None + pkt = EPICCmd.build(cmd) + self.ep.send_epic(self.chan, EPICType.COMMAND, EPICCategory.COMMAND, type, self.seq, pkt) + self.seq += 1 + while self.reply is None: + self.ep.asc.work() + return self.reply + +class EPICStandardService(EPICService): + def call(self, group, cmd, data=b'', replen=None): + msg = struct.pack("<2xHIII48x", group, cmd, len(data), 0x69706378) + data + if replen is not None: + replen += 64 + resp = self.send_cmd(0xc0, msg, replen) + if not resp: + return + rgroup, rcmd, rlen, rmagic = struct.unpack("<2xHIII", resp[:16]) + assert rmagic == 0x69706378 + assert rgroup == group + assert rcmd == cmd + return resp[64:64+rlen] + + def getLocation(self, unk=0): + return struct.unpack("<16xI12x", self.call(4, 4, bytes(32))) + + def getUnit(self, unk=0): + return struct.unpack("<16xI12x", self.call(4, 5, bytes(32))) + + def open(self, unk=0): + self.call(4, 6, struct.pack("<16xI12x", unk)) + + def close(self): + self.call(4, 7, bytes(16)) + +class AFKSystemService(EPICService): + NAME = "system" + SHORT = "system" + + def getProperty(self, prop, val): + pass + #self.send_cmd(0x40, msg, 0) + + def setProperty(self, prop, val): + msg = { + "name_len": (len(prop) + 3) & ~3, + "name": prop, + "value": val, + } + msg = EPICSetProp.build(msg) + self.send_cmd(0x43, msg, 0) + +class EPICEndpoint(AFKRingBufEndpoint): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.serv_map = {} + self.chan_map = {} + self.serv_names = {} + self.hseq = 0 + + for i in self.SERVICES: + self.serv_names[i.NAME] = i + + def handle_ipc(self, data): + fd = BytesIO(data) + hdr = EPICHeader.parse_stream(fd) + sub = EPICSubHeader.parse_stream(fd) + + if self.verbose > 2: + self.log(f"Ch {hdr.channel} Type {hdr.type} Ver {hdr.version} Seq {hdr.seq}") + self.log(f" Len {sub.length} Ver {sub.version} Cat {sub.category} Type {sub.type:#x} Seq {sub.seq}") + + if sub.category == EPICCategory.REPORT: + self.handle_report(hdr, sub, fd) + if sub.category == EPICCategory.NOTIFY: + self.handle_notify(hdr, sub, fd) + elif sub.category == EPICCategory.REPLY: + self.handle_reply(hdr, sub, fd) + elif sub.category == EPICCategory.COMMAND: + self.handle_cmd(hdr, sub, fd) + + def wait_for(self, name): + while True: + srv = getattr(self, name, None) + if srv is not None and srv.ready: + break + self.asc.work() + + def handle_report(self, hdr, sub, fd): + if sub.type == 0x30: + init = EPICAnnounce.parse_stream(fd) + if init.props is None: + init.props = {} + name = init.name + if "EPICName" in init.props: + name = init.props["EPICName"] + key = name + str(init.props.get("EPICUnit", "")) + if name in self.serv_names: + srv = self.serv_names[name](self) + short = srv.SHORT + str(init.props.get("EPICUnit", "")) + setattr(self, short, srv) + srv.init(init.props) + srv.chan = hdr.channel + self.chan_map[hdr.channel] = srv + self.serv_map[key] = srv + self.log(f"New service: {key} on channel {hdr.channel} (short name: {short})") + else: + self.log(f"Unknown service {key} on channel {hdr.channel}") + else: + if hdr.channel not in self.chan_map: + self.log(f"Ignoring report on channel {hdr.channel}") + else: + self.chan_map[hdr.channel].handle_report(sub.category, sub.type, sub.seq, fd) + + def handle_notify(self, hdr, sub, fd): + self.chan_map[hdr.channel].handle_notify(sub.category, sub.type, sub.seq, fd) + + def handle_reply(self, hdr, sub, fd): + self.chan_map[hdr.channel].handle_reply(sub.category, sub.type, sub.seq, fd) + + def handle_cmd(self, hdr, sub, fd): + self.chan_map[hdr.channel].handle_cmd(sub.category, sub.type, sub.seq, fd) + + def send_epic(self, chan, ptype, category, type, seq, data, inline_len=0): + hdr = Container() + hdr.channel = chan + hdr.type = ptype + hdr.seq = self.hseq + self.hseq += 1 + + sub = Container() + sub.length = len(data) + sub.category = category + sub.type = type + sub.seq = seq + sub.inline_len = inline_len + pkt = EPICHeader.build(hdr) + EPICSubHeader.build(sub) + data + super().send_ipc(pkt) + +class AFKSystemEndpoint(EPICEndpoint): + SHORT = "system" + + SERVICES = [ + AFKSystemService, + ] diff --git a/tools/proxyclient/m1n1/fw/afk/rbep.py b/tools/proxyclient/m1n1/fw/afk/rbep.py new file mode 100644 index 0000000..872d75f --- /dev/null +++ b/tools/proxyclient/m1n1/fw/afk/rbep.py @@ -0,0 +1,225 @@ +# SPDX-License-Identifier: MIT + +import struct + +from ..common import * +from ...utils import * +from ..asc.base import * + + +class AFKEPMessage(Register64): + TYPE = 63, 48 + +class AFKEP_GetBuf(AFKEPMessage): + TYPE = 63, 48, Constant(0x89) + SIZE = 31, 16 + TAG = 15, 0 + +class AFKEP_GetBuf_Ack(AFKEPMessage): + TYPE = 63, 48, Constant(0xa1) + DVA = 47, 0 + +class AFKEP_InitRB(AFKEPMessage): + OFFSET = 47, 32 + SIZE = 31, 16 + TAG = 15, 0 + +class AFKEP_Send(AFKEPMessage): + TYPE = 63, 48, Constant(0xa2) + WPTR = 31, 0 + +class AFKEP_Recv(AFKEPMessage): + TYPE = 63, 48, Constant(0x85) + WPTR = 31, 0 + +class AFKEP_Init(AFKEPMessage): + TYPE = 63, 48, Constant(0x80) + +class AFKEP_Init_Ack(AFKEPMessage): + TYPE = 63, 48, Constant(0xa0) + +class AFKEP_Start(AFKEPMessage): + TYPE = 63, 48, Constant(0xa3) + +class AFKEP_Start_Ack(AFKEPMessage): + TYPE = 63, 48, Constant(0x86) + +class AFKEP_Shutdown(AFKEPMessage): + TYPE = 63, 48, Constant(0xc0) + +class AFKEP_Shutdown_Ack(AFKEPMessage): + TYPE = 63, 48, Constant(0xc1) + + +class AFKError(Exception): + pass + +class AFKRingBuf(Reloadable): + BLOCK_SIZE = 0x40 + + def __init__(self, ep, base, size): + self.ep = ep + self.base = base + + bs, unk = struct.unpack("<II", self.read_buf(0, 8)) + assert (bs + 3 * self.BLOCK_SIZE) == size + self.bufsize = bs + self.rptr = 0 + self.wptr = 0 + + def read_buf(self, off, size): + return self.ep.iface.readmem(self.base + off, size) + + def write_buf(self, off, data): + return self.ep.iface.writemem(self.base + off, data) + + def get_rptr(self): + return self.ep.asc.p.read32(self.base + self.BLOCK_SIZE) + + def get_wptr(self): + return self.ep.asc.p.read32(self.base + 2 * self.BLOCK_SIZE) + + def update_rptr(self, rptr): + self.ep.asc.p.write32(self.base + self.BLOCK_SIZE, self.rptr) + + def update_wptr(self, rptr): + self.ep.asc.p.write32(self.base + 2 * self.BLOCK_SIZE, self.wptr) + + def read(self): + self.wptr = self.get_wptr() + + while self.wptr != self.rptr: + hdr = self.read_buf(3 * self.BLOCK_SIZE + self.rptr, 16) + self.rptr += 16 + magic, size = struct.unpack("<4sI", hdr[:8]) + assert magic in [b"IOP ", b"AOP "] + if size > (self.bufsize - self.rptr): + hdr = self.read_buf(3 * self.BLOCK_SIZE, 16) + self.rptr = 16 + magic, size = struct.unpack("<4sI", hdr[:8]) + assert magic in [b"IOP ", b"AOP "] + + payload = self.read_buf(3 * self.BLOCK_SIZE + self.rptr, size) + self.rptr = (align_up(self.rptr + size, self.BLOCK_SIZE)) % self.bufsize + self.update_rptr(self.rptr) + yield hdr[8:] + payload + self.wptr = self.get_wptr() + + self.update_rptr(self.rptr) + + def write(self, data): + hdr2, data = data[:8], data[8:] + self.rptr = self.get_rptr() + + if self.wptr < self.rptr and self.wptr + 0x10 >= self.rptr: + raise AFKError("Ring buffer is full") + + hdr = struct.pack("<4sI", b"IOP ", len(data)) + hdr2 + self.write_buf(3 * self.BLOCK_SIZE + self.wptr, hdr) + + if len(data) > (self.bufsize - self.wptr - 16): + if self.rptr < 0x10: + raise AFKError("Ring buffer is full") + self.write_buf(3 * self.BLOCK_SIZE, hdr) + self.wptr = 0 + + if self.wptr < self.rptr and self.wptr + 0x10 + len(data) >= self.rptr: + raise AFKError("Ring buffer is full") + + self.write_buf(3 * self.BLOCK_SIZE + self.wptr + 0x10, data) + self.wptr = align_up(self.wptr + 0x10 + len(data), self.BLOCK_SIZE) % self.bufsize + + self.update_wptr(self.wptr) + return self.wptr + +class AFKRingBufEndpoint(ASCBaseEndpoint): + BASE_MESSAGE = AFKEPMessage + SHORT = "afkep" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.txq = None + self.rxq = None + self.iface = self.asc.iface + self.alive = False + self.started = False + self.iobuffer = None + self.verbose = 2 + self.msgid = 0 + + def start(self): + self.send(AFKEP_Init()) + + @msg_handler(0xa0, AFKEP_Init_Ack) + def Init_Ack(self, msg): + self.alive = True + return True + + @msg_handler(0x89, AFKEP_GetBuf) + def GetBuf(self, msg): + size = msg.SIZE * AFKRingBuf.BLOCK_SIZE + + if self.iobuffer: + print("WARNING: trying to reset iobuffer!") + + self.iobuffer, self.iobuffer_dva = self.asc.ioalloc(size) + self.asc.p.write32(self.iobuffer, 0xdeadbeef) + self.send(AFKEP_GetBuf_Ack(DVA=self.iobuffer_dva)) + self.log(f"Buffer: phys={self.iobuffer:#x} dva={self.iobuffer_dva:#x} size={size:#x}") + return True + + def stop(self): + self.log("Shutting down") + self.send(AFKEP_Shutdown()) + while self.alive: + self.asc.work() + + @msg_handler(0xc1, AFKEP_Shutdown_Ack) + def Shutdown_Ack(self, msg): + self.alive = False + self.log("Shutdown ACKed") + return True + + @msg_handler(0x8a, AFKEP_InitRB) + def InitTX(self, msg): + self.txq = self.init_rb(msg) + if self.rxq and self.txq: + self.start_queues() + return True + + @msg_handler(0x8b, AFKEP_InitRB) + def InitRX(self, msg): + self.rxq = self.init_rb(msg) + if self.rxq and self.txq: + self.start_queues() + return True + + def init_rb(self, msg): + off = msg.OFFSET * AFKRingBuf.BLOCK_SIZE + size = msg.SIZE * AFKRingBuf.BLOCK_SIZE + + return AFKRingBuf(self, self.iobuffer + off, size) + + def start_queues(self): + self.send(AFKEP_Start()) + + @msg_handler(0x86, AFKEP_Start_Ack) + def Start_Ack(self, msg): + self.started = True + return True + + @msg_handler(0x85, AFKEP_Recv) + def Recv(self, msg): + for data in self.rxq.read(): + if self.verbose >= 3: + self.log(f"<RX rptr={self.rxq.rptr:#x}") + chexdump(data) + self.handle_ipc(data) + return True + + def handle_ipc(self, data): + pass + + def send_ipc(self, data): + wptr = self.txq.write(data) + self.send(AFKEP_Send(WPTR = wptr)) diff --git a/tools/proxyclient/m1n1/fw/agx/__init__.py b/tools/proxyclient/m1n1/fw/agx/__init__.py new file mode 100644 index 0000000..e436720 --- /dev/null +++ b/tools/proxyclient/m1n1/fw/agx/__init__.py @@ -0,0 +1,98 @@ +# SPDX-License-Identifier: MIT +from ...utils import * +from ...malloc import Heap + +from ..asc import StandardASC +from ..asc.base import ASCBaseEndpoint, msg_handler + +from .initdata import InitData, IOMapping + +__all__ = [] + +class GpuMsg(Register64): + TYPE = 63, 48 + +class InitMsg(GpuMsg): + TYPE = 63, 48, Constant(0x81) + UNK = 47, 44, Constant(0) + INITDATA = 43, 0 + +class EventMsg(GpuMsg): + TYPE = 63, 48, Constant(0x42) + UNK = 47, 0, Constant(0) + +class DoorbellMsg(GpuMsg): + TYPE = 63, 48, Constant(0x83) + CHANNEL = 15, 0 + +class FWCtlMsg(GpuMsg): + TYPE = 63, 48, Constant(0x84) + +class HaltMsg(GpuMsg): + TYPE = 63, 48, Constant(0x85) + +class FirmwareEP(ASCBaseEndpoint): + BASE_MESSAGE = GpuMsg + SHORT = "fw" + + @msg_handler(0x42) + def event(self, msg): + #self.log("Received event") + self.asc.agx.poll_channels() + return True + + def send_initdata(self, addr): + self.log(f"Sending initdata @ {addr:#x}") + msg = InitMsg(INITDATA=addr) + self.send(msg) + +class DoorbellEP(ASCBaseEndpoint): + BASE_MESSAGE = DoorbellMsg + SHORT = "db" + + def doorbell(self, channel): + #self.log(f"Sending doorbell ch={channel}") + msg = DoorbellMsg(CHANNEL = channel) + self.send(msg) + + def fwctl_doorbell(self): + msg = FWCtlMsg() + self.send(msg) + +class AGXASC(StandardASC): + ENDPOINTS = { + 0x20: FirmwareEP, + 0x21: DoorbellEP, + } + + def __init__(self, u, base, agx, uat): + super().__init__(u, base) + self.agx = agx + self.uat = uat + + def addr(self, addr): + base, obj = self.agx.find_object(addr) + if base is None: + return super().addr(addr) + + return f"{addr:#x} ({obj._name} [{obj._size:#x}] @ {base:#x} + {addr - base:#x})" + + def iomap(self, addr, size): + return self.uat.iomap(0, addr, size) + + def ioalloc(self, size): + paddr = self.u.memalign(0x4000, size) + dva = self.iomap(paddr, size) + return paddr, dva + + def ioread(self, dva, size, ctx=0): + return self.uat.ioread(ctx, dva & 0xFFFFFFFFFF, size) + + def iowrite(self, dva, data, ctx=0): + return self.uat.iowrite(ctx, dva & 0xFFFFFFFFFF, data) + + def iotranslate(self, dva, size, ctx=0): + return self.uat.iotranslate(ctx, dva & 0xFFFFFFFFFF, size) + +__all__.extend(k for k, v in globals().items() + if (callable(v) or isinstance(v, type)) and v.__module__ == __name__) diff --git a/tools/proxyclient/m1n1/fw/agx/channels.py b/tools/proxyclient/m1n1/fw/agx/channels.py new file mode 100644 index 0000000..6704d20 --- /dev/null +++ b/tools/proxyclient/m1n1/fw/agx/channels.py @@ -0,0 +1,481 @@ + +import random + +from m1n1.utils import * +from m1n1.constructutils import * +from construct import * +from .cmdqueue import * + +__all__ = ["channelNames", "channelRings", "DeviceControlMsg", "EventMsg", "StatsMsg"] + +class RunCmdQueueMsg(ConstructClass): + subcon = Struct ( + "queue_type" / Default(Int32ul, 0), + "cmdqueue_addr" / Default(Hex(Int64ul), 0), + "cmdqueue" / Lazy(ROPointer(this.cmdqueue_addr, CommandQueueInfo)), + "head" / Default(Int32ul, 0), + "event_number" / Default(Int32ul, 0), + "new_queue" / Default(Int32ul, 0), + "data" / HexDump(Default(Bytes(0x18), bytes(0x18))), + ) + + TYPES = { + 0: "SubmitTA", + 1: "Submit3D", + 2: "SubmitCompute", + } + + def __str__(self, *args, **kwargs): + s = super().__str__(*args, **kwargs) + "\n" + + if self.cmdqueue_addr == 0: + return s + "<Empty RunCmdQueueMsg>" + + r = random.randrange(2**64) + s += f"{self.TYPES[self.queue_type]}(0x{self.cmdqueue_addr & 0xfff_ffffffff:x}, {self.head}, ev={self.event_number}, new={self.new_queue}) //{r:x}" + return s + +class DC_DestroyContext(ConstructClass): + subcon = Struct ( + "msg_type" / Const(0x17, Int32ul), + "unk_4" / Hex(Int32ul), + "unk_8" / Hex(Int32ul), + "unk_c" / Hex(Int32ul), + "unk_10" / Hex(Int32ul), + "unk_14" / Hex(Int32ul), + "unk_18" / Hex(Int32ul), + "context_addr" / Hex(Int64ul), + "rest" / HexDump(Default(Bytes(0xc), bytes(0xc))) + ) + +class DC_Write32(ConstructClass): + subcon = Struct ( + "msg_type" / Const(0x18, Int32ul), + "addr" / Hex(Int64ul), + "data" / Int32ul, + "unk_10" / Int32ul, + "unk_14" / Int32ul, + "unk_18" / Int32ul, + "unk_1c" / Int32ul, + "rest" / HexDump(Default(Bytes(0x10), bytes(0x10))) + ) + +class DC_Write32B(ConstructClass): + subcon = Struct ( + "msg_type" / Const(0x13, Int32ul), + "addr" / Hex(Int64ul), + "data" / Int32ul, + "unk_10" / Int32ul, + "unk_14" / Int32ul, + "unk_18" / Int32ul, + "unk_1c" / Int32ul, + "rest" / HexDump(Default(Bytes(0x10), bytes(0x10))) + ) + +class DC_Init(ConstructClass): + subcon = Struct ( + "msg_type" / Const(0x19, Int32ul), + "data" / HexDump(Default(Bytes(0x2c), bytes(0x2c))) + ) + +class DC_09(ConstructClass): + subcon = Struct ( + "msg_type" / Const(0x9, Int32ul), + "unk_4" / Int64ul, + "unkptr_c" / Int64ul, + "unk_14" / Int64ul, + "data" / HexDump(Default(Bytes(0x14), bytes(0x14))) + ) + +class DC_Any(ConstructClass): + subcon = Struct ( + "msg_type" / Int32ul, + "data" / HexDump(Default(Bytes(0x2c), bytes(0x2c))) + ) + +class DC_1e(ConstructClass): + subcon = Struct ( + "msg_type" / Const(0x1e, Int32ul), + "unk_4" / Int64ul, + "unk_c" / Int64ul, + "data" / HexDump(Default(Bytes(0x1c), bytes(0x1c))) + ) + +class DC_UpdateIdleTS(ConstructClass): + subcon = Struct ( + "msg_type" / Const(0x23, Int32ul), + "data" / HexDump(Default(Bytes(0x2c), bytes(0x2c))), + ) + +class UnknownMsg(ConstructClass): + subcon = Struct ( + "msg_type" / Hex(Int32ul), + "data" / HexDump(Bytes(0x2c)), + ) + +DeviceControlMsg = FixedSized(0x30, Select( + DC_DestroyContext, + DC_Init, + DC_UpdateIdleTS, + DC_1e, + DC_Write32, + UnknownMsg, +)) + +class StatsMsg_Power(ConstructClass): + subcon = Struct ( + "msg_type" / Hex(Const(0x00, Int32ul)), + ZPadding(0x18), # ??? why the hole? never written... + "power" / Hex(Int64ul), + ZPadding(0xc), # Confirmed padding + ) + + def __str__(self): + return f"Power: {self.power / 8192.0:.3f} mW" + +class StatsMsg_PowerOn(ConstructClass): + subcon = Struct ( + "msg_type" / Hex(Const(0x02, Int32ul)), + "power_off_ticks" / Dec(Int64ul), + ZPadding(0x24), # Confirmed padding + ) + def __str__(self): + t = self.power_off_ticks / 24000000 + return f"Power ON: spent {t:.04}s powered off ({self.power_off_ticks} ticks)" + +class StatsMsg_PowerOff(ConstructClass): + subcon = Struct ( + "msg_type" / Hex(Const(0x03, Int32ul)), + "power_on_ticks" / Dec(Int64ul), + ZPadding(0x24), # Confirmed padding + ) + def __str__(self): + t = self.power_on_ticks / 24000000 + return f"Power OFF: spent {t:.04}s powered on ({self.power_on_ticks} ticks)" + +class StatsMsg_Util(ConstructClass): + subcon = Struct ( + "msg_type" / Hex(Const(0x04, Int32ul)), + "timestamp" / Hex(Int64ul), + "util1" / Dec(Int32ul), + "util2" / Dec(Int32ul), + "util3" / Dec(Int32ul), + "util4" / Dec(Int32ul), + ZPadding(0x14), # Confirmed padding + ) + def __str__(self): + return f"Utilization: {self.util1:>3d}% {self.util2:>3d}% {self.util3:>3d}% {self.util4:>3d}%" + +class StatsMsg_AvgPower(ConstructClass): + subcon = Struct ( + "msg_type" / Hex(Const(0x09, Int32ul)), + "active_cs" / Dec(Int64ul), + "unk2" / Hex(Int32ul), + "unk3" / Hex(Int32ul), + "unk4" / Hex(Int32ul), + "avg_power" / Dec(Int32ul), + ZPadding(0x14), # Confirmed padding + ) + + def __str__(self): + return f"Activity: Active {self.active_cs * 10:6d} ms Avg Pwr {self.avg_power:4d} mW ({self.unk2:d} {self.unk3:d} {self.unk4:d})" + +class StatsMsg_Temp(ConstructClass): + subcon = Struct ( + "msg_type" / Hex(Const(0x0a, Int32ul)), + ZPadding(8), # Not written + "raw_value" / Hex(Int32ul), + "scale" / Hex(Int32ul), + "tmin" / Hex(Int32ul), + "tmax" / Hex(Int32ul), + ZPadding(0x14), # Confirmed padding + ) + + def __str__(self): + temp = self.raw_value / float(self.scale) / 64.0 + return f"Temp: {temp:.2f}°C s={self.scale:d} tmin={self.tmin:d} tmax={self.tmax:d}" + +class StatsMsg_PowerState(ConstructClass): + subcon = Struct ( + "msg_type" / Hex(Const(0x0b, Int32ul)), + "timestamp" / Hex(Int64ul), + "last_busy_ts" / Hex(Int64ul), + "active" / Hex(Int32ul), + "poweroff" / Dec(Int32ul), + "unk2" / Dec(Int32ul), + "pstate" / Dec(Int32ul), + "unk4" / Dec(Int32ul), + "unk5" / Dec(Int32ul), + ZPadding(4), # Confirmed padding + ) + + def __str__(self): + act = "ACT" if self.active else " " + off = "OFF" if self.poweroff else " " + + return f"PowerState: {act} {off} ps={int(self.pstate)} {self.unk4} {self.unk2} {self.unk5}" + +class StatsMsg_FWBusy(ConstructClass): + subcon = Struct ( + "msg_type" / Hex(Const(0x0c, Int32ul)), + "timestamp" / Hex(Int64ul), + "flag" / Int32ul, + ZPadding(0x20), # Confirmed padding + ) + + def __str__(self): + return f"FW active: {bool(self.flag)}" + +class StatsMsg_PState(ConstructClass): + subcon = Struct ( + "msg_type" / Hex(Const(0x0d, Int32ul)), + ZPadding(8), # Not written + "ps_min" / Dec(Int32ul), + "unk1" / Dec(Int32ul), + "ps_max" / Dec(Int32ul), + "unk3" / Dec(Int32ul), + ZPadding(0x14), # Confirmed padding + ) + def __str__(self): + return f"PState: {self.ps_min:d}..{self.ps_max:d} ({self.unk1:d}/{self.unk3:d})" + +class StatsMsg_TempSensor(ConstructClass): + subcon = Struct ( + "msg_type" / Hex(Const(0x0e, Int32ul)), + ZPadding(4), # Not written + "sensor_id" / Hex(Int32ul), + "raw_value" / Hex(Int32ul), + "scale" / Dec(Int32ul), + "tmin" / Dec(Int32ul), + "tmax" / Dec(Int32ul), + ZPadding(0x14), # Confirmed padding + ) + def __str__(self): + temp = self.raw_value / float(self.scale) / 64.0 + return f"TempSensor: #{self.sensor_id:d} {temp:.2f}°C s={self.scale:d} tmin={self.tmin:d} tmax={self.tmax:d}" + +StatsMsg = FixedSized(0x30, Select( + StatsMsg_Power, + StatsMsg_PowerOn, + StatsMsg_PowerOff, + StatsMsg_Util, + StatsMsg_AvgPower, + StatsMsg_Temp, + StatsMsg_PowerState, + StatsMsg_FWBusy, + StatsMsg_PState, + StatsMsg_TempSensor, + UnknownMsg, +)) + +class FWLogMsg(ConstructClass): + subcon = Struct ( + "msg_type" / Hex(Const(0x03, Int32ul)), + "seq_no" / Hex(Int32ul), + "timestamp" / Hex(Int64ul), + "msg" / PaddedString(0xc8, "ascii") + ) + +class FaultMsg(ConstructClass): + subcon = Struct ( + "msg_type" / Hex(Const(0, Int32ul)), + "unk_4" / HexDump(Bytes(0x34)), + ) + +class FlagMsg(ConstructClass): + subcon = Struct ( + "msg_type" / Hex(Const(1, Int32ul)), + "firing" / Array(2, Hex(Int64ul)), + "unk_14" / Hex(Int16ul), + "tail" / Bytes(0x38 - 0x18), + ) + +class TimeoutMsg(ConstructClass): + subcon = Struct ( + "msg_type" / Hex(Const(4, Int32ul)), + "counter" / Hex(Int32ul), + "unk_8" / Hex(Int32ul), + "stamp_index" / Hex(Int32sl), + "unkpad_16" / HexDump(Bytes(0x38 - 0x10)), + ) + +EventMsg = FixedSized(0x38, Select( + FaultMsg, + FlagMsg, + TimeoutMsg, + HexDump(Bytes(0x38)), +)) + +TRACE_MSGS = { + (0x00, 0x00, 0): ("StartTA", "uuid", None, "unk", "cmdqueue"), + (0x00, 0x01, 0): ("FinishTA", "uuid", None, "unk", "cmdqueue"), + (0x00, 0x04, 0): ("Start3D", "uuid", "partial_render", "unk", "cmdqueue"), + (0x00, 0x05, 0): ("Finish3D_unk", "uuid", "unk", "flag", "buf_related"), + (0x00, 0x06, 0): ("Finish3D", "uuid", None, "unk", "cmdqueue"), + (0x00, 0x07, 0): ("StartCP", "uuid", None, "unk", "cmdqueue"), + (0x00, 0x08, 0): ("FinishCP", "uuid", None, "unk", "cmdqueue"), + (0x00, 0x0a, 0): ("StampUpdateTA", "value", "ev_id", "addr", "uuid"), + (0x00, 0x0c, 0): ("StampUpdate3D", "value", "ev_id", "addr", "uuid"), + (0x00, 0x0e, 0): ("StampUpdateCL", "value", "ev_id", "addr", "uuid"), + (0x00, 0x10, 1): ("TAPreproc1", "unk"), + (0x00, 0x10, 2): ("TAPreproc2", "unk1", "unk2"), + (0x00, 0x17, 0): ("Finish3D2", "uuid", None, "unk", "cmdqueue"), + (0x00, 0x28, 0): ("EvtNotify", "firing0", "firing1", "firing2", "firing3"), + (0x00, 0x2f, 0): ("Finish3D_unk2", "uuid", "unk"), + (0x00, 0x1e, 0): ("CleanupPB", "uuid", "unk2", "slot"), + (0x01, 0x0a, 0): ("Postproc", "cmdid", "event_ctl", "stamp_val", "uuid"), + (0x01, 0x0b, 0): ("EvtComplete", None, "event_ctl"), + (0x01, 0x0d, 0): ("EvtDequeued", "next", "event_ctl"), + (0x01, 0x16, 0): ("InitAttachment", "idx", "flags", "addr", "size"), + (0x01, 0x18, 0): ("ReInitAttachment", "idx", "flags", "addr", "size"), +} + +class KTraceMsg(ConstructClass): + THREADS = [ + "irq", + "bg", + "smpl", + "pwr", + "rec", + "kern", + ] + subcon = Struct ( + "msg_type" / Hex(Const(5, Int32ul)), + "timestamp" / Hex(Int64ul), + "args" / Array(4, Int64ul), + "code" / Int8ul, + "channel" / Int8ul, + "pad" / Const(0, Int8ul), + "thread" / Int8ul, + "unk_flag" / Int64ul, + ) + def __str__(self): + ts = self.timestamp / 24000000 + code = (self.channel, self.code, self.unk_flag) + if code in TRACE_MSGS: + info = TRACE_MSGS[code] + args = info[0] + ": " + " ".join(f"{k}={v:#x}" for k, v in zip(info[1:], self.args) if k is not None) + else: + args = "UNK: " + ", ".join(hex(i) for i in self.args) + return f"TRACE: [{ts:10.06f}][{self.THREADS[self.thread]:4s}] {self.channel:2x}:{self.code:2x} ({self.unk_flag}) {args}" + +class FWCtlMsg(ConstructClass): + subcon = Struct ( + "addr" / Int64ul, + "unk_8" / Int32ul, + "context_id" / Int32ul, + "unk_10" / Int16ul, + "unk_12" / Int16ul, + ) + +channelNames = [ + "TA_0", "3D_0", "CL_0", + "TA_1", "3D_1", "CL_1", + "TA_2", "3D_2", "CL_2", + "TA_3", "3D_3", "CL_3", + "DevCtrl", + "Event", "FWLog", "KTrace", "Stats", + + ## Not really in normal order + "FWCtl" +] + +# Exclude FWCtl +CHANNEL_COUNT = len(channelNames) - 1 + +channelRings = ( + [[(RunCmdQueueMsg, 0x30, 0x100)]] * 12 + [ + [(DeviceControlMsg, 0x30, 0x100)], + [(EventMsg, 0x38, 0x100)], + [ + (FWLogMsg, 0xd8, 0x100), # unk 0 + (FWLogMsg, 0xd8, 0x100), # init log + (FWLogMsg, 0xd8, 0x100), # unk 2 + (FWLogMsg, 0xd8, 0x100), # warnings? + (FWLogMsg, 0xd8, 0x100), # unk 4 + (FWLogMsg, 0xd8, 0x100), # unk 5 + ], + [(KTraceMsg, 0x38, 0x200)], + [(HexDump(Bytes(0x60)), 0x60, 0x100)], + [(FWCtlMsg, 0x14, 0x100)], + ] +) + +class ChannelStateFields(RegMap): + _SIZE = 0x30 + + READ_PTR = 0x00, Register32 + WRITE_PTR = 0x20, Register32 + +class FWControlStateFields(RegMap): + _SIZE = 0x20 + + READ_PTR = 0x00, Register32 + WRITE_PTR = 0x10, Register32 + +class Channel(Reloadable): + def __init__(self, u, uat, info, ring_defs, base=None, state_fields=ChannelStateFields): + self.uat = uat + self.u = u + self.p = u.proxy + self.iface = u.iface + + self.ring_defs = ring_defs + self.info = info + + self.accessor = uat.ioaccessor(0) + self.state_addr = info.state_addr + self.state = [] + self.rb_base = [] + self.rb_maps = [] + + if base is None: + p = info.ringbuffer_addr + else: + p = base + for i, (msg, size, count) in enumerate(ring_defs): + assert msg.sizeof() == size + + self.state.append(state_fields(self.accessor, self.state_addr + 0x30 * i)) + m = uat.iotranslate(0, p, size * count) + self.rb_base.append(p) + self.rb_maps.append(m) + p += size * count + + def get_message(self, ring, index, meta_fn=None): + msgcls, size, count = self.ring_defs[ring] + + assert index < count + addr = self.rb_base[ring] + index * size + stream = self.uat.iostream(0, addr) + stream.meta_fn = meta_fn + return msgcls.parse_stream(stream) + + def clear_message(self, ring, index): + msgcls, size, count = self.ring_defs[ring] + + self.put_message(ring, index, b"\xef\xbe\xad\xde" * (size // 4)) + + def put_message(self, ring, index, obj): + msgcls, size, count = self.ring_defs[ring] + + assert index < count + if isinstance(obj, bytes): + data = obj + else: + data = obj.build() + self.uat.iowrite(0, self.rb_base[ring] + index * size, data) + +class ChannelInfo(ConstructClass): + subcon = Struct( + "state_addr" / Hex(Int64ul), + "ringbuffer_addr" / Hex(Int64ul), + ) + +class ChannelInfoSet(ConstructClass): + CHAN_COUNT = CHANNEL_COUNT + + subcon = Struct(*[ name / ChannelInfo for name in channelNames[:CHAN_COUNT]]) + +__all__.extend(k for k, v in globals().items() + if (callable(v) or isinstance(v, type)) and v.__module__ == __name__) diff --git a/tools/proxyclient/m1n1/fw/agx/cmdqueue.py b/tools/proxyclient/m1n1/fw/agx/cmdqueue.py new file mode 100644 index 0000000..bd90a05 --- /dev/null +++ b/tools/proxyclient/m1n1/fw/agx/cmdqueue.py @@ -0,0 +1,516 @@ +# SPDX-License-Identifier: MIT +from m1n1.constructutils import * +from construct import * +from .microsequence import * +from ...utils import RegMap, Register32 + +__all__ = [] + +class WorkCommandBarrier(ConstructClass): + """ + sent before WorkCommand3D on the Submit3d queue. + Might be for initilzing the tile buckets? + + Example: + 00000004 0c378018 ffffffa0 00000c00 00000006 00000900 08002c9a 00000000 + 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 + """ + subcon = Struct( + "magic" / Const(0x4, Int32ul), + "stamp_addr" / Int64ul, + "stamp" / ROPointer(this.stamp_addr, StampCounter), + "wait_value" / Int32ul, + "event" / Int32ul, # Event number that signals a stamp check + "stamp_self" / Int32ul, + "uuid" / Int32ul, + "unk" / Default(Int32ul, 0), + ) + +class WorkCommandInitBM(ConstructClass): + """ + occationally sent before WorkCommandTA on the SubmitTA queue. + + Example: + 00000004 0c378018 ffffffa0 00000c00 00000006 00000900 08002c9a 00000000 + 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 + """ + subcon = Struct( + "magic" / Const(0x6, Hex(Int32ul)), + "context_id" / Hex(Int32ul), # Might be context? + "buffer_mgr_slot" / Hex(Int32ul), # 0 + "unk_c" / Hex(Int32ul), # 0 + "unk_10" / Hex(Int32ul), # 0x30 + "buffer_mgr_addr" / Int64ul, + "buffer_mgr" / ROPointer(this.buffer_mgr_addr, BufferManagerInfo), + "stamp_value" / Hex(Int32ul), # 0x100 + ) + +class LinkedListHead(ConstructClass): + subcon = Struct( + "prev" / Int64ul, + "next" / Int64ul, + ) + + def __init__(self): + super().__init__() + self.prev = 0 + self.next = 0 + +class EventControlUnkBuf(ConstructValueClass): + subcon = HexDump(Bytes(0x8)) + + def __init__(self): + super().__init__() + self.value = b"\xff" * 8 + +class EventControl(ConstructClass): + subcon = Struct( + "event_count_addr" / Int64ul, + "event_count" / ROPointer(this.event_count_addr, Int32ul), + "generation" / Int32ul, + "cur_count" / Int32ul, + "unk_10" / Int32ul, + "unk_14" / Int32ul, + "unk_18" / Int64ul, + "unk_20" / Int32ul, + "vm_slot" / Int32ul, + "has_ta" / Int32ul, + "pstamp_ta" / Array(4, Int64ul), + "has_3d" / Int32ul, + "pstamp_3d" / Array(4, Int64ul), + "has_cp" / Int32ul, + "pstamp_cp" / Array(4, Int64ul), + "in_list" / Int32ul, + Ver("G >= G14", "unk_98_g14_0" / HexDump(Bytes(0x14))), + "list_head" / LinkedListHead, + Ver("G >= G14", "unk_a8_g14_0" / Padding(4)), + Ver("V >= V13_0B4", "unk_buf" / EventControlUnkBuf), + ) + + def __init__(self): + super().__init__() + self.unk_14 = 0 + self.unk_18 = 0 + self.unk_20 = 0 + self.vm_slot = 0 + self.has_ta = 0 + self.pstamp_ta = [0]*4 + self.has_3d = 0 + self.pstamp_3d = [0]*4 + self.has_cp = 0 + self.pstamp_cp = [0]*4 + self.in_list = 0 + self.unk_98_g14_0 = bytes(0x14) + self.list_head = LinkedListHead() + self.unk_buf = EventControlUnkBuf() + +class WorkCommandCP(ConstructClass): + """ + For compute + + Example: + 00000000 00000003 00000000 00000004 0c3d80c0 ffffffa0 00000000 00000000 00000000 + 00000020 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 + 00000040 * + 00000060 00000000 00000000 00088000 00000015 00078000 00000015 000a6300 00000015 + 00000080 000a6308 00000015 000a6310 00000015 000a6318 00000015 00000000 00000011 + 000000a0 00008c60 00000000 00000041 00000000 000e8000 00000015 00000040 00000000 + 000000c0 00000001 00000000 0000001c 00000000 00000000 00000000 00000000 00000000 + 000000e0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 + 00000100 * + 000001e0 00000000 00000000 0c311cc0 ffffffa0 00000240 00000000 00000000 00000000 + 00000200 00000000 00000000 00000000 00000000 00000000 00000000 00088000 00000015 + 00000220 00078024 00000015 00000000 00000000 00000000 00000000 00000000 00000000 + 00000240 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 + 00000260 110022b3 00000000 ffffffff 00000500 00000015 00000000 00000000 00000000 + 00000280 000c8014 ffffffa0 0c378014 ffffffa0 00003b00 00000005 00000000 00000000 + 000002a0 120022b8 00000000 00000000 00000000 00029030 ffffffa0 00029038 ffffffa0 + 000002c0 00000000 00000000 00000000 00000000 00000015 00000000 00000000 00000000 + 000002e0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 + """ + + subcon = Struct( + "addr" / Tell, + "magic" / Const(0x3, Hex(Int32ul)), + "unk_4" / Hex(Int32ul), + "context_id" / Hex(Int32ul), + "event_control_addr" / Hex(Int64ul), + "event_control" / ROPointer(this.event_control_addr, EventControl), + + # This struct embeeds some data that the Control List has pointers back to, but doesn't + # seem to be actually part of this struct + Padding(0x1e8 - 0x14), + + # offset 000001e8 + "microsequence_ptr" / Hex(Int64ul), + "microsequence_size" / Hex(Int32ul), + "microsequence" / ROPointer(this.microsequence_ptr, MicroSequence), + ) + +class WorkCommand0_UnkBuf(ConstructValueClass): + subcon = HexDump(Bytes(0x18)) + + def __init__(self): + self.value = bytes(0x18) + +class WorkCommand1_UnkBuf(ConstructValueClass): + subcon = HexDump(Bytes(0x110)) + + def __init__(self): + self.value = bytes(0x110) + +class WorkCommand1_UnkBuf2(ConstructClass): + subcon = Struct( + "unk_0" / Int64ul, + "unk_8" / Int64ul, + "unk_10" / Int64ul, + ) + +class Flag(ConstructValueClass): + subcon = Hex(Int32ul) + + def __init__(self): + self.value = 0 + +class WorkCommand3D(ConstructClass): + """ + For 3D + + Example: 0xfa00c095640 + 00000000 00000001 00000004 00000000 0c2d5f00 ffffffa0 000002c0 0c3d80c0 ffffffa0 + 00000020 0c3e0000 ffffffa0 0c3e0100 ffffffa0 0c3e09c0 ffffffa0 01cb0000 00000015 + 00000040 00000088 00000000 00000001 0010000c 00000000 00000000 00000000 00000000 + 00000060 3a8de3be 3abd2fa8 00000000 00000000 0000076c 00000000 0000a000 00000000 + 00000080 ffff8002 00000000 00028044 00000000 00000088 00000000 005d0000 00000015 + 000000a0 00758000 00000015 0000c000 00000000 00000640 000004b0 0257863f 00000000 + 000000c0 00000000 00000000 00000154 00000000 011d0000 00000015 011d0000 00000015 + 000000e0 0195c000 00000015 0195c000 00000015 00000000 00000000 00000000 00000000 + 00000100 00000000 00000000 00000000 00000000 0193c000 00000015 00000000 00000000 + 00000120 0193c000 00000015 00000000 00000000 01b64000 00000015 00000000 00000000 + 00000140 01b64000 00000015 00000000 00000000 01cb0000 00000015 01cb4000 00000015 + 00000160 c0000000 00000003 01cb4000 00000015 00010280 00000000 00a38000 00000015 + 00000180 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 + 000001a0 00000000 00000000 00000000 00000000 00000000 00000011 00008c60 00000000 + 000001c0 00000000 00000000 00000000 00000000 0000001c 00000000 00000000 00000000 + 000001e0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 + 00000200 * + 000003c0 00000012 00028084 00000000 00000000 3a8de3be 3abd2fa8 00000000 00000000 + 000003e0 0010000c 00000000 00025031 00000004 3f800000 00000700 00000000 00000001 + """ + + subcon = Struct( + "addr" / Tell, + "magic" / Const(0x1, Hex(Int32ul)), + Ver("V >= V13_0B4", "counter" / Int64ul), + "context_id" / Hex(Int32ul), + "unk_8" / Hex(Int32ul), + "microsequence_ptr" / Hex(Int64ul), # Command list + "microsequence_size" / Hex(Int32ul), + "microsequence" / ROPointer(this.microsequence_ptr, MicroSequence), + "event_control_addr" / Hex(Int64ul), + "event_control" / ROPointer(this.event_control_addr, EventControl), + "buffer_mgr_addr" / Int64ul, + "buffer_mgr" / ROPointer(this.buffer_mgr_addr, BufferManagerInfo), + "buf_thing_addr" / Int64ul, + "buf_thing" / ROPointer(this.buf_thing_addr, BufferThing), + "unk_emptybuf_addr" / Hex(Int64ul), + "tvb_tilemap" / Hex(Int64ul), + "unk_40" / Hex(Int64ul), + "unk_48" / Hex(Int32ul), + "tile_blocks_y" / Hex(Int16ul), # * 4 + "tile_blocks_x" / Hex(Int16ul), # * 4 + "unk_50" / Hex(Int64ul), + "unk_58" / Hex(Int64ul), + "merge_upper_x" / Hex(Float32l), + "merge_upper_y" / Hex(Float32l), + "unk_68" / Hex(Int64ul), + "tile_count" / Hex(Int64ul), + + # Embedded structures that are also pointed to by other stuff + "struct_2" / Start3DStruct2, + "struct_1" / Start3DStruct1, + "unk_758" / Flag, + "unk_75c" / Flag, + "unk_buf" / WorkCommand1_UnkBuf, + "busy_flag" / Flag, + "struct_6" / Start3DStruct6, + "struct_7" / Start3DStruct7, + "unk_buf2" / WorkCommand1_UnkBuf2, + "ts1" / TimeStamp, + "ts2" / TimeStamp, + "ts3" / TimeStamp, + "unk_914" / Int32ul, + "unk_918" / Int64ul, + "unk_920" / Int32ul, + "unk_924" / Int32ul, + Ver("V >= V13_0B4", "unk_928_0" / Int32ul), + Ver("V >= V13_0B4", "unk_928_4" / Int8ul), + Ver("V >= V13_0B4", "ts_flag" / TsFlag), + Ver("V >= V13_0B4", "unk_5e6" / Default(Int16ul, 0)), + Ver("V >= V13_0B4", "unk_5e8" / Default(HexDump(Bytes(0x20)), bytes(0x20))), + "pad_928" / Default(HexDump(Bytes(0x18)), bytes(0x18)), + ) + +class WorkCommand0_UnkBuf(ConstructValueClass): + subcon = HexDump(Bytes(0x18)) + + def __init__(self): + super().__init__() + self.value = bytes(0x18) + +class WorkCommandTA(ConstructClass): + """ + For TA + + Example: + 00000000 00000000 00000004 00000000 0c3d80c0 ffffffa0 00000002 00000000 0c3e0000 + 00000020 ffffffa0 0c3e0100 ffffffa0 0c3e09c0 ffffffa0 00000000 00000200 00000000 + 00000040 1e3ce508 1e3ce508 01cb0000 00000015 00000000 00000000 00970000 00000015 + 00000060 01cb4000 80000015 006b0003 003a0012 00000001 00000000 00000000 00000000 + 00000080 0000a000 00000000 00000088 00000000 01cb4000 00000015 00000000 00000000 + 000000a0 0000ff00 00000000 007297a0 00000015 00728120 00000015 00000001 00000000 + 000000c0 00728000 00040015 009f8000 00000015 00000000 00000000 00000000 00000000 + 000000e0 0000a441 00000000 00000000 00000000 00000000 00000000 00000000 00000000 + 00000100 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000011 + 00000120 00000000 00000000 0000001c 00000000 00008c60 00000000 00000000 00000000 + 00000140 00000000 00000000 00000000 00000000 0000001c 00000000 00000000 00000000 + 00000160 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 + 00000180 * + 000003a0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 000000f0 + 000003c0 00000088 00000202 04af063f 00025031 00404030 00303024 000000c0 00000180 + 000003e0 00000100 00008000 00000000 00000000 00000000 00000000 00000000 00000000 + """ + + subcon = Struct( + "addr" / Tell, + "magic" / Const(0x0, Hex(Int32ul)), + Ver("V >= V13_0B4", "counter" / Int64ul), + "context_id" / Hex(Int32ul), + "unk_8" / Hex(Int32ul), + "event_control_addr" / Hex(Int64ul), + "event_control" / ROPointer(this.event_control_addr, EventControl), + "buffer_mgr_slot" / Hex(Int64ul), + "buffer_mgr_addr" / Int64ul, + "buffer_mgr" / ROPointer(this.buffer_mgr_addr, BufferManagerInfo), + "buf_thing_addr" / Int64ul, + "buf_thing" / ROPointer(this.buf_thing_addr, BufferThing), + "unk_emptybuf_addr" / Hex(Int64ul), + "unk_34" / Hex(Int32ul), + + # Embedded structures that are also pointed to by other stuff + "struct_2" / StartTACmdStruct2, # 0x11c bytes + "unk_154" / HexDump(Bytes(0x268)), # unknown + "tiling_params" / TilingParameters, # 0x2c bytes + "unk_3e8" / HexDump(Bytes(0x74)), # unknown + + "unkptr_45c" / Int64ul, + "tvb_size" / Int64ul, + "microsequence_ptr" / Hex(Int64ul), + "microsequence_size" / Hex(Int32ul), + "microsequence" / ROPointer(this.microsequence_ptr, MicroSequence), + "ev_3d" / Int32ul, + "stamp_value" / Int32ul, + + "struct_3" / StartTACmdStruct3, # 0x114 bytes + + "unk_594" / WorkCommand0_UnkBuf, + + "ts1" / TimeStamp, + "ts2" / TimeStamp, + "ts3" / TimeStamp, + + "unk_5c4" / Int32ul, + "unk_5c8" / Int32ul, + "unk_5cc" / Int32ul, + "unk_5d0" / Int32ul, + "unk_5d4" / Int8ul, + "pad_5d5" / Default(HexDump(Bytes(0x3)), bytes(0x3)), + Ver("V >= V13_0B4", "unk_5e0" / Int32ul), + Ver("V >= V13_0B4", "unk_5e4" / Int8ul), + Ver("V >= V13_0B4", "ts_flag" / TsFlag), + Ver("V >= V13_0B4", "unk_5e6" / Default(Int16ul, 0)), + Ver("V >= V13_0B4", "unk_5e8" / Default(HexDump(Bytes(0x18)), bytes(0x18))), + "pad_5d8" / Default(HexDump(Bytes(0x8)), bytes(0x8)), + Ver("V >= V13_0B4", "pad_5e0" / Default(HexDump(Bytes(0x18)), bytes(0x18))), + ) + +class UnknownWorkCommand(ConstructClass): + subcon = Struct( + "magic" / Hex(Int32ul), + "unk_4" / Hex(Int32ul), + "unk_8" / Hex(Int32ul), + "unk_c" / Hex(Int32ul), + "unk_10" / Hex(Int32ul), + "unk_14" / Hex(Int32ul), + "unk_18" / Hex(Int32ul), + "unk_1c" / Hex(Int32ul), + ) + +class CmdBufWork(ConstructClass): + subcon = Struct( + "cmdid" / Peek(Int32ul), + "cmd" / Switch(this.cmdid, { + 0: WorkCommandTA, + 1: WorkCommand3D, + 3: WorkCommandCP, + 4: WorkCommandBarrier, + 6: WorkCommandInitBM, + }) + ) + +class JobList(ConstructClass): + subcon = Struct( + "first_job" / Default(Int64ul, 0), + "last_head" / Int64ul, + "unkptr_10" / Default(Int64ul, 0), + ) + +class GPUContextData(ConstructClass): + subcon = Struct( + "unk_0" / Int8ul, + "unk_1" / Int8ul, + "unk_2" / Default(Bytes(3), bytes(3)), + "unk_5" / Int8ul, + "unk_6" / Default(Bytes(0x18), bytes(0x18)), + "unk_1e" / Int8ul, + "unk_1f" / Int8ul, + "unk_20" / Default(Bytes(3), bytes(3)), + "unk_23" / Int8ul, + "unk_24" / Default(Bytes(0x1c), bytes(0x1c)), + ) + + def __init__(self): + self.unk_0 = 0xff + self.unk_1 = 0xff + self.unk_5 = 1 + self.unk_1e = 0xff + self.unk_1f = 0 + self.unk_23 = 2 + +class CommandQueuePointerMap(RegMap): + GPU_DONEPTR = 0x00, Register32 + GPU_RPTR = 0x30, Register32 + CPU_WPTR = 0x40, Register32 + +class CommandQueuePointers(ConstructClass): + subcon = Struct( + "gpu_doneptr" / Int32ul, + ZPadding(12), + "unk_10" / Int32ul, + ZPadding(12), + "unk_20" / Int32ul, + ZPadding(12), + "gpu_rptr" / Int32ul, + ZPadding(12), + "cpu_wptr" / Int32ul, + ZPadding(12), + "rb_size" / Int32ul, + ZPadding(12), + ) + + def __init__(self): + super().__init__() + self.gpu_doneptr = 0 + self.unk_10 = 0 + self.unk_20 = 0 + self.gpu_rptr = 0 + self.cpu_wptr = 0 + self.rb_size = 0x500 + +class CommandQueueInfo(ConstructClass): + """ Structure type shared by Submit3D, SubmitTA and SubmitCompute + Applications have multiple of these, one of each submit type + TODO: Can applications have more than one of each type? One per encoder? + Mostly managed by GPU, only intialize by CPU + + """ + subcon = Struct( + "pointers_addr" / Hex(Int64ul), + "pointers" / ROPointer(this.pointers_addr, CommandQueuePointers), + "rb_addr" / Hex(Int64ul), # 0x4ff pointers + "job_list_addr" / Hex(Int64ul), # ffffffa000000000, size 0x18 (shared by 3D and TA) + "job_list" / ROPointer(this.job_list_addr, JobList), + "gpu_buf_addr" / Hex(Int64ul), # GPU space for this queue, 0x2c18 bytes? + #"gpu_buf" / ROPointer(this.gpu_buf_addr, HexDump(Bytes(0x2c18))), + "gpu_rptr1" / Hex(Int32ul), + "gpu_rptr2" / Hex(Int32ul), + "gpu_rptr3" / Hex(Int32ul), + "event_id" / Int32sl, + "unk_30" / Hex(Int32ul), # read by CPU + "unk_34" / Hex(Int32ul), + "unk_38" / Hex(Int64ul), + "unk_40" / Hex(Int32ul), # 1 + "unk_44" / Hex(Int32ul), # 0 + "unk_48" / Hex(Int32ul), # 1, 2 + "unk_4c" / Int32sl, # -1 + "uuid" / Hex(Int32ul), # Counts up for each new process or command queue + "unk_54" / Int32sl, + "unk_58" / Hex(Int64ul), # 0 + "busy" / Hex(Int32ul), # 1 = gpu busy + "pad1" / ZPadding(0x20), + "blocked_on_barrier" / Hex(Int32ul), + "unk_88" / Int32ul, + "unk_8c" / Int32ul, + "unk_90" / Int32ul, + "unk_94" / Int32ul, + "pending" / Int32ul, + "unk_9c" / Int32ul, + "gpu_context_addr" / Hex(Int64ul), # GPU managed context, shared between 3D and TA. Passed to DC_DestroyContext + "gpu_context" / ROPointer(this.gpu_context_addr, GPUContextData), + "unk_a8" / Int64ul + # End of struct + ) + + def __init__(self): + super().__init__() + self.gpu_rptr1 = 0 + self.gpu_rptr2 = 0 + self.gpu_rptr3 = 0 + self.event_id = -1 + self.unk_4c = -1 + self.uuid = 0xdeadbeef # some kind of ID + self.unk_54 = -1 + self.unk_58 = 0x0 + self.busy = 0x0 + self.blocked_on_barrier = 0x0 + self.unk_88 = 0 + self.unk_8c = 0 + self.unk_90 = 0 + self.unk_94 = 0 + self.pending = 0 + self.unk_9c = 0 + self.set_prio(0) + self.unk_a8 = 0 + + def set_prio(self, p): + if p == 0: + self.unk_30 = 0 + self.unk_34 = 0 # 0-3? + self.unk_38 = 0xffff_ffff_ffff_0000 + self.unk_40 = 1 + self.unk_44 = 0 + self.unk_48 = 1 + elif p == 1: + self.unk_30 = 1 + self.unk_34 = 1 + self.unk_38 = 0xffff_ffff_0000_0000 + self.unk_40 = 0 + self.unk_44 = 0 + self.unk_48 = 0 + elif p == 2: + self.unk_30 = 2 + self.unk_34 = 2 + self.unk_38 = 0xffff_0000_0000_0000 + self.unk_40 = 0 + self.unk_44 = 0 + self.unk_48 = 2 + else: + self.unk_30 = 3 + self.unk_34 = 3 + self.unk_38 = 0x0000_0000_0000_0000 + self.unk_40 = 0 + self.unk_44 = 0 + self.unk_48 = 3 + +__all__.extend(k for k, v in globals().items() + if (callable(v) or isinstance(v, type)) and v.__module__ == __name__) diff --git a/tools/proxyclient/m1n1/fw/agx/handoff.py b/tools/proxyclient/m1n1/fw/agx/handoff.py new file mode 100644 index 0000000..4f9acf0 --- /dev/null +++ b/tools/proxyclient/m1n1/fw/agx/handoff.py @@ -0,0 +1,120 @@ +# SPDX-License-Identifier: MIT +from ...utils import * +from contextlib import contextmanager + +PPL_MAGIC = 0x4b1d000000000002 + +class GFXHandoffStruct(RegMap): + MAGIC_AP = 0x0, Register64 + MAGIC_FW = 0x8, Register64 + + LOCK_AP = 0x10, Register8 + LOCK_FW = 0x11, Register8 + TURN = 0x14, Register32 + + CUR_CTX = 0x18, Register32 + + FLUSH_STATE = irange(0x20, 0x41, 0x18), Register64 + FLUSH_ADDR = irange(0x28, 0x41, 0x18), Register64 + FLUSH_SIZE = irange(0x30, 0x41, 0x18), Register64 + + UNK2 = 0x638, Register8 + UNK3 = 0x640, Register64 + +class GFXHandoff: + def __init__(self, u): + self.u = u + self.sgx_dev = self.u.adt["/arm-io/sgx"] + self.base = self.sgx_dev.gfx_handoff_base + self.reg = GFXHandoffStruct(u, self.base) + self.is_locked = False + self.initialized = False + + @contextmanager + def lock(self): + """Dekker's algorithm lock""" + assert not self.is_locked + + # Note: This *absolutely* needs barriers everywhere. + # Those are implicit in proxyclient for every operation. + + self.reg.LOCK_AP.val = 1 + while self.reg.LOCK_FW.val != 0: + if self.reg.TURN != 0: + self.reg.LOCK_AP = 0 + while self.reg.TURN != 0: + pass + self.reg.LOCK_AP = 1 + + self.is_locked = True + try: + yield + finally: + self.reg.TURN.val = 1 + self.reg.LOCK_AP.val = 0 + self.is_locked = False + + def initialize(self): + if self.initialized: + return + + print("[Handoff] Initializing...") + + self.reg.MAGIC_AP.val = PPL_MAGIC + self.reg.UNK = 0xffffffff + self.reg.UNK3 = 0 + + with self.lock(): + print("[Handoff] Waiting for FW PPL init...") + while self.reg.MAGIC_FW.val != PPL_MAGIC: + pass + + for i in range(0x41): + self.reg.FLUSH_STATE[i].val = 0 + self.reg.FLUSH_ADDR[i].val = 0 + self.reg.FLUSH_SIZE[i].val = 0 + + self.initialized = True + print("[Handoff] Initialized!") + + # The order here is: + # - Remap memory as shared + # - TLBI + # - prepare_cacheflush() + # - issue FWCtl request + # - wait for completion (ring or wait_cacheflush?) + # - Unmap memory + # - TLBI + # - complete_cacheflush() + def prepare_cacheflush(self, base, size, context=0x40): + assert self.reg.FLUSH_STATE[context].val == 0 + + self.reg.FLUSH_ADDR[context].val = base + self.reg.FLUSH_SIZE[context].val = size + self.reg.FLUSH_STATE[context].val = 1 + + def wait_cacheflush(self, context=0x40): + while self.reg.FLUSH_STATE[context].val == 1: + pass + + def complete_cacheflush(self, context=0x40): + assert self.reg.FLUSH_STATE[context].val == 2 + self.reg.FLUSH_STATE[context].val = 0 + + # probably not necessary? + # order is: + # - Remap memory as shared + # - (no TLBI?) + # - prepare_unmap() + # - unmap + # - TLBI + # - complete_unmap() + def prepare_unmap(self, base, size, context): + assert self.reg.FLUSH_STATE[context].val == 0 + self.reg.FLUSH_ADDR[context].val = 0xdead000000000000 | (base & 0xffffffffffff) + self.reg.FLUSH_SIZE[context].val = size + self.reg.FLUSH_STATE[context].val = 2 + + def complete_unmap(self, context): + assert self.reg.FLUSH_STATE[context].val == 2 + self.reg.FLUSH_STATE[context].val = 0 diff --git a/tools/proxyclient/m1n1/fw/agx/initdata.py b/tools/proxyclient/m1n1/fw/agx/initdata.py new file mode 100644 index 0000000..ea8d4df --- /dev/null +++ b/tools/proxyclient/m1n1/fw/agx/initdata.py @@ -0,0 +1,1931 @@ +from m1n1.utils import * +from m1n1.constructutils import * +from construct import * +from construct.lib import hexundump + +from .channels import ChannelInfoSet, ChannelInfo + +__all__ = [] + +class InitData_FWStatus(ConstructClass): + subcon = Struct( + "fwctl_channel" / ChannelInfo, + "halt_count" / Int32ul, + ZPadding(0xc), + "halted" / Int32ul, + ZPadding(0xc), + "resume" / Int32ul, + ZPadding(0xc), + "unk_40" / Int32ul, + ZPadding(0xc), + "unk_ctr" / Int32ul, + ZPadding(0xc), + "unk_60" / Int32ul, + ZPadding(0xc), + "unk_70" / Int32ul, + ZPadding(0xc), + ) + + def __init__(self): + super().__init__() + self.halt_count = 0 + self.halted = 0 + self.resume = 0 + self.unk_40 = 0 + self.unk_ctr = 0 + self.unk_60 = 0 + self.unk_70 = 0 + +class AGXHWDataShared1(ConstructClass): + subcon = Struct( + "table" / Array(17, Int32sl), + "unk_44" / HexDump(Bytes(0x60)), + "unk_a4" / Int32ul, + "unk_a8" / Int32ul, + ) + + def __init__(self, chip_info): + super().__init__() + self.table = chip_info.shared1_tab + self.unk_44 = bytes(0x60) + self.unk_a4 = chip_info.shared1_a4 + self.unk_a8 = 0 + +class AGXHWDataShared2Curve(ConstructClass): + subcon = Struct( + "unk_0" / Int32ul, + "unk_4" / Int32ul, + "t1" / Array(16, Int16sl), + "t2" / Array(16, Int16sl), + "t3" / Array(8, Array(16, Int32sl)), + ) + + def __init__(self, unk_0=0, unk_4=0, t1=None, t2=None, t3=None): + self.unk_0 = unk_0 + self.unk_4 = unk_4 + if not t1: + self.t1 = [0] * 16 + else: + self.t1 = t1 + [t1[0]] * (16 - len(t1)) + if not t2: + self.t2 = [0] * 16 + else: + self.t2 = t2 + [t2[0]] * (16 - len(t2)) + if t3 is None: + self.t3 = [[0] * 16] * 8 + else: + self.t3 = ([(i + [0x3ffffff] * (16 - len(i))) for i in t3] + + [[0x3ffffff] * 16] * (8 - len(t3))) + + +class AGXHWDataShared2T8112(ConstructClass): + subcon = Struct( + "unk_0" / Array(5, Int32ul), + "unk_14" / Int32ul, + "unk_18" / Array(8, Int32ul), + "curve1" / AGXHWDataShared2Curve, + "curve2" / AGXHWDataShared2Curve, + ) + + def __init__(self, chip_info): + self.unk_0 = [0] * 5 + self.unk_18 = [0] * 8 + + if chip_info.chip_id == 0x8112: + self.unk_14 = 0x6000000 + self.curve1 = AGXHWDataShared2Curve( + 0, 0x20000000, + [-1], [0x0f07], [[]] + ) + self.curve2 = AGXHWDataShared2Curve( + 7, 0x80000000, + [ + -1, 25740, 17429, 12550, 9597, 7910, 6657, 5881, 5421 + ], [ + 0x0F07, 0x04C0, 0x06C0, 0x08C0, + 0x0AC0, 0x0C40, 0x0DC0, 0x0EC0, + 0x0F80 + ], [ + [0x03FFFFFF, 107, 101, 94, 87, 82, 77, 73, 71], + [0x03FFFFFF, 38240, 36251, 33562, + 31368, 29379, 27693, 26211, 25370], + [0x03FFFFFF, 123933, 117485, 108771, + 101661, 95217, 89751, 84948, 82222], + ] + ) + else: + self.unk_14 = 0 + self.curve1 = AGXHWDataShared2Curve() + self.curve2 = AGXHWDataShared2Curve() + +class AGXHWDataShared3(ConstructClass): + subcon = Struct( + "unk_0" / Int32ul, + "unk_4" / Int32ul, + "unk_8" / Int32ul, + "table" / Array(16, Int32ul), + "unk_4c" / Int32ul, + ) + + def __init__(self, chip_info): + if chip_info.chip_id == 0x8112: + self.unk_0 = 1 + self.unk_4 = 500 + self.unk_8 = 5 + self.table = [ + 10700, 10700, 10700, 10700, + 10700, 6000, 1000, 1000, + 1000, 10700, 10700, 10700, + 10700, 10700, 10700, 10700, + ] + self.unk_4c = 1 + else: + self.unk_0 = 0 + self.unk_4 = 0 + self.unk_8 = 0 + self.table = [0] * 16 + self.unk_4c = 0 + +class AGXHWDataShared2(ConstructClass): + subcon = Struct( + "table" / Array(10, Int32sl), + "unk_28" / HexDump(Bytes(0x10)), + "unk_38" / AGXHWDataShared2T8112, + "unk_500" / Int32ul, + "unk_504" / Int32ul, + "unk_508" / Int32ul, + "unk_50c" / Int32ul, + "unk_510" / Int32ul, + ) + + def __init__(self, chip_info): + super().__init__() + self.table = chip_info.shared2_tab + self.unk_20 = bytes(8) + self.unk_28 = b"\xff" * 16 + self.unk_38 = AGXHWDataShared2T8112(chip_info) + self.unk_500 = 0 + self.unk_504 = 0 + self.unk_508 = chip_info.shared2_unk_508 + self.unk_50c = 0 + self.unk_510 = 0 + +class AGXHWDataA130Extra(ConstructClass): + subcon = Struct( + "unk_0" / HexDump(Bytes(0x38)), + "unk_38" / Dec(Int32ul), + "unk_3c" / Dec(Int32ul), + "unk_40" / Dec(Int32ul), + "unk_44" / Int32ul, + "unk_48" / Int32ul, + "unk_4c" / Dec(Int32ul), + "unk_50" / Int32ul, + "unk_54" / Dec(Int32ul), + "unk_58" / Int32ul, + "unk_5c" / Int32ul, + "unk_60" / Float32l, + "unk_64" / Float32l, + "unk_68" / Float32l, + "unk_6c" / Float32l, + "unk_70" / Float32l, + "unk_74" / Float32l, + "unk_78" / Float32l, + "unk_7c" / Float32l, + "unk_80" / Float32l, + "unk_84" / Float32l, + "unk_88" / Int32ul, + "unk_8c" / Dec(Int32ul), + "unk_90" / Dec(Int32ul), + "unk_94" / Int32ul, + "unk_98" / Int32ul, + "unk_9c" / Float32l, + "unk_a0" / Dec(Int32ul), + "unk_a4" / Int32ul, + "unk_a8" / Dec(Int32ul), + "unk_ac" / Dec(Int32ul), + "unk_b0" / Dec(Int32ul), + "unk_b4" / Int32ul, + "unk_b8" / Dec(Int32ul), + "unk_bc" / Int32ul, + "unk_c0" / Int32ul, + "unk_c4" / Float32l, + "unk_c8" / HexDump(Bytes(0x4c)), + "unk_114" / Float32l, + "unk_118" / Int32ul, + "unk_11c" / Int32ul, + "unk_120" / Int32ul, + "unk_124" / Dec(Int32ul), + "unk_128" / Dec(Int32ul), + "unk_12c" / HexDump(Bytes(0x8c)), + ) + + def __init__(self): + super().__init__() + self.unk_0 = bytes(0x38) + self.unk_38 = 4 + self.unk_3c = 8000 + self.unk_40 = 2500 + self.unk_44 = 0x0 + self.unk_48 = 0xffffffff + self.unk_4c = 50 + self.unk_50 = 0x0 + self.unk_54 = 50 + self.unk_58 = 0x1 + self.unk_5c = 0x0 + self.unk_60 = 0.88888888 + self.unk_64 = 0.66666666 + self.unk_68 = 0.111111111 + self.unk_6c = 0.33333333 + self.unk_70 = -0.4 + self.unk_74 = -0.8 + self.unk_78 = 0.0 + self.unk_7c = 65536.0 + self.unk_80 = -5.0 + self.unk_84 = -10.0 + self.unk_88 = 0x0 + self.unk_8c = 40 + self.unk_90 = 600 + self.unk_94 = 0x0 + self.unk_98 = 0x0 + self.unk_9c = 8000.0 + self.unk_a0 = 1400 + self.unk_a4 = 0x0 + self.unk_a8 = 72 + self.unk_ac = 24 + self.unk_b0 = 1728000 + self.unk_b4 = 0x0 + self.unk_b8 = 576000 + self.unk_bc = 0x0 + self.unk_c0 = 0x0 + self.unk_c4 = 65536.0 + self.unk_c8 = bytes(0x4c) + self.unk_114 = 65536.0 + self.unk_118 = 0x0 + self.unk_11c = 0x0 + self.unk_120 = 0x0 + self.unk_124 = 40 + self.unk_128 = 600 + self.unk_12c = bytes(0x8c) + +class AGXHWDataT81xx(ConstructClass): + subcon = Struct( + "unk_d8c" / Int32ul, + "unk_d90" / Int32ul, + "unk_d94" / Int32ul, + "unk_d98" / Int32ul, + "unk_d9c" / Float32l, + "unk_da0" / Int32ul, + "unk_da4" / Float32l, + "unk_da8" / Int32ul, + "unk_dac" / Float32l, + "unk_db0" / Int32ul, + "unk_db4" / Int32ul, + "unk_db8" / Float32l, + "unk_dbc" / Float32l, + "unk_dc0" / Int32ul, + "unk_dc4" / Int32ul, + "unk_dc8" / Int32ul, + "unk_dcc" / Int32ul, + ) + def __init__(self, sgx, chip_info): + if chip_info.chip_id in (0x8103, 0x8112): + self.unk_d8c = 0x80000000 + self.unk_d90 = 4 + self.unk_d94 = 0 + self.unk_d98 = 0 + self.unk_d9c = 0.6 + self.unk_da0 = 0 + self.unk_da4 = 0.4 + self.unk_da8 = 0 + self.unk_dac = 0.38552 + self.unk_db0 = 0 + self.unk_db4 = 0 + self.unk_db8 = 65536.0 + self.unk_dbc = 13.56 + self.unk_dc0 = 0 + self.unk_dc4 = 0 + self.unk_dc8 = 0 + self.unk_dcc = 100 * sgx.gpu_num_perf_states + else: + self.unk_d8c = 0 + self.unk_d90 = 0 + self.unk_d94 = 0 + self.unk_d98 = 0 + self.unk_d9c = 0 + self.unk_da0 = 0 + self.unk_da4 = 0 + self.unk_da8 = 0 + self.unk_dac = 0 + self.unk_db0 = 0 + self.unk_db4 = 0 + self.unk_db8 = 0 + self.unk_dbc = 0 + self.unk_dc0 = 0 + self.unk_dc4 = 0 + self.unk_dc8 = 0 + self.unk_dcc = 0 + +class PowerZone(ConstructClass): + subcon = Struct( + "val" / Float32l, + "target" / Dec(Int32ul), + "target_off" / Dec(Int32ul), + "filter_tc_x4" / Dec(Int32ul), + "filter_tc_xperiod" / Dec(Int32ul), + Ver("V >= V13_0B4", "unk_10" / Dec(Int32ul)), + Ver("V >= V13_0B4", "unk_14" / Dec(Int32ul)), + "filter_tc_neginv" / Float32l, + "filter_tc_inv" / Float32l, + "pad" / Int32ul, + ) + def __init__(self, tc=None, target=None, off=None, period_ms=None): + self.val = 0.0 + self.pad = 0 + if tc is None: + self.target = 0 + self.target_off = 0 + self.filter_tc_x4 = 0 + self.filter_tc_xperiod = 0 + self.unk_10 = 0 + self.unk_14 = 0 + self.filter_tc_neginv = 0 + self.filter_tc_inv = 0 + else: + self.target = target + self.target_off = self.target - off + self.filter_tc_x4 = tc * 4 + self.filter_tc_xperiod = tc * period_ms + self.unk_10 = 1320000000 + self.unk_14 = 0 + self.filter_tc_neginv = 1 / tc + self.filter_tc_inv = 1 - 1 / tc + +class AGXHWDataA(ConstructClass): + subcon = Struct( + "unk_0" / Int32ul, + "clocks_per_period" / Int32ul, + Ver("V >= V13_0B4", "clocks_per_period_2" / Int32ul), + "unk_8" / Int32ul, + "pwr_status" / Int32ul, + "unk_10" / Float32l, + "unk_14" / Int32ul, + "unk_18" / Int32ul, + "unk_1c" / Int32ul, + "unk_20" / Int32ul, + "unk_24" / Int32ul, + "actual_pstate" / Int32ul, + "tgt_pstate" / Int32ul, + "unk_30" / Int32ul, + "cur_pstate" / Int32ul, + "unk_38" / Int32ul, + Ver("V >= V13_0B4", "unk_3c_0" / Int32ul), + "base_pstate_scaled" / Int32ul, + "unk_40" / Int32ul, + "max_pstate_scaled" / Int32ul, + "unk_48" / Int32ul, + "min_pstate_scaled" / Int32ul, + "freq_mhz" / Float32l, + "unk_54" / HexDump(Bytes(0x20)), + Ver("V >= V13_0B4", "unk_74_0" / Int32ul), + "unk_74" / Array(16, Float32l), + "unk_b4" / HexDump(Bytes(0x100)), + "unk_1b4" / Int32ul, + "temp_c" / Int32ul, + "avg_power_mw" / Dec(Int32ul), + "update_ts" / Int64ul, + "unk_1c8" / Int32ul, + "unk_1cc" / HexDump(Bytes(0x644 - 0x1cc)), + "pad_644" / HexDump(Bytes(8)), + + "unk_64c" / Int32ul, + "unk_650" / Int32ul, + "pad_654" / Int32ul, + "pwr_filter_a_neg" / Float32l, + "pad_65c" / Int32ul, + "pwr_filter_a" / Float32l, + "pad_664" / Int32ul, + "pwr_integral_gain" / Float32l, + "pad_66c" / Int32ul, + "pwr_integral_min_clamp" / Float32l, + "max_power_1" / Float32l, + "pwr_proportional_gain" / Float32l, + "pad_67c" / Int32ul, + "pwr_pstate_related_k" / Float32l, + "pwr_pstate_max_dc_offset" / Int32sl, + "unk_688" / Int32ul, + "max_pstate_scaled_2" / Int32ul, + "pad_690" / Int32ul, + "unk_694" / Int32ul, + "max_power_2" / Int32ul, + + "pad_69c" / HexDump(Bytes(0x18)), + + "unk_6b4" / Int32ul, + Ver("V >= V13_0B4", "unk_6b8_0" / HexDump(Bytes(0x10))), + "max_pstate_scaled_3" / Int32ul, + "unk_6bc" / Int32ul, + + "pad_6c0" / HexDump(Bytes(0x14)), + + "ppm_filter_tc_periods_x4" / Int32ul, + "unk_6d8" / Int32ul, + + "pad_6dc" / Int32ul, + + "ppm_filter_a_neg" / Float32l, + "pad_6e4" / Int32ul, + "ppm_filter_a" / Float32l, + "pad_6ec" / Int32ul, + "ppm_ki_dt" / Float32l, + "pad_6f4" / Int32ul, + "pwr_integral_min_clamp_2" / Int32ul, + "unk_6fc" / Float32l, + "ppm_kp" / Float32l, + "pad_704" / Int32ul, + + "unk_708" / Int32ul, + "pwr_min_duty_cycle" / Int32ul, + "max_pstate_scaled_4" / Int32ul, + "unk_714" / Int32ul, + + "pad_718" / Int32ul, + + "unk_71c" / Float32l, + "max_power_3" / Int32ul, + + "cur_power_mw_2" / Int32ul, + + "ppm_filter_tc_ms" / Int32ul, + "unk_72c" / Int32ul, + Ver("V >= V13_0B4", "unk_730_0" / Int32ul), + Ver("V >= V13_0B4", "unk_730_4" / Int32ul), + Ver("V >= V13_0B4", "unk_730_8" / Int32ul), + Ver("V >= V13_0B4", "unk_730_c" / Int32ul), + "unk_730" / Float32l, + "unk_734" / Int32ul, + + "unk_738" / Int32ul, + "unk_73c" / Int32ul, + "unk_740" / Int32ul, + "unk_744" / Int32ul, + "unk_748" / Array(4, Float32l), + "unk_758" / Int32ul, + "perf_tgt_utilization" / Int32ul, + "pad_760" / Int32ul, + "perf_boost_min_util" / Int32ul, + "perf_boost_ce_step" / Int32ul, + "perf_reset_iters" / Int32ul, + "pad_770" / Int32ul, + "unk_774" / Int32ul, + "unk_778" / Int32ul, + "perf_filter_drop_threshold" / Int32ul, + + "perf_filter_a_neg" / Float32l, + "perf_filter_a2_neg" / Float32l, + "perf_filter_a" / Float32l, + "perf_filter_a2" / Float32l, + "perf_ki" / Float32l, + "perf_ki2" / Float32l, + "perf_integral_min_clamp" / Float32l, + "unk_79c" / Float32l, + "perf_kp" / Float32l, + "perf_kp2" / Float32l, + "boost_state_unk_k" / Float32l, + + "base_pstate_scaled_2" / Dec(Int32ul), + "max_pstate_scaled_5" / Dec(Int32ul), + "base_pstate_scaled_3" / Dec(Int32ul), + + "pad_7b8" / Int32ul, + + "perf_cur_utilization" / Float32l, + "perf_tgt_utilization_2" / Int32ul, + + "pad_7c4" / HexDump(Bytes(0x18)), + + "unk_7dc" / Int32ul, + Ver("V >= V13_0B4", "unk_7e0_0" / HexDump(Bytes(0x10))), + "base_pstate_scaled_4" / Dec(Int32ul), + "pad_7e4" / Int32ul, + + "unk_7e8" / HexDump(Bytes(0x14)), + + "unk_7fc" / Float32l, + "pwr_min_duty_cycle_2" / Float32l, + "max_pstate_scaled_6" / Float32l, + "max_freq_mhz" / Int32ul, + "pad_80c" / Int32ul, + "unk_810" / Int32ul, + "pad_814" / Int32ul, + "pwr_min_duty_cycle_3" / Int32ul, + "unk_81c" / Int32ul, + "pad_820" / Int32ul, + "min_pstate_scaled_4" / Float32l, + "max_pstate_scaled_7" / Dec(Int32ul), + "unk_82c" / Int32ul, + "unk_alpha_neg" / Float32l, + "unk_alpha" / Float32l, + "unk_838" / Int32ul, + "unk_83c" / Int32ul, + "pad_840" / HexDump(Bytes(0x86c - 0x838 - 8)), + + "unk_86c" / Int32ul, + "fast_die0_sensor_mask64" / Int64ul, + "fast_die0_release_temp_cc" / Int32ul, + "unk_87c" / Int32sl, + "unk_880" / Int32ul, + "unk_884" / Int32ul, + "pad_888" / Int32ul, + "unk_88c" / Int32ul, + "pad_890" / Int32ul, + "unk_894" / Float32l, + "pad_898" / Int32ul, + "fast_die0_ki_dt" / Float32l, + "pad_8a0" / Int32ul, + "unk_8a4" / Int32ul, + "unk_8a8" / Float32l, + "fast_die0_kp" / Float32l, + "pad_8b0" / Int32ul, + "unk_8b4" / Int32ul, + "pwr_min_duty_cycle_4" / Int32ul, + "max_pstate_scaled_8" / Dec(Int32ul), + "max_pstate_scaled_9" / Dec(Int32ul), + "fast_die0_prop_tgt_delta" / Int32ul, + "unk_8c8" / Int32ul, + "unk_8cc" / Int32ul, + "pad_8d0" / HexDump(Bytes(0x14)), + Ver("V >= V13_0B4", "unk_8e4_0" / HexDump(Bytes(0x10))), + "unk_8e4" / Int32ul, + "unk_8e8" / Int32ul, + "max_pstate_scaled_10" / Dec(Int32ul), + "unk_8f0" / Int32ul, + "unk_8f4" / Int32ul, + "pad_8f8" / Int32ul, + "pad_8fc" / Int32ul, + "unk_900" / HexDump(Bytes(0x24)), + "unk_924" / Array(8, Array(8, Float32l)), + "unk_a24" / Array(8, Array(8, Float32l)), + "unk_b24" / HexDump(Bytes(0x70)), + "max_pstate_scaled_11" / Dec(Int32ul), + "freq_with_off" / Int32ul, + "unk_b9c" / Int32ul, + "unk_ba0" / Int64ul, + "unk_ba8" / Int64ul, + "unk_bb0" / Int32ul, + "unk_bb4" / Int32ul, + "pad_bb8" / HexDump(Bytes(0xc2c - 0xbb8)), + + "unk_c2c" / Int32ul, + "power_zone_count" / Int32ul, + "max_power_4" / Int32ul, + "max_power_5" / Int32ul, + "max_power_6" / Int32ul, + "unk_c40" / Int32ul, + "unk_c44" / Float32l, + "avg_power_target_filter_a_neg" / Float32l, + "avg_power_target_filter_a" / Float32l, + "avg_power_target_filter_tc_x4" / Dec(Int32ul), + "avg_power_target_filter_tc_xperiod" / Dec(Int32ul), + Ver("V >= V13_0B4", "base_clock_mhz" / Int32ul), + Ver("V >= V13_0B4", "unk_c58_4" / Int32ul), + "power_zones" / Array(5, PowerZone), + "avg_power_filter_tc_periods_x4" / Dec(Int32ul), + "unk_cfc" / Int32ul, + "unk_d00" / Int32ul, + "avg_power_filter_a_neg" / Float32l, + "unk_d08" / Int32ul, + "avg_power_filter_a" / Float32l, + "unk_d10" / Int32ul, + "avg_power_ki_dt" / Float32l, + "unk_d18" / Int32ul, + "unk_d1c" / Int32ul, + "unk_d20" / Float32l, + "avg_power_kp" / Float32l, + "unk_d28" / Int32ul, + "unk_d2c" / Int32ul, + "avg_power_min_duty_cycle" / Int32ul, + "max_pstate_scaled_12" / Int32ul, + "max_pstate_scaled_13" / Int32ul, + "unk_d3c" / Int32ul, + "max_power_7" / Float32l, + "max_power_8" / Int32ul, + "unk_d48" / Int32ul, + "unk_d4c" / Int32ul, + "unk_d50" / Int32ul, + Ver("V >= V13_0B4", "base_clock_mhz_2" / Int32ul), + Ver("V >= V13_0B4", "unk_d54_4" / HexDump(Bytes(0xc))), + "unk_d54" / HexDump(Bytes(0x10)), + "max_pstate_scaled_14" / Int32ul, + "unk_d68" / Bytes(0x24), + + "t81xx_data" / AGXHWDataT81xx, + + "unk_dd0" / HexDump(Bytes(0x40)), + Ver("V >= V13_0B4", "unk_e10_0" / AGXHWDataA130Extra), + "unk_e10" / HexDump(Bytes(0xc)), + "fast_die0_sensor_mask64_2" / Int64ul, + "unk_e24" / Int32ul, + "unk_e28" / Int32ul, + "unk_e2c" / HexDump(Bytes(0x1c)), + "unk_e48" / Array(8, Array(8, Float32l)), + "unk_f48" / Array(8, Array(8, Float32l)), + "pad_1048" / HexDump(Bytes(0x5e4)), + "fast_die0_sensor_mask64_alt" / Int64ul, + "fast_die0_sensor_present" / Int32ul, + Ver("V < V13_0B4", "unk_1638" / Array(2, Int32ul)), + "unk_1640" / HexDump(Bytes(0x2000)), + "unk_3640" / Int32ul, + "hws1" / AGXHWDataShared1, + Ver("V >= V13_0B4", "unk_pad1" / HexDump(Bytes(0x20))), + "hws2" / AGXHWDataShared2, + "unk_3c04" / Int32ul, + "hws3" / AGXHWDataShared3, + "unk_3c58" / HexDump(Bytes(0x3c)), + "unk_3c94" / Int32ul, + "unk_3c98" / Int64ul, + "unk_3ca0" / Int64ul, + "unk_3ca8" / Int64ul, + "unk_3cb0" / Int64ul, + "ts_last_idle" / Int64ul, + "ts_last_poweron" / Int64ul, + "ts_last_poweroff" / Int64ul, + "unk_3cd0" / Int64ul, + "unk_3cd8" / Int64ul, + Ver("V >= V13_0B4", "unk_3ce0_0" / Int32ul), + "unk_3ce0" / Int32ul, + "unk_3ce4" / Int32ul, + "unk_3ce8" / Int32ul, + "unk_3cec" / Int32ul, + "unk_3cf0" / Int32ul, + "unk_3cf4" / Array(8, Float32l), + "unk_3d14" / Array(8, Float32l), + "unk_3d34" / HexDump(Bytes(0x38)), + Ver("V >= V13_0B4", "unk_3d6c" / HexDump(Bytes(0x38))), + ) + + def __init__(self, sgx, chip_info): + super().__init__() + + base_clock_khz = 24000 + base_clock_mhz = base_clock_khz * 1000 + period_ms = sgx.gpu_power_sample_period + + self.unk_0 = 0 + self.clocks_per_period = base_clock_khz * period_ms + self.clocks_per_period_2 = base_clock_khz * period_ms + self.unk_8 = 0 + self.pwr_status = 4 + self.unk_10 = 1.0 + self.unk_14 = 0 + self.unk_18 = 0 + self.unk_1c = 0 + self.unk_20 = 0 + self.unk_24 = 0 + self.actual_pstate = 1 + self.tgt_pstate = 1 + self.unk_30 = 0 + self.cur_pstate = 0 + self.unk_38 = 0 + self.unk_3c_0 = 0 + self.base_pstate_scaled = 100 * sgx.getprop("gpu-perf-base-pstate", 3) + self.unk_40 = 1 + self.max_pstate_scaled = 100 * sgx.gpu_num_perf_states + self.unk_48 = 0 + self.min_pstate_scaled = 100 + self.freq_mhz = 0.0 + self.unk_54 = bytes(0x20) + self.unk_74_0 = 0 + # perf related + self.unk_74 = [0] * 16 + + self.unk_b4 = bytes(0x100) + self.unk_1b4 = 0 + self.temp_c = 0 + self.avg_power_mw = 0 + self.update_ts = 0 + self.unk_1c8 = 0 + self.unk_1cc = bytes(0x644 - 0x1cc) + + self.pad_644 = bytes(8) + + self.unk_64c = 625 + self.unk_650 = 0 + self.pad_654 = 0 + self.pwr_filter_a_neg = 1 - 1 / sgx.getprop("gpu-pwr-filter-time-constant", 313) + self.pad_65c = 0 + self.pwr_filter_a = 1 - self.pwr_filter_a_neg + self.pad_664 = 0 + self.pwr_integral_gain = sgx.getprop("gpu-pwr-integral-gain", 0.0202129) + self.pad_66c = 0 + self.pwr_integral_min_clamp = sgx.getprop("gpu-pwr-integral-min-clamp", 0) + self.max_power_1 = chip_info.max_power + self.pwr_proportional_gain = sgx.getprop("gpu-pwr-proportional-gain", 5.2831855) + self.pad_67c = 0 + self.pwr_pstate_related_k = -self.max_pstate_scaled / chip_info.max_power + self.pwr_pstate_max_dc_offset = sgx.gpu_pwr_min_duty_cycle - self.max_pstate_scaled + self.unk_688 = 0 + self.max_pstate_scaled_2 = self.max_pstate_scaled + self.pad_690 = 0 + self.unk_694 = 0 + self.max_power_2 = chip_info.max_power + self.pad_69c = bytes(0x18) + self.unk_6b4 = 0 + self.unk_6b8_0 = bytes(0x10) + self.max_pstate_scaled_3 = self.max_pstate_scaled + self.unk_6bc = 0 + self.pad_6c0 = bytes(0x14) + + # Note: integer rounding here + ppm_filter_tc_periods = sgx.gpu_ppm_filter_time_constant_ms // period_ms + self.ppm_filter_tc_periods_x4 = ppm_filter_tc_periods * 4 + self.unk_6d8 = 0 + self.pad_6dc = 0 + self.ppm_filter_a_neg = 1 - 1 / ppm_filter_tc_periods + self.pad_6e4 = 0 + self.ppm_filter_a = 1 - self.ppm_filter_a_neg + self.pad_6ec = 0 + self.ppm_ki_dt = sgx.gpu_ppm_ki * (period_ms / 1000) + self.pad_6f4 = 0 + self.pwr_integral_min_clamp_2 = self.pwr_integral_min_clamp + if Ver.check("V >= V13_0B4") or chip_info.chip_id != 0x8103: + self.unk_6fc = 65536.0 + else: + self.unk_6fc = 0 + self.ppm_kp = sgx.gpu_ppm_kp + self.pad_704 = 0 + self.unk_708 = 0 + self.pwr_min_duty_cycle = sgx.gpu_pwr_min_duty_cycle + self.max_pstate_scaled_4 = self.max_pstate_scaled + self.unk_714 = 0 + self.pad_718 = 0 + self.unk_71c = 0.0 + self.max_power_3 = chip_info.max_power + self.cur_power_mw_2 = 0x0 + self.ppm_filter_tc_ms = sgx.gpu_ppm_filter_time_constant_ms + self.unk_72c = 0 + self.unk_730_0 = 0x232800 + self.unk_730_4 = 0 + self.unk_730_8 = 0 + self.unk_730_c = 0 + self.unk_730 = 0.0 + self.unk_734 = 0 + self.unk_738 = 0 + self.unk_73c = 0 + self.unk_740 = 0 + self.unk_744 = 0 + self.unk_748 = [0.0, 0.0, 0.0, 0.0] + self.unk_758 = 0 + self.perf_tgt_utilization = sgx.gpu_perf_tgt_utilization + self.pad_760 = 0 + self.perf_boost_min_util = sgx.getprop("gpu-perf-boost-min-util", 100) + + self.perf_boost_ce_step = sgx.getprop("gpu-perf-boost-ce-step", 25) + self.perf_reset_iters = sgx.getprop("gpu-perf-reset-iters", 6) + self.pad_770 = 0x0 + self.unk_774 = 6 + self.unk_778 = 1 + self.perf_filter_drop_threshold = sgx.gpu_perf_filter_drop_threshold + self.perf_filter_a_neg = 1 - 1 / sgx.gpu_perf_filter_time_constant + self.perf_filter_a2_neg = 1 - 1 / sgx.gpu_perf_filter_time_constant2 + self.perf_filter_a = 1 - self.perf_filter_a_neg + self.perf_filter_a2 = 1 - self.perf_filter_a2_neg + self.perf_ki = sgx.getprop("gpu-perf-integral-gain", 7.895683288574219) + self.perf_ki2 = sgx.gpu_perf_integral_gain2 + self.perf_integral_min_clamp = sgx.gpu_perf_integral_min_clamp + self.unk_79c = 95.0 + self.perf_kp = sgx.getprop("gpu-perf-proportional-gain", 14.707962989807129) + self.perf_kp2 = sgx.gpu_perf_proportional_gain2 + base_state = sgx.getprop("gpu-perf-base-pstate", 3) + max_state = sgx.gpu_num_perf_states + boost_states = max_state - base_state + self.boost_state_unk_k = boost_states / 0.95 + self.base_pstate_scaled_2 = 100 * sgx.getprop("gpu-perf-base-pstate", 3) + self.max_pstate_scaled_5 = self.max_pstate_scaled + self.base_pstate_scaled_3 = 100 * sgx.getprop("gpu-perf-base-pstate", 3) + self.pad_7b8 = 0x0 + self.perf_cur_utilization = 0.0 + self.perf_tgt_utilization_2 = sgx.gpu_perf_tgt_utilization + self.pad_7c4 = bytes(0x18) + self.unk_7dc = 0x0 + self.unk_7e0_0 = bytes(0x10) + self.base_pstate_scaled_4 = 100 * sgx.getprop("gpu-perf-base-pstate", 3) + self.pad_7e4 = 0x0 + self.unk_7e8 = bytes(0x14) + self.unk_7fc = 65536.0 + self.pwr_min_duty_cycle_2 = sgx.gpu_pwr_min_duty_cycle + self.max_pstate_scaled_6 = self.max_pstate_scaled + self.max_freq_mhz = sgx.perf_states[sgx.gpu_num_perf_states].freq // 1000000 + self.pad_80c = 0x0 + self.unk_810 = 0x0 + self.pad_814 = 0x0 + self.pwr_min_duty_cycle_3 = sgx.gpu_pwr_min_duty_cycle + self.unk_81c = 0x0 + self.pad_820 = 0x0 + self.min_pstate_scaled_4 = 100.0 + self.max_pstate_scaled_7 = self.max_pstate_scaled + self.unk_82c = 0x0 + self.unk_alpha_neg = 0.8 + self.unk_alpha = 1 - self.unk_alpha_neg + self.unk_838 = 0x0 + self.unk_83c = 0x0 + self.pad_840 = bytes(0x2c) + self.unk_86c = 0x0 + self.fast_die0_sensor_mask64 = chip_info.gpu_fast_die0_sensor_mask64 + self.fast_die0_release_temp_cc = 100 * sgx.getprop("gpu-fast-die0-release-temp", 80) + self.unk_87c = chip_info.unk_87c + self.unk_880 = 0x4 + self.unk_884 = 0x0 + self.pad_888 = 0x0 + self.unk_88c = 0x0 + self.pad_890 = 0x0 + self.unk_894 = 1.0 + self.pad_898 = 0x0 + self.fast_die0_ki_dt = sgx.gpu_fast_die0_integral_gain * (period_ms / 1000) + self.pad_8a0 = 0x0 + self.unk_8a4 = 0x0 + self.unk_8a8 = 65536.0 + self.fast_die0_kp = sgx.gpu_fast_die0_proportional_gain + self.pad_8b0 = 0x0 + self.unk_8b4 = 0x0 + self.pwr_min_duty_cycle_4 = sgx.gpu_pwr_min_duty_cycle + self.max_pstate_scaled_8 = self.max_pstate_scaled + self.max_pstate_scaled_9 = self.max_pstate_scaled + self.fast_die0_prop_tgt_delta = 100 * sgx.getprop("gpu-fast-die0-prop-tgt-delta", 0) + self.unk_8c8 = 0 + self.unk_8cc = chip_info.unk_8cc + self.pad_8d0 = bytes(0x14) + self.unk_8e4_0 = bytes(0x10) + self.unk_8e4 = 0 + self.unk_8e8 = 0 + self.max_pstate_scaled_10 = self.max_pstate_scaled + self.unk_8f0 = 0 + self.unk_8f4 = 0 + self.pad_8f8 = 0 + self.pad_8fc = 0 + self.unk_900 = bytes(0x24) + self.unk_924 = chip_info.unk_924 + self.unk_a24 = chip_info.unk_924 + self.unk_b24 = bytes(0x70) + self.max_pstate_scaled_11 = self.max_pstate_scaled + self.freq_with_off = 0x0 + self.unk_b9c = 0 + self.unk_ba0 = 0 + self.unk_ba8 = 0 + self.unk_bb0 = 0 + self.unk_bb4 = 0 + self.pad_bb8 = bytes(0x74) + self.unk_c2c = 1 + + self.power_zones = [PowerZone()] * 5 + power_zone_count = 0 + for i in range(5): + if sgx.getprop(f"gpu-power-zone-target-{i}", None) is None: + break + self.power_zones[i] = PowerZone( + sgx.getprop(f"gpu-power-zone-filter-tc-{i}", None), + sgx.getprop(f"gpu-power-zone-target-{i}", None), + sgx.getprop(f"gpu-power-zone-target-offset-{i}", None), + period_ms + ) + power_zone_count += 1 + + self.power_zone_count = power_zone_count + self.max_power_4 = chip_info.max_power + self.max_power_5 = chip_info.max_power + self.max_power_6 = chip_info.max_power + self.unk_c40 = 0 + self.unk_c44 = 0.0 + self.avg_power_target_filter_a_neg = 1 - 1 / sgx.gpu_avg_power_target_filter_tc + self.avg_power_target_filter_a = 1 / sgx.gpu_avg_power_target_filter_tc + self.avg_power_target_filter_tc_x4 = 4 * sgx.gpu_avg_power_target_filter_tc + self.avg_power_target_filter_tc_xperiod = period_ms * sgx.gpu_avg_power_target_filter_tc + self.base_clock_mhz = base_clock_mhz + self.unk_c58_4 = 0 + + # Note: integer rounding + avg_power_filter_tc_periods = sgx.gpu_avg_power_filter_tc_ms // period_ms + self.avg_power_filter_tc_periods_x4 = avg_power_filter_tc_periods * 4 + self.unk_cfc = 0 + self.unk_d00 = 0 + self.avg_power_filter_a_neg = 1 - 1 / avg_power_filter_tc_periods + self.unk_d08 = 0 + self.avg_power_filter_a = 1 - self.avg_power_filter_a_neg + self.unk_d10 = 0 + self.avg_power_ki_dt = sgx.gpu_avg_power_ki_only * (period_ms / 1000) + self.unk_d18 = 0 + self.unk_d1c = 0 + self.unk_d20 = 65536.0 + self.avg_power_kp = sgx.gpu_avg_power_kp + self.unk_d28 = 0 + self.unk_d2c = 0 + self.avg_power_min_duty_cycle = sgx.gpu_avg_power_min_duty_cycle + self.max_pstate_scaled_12 = self.max_pstate_scaled + self.max_pstate_scaled_13 = self.max_pstate_scaled + self.unk_d3c = 0 + self.max_power_7 = chip_info.max_power + self.max_power_8 = chip_info.max_power + self.unk_d48 = 0 + self.unk_d4c = sgx.gpu_avg_power_filter_tc_ms + self.unk_d50 = 0 + self.base_clock_mhz_2 = base_clock_mhz + self.unk_d54_4 = bytes(0xc) + self.unk_d54 = bytes(0x10) + self.max_pstate_scaled_14 = self.max_pstate_scaled + self.unk_d68 = bytes(0x24) + + self.t81xx_data = AGXHWDataT81xx(sgx, chip_info) + + self.unk_dd0 = bytes(0x40) + + self.unk_e10_0 = AGXHWDataA130Extra() + self.unk_e10 = bytes(0xc) + self.fast_die0_sensor_mask64_2 = chip_info.gpu_fast_die0_sensor_mask64 + self.unk_e24 = chip_info.unk_e24 + self.unk_e28 = 1 + self.unk_e2c = bytes(0x1c) + self.unk_e48 = chip_info.unk_e48 + self.unk_f48 = chip_info.unk_e48 + self.pad_1048 = bytes(0x5e4) + self.fast_die0_sensor_mask64_alt = chip_info.gpu_fast_die0_sensor_mask64_alt + self.fast_die0_sensor_present = chip_info.gpu_fast_die0_sensor_present + self.unk_1638 = [0, 1] + self.unk_1640 = bytes(0x2000) + self.unk_3640 = 0 + self.hws1 = AGXHWDataShared1(chip_info) + self.unk_pad1 = bytes(0x20) + self.hws2 = AGXHWDataShared2(chip_info) + self.unk_3c04 = 0 + self.hws3 = AGXHWDataShared3(chip_info) + self.unk_3c58 = bytes(0x3c) + self.unk_3c94 = 0 # flag + self.unk_3c98 = 0 # timestamp? + self.unk_3ca0 = 0 # timestamp? + self.unk_3ca8 = 0 + self.unk_3cb0 = 0 + self.ts_last_idle = 0 + self.ts_last_poweron = 0 + self.ts_last_poweroff = 0 + self.unk_3cd0 = 0 + self.unk_3cd8 = 0 + self.unk_3ce0_0 = 0 + + self.unk_3ce0 = 0 + self.unk_3ce4 = 0 + self.unk_3ce8 = 1 + self.unk_3cec = 0 + self.unk_3cf0 = 0 + self.unk_3cf4 = chip_info.unk_3cf4 + self.unk_3d14 = chip_info.unk_3d14 + self.unk_3d34 = bytes(0x38) + self.unk_3d6c = bytes(0x38) + +class IOMapping(ConstructClass): + _MAPTYPE = { + 0: "RO", + 1: "RW", + } + + subcon = Struct( + "phys_addr" / Int64ul, + "virt_addr" / Int64ul, + "size" / Int32ul, + "range_size" / Int32ul, # Useally the same as size, but for MCC, this is the size of a single MMC register range. + "readwrite" / Int64ul + ) + + def __init__(self, phys=0, addr=0, size=0, range_size=0, readwrite=0): + self.phys_addr = phys + self.virt_addr = addr + self.size = size + self.range_size = range_size + self.readwrite = readwrite + + def __str__(self): + if self.virt_addr == 0: + return "\n<IOMapping: Invalid>" + + try: + hv = self._stream.uat.hv + except AttributeError: + hv = None + + if hv: + dev, range = hv.device_addr_tbl.lookup(self.phys_addr) + offset = self.phys_addr - range.start + return f"\nIO Mapping: {self._MAPTYPE.get(self.readwrite, self.readwrite)} {self.virt_addr:#x} -> " \ + f"{dev}+{offset:#x} = {self.phys_addr:#x} ({self.size:#x} / {self.range_size:#x})" + else: + return f"\nIO Mapping: {self._MAPTYPE.get(self.readwrite, self.readwrite)} {self.virt_addr:#x} -> " \ + f"{self.phys_addr:#x} = {self.phys_addr:#x} ({self.size:#x} / {self.range_size:#x})" + + +class AGXHWDataB(ConstructClass): + subcon = Struct( + Ver("V < V13_0B4", "unk_0" / Int64ul), + "unk_8" / Int64ul, + Ver("V < V13_0B4", "unk_10" / Int64ul), + "unk_18" / Int64ul, + "unk_20" / Int64ul, + "unk_28" / Int64ul, + "unk_30" / Int64ul, + "unkptr_38" / Int64ul, + "pad_40" / HexDump(Bytes(0x20)), + Ver("V < V13_0B4", "yuv_matrices" / Array(15, Array(3, Array(4, Int16sl)))), + Ver("V >= V13_0B4", "yuv_matrices" / Array(63, Array(3, Array(4, Int16sl)))), + "pad_1c8" / HexDump(Bytes(8)), + "io_mappings" / Array(0x14, IOMapping), + Ver("V >= V13_0B4", "unk_450_0" / HexDump(Bytes(0x68))), + "chip_id" / Int32ul, + "unk_454" / Int32ul, + "unk_458" / Int32ul, + "unk_45c" / Int32ul, + "unk_460" / Int32ul, + "unk_464" / Int32ul, + "unk_468" / Int32ul, + "unk_46c" / Int32ul, + "unk_470" / Int32ul, + "unk_474" / Int32ul, + "unk_478" / Int32ul, + "unk_47c" / Int32ul, + "unk_480" / Int32ul, + "unk_484" / Int32ul, + "unk_488" / Int32ul, + "unk_48c" / Int32ul, + "base_clock_khz" / Int32ul, + "power_sample_period" / Int32ul, + "pad_498" / ZPadding(4), + + "unk_49c" / Int32ul, + "unk_4a0" / Int32ul, + "unk_4a4" / Int32ul, + "pad_4a8" / ZPadding(4), + + "unk_4ac" / Int32ul, + "pad_4b0" / ZPadding(8), + + "unk_4b8" / Int32ul, + "unk_4bc" / ZPadding(4), + + "unk_4c0" / Int32ul, + "unk_4c4" / Int32ul, + "unk_4c8" / Int32ul, + "unk_4cc" / Int32ul, + "unk_4d0" / Int32ul, + "unk_4d4" / Int32ul, + "unk_4d8" / ZPadding(4), + + "unk_4dc" / Int32ul, + "unk_4e0" / Int64ul, + "unk_4e8" / Int32ul, + "unk_4ec" / Int32ul, + "unk_4f0" / Int32ul, + "unk_4f4" / Int32ul, + "unk_4f8" / Int32ul, + "unk_4fc" / Int32ul, + "unk_500" / Int32ul, + Ver("V >= V13_0B4", "unk_504_0" / Int32ul), + "unk_504" / Int32ul, + "unk_508" / Int32ul, + "unk_50c" / Int32ul, + "unk_510" / Int32ul, + "unk_514" / Int32ul, + "unk_518" / Int32ul, + "unk_51c" / Int32ul, + "unk_520" / Int32ul, + "unk_524" / Int32ul, + "unk_528" / Int32ul, + "unk_52c" / Int32ul, + "unk_530" / Int32ul, + "unk_534" / Int32ul, + "unk_538" / Int32ul, + Ver("V >= V13_0B4", "unk_53c_0" / Int32ul), + "num_frags" / Int32ul, + "unk_540" / Int32ul, + "unk_544" / Int32ul, + "unk_548" / Int32ul, + "unk_54c" / Int32ul, + "unk_550" / Int32ul, + "unk_554" / Int32ul, + "gpu_region_base" / Int64ul, + "gpu_core" / Int32ul, + "gpu_rev" / Int32ul, + "num_cores" / Int32ul, + "max_pstate" / Int32ul, + Ver("V < V13_0B4", "num_pstates" / Int32ul), + "frequencies" / Array(16, Dec(Int32ul)), + "voltages" / Array(16, Array(8, Dec(Int32ul))), + "voltages_sram" / Array(16, Array(8, Dec(Int32ul))), + "unk_9b4" / Array(16, Float32l), + "unk_9f4" / Array(16, Int32ul), + "rel_max_powers" / Array(16, Dec(Int32ul)), + "rel_boost_freqs" / Array(16, Dec(Int32ul)), + Ver("V < V13_0B4", "min_sram_volt" / Dec(Int32ul)), + Ver("V < V13_0B4", "unk_ab8" / Int32ul), + Ver("V < V13_0B4", "unk_abc" / Int32ul), + Ver("V < V13_0B4", "unk_ac0" / Int32ul), + + Ver("V >= V13_0B4", "unk_ac4_0" / HexDump(Bytes(0x1f0))), + + "pad_ac4" / ZPadding(8), + "unk_acc" / Int32ul, + "unk_ad0" / Int32ul, + "pad_ad4" / ZPadding(16), + "unk_ae4" / Array(4, Int32ul), + "pad_af4" / ZPadding(4), + "unk_af8" / Int32ul, + "unk_afc" / Int32ul, + "unk_b00" / Int32ul, + "unk_b04" / Int32ul, + "unk_b08" / Int32ul, + "unk_b0c" / Int32ul, + "unk_b10" / Int32ul, + "pad_b14" / ZPadding(8), + "unk_b1c" / Int32ul, + "unk_b20" / Int32ul, + "unk_b24" / Int32ul, + "unk_b28" / Int32ul, + "unk_b2c" / Int32ul, + "unk_b30" / Int32ul, + "unk_b34" / Int32ul, + Ver("V >= V13_0B4", "unk_b38_0" / Int32ul), + Ver("V >= V13_0B4", "unk_b38_4" / Int32ul), + "unk_b38" / Array(6, Int64ul), + "unk_b68" / Int32ul, + Ver("V >= V13_0B4", "unk_b6c" / HexDump(Bytes(0xd0))), + Ver("V >= V13_0B4", "unk_c3c" / Int32ul), + ) + + def __init__(self, sgx, chip_info): + # Userspace VA map related + self.unk_0 = 0x13_00000000 + self.unk_8 = 0x14_00000000 + self.unk_10 = 0x1_00000000 + self.unk_18 = 0xffc00000 + self.unk_20 = 0x11_00000000 + self.unk_28 = 0x11_00000000 + # userspace address? + self.unk_30 = 0x6f_ffff8000 + self.pad_40 = bytes(0x20) + # unmapped? + self.unkptr_38 = 0xffffffa0_11800000 + self.pad_1c8 = bytes(8) + + # Note: these are rounded poorly, need to recompute. + self.yuv_matrices = [ + [ # BT.601 full range -> RGB + [ 0x2000, -0x8, 0x2cdb, -0x2cd3], + [ 0x2000, -0xb00, -0x16da, 0x21da], + [ 0x2000, 0x38b6, 0x8, -0x38be], + ], + [ # BT.709 full range -> RGB + [ 0x2000, -0x1, 0x3264, -0x3263], + [ 0x2000, -0x5fe, -0xefb, 0x14f9], + [ 0x2000, 0x3b61, 0x1, -0x3b62], + ], + [ # BT.2020 full range -> RGB + [ 0x2000, 0x0, 0x2f30, -0x2f30], + [ 0x2000, -0x544, -0x1248, 0x178c], + [ 0x2000, 0x3c34, -0x1, -0x3c33], + ], + [ # BT.601 limited range -> RGB + [ 0x2568, -0x9, 0x3343, -0x37e7], + [ 0x2568, -0xc92, -0x1a1e, 0x2203], + [ 0x2568, 0x40cf, 0x9, -0x4585], + ], + [ # BT.709 limited range -> RGB + [ 0x2568, -0x1, 0x3997, -0x3e43], + [ 0x2568, -0x6d9, -0x111f, 0x134b], + [ 0x2568, 0x43dd, 0x1, -0x488b], + ], + [ # BT.2020 limited range -> RGB + [ 0x2568, 0x0, 0x35ee, -0x3a9b], + [ 0x2568, -0x604, -0x14e5, 0x163c], + [ 0x2568, 0x44ce, -0x1, -0x497a], + ], + [ # Unknown YUV->RGB + [ 0x24cb, 0x0, 0x2cfa, -0x3676], + [ 0x24cb, -0xb0a, -0x16e9, 0x1877], + [ 0x24cb, 0x38d9, 0x0, -0x4255], + ], + [ # null + [ 0x0, 0x0, 0x0, 0x0], + [ 0x0, 0x0, 0x0, 0x0], + [ 0x0, 0x0, 0x0, 0x0], + ], + [ # RGB -> BT.601 full range + [ 0x2645, 0x4b23, 0xe98, 0x0], + [-0x15a1, -0x2a5e, 0x3fff, 0x4000], + [ 0x4000, -0x35a2, -0xa5e, 0x4000], + ], + [ # RGB -> BT.709 full range + [ 0x1b37, 0x5b8c, 0x93d, 0x0], + [ -0xeac, -0x3155, 0x4000, 0x4000], + [ 0x4000, -0x3a24, -0x5dd, 0x4000], + ], + [ # RGB -> BT.2020 full range + [ 0x21a0, 0x56c9, 0x797, 0x0], + [-0x11de, -0x2e22, 0x4000, 0x4000], + [ 0x4000, -0x3adb, -0x526, 0x4000], + ], + [ # RGB -> BT.601 limited range + [ 0x20bd, 0x4047, 0xc7c, 0x800], + [-0x12ed, -0x2513, 0x3800, 0x4000], + [ 0x3800, -0x2eee, -0x912, 0x4000], + ], + [ # RGB -> BT.709 limited range + [ 0x1748, 0x4e51, 0x7e7, 0x800], + [ -0xcd6, -0x2b2a, 0x3800, 0x4000], + [ 0x3800, -0x32df, -0x521, 0x4000], + ], + [ # RGB -> BT.2020 limited range + [ 0x1cc4, 0x4a3e, 0x67e, 0x800], + [ -0xfa3, -0x285e, 0x3800, 0x4000], + [ 0x3800, -0x337f, -0x481, 0x4000], + ], + [ # Unknown (identity?) + [-0x8000, 0x0, 0x0, 0x0], + [ 0x0, -0x8000, 0x0, 0x0], + [ 0x0, 0x0, -0x8000, 0x0], + ], + ] + if Ver.check("V >= V13_0B4"): + self.yuv_matrices = [ + *self.yuv_matrices[:8], + *(24 * [[[0,0,0,0]]*3]), + *self.yuv_matrices[8:], + *(24 * [[[0,0,0,0]]*3]), + ] + + self.unk_450_0 = bytes(0x68) + + self.chip_id = chip_info.chip_id + self.unk_454 = 0x1 + self.unk_458 = 0x1 + self.unk_45c = 0x0 + self.unk_460 = 0x1 + self.unk_464 = 0x1 + self.unk_468 = 0x1 + self.unk_46c = 0x0 + self.unk_470 = 0x0 + self.unk_474 = 0x0 + self.unk_478 = 0x0 + self.unk_47c = 0x1 + self.unk_480 = 0x0 + self.unk_484 = 0x1 + self.unk_488 = 0x0 + self.unk_48c = 0x1 + self.base_clock_khz = 24000 + self.power_sample_period = sgx.gpu_power_sample_period + self.unk_49c = 0x1 + self.unk_4a0 = 0x1 + self.unk_4a4 = 0x1 + self.unk_4ac = 0x0 + self.unk_4b8 = 0x0 + self.unk_4c0 = 0x1f + self.unk_4c4 = 0x0 + self.unk_4c8 = 0x0 + self.unk_4cc = 0x0 + self.unk_4d0 = 0x0 + self.unk_4d4 = 0x0 + self.unk_4dc = 0x0 + self.unk_4e0 = chip_info.hwdb_4e0 + self.unk_4e8 = 0x0 + self.unk_4ec = 0x0 + self.unk_4f0 = 0x1 + self.unk_4f4 = 0x1 + self.unk_4f8 = 0x0 + self.unk_4fc = 0x0 + self.unk_500 = 0x0 + self.unk_504_0 = 0 + self.unk_504 = 0x31 + self.unk_508 = 0x0 + self.unk_50c = 0x0 + self.unk_510 = 0x0 + self.unk_514 = 0x0 + self.unk_518 = 0x0 + self.unk_51c = 0x0 + self.unk_520 = 0x0 + self.unk_524 = 0x1 # use_secure_cache_flush + self.unk_528 = 0x0 + self.unk_52c = 0x0 + self.unk_530 = 0x0 + self.unk_534 = chip_info.hwdb_534 + self.unk_538 = 0x0 + self.unk_53c_0 = 0 + self.num_frags = chip_info.num_cores + self.unk_540 = 0x0 + self.unk_544 = 0x0 + self.unk_548 = 0x0 + self.unk_54c = 0x0 + self.unk_550 = 0x0 + self.unk_554 = 0x1 + self.gpu_region_base = sgx.gpu_region_base + self.gpu_core = chip_info.gpu_core + self.gpu_rev = chip_info.gpu_rev + self.num_cores = chip_info.num_cores + self.max_pstate = sgx.gpu_num_perf_states + self.num_pstates = sgx.gpu_num_perf_states + 1 + + self.frequencies = [0] * 16 + self.voltages = [[0] * 8 for i in range(16)] + self.voltages_sram = [[0] * 8 for i in range(16)] + self.unk_9b4 = [0.] * 16 + self.unk_9f4 = [0] * 16 + self.rel_max_powers = [0] * 16 + self.rel_boost_freqs = [0] * 16 + self.min_sram_volt = chip_info.min_sram_volt + self.unk_ab8 = chip_info.hwdb_ab8 + self.unk_abc = chip_info.hwdb_abc + self.unk_ac0 = 0x1020 + self.unk_ac4_0 = bytes(0x1f0) + self.unk_acc = 0x0 + self.unk_ad0 = 0x0 + if Ver.check("V >= V13_0B4"): + self.unk_ae4 = [0x0, 0x3, 0x7, 0x7] + else: + self.unk_ae4 = [0x0, 0xf, 0x3f, 0x3f] + self.unk_af8 = 0x0 + self.unk_afc = 0x0 + self.unk_b00 = 0x0 + self.unk_b04 = 0x0 + self.unk_b08 = 0x0 + self.unk_b0c = 0x0 + self.unk_b10 = 0x1 + self.unk_b1c = 0x0 + self.unk_b20 = 0x0 + self.unk_b24 = 0x1 + self.unk_b28 = 0x1 + self.unk_b2c = 0x1 + self.unk_b30 = chip_info.hwdb_b30 + self.unk_b34 = 0x0 + self.unk_b38_0 = 1 + self.unk_b38_4 = 1 + self.unk_b38 = [0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff] + self.unk_b68 = 0x0 + self.unk_b6c = bytes(0xd0) + self.unk_c3c = 0x19 + +class InitData_BufferMgrCtl(ConstructValueClass): + subcon = Array(126, Bytes(0x10)) + + def __init__(self): + self.value = [bytes(0x10)] * 126 + +class InitData_GPUQueueStatsTA(ConstructClass): + subcon = Struct( + "busy" / Int32ul, + "unk_4" / Int32ul, + "cur_cmdqueue" / Int64ul, + "cur_count" / Int32ul, + "unk_14" / Int32ul, + ) + def __init__(self): + self.busy = 0 + self.unk_4 = 0 + self.cur_cmdqueue = 0 + self.cur_count = 0 + self.unk_14 = 0 + +class InitData_GPUStatsTA(ConstructClass): + subcon = Struct( + "unk_4" / Int32ul, + "queues" / Array(4, InitData_GPUQueueStatsTA), + "unk_68" / Bytes(0x8), + "unk_70" / Int32ul, + "unk_74" / Int32ul, + "unk_timestamp" / Int64ul, + "unk_80" / HexDump(Bytes(0x40)), + Ver("V >= V13_0B4", "unk_c0" / HexDump(Bytes(0x800))), + ) + + def __init__(self): + self.unk_4 = 0 + self.queues = [InitData_GPUQueueStatsTA() for i in range(4)] + self.unk_68 = bytes(0x8) + self.unk_70 = 0 + self.unk_74 = 0 + self.unk_timestamp = 0 + self.unk_80 = bytes(0x40) + self.unk_c0 = bytes(0x800) + +class InitData_GPUQueueStats3D(ConstructClass): + subcon = Struct( + "busy" / Int32ul, + "cur_cmdqueue" / Int64ul, + "unk_c" / Int32ul, + "unk_10" / Int32ul, + "unk_14" / HexDump(Bytes(0x28 - 0x14)), + ) + def __init__(self): + self.busy = 0 + self.cur_cmdqueue = 0 + self.unk_c = 0 + self.unk_10 = 0 + self.unk_14 = bytes(0x14) + +class InitData_GPUStats3D(ConstructClass): + subcon = Struct( + "unk_0" / Bytes(0x18), + "queues" / Array(4, InitData_GPUQueueStats3D), + "unk_d0" / HexDump(Bytes(0x38)), + "tvb_overflows_1" / Int32ul, + "tvb_overflows_2" / Int32ul, + "unk_f8" / Int32ul, + "unk_fc" / Int32ul, + "cur_stamp_id" / Int32sl, + "unk_104" / Bytes(0x14), + "unk_118" / Int32sl, + "unk_11c" / Int32ul, + "unk_120" / Int32ul, + "unk_124" / Int32ul, + "unk_128" / Int32ul, + "unk_12c" / Int32ul, + "unk_timestamp" / Int64ul, + "unk_134" / Bytes(0x1c0 - 0x134), + Ver("V >= V13_0B4", "unk_1c0" / HexDump(Bytes(0x800))), + ) + + def __init__(self): + self.unk_0 = bytes(0x18) + self.queues = [InitData_GPUQueueStats3D() for i in range(4)] + self.unk_68 = 0 + self.cur_cmdqueue = 0 + self.unk_d0 = bytes(0x38) + self.tvb_overflows_1 = 0 + self.tvb_overflows_2 = 0 + self.unk_f8 = 0 + self.unk_fc = 0 + self.cur_stamp_id = -1 + self.unk_104 = bytes(0x14) + self.unk_118 = -1 + self.unk_11c = 0 + self.unk_120 = 0 + self.unk_124 = 0 + self.unk_128 = 0 + self.unk_12c = 0 + self.unk_timestamp = 0 + self.unk_134 = bytes(0x1c0 - 0x134) + self.unk_1c0 = bytes(0x800) + +class InitData_GPUGlobalStatsTA(ConstructClass): + subcon = Struct( + "total_cmds" / Int32ul, + "stats" / InitData_GPUStatsTA, + ) + + def __init__(self): + self.total_cmds = 0 + self.stats = InitData_GPUStatsTA() + +class InitData_GPUGlobalStats3D(ConstructClass): + subcon = Struct( + "total_cmds" / Int32ul, + "unk_4" / Int32ul, + "stats" / InitData_GPUStats3D, + ) + + def __init__(self): + self.total_cmds = 0 + self.unk_4 = 0 + self.stats = InitData_GPUStats3D() + +class InitData_RegionB(ConstructClass): + subcon = Struct( + "channels" / ChannelInfoSet, + "pad_110" / ZPadding(0x50), + "unk_160" / Default(Int64ul, 0), + "unk_168" / Default(Int64ul, 0), + "stats_ta_addr" / Int64ul, + "stats_ta" / ROPointer(this.stats_ta_addr, InitData_GPUGlobalStatsTA), + "stats_3d_addr" / Int64ul, + "stats_3d" / ROPointer(this.stats_3d_addr, InitData_GPUGlobalStats3D), + "stats_cp_addr" / Int64ul, + "stats_cp" / ROPointer(this.stats_cp_addr, Bytes(0x140)), + "hwdata_a_addr" / Int64ul, + "hwdata_a" / ROPointer(this.hwdata_a_addr, AGXHWDataA), + "unkptr_190" / Int64ul, # size: 0x80, empty + "unk_190" / ROPointer(this.unkptr_190, Bytes(0x80)), + "unkptr_198" / Int64ul, # size: 0xc0, fw writes timestamps into this + "unk_198" / ROPointer(this.unkptr_198, Bytes(0xc0)), + "hwdata_b_addr" / Int64ul, # size: 0xb80, io stuff + "hwdata_b" / ROPointer(this.hwdata_b_addr, AGXHWDataB), + "hwdata_b_addr2" / Int64ul, # repeat of 1a0 + "fwlog_ring2" / Int64ul, # + "unkptr_1b8" / Int64ul, # Unallocated, Size 0x1000 + "unk_1b8" / Lazy(ROPointer(this.unkptr_1b8, Bytes(0x1000))), + "unkptr_1c0" / Int64ul, # Unallocated, size 0x300 + "unk_1c0" / Lazy(ROPointer(this.unkptr_1c0, Bytes(0x300))), + "unkptr_1c8" / Int64ul, # Unallocated, unknown size + "unk_1c8" / Lazy(ROPointer(this.unkptr_1c8, Bytes(0x1000))), + "unk_1d0" / Int32ul, + "unk_1d4" / Int32ul, + "unk_1d8" / HexDump(Bytes(0x3c)), + "buffer_mgr_ctl_addr" / Int64ul, # Size: 0x4000 + "buffer_mgr_ctl" / ROPointer(this.buffer_mgr_ctl_addr, InitData_BufferMgrCtl), + "buffer_mgr_ctl_addr2" / Int64ul, # Size: 0x4000 + # Written to by DC_09 + "unk_224" / HexDump(Bytes(0x685c)), + "unk_6a80" / Int32ul, + "gpu_idle" / Int32ul, + "unkpad_6a88" / HexDump(Bytes(0x14)), + "unk_6a9c" / Int32ul, + "unk_ctr0" / Int32ul, + "unk_ctr1" / Int32ul, + "unk_6aa8" / Int32ul, + "unk_6aac" / Int32ul, + "unk_ctr2" / Int32ul, + "unk_6ab4" / Int32ul, + "unk_6ab8" / Int32ul, + "unk_6abc" / Int32ul, + "unk_6ac0" / Int32ul, + "unk_6ac4" / Int32ul, + "unk_ctr3" / Int32ul, + "unk_6acc" / Int32ul, + "unk_6ad0" / Int32ul, + "unk_6ad4" / Int32ul, + "unk_6ad8" / Int32ul, + "unk_6adc" / Int32ul, + "unk_6ae0" / Int32ul, + "unk_6ae4" / Int32ul, + "unk_6ae8" / Int32ul, + "unk_6aec" / Int32ul, + "unk_6af0" / Int32ul, + "unk_ctr4" / Int32ul, + "unk_ctr5" / Int32ul, + "unk_6afc" / Int32ul, + "pad_6b00" / HexDump(Bytes(0x38)), + "unk_6b38" / Int32ul, + "pad_6b3c" / HexDump(Bytes(0x84)), + ) + + def __init__(self): + super().__init__() + self.unk_1d0 = 0 + self.unk_1d4 = 0 + self.unk_1d8 = bytes(0x3c) + self.unk_224 = bytes(0x685c) + self.unkpad_6a88 = bytes(0x14) + self.pad_6b00 = bytes(0x38) + self.unk_6b38 = 0xff + self.pad_6b3c = bytes(0x84) + + def mon(self, add_fn): + add_fn(self.unkptr_170, 0x140, "unkptr_170") + add_fn(self.unkptr_178, 0x1c0, "unkptr_178") + add_fn(self.unkptr_180, 0x140, "unkptr_180") + add_fn(self.unkptr_188_addr, 0x3b80, "unkptr_188") + add_fn(self.unkptr_190, 0x80, "unkptr_190") + add_fn(self.unkptr_198_addr, 0xc0, "unkptr_198") + add_fn(self.unkptr_1a0_addr, 0xb80, "unkptr_1a0") + add_fn(self.fwlog_ring2, 0x51000, "fwlog_ring2") + add_fn(self.unkptr_214, 0x4000, "unkptr_214") + + # Unallocated during init + #add_fn(self.unkptr_1b8, 0x1000, "unkptr_1b8") + #add_fn(self.unkptr_1c0, 0x300, "unkptr_1c0") + #add_fn(self.unkptr_1c8, 0x1000, "unkptr_1c8") + +class InitData_PendingStamp(ConstructClass): + subcon = Struct( + "info" / Int32ul, + "wait_value" / Int32ul, + ) + + def __init__(self): + super().__init__() + self.info = 0 + self.wait_value = 0 + + def __bool__(self): + return bool(self.info or self.wait_value) + +class InitData_FaultInfo(ConstructClass): + subcon = Struct( + "unk_0" / Int32ul, + "unk_4" / Int32ul, + "queue_uuid" / Int32ul, + "unk_c" / Int32ul, + "unk_10" / Int32ul, + "unk_14" / Int32ul, + ) + + def __init__(self): + super().__init__() + self.unk_0 = 0 + self.unk_4 = 0 + self.queue_uuid = 0 + self.unk_c = 0 + self.unk_10 = 0 + self.unk_14 = 0 + +class RCPowerZone(ConstructClass): + subcon = Struct( + "target" / Dec(Int32ul), + "target_off" / Dec(Int32ul), + "tc" / Dec(Int32ul), + ) + def __init__(self, tc=None, target=None, off=None): + if tc is None: + self.target = 0 + self.target_off = 0 + self.tc = 0 + else: + self.target = target + self.target_off = self.target - off + self.tc = tc + + +class InitData_RegionC(ConstructClass): + subcon = Struct( + "ktrace_enable" / Int32ul, + "unk_4" / HexDump(Bytes(0x24)), + Ver("V >= V13_0B4", "unk_28_0" / Int32ul), + "unk_28" / Int32ul, + Ver("V >= V13_0B4", "unk_2c_0" / Int32ul), + "unk_2c" / Int32ul, + "unk_30" / Int32ul, + "unk_34" / Int32ul, + "unk_38" / HexDump(Bytes(0x1c)), + "unk_54" / Int16ul, + "unk_56" / Int16ul, + "unk_58" / Int16ul, + "unk_5a" / Int32ul, + "unk_5e" / Int32ul, + "unk_62" / Int32ul, + Ver("V >= V13_0B4", "unk_66_0" / HexDump(Bytes(0xc))), + "unk_66" / Int32ul, + "unk_6a" / HexDump(Bytes(0x16)), + "unk_80" / HexDump(Bytes(0xf80)), + "unk_1000" / HexDump(Bytes(0x7000)), + "unk_8000" / HexDump(Bytes(0x900)), + Ver("V >= V13_0B4", "unk_8900_0" / Int32ul), + "unk_8900" / Int32ul, + "unk_atomic" / Int32ul, + "max_power" / Int32ul, + "max_pstate_scaled" / Int32ul, + "max_pstate_scaled_2" / Int32ul, + "unk_8914" / Int32ul, + "unk_8918" / Int32ul, + "max_pstate_scaled_3" / Int32ul, + "unk_8920" / Int32ul, + "power_zone_count" / Int32ul, + "avg_power_filter_tc_periods" / Int32ul, + "avg_power_ki_dt" / Float32l, + "avg_power_kp" / Float32l, + "avg_power_min_duty_cycle" / Int32ul, + "avg_power_target_filter_tc" / Int32ul, + "power_zones" / Array(5, RCPowerZone), + "unk_8978" / HexDump(Bytes(0x44)), + Ver("V >= V13_0B4", "unk_89bc_0" / HexDump(Bytes(0x3c))), + "unk_89bc" / Int32ul, + "fast_die0_release_temp" / Int32ul, + "unk_89c4" / Int32sl, + "fast_die0_prop_tgt_delta" / Int32ul, + "fast_die0_kp" / Float32l, + "fast_die0_ki_dt" / Float32l, + "unk_89d4" / HexDump(Bytes(0xc)), + "unk_89e0" / Int32ul, + "max_power_2" / Int32ul, + "ppm_kp" / Float32l, + "ppm_ki_dt" / Float32l, + "unk_89f0" / Int32ul, + Ver("V >= V13_0B4", "unk_89f4_0" / HexDump(Bytes(0x8))), + Ver("V >= V13_0B4", "unk_89f4_8" / Int32ul), + Ver("V >= V13_0B4", "unk_89f4_c" / HexDump(Bytes(0x50))), + "hws1" / AGXHWDataShared1, + "hws2" / AGXHWDataShared2, + "hws3" / AGXHWDataShared3, + "unk_9004" / HexDump(Bytes(8)), + Ver("V >= V13_0B4", "unk_900c_0" / HexDump(Bytes(0x28))), + "unk_900c" / Int32ul, + Ver("V >= V13_0B4", "unk_9010_0" / Int32ul), + Ver("V >= V13_0B4", "unk_9010_4" / HexDump(Bytes(0x14))), + "unk_9010" / HexDump(Bytes(0x2c)), + "unk_903c" / Int32ul, + "unk_9040" / HexDump(Bytes(0xc0)), + "unk_9100" / HexDump(Bytes(0x6f00)), + "unk_10000" / HexDump(Bytes(0xe50)), + "unk_10e50" / Int32ul, + "unk_10e54" / HexDump(Bytes(0x2c)), + "fault_control" / Int32ul, + "do_init" / Int32ul, + "unk_10e88" / HexDump(Bytes(0x188)), + "idle_ts" / Int64ul, + "idle_unk" / Int64ul, + "unk_11020" / Int32ul, + "unk_11024" / Int32ul, + "unk_11028" / Int32ul, + Ver("V >= V13_0B4", "unk_1102c_0" / Int32ul), + Ver("V >= V13_0B4", "unk_1102c_4" / Int32ul), + Ver("V >= V13_0B4", "unk_1102c_8" / Dec(Int32ul)), + Ver("V >= V13_0B4", "unk_1102c_c" / Int32ul), + Ver("V >= V13_0B4", "unk_1102c_10" / Int32ul), + "unk_1102c" / Int32ul, + "idle_to_off_delay_ms" / Int32ul, + "fender_idle_to_off_delay_ms" / Int32ul, + "fw_early_wake_timeout_ms" / Int32ul, + "pending_stamps" / Array(0x110, InitData_PendingStamp), + "unk_117bc" / Int32ul, + "fault_info" / InitData_FaultInfo, + "counter" / Int32ul, + "unk_118dc" / Int32ul, + Ver("V >= V13_0B4", "unk_118e0_0" / HexDump(Bytes(0x9c))), + "unk_118e0" / Dec(Int32ul), + Ver("V >= V13_0B4", "unk_118e4_0" / Dec(Int32ul)), + "unk_118e4" / Int32ul, + "unk_118e8" / Int32ul, + "unk_118ec" / Array(0x15, Int8ul), + "unk_11901" / HexDump(Bytes(0x43f)), + Ver("V >= V13_0B4", "unk_11d40" / HexDump(Bytes(0x19c))), + Ver("V >= V13_0B4", "unk_11edc" / Int32ul), + Ver("V >= V13_0B4", "unk_11ee0" / HexDump(Bytes(0x1c))), + Ver("V >= V13_0B4", "unk_11efc" / Int32ul), + ) + + def __init__(self, sgx, chip_info): + period_ms = sgx.gpu_power_sample_period + avg_power_filter_tc_periods = sgx.gpu_avg_power_filter_tc_ms // period_ms + + self.ktrace_enable = 0# 0xffffffff + self.unk_4 = bytes(0x24) + self.unk_28_0 = 1 # debug + self.unk_28 = 1 + self.unk_2c_0 = 0 + self.unk_2c = 1 + self.unk_30 = 0 + self.unk_34 = 120 + self.unk_38 = bytes(0x1c) + self.unk_54 = 0xffff + self.unk_56 = 40 + self.unk_58 = 0xffff + self.unk_5a = 0 + self.unk_5e = 1 + self.unk_62 = 0 + self.unk_66_0 = bytes(0xc) + self.unk_66 = 1 + self.unk_6a = bytes(0x16) + self.unk_80 = bytes(0xf80) + self.unk_1000 = bytes(0x7000) + self.unk_8000 = bytes(0x900) + self.unk_8900_0 = 0 + self.unk_8900 = 1 + # Accessed with OSIncrementAtomic/OSDecrementAtomic + self.unk_atomic = 0 + self.max_power = chip_info.max_power + self.max_pstate_scaled = 100 * sgx.gpu_num_perf_states + self.max_pstate_scaled_2 = 100 * sgx.gpu_num_perf_states + self.unk_8914 = 0 + self.unk_8918 = 0 + self.max_pstate_scaled_3 = 100 * sgx.gpu_num_perf_states + self.unk_8920 = 0 + + self.power_zones = [RCPowerZone()] * 5 + power_zone_count = 0 + for i in range(5): + if sgx.getprop(f"gpu-power-zone-target-{i}", None) is None: + break + self.power_zones[i] = RCPowerZone( + sgx.getprop(f"gpu-power-zone-filter-tc-{i}", None), + sgx.getprop(f"gpu-power-zone-target-{i}", None), + sgx.getprop(f"gpu-power-zone-target-offset-{i}", None), + ) + power_zone_count += 1 + + self.power_zone_count = power_zone_count + self.avg_power_filter_tc_periods = avg_power_filter_tc_periods + self.avg_power_ki_dt = sgx.gpu_avg_power_ki_only * (period_ms / 1000) + self.avg_power_kp = sgx.gpu_avg_power_kp + self.avg_power_min_duty_cycle = sgx.gpu_avg_power_min_duty_cycle + self.avg_power_target_filter_tc = sgx.gpu_avg_power_target_filter_tc + self.unk_8978 = bytes(0x44) + self.unk_89bc_0 = bytes(0x3c) + self.unk_89bc = chip_info.unk_8cc + self.fast_die0_release_temp = 100 * sgx.getprop("gpu-fast-die0-release-temp", 80) + self.unk_89c4 = chip_info.unk_87c + self.fast_die0_prop_tgt_delta = 100 * sgx.getprop("gpu-fast-die0-prop-tgt-delta", 0) + self.fast_die0_kp = sgx.gpu_fast_die0_proportional_gain + self.fast_die0_ki_dt = sgx.gpu_fast_die0_integral_gain * (period_ms / 1000) + self.unk_89d4 = bytes(0xc) + self.unk_89e0 = 1 + self.max_power_2 = chip_info.max_power + self.ppm_kp = sgx.gpu_ppm_kp + self.ppm_ki_dt = sgx.gpu_ppm_ki * (period_ms / 1000) + self.unk_89f0 = 0 + self.unk_89f4_0 = bytes(8) + self.unk_89f4_8 = 1 + self.unk_89f4_c = bytes(0x50) + self.hws1 = AGXHWDataShared1(chip_info) + self.hws2 = AGXHWDataShared2(chip_info) + self.hws3 = AGXHWDataShared3(chip_info) + self.unk_9004 = bytes(8) + self.unk_900c_0 = bytes(0x28) + self.unk_900c = 1 + self.unk_9010_0 = 1 + self.unk_9010_4 = bytes(0x14) + self.unk_9010 = bytes(0x2c) + if Ver.check("V >= V13_0B4"): + self.unk_903c = 1 + else: + self.unk_903c = 0 + self.unk_9040 = bytes(0xc0) + self.unk_9100 = bytes(0x6f00) + self.unk_10000 = bytes(0xe50) + self.unk_10e50 = 0 + self.unk_10e54 = bytes(0x2c) + self.unk_10e80_0 = bytes(0xed4) + self.unk_10e80_ed0 = 0 + self.unk_10e80_ed4 = bytes(0x2c) + #self.fault_control = 0xb + self.fault_control = 0 + self.do_init = 1 + self.unk_10e88 = bytes(0x188) + self.idle_ts = 0 + self.idle_unk = 0 + self.unk_11020 = 40 + self.unk_11024 = 10 + self.unk_11028 = 250 + self.unk_1102c_0 = 1 + self.unk_1102c_4 = 1 + self.unk_1102c_8 = 100 + self.unk_1102c_c = 1 + self.unk_1102c_10 = 0 + self.unk_1102c = 0 + self.idle_to_off_delay_ms = sgx.getprop("gpu-idle-off-delay-ms", 2) + self.fender_idle_to_off_delay_ms = sgx.getprop("gpu-fender-idle-off-delay-ms", 40) + self.fw_early_wake_timeout_ms = sgx.getprop("gpu-fw-early-wake-timeout-ms", 5) + self.pending_stamps = [InitData_PendingStamp() for i in range(0x110)] + self.unk_117bc = 0 + self.fault_info = InitData_FaultInfo() + self.counter = 0 + self.unk_118dc = 0 + self.unk_118e0_0 = bytes(0x9c) + self.unk_118e0 = 40 + self.unk_118e4_0 = 50 + self.unk_118e4 = 0 + self.unk_118e8 = 0 if chip_info.unk_118ec is None else 1 + self.unk_118ec = chip_info.unk_118ec or [0] * 0x15 + self.unk_11901 = bytes(0x43f) + + self.unk_11d40 = bytes(0x19c) + self.unk_11edc = 8 + self.unk_11ee0 = bytes(0x1c) + self.unk_11efc = 8 + +class UatLevelInfo(ConstructClass): + subcon = Struct( + "unk_3" / Int8ul, # always 8 + "unk_1" / Int8ul, # always 14, page bits? + "unk_2" / Int8ul, # always 14, also page bits? + "index_shift" / Int8ul, + "num_entries" / Int16ul, + "unk_4" / Int16ul, # 0x4000, Table size? + "unk_8" / Int64ul, # always 1 + "phys_mask" / Int64ul, + "index_mask" / Int64ul, + ) + + def __init__(self, index_shift, num_entries, phys_mask): + self.index_shift = index_shift + self.unk_1 = 14 + self.unk_2 = 14 + self.unk_3 = 8 + self.unk_4 = 0x4000 # I doubt anything other than 16k pages is supported + self.num_entries = num_entries + self.unk_8 = 1 + self.phys_mask = phys_mask + self.index_mask = (num_entries - 1) << index_shift + +class InitData(ConstructClass): + subcon = Struct( + Ver("V >= V13_0B4", "ver_info" / Array(4, Int16ul)), + "regionA_addr" / Int64ul, # allocation size: 0x4000 + "regionA" / ROPointer(this.regionA_addr, HexDump(Bytes(0x4000))), + "unk_8" / Default(Int32ul, 0), + "unk_c"/ Default(Int32ul, 0), + "regionB_addr" / Int64ul, # 0xfa00c338000 allocation size: 0x6bc0 + "regionB" / ROPointer(this.regionB_addr, InitData_RegionB), + "regionC_addr" / Int64ul, # 0xfa000200000 allocation size: 0x11d40, heap? + "regionC" / ROPointer(this.regionC_addr, InitData_RegionC), + "fw_status_addr" / Int64ul, # allocation size: 0x4000, but probably only 0x80 bytes long + "fw_status" / ROPointer(this.fw_status_addr, InitData_FWStatus), + "uat_page_size" / Int16ul, + "uat_page_bits" / Int8ul, + "uat_num_levels" / Int8ul, + "uat_level_info" / Array(3, UatLevelInfo), + "pad_8c" / HexDump(Default(Bytes(0x14), bytes(0x14))), + "host_mapped_fw_allocations" / Int32ul, # must be 1 + "unk_ac" / Int32ul, + "unk_b0" / Int32ul, + "unk_b4" / Int32ul, + "unk_b8" / Int32ul, + ) + + def __init__(self): + super().__init__() + self.unk_ac = 0 + self.unk_b0 = 0 + self.unk_b4 = 0 + self.unk_b8 = 0 + +__all__.extend(k for k, v in globals().items() + if (callable(v) or isinstance(v, type)) and v.__module__ == __name__) diff --git a/tools/proxyclient/m1n1/fw/agx/microsequence.py b/tools/proxyclient/m1n1/fw/agx/microsequence.py new file mode 100644 index 0000000..475597f --- /dev/null +++ b/tools/proxyclient/m1n1/fw/agx/microsequence.py @@ -0,0 +1,800 @@ +""" +I think these are executed by a simple state machine on the firmware's arm core, +and the typical result is a commandlist submitting to one of the gpu's hardware +command processors. + +It seems like a common pattern is: + 1. Start (3D or Compute) + 2. Timestamp + 3. Wait For Interrupts + 4. Timestamp again + 5. Finish (3D or Compute) + 6. End + +Error messages call these as SKU commands + +""" +from m1n1.constructutils import * + +from construct import * +from construct.core import Int64ul, Int32ul, Int32sl +import textwrap + +__all__ = [] + +class TimeStamp(ConstructValueClass): + subcon = Int64ul + + def __init__(self, value=0): + self.value = value + +class TsFlag(ConstructValueClass): + subcon = Int8ul + + def __init__(self, value=0): + self.value = value + +class WrappedPointer(ConstructValueClass): + subcon = Int64ul + + def __init__(self, value=0): + self.value = value + +class StampCounter(ConstructValueClass): + subcon = Hex(Int32ul) + + def __init__(self): + self.value = 0 + +class BufferManagerBlockControl(ConstructClass): + subcon = Struct( + "total" / Int32ul, + "wptr" / Int32ul, + "unk" / Int32ul, + "pad" / ZPadding(0x34) + ) + +class BufferManagerCounter(ConstructClass): + subcon = Struct( + "count" / Int32ul, + "pad" / ZPadding(0x3c) + ) + +class BufferManagerMisc(ConstructClass): + subcon = Struct( + "gpu_0" / Default(Int32ul, 0), + "gpu_4" / Default(Int32ul, 0), + "gpu_8" / Default(Int32ul, 0), + "gpu_c" / Default(Int32ul, 0), + "pad_10" / ZPadding(0x10), + "cpu_flag" / Int32ul, + "pad_24" / ZPadding(0x1c), + ) + +class BufferManagerInfo(ConstructClass): + subcon = Struct( + "gpu_counter" / Int32ul, + "unk_4" / Int32ul, + "last_id" / Int32ul, + "cur_id" / Int32ul, + "unk_10" / Int32ul, + "gpu_counter2" / Int32ul, + "unk_18" / Int32ul, + Ver("V < V13_0B4", "unk_1c" / Int32ul), + "page_list_addr" / Int64ul, + "page_list_size" / Int32ul, + "page_count" / Int32ul, + "unk_30" / Int32ul, + "block_count" / Int32ul, + "unk_38" / Int32ul, + "block_list_addr" / Int64ul, + "block_ctl_addr" / Int64ul, # points to two u32s + "block_ctl" / ROPointer(this.block_ctl_addr, BufferManagerBlockControl), + "last_page" / Int32ul, + "gpu_page_ptr1" / Int32ul, + "gpu_page_ptr2" / Int32ul, + "unk_58" / Int32ul, + "block_size" / Int32ul, + "unk_60" / Int64ul, + "counter_addr" / Int64ul, + "counter" / ROPointer(this.counter_addr, BufferManagerCounter), + "unk_70" / Int32ul, + "unk_74" / Int32ul, + "unk_78" / Int32ul, + "unk_7c" / Int32ul, + "unk_80" / Int32ul, + "unk_84" / Int32ul, + "unk_88" / Int32ul, + "unk_8c" / Int32ul, + "unk_90" / HexDump(Bytes(0x30)), + + ) + + def __init__(self): + super().__init__() + self.gpu_counter = 0x0 + self.unk_4 = 0 + self.last_id = 0x0 + self.cur_id = 0xffffffff + self.unk_10 = 0x0 + self.gpu_counter2 = 0x0 + self.unk_18 = 0x0 + self.unk_1c = 0x0 + self.unk_30 = 0xd1a + self.unk_38 = 0x0 + self.gpu_page_ptr1 = 0x0 + self.gpu_page_ptr2 = 0x0 + self.unk_58 = 0x0 + self.unk_60 = 0x0 + self.unk_70 = 0x0 + self.unk_74 = 0x0 + self.unk_78 = 0x0 + self.unk_7c = 0x0 + self.unk_80 = 0x1 + self.unk_84 = 0x66cc + self.unk_88 = 0x2244 + self.unk_8c = 0x0 + self.unk_90 = bytes(0x30) + + +class Start3DClearPipelineBinding(ConstructClass): + subcon = Struct( + "pipeline_bind" / Int64ul, + "address" / Int64ul, + ) + + def __init__(self, pipeline_bind=None, address=None): + super().__init__() + self.pipeline_bind = pipeline_bind + self.address = address + +class Start3DStorePipelineBinding(ConstructClass): + subcon = Struct( + "unk_0" / Int64ul, + "unk_8" / Int32ul, + "pipeline_bind" / Int32ul, + "unk_10" / Int32ul, + "address" / Int32ul, + "unk_18" / Int32ul, + "unk_1c_padding" / Int32ul, + ) + + def __init__(self, pipeline_bind=None, address=None): + super().__init__() + self.unk_0 = 0 + self.unk_8 = 0 + self.pipeline_bind = pipeline_bind + self.unk_10 = 0 + self.address = address + self.unk_18 = 0 + self.unk_1c_padding = 0 + +class Start3DArrayAddr(ConstructClass): + subcon = Struct( + "ptr" / Int64ul, + "unk_padding" / Int64ul, + ) + + def __init__(self, ptr=None): + super().__init__() + self.ptr = ptr + self.unk_padding = 0 + +class AuxFBInfo(ConstructClass): + subcon = Struct( + "unk1" / Int32ul, + "unk2" / Int32ul, + "width" / Dec(Int32ul), + "height" / Dec(Int32ul), + Ver("V >= V13_0B4", "unk3" / Int64ul), + ) + + def __init__(self, unk1, unk2, width, height): + super().__init__() + self.unk1 = unk1 + self.unk2 = unk2 + self.width = width + self.height = height + self.unk3 = 0x100000 + +class Start3DStruct1(ConstructClass): + subcon = Struct( + "store_pipeline_bind" / Int32ul, # 0x12, 0x34 seen + "store_pipeline_addr" / Int32ul, + "unk_8" / Int32ul, + "unk_c" / Int32ul, + "merge_upper_x" / Float32l, + "merge_upper_y" / Float32l, + "unk_18" / Int64ul, + "tile_blocks_y" / Int16ul, # * 4 + "tile_blocks_x" / Int16ul, # * 4 + "unk_24" / Int32ul, + "tile_counts" / Int32ul, + "unk_2c" / Int32ul, + "depth_clear_val1" / Float32l, + "stencil_clear_val1" / Int8ul, + "unk_35" / Int8ul, + "unk_36" / Int16ul, + "unk_38" / Int32ul, + "unk_3c" / Int32ul, + "unk_40" / Int32ul, + "unk_44_padding" / HexDump(Bytes(0xac)), + "depth_bias_array" / Start3DArrayAddr, + "scissor_array" / Start3DArrayAddr, + "visibility_result_buffer" / Int64ul, + "unk_118" / Int64ul, + "unk_120" / Array(37, Int64ul), + "unk_reload_pipeline" / Start3DClearPipelineBinding, + "unk_258" / Int64ul, + "unk_260" / Int64ul, + "unk_268" / Int64ul, + "unk_270" / Int64ul, + "reload_pipeline" / Start3DClearPipelineBinding, + "depth_flags" / Int64ul, # 0x40000 - has stencil 0x80000 - has depth + "unk_290" / Int64ul, + "depth_buffer_ptr1" / Int64ul, + "unk_2a0" / Int64ul, + "unk_2a8" / Int64ul, + "depth_buffer_ptr2" / Int64ul, + "depth_buffer_ptr3" / Int64ul, + "depth_aux_buffer_ptr" / Int64ul, + "stencil_buffer_ptr1" / Int64ul, + "unk_2d0" / Int64ul, + "unk_2d8" / Int64ul, + "stencil_buffer_ptr2" / Int64ul, + "stencil_buffer_ptr3" / Int64ul, + "stencil_aux_buffer_ptr" / Int64ul, + "unk_2f8" / Array(2, Int64ul), + "aux_fb_unk0" / Int32ul, + "unk_30c" / Int32ul, + "aux_fb" / AuxFBInfo, + "unk_320_padding" / HexDump(Bytes(0x10)), + "unk_partial_store_pipeline" / Start3DStorePipelineBinding, + "partial_store_pipeline" / Start3DStorePipelineBinding, + "depth_clear_val2" / Float32l, + "stencil_clear_val2" / Int8ul, + "unk_375" / Int8ul, + "unk_376" / Int16ul, + "unk_378" / Int32ul, + "unk_37c" / Int32ul, + "unk_380" / Int64ul, + "unk_388" / Int64ul, + Ver("V >= V13_0B4", "unk_390_0" / Int64ul), + "depth_dimensions" / Int64ul, + ) + +class Start3DStruct2(ConstructClass): + subcon = Struct( + "unk_0" / Int64ul, + "clear_pipeline" / Start3DClearPipelineBinding, + "unk_18" / Int64ul, + "scissor_array" / Int64ul, + "depth_bias_array" / Int64ul, + "aux_fb" / AuxFBInfo, + "depth_dimensions" / Int64ul, + "visibility_result_buffer" / Int64ul, + "depth_flags" / Int64ul, # 0x40000 - has stencil 0x80000 - has depth + Ver("G >= G14", "unk_58_g14_0" / Int64ul), + Ver("G >= G14", "unk_58_g14_8" / Int64ul), + "depth_buffer_ptr1" / Int64ul, + "depth_buffer_ptr2" / Int64ul, + "stencil_buffer_ptr1" / Int64ul, + "stencil_buffer_ptr2" / Int64ul, + Ver("G >= G14", "unk_68_g14_0" / HexDump(Bytes(0x20))), + "unk_78" / Array(4, Int64ul), + "depth_aux_buffer_ptr1" / Int64ul, + "unk_a0" / Int64ul, + "depth_aux_buffer_ptr2" / Int64ul, + "unk_b0" / Int64ul, + "stencil_aux_buffer_ptr1" / Int64ul, + "unk_c0" / Int64ul, + "stencil_aux_buffer_ptr2" / Int64ul, + "unk_d0" / Int64ul, + "tvb_tilemap" / Int64ul, + "tvb_heapmeta_addr" / Int64ul, + "unk_e8" / Int64ul, + "tvb_heapmeta_addr2" / Int64ul, + "unk_f8" / Int64ul, + "aux_fb_ptr" / Int64ul, + "unk_108" / Array(6, Int64ul), + "pipeline_base" / Int64ul, + "unk_140" / Int64ul, + "unk_148" / Int64ul, + "unk_150" / Int64ul, + "unk_158" / Int64ul, + "unk_160" / Int64ul, + Ver("G < G14", "unk_168_padding" / HexDump(Bytes(0x1d8))), + Ver("G >= G14", "unk_198_padding" / HexDump(Bytes(0x1a8))), + Ver("V < V13_0B4", ZPadding(8)), + ) + +class BufferThing(ConstructClass): + subcon = Struct( + "unk_0" / Int64ul, + "unk_8" / Int64ul, + "unk_10" / Int64ul, + "unkptr_18" / Int64ul, + "unk_20" / Int32ul, + "bm_misc_addr" / Int64ul, + "bm_misc" / ROPointer(this.bm_misc_addr, BufferManagerMisc), + "unk_2c" / Int32ul, + "unk_30" / Int64ul, + "unk_38" / Int64ul, + ) + +class Start3DStruct6(ConstructClass): + subcon = Struct( + "tvb_overflow_count" / Int64ul, + "unk_8" / Int32ul, + "unk_c" / Int32ul, + "unk_10" / Int32ul, + "encoder_id" / Int64ul, + "unk_1c" / Int32ul, + "unknown_buffer" / Int64ul, + "unk_28" / Int64ul, + "unk_30" / Int32ul, + "unk_34" / Int64ul, + ) + +class Start3DStruct7(ConstructClass): + subcon = Struct( + "unk_0" / Int64ul, + "stamp1_addr" / WrappedPointer, # same contents as below + "stamp1" / ROPointer(this.stamp1_addr.value, StampCounter), + "stamp2_addr" / WrappedPointer, # same as FinalizeComputeCmd.stamp - some kind of fence/token + "stamp2" / ROPointer(this.stamp2_addr.value, StampCounter), + "stamp_value" / Int32ul, + "ev_3d" / Int32ul, + "evctl_index" / Int32ul, + "unk_24" / Int32ul, + "uuid" / Int32ul, + "prev_stamp_value" / Int32ul, + "unk_30" / Int32ul, + ) + + def __init__(self): + super().__init__() + self.stamp1_addr = StampCounter() + self.stamp2_addr = StampCounter() + +class Attachment(ConstructClass): + subcon = Struct( + "address" / Int64ul, + "size" / Int32ul, + "unk_c" / Int16ul, + "unk_e" / Int16ul, + ) + + def __init__(self, addr, size, unk_c, unk_e): + self.address = addr + self.size = size + self.unk_c = unk_c + self.unk_e = unk_e + +class Start3DCmd(ConstructClass): + subcon = Struct( # 0x194 bytes'''' + "magic" / Const(0x24, Int32ul), + "struct1_addr" / Int64ul, # empty before run. Output? WorkCommand3D + 0x3c0 + "struct1" / ROPointer(this.struct1_addr, Start3DStruct1), + "struct2_addr" / Int64ul, # ?? WorkCommand3D + 0x78 + "struct2" / ROPointer(this.struct2_addr, Start3DStruct2), + "buf_thing_addr" / Int64ul, + "buf_thing" / ROPointer(this.buf_thing_addr, BufferThing), + "stats_ptr" / Int64ul, + "busy_flag_ptr" / Int64ul, # 4 bytes + "struct6_addr" / Int64ul, # 0x3c bytes + "struct6" / ROPointer(this.struct6_addr, Start3DStruct6), + "struct7_addr" / Int64ul, # 0x34 bytes + "struct7" / ROPointer(this.struct7_addr, Start3DStruct7), + "cmdqueue_ptr" / Int64ul, # points back to the CommandQueueInfo that this command came from + "workitem_ptr" / Int64ul, # points back at the WorkItem that this command came from + "context_id" / Int32ul, + "unk_50" / Int32ul, + "event_generation" / Int32ul, + "buffer_mgr_slot" / Int32ul, + "unk_5c" / Int32ul, + "prev_stamp_value" / Int64ul, # 0 + "unk_68" / Int32ul, # 0 + "unk_buf_ptr" / Int64ul, + "unk_buf2_ptr" / Int64ul, # 0x18 bytes + "unk_7c" / Int32ul, + "unk_80" / Int32ul, + "unk_84" / Int32ul, + "uuid" / Int32ul, # uuid for tracking + "attachments" / Array(16, Attachment), + "num_attachments" / Int32ul, + "unk_190" / Int32ul, + Ver("V >= V13_0B4", "unk_194" / Int64ul), + Ver("V >= V13_0B4", "unkptr_19c" / Int64ul), + ) + + +class Finalize3DCmd(ConstructClass): + subcon = Struct( # 0x9c bytes + "magic" / Const(0x25, Int32ul), + "uuid" / Int32ul, # uuid for tracking + "unk_8" / Int32ul, # 0 + "stamp_addr" / Int64ul, + "stamp" / ROPointer(this.stamp_addr, StampCounter), + "stamp_value" / Int32ul, + "unk_18" / Int32ul, + "buf_thing_addr" / Int64ul, + "buf_thing" / ROPointer(this.buf_thing_addr, BufferThing), + "buffer_mgr_addr" / Int64ul, + "buffer_mgr" / ROPointer(this.buffer_mgr_addr, BufferManagerInfo), + "unk_2c" / Int64ul, # 1 + "stats_ptr" / Int64ul, + "struct7_addr" / Int64ul, + "struct7" / ROPointer(this.struct7_addr, Start3DStruct7), + "busy_flag_ptr" / Int64ul, + "cmdqueue_ptr" / Int64ul, + "workitem_ptr" / Int64ul, + "unk_5c" / Int64ul, + "unk_buf_ptr" / Int64ul, # Same as Start3DCmd.unkptr_6c + "unk_6c" / Int64ul, # 0 + "unk_74" / Int64ul, # 0 + "unk_7c" / Int64ul, # 0 + "unk_84" / Int64ul, # 0 + "unk_8c" / Int64ul, # 0 + Ver("G >= G14", "unk_8c_g14" / Int64ul), + "restart_branch_offset" / Int32sl, + "unk_98" / Int32ul, # 1 + Ver("V >= V13_0B4", "unk_9c" / HexDump(Bytes(0x10))), + ) + +class TilingParameters(ConstructClass): + subcon = Struct( + "size1" / Int32ul, + "unk_4" / Int32ul, + "unk_8" / Int32ul, + "x_max" / Dec(Int16ul), + "y_max" / Dec(Int16ul), + "tile_count" / Int32ul, + "x_blocks" / Int32ul, + "y_blocks" / Int32ul, + "size2" / Int32ul, + "size3" / Int32ul, + "unk_24" / Int32ul, + "unk_28" / Int32ul, + ) + +class StartTACmdStruct2(ConstructClass): + subcon = Struct( + "unk_0" / Hex(Int64ul), + "unk_8" / Hex(Int32ul), + "unk_c" / Hex(Int32ul), + "tvb_tilemap" / Hex(Int64ul), + Ver("G < G14", "tvb_cluster_tilemaps" / Hex(Int64ul)), + "tpc" / Hex(Int64ul), + "tvb_heapmeta_addr" / Hex(Int64ul), # like Start3DStruct2.tvb_end_addr with bit 63 set? + "iogpu_unk_54" / Int32ul, + "iogpu_unk_55" / Int32ul, + "iogpu_unk_56" / Int64ul, + Ver("G < G14", "tvb_cluster_meta1" / Int64ul), + "unk_48" / Int64ul, + "unk_50" / Int64ul, + "tvb_heapmeta_addr2" / Int64ul, + Ver("G < G14", "unk_60" / Int64ul), + Ver("G < G14", "core_mask" / Int64ul), + "iogpu_deflake_1" / Int64ul, + "iogpu_deflake_2" / Int64ul, + "unk_80" / Int64ul, + "iogpu_deflake_3" / Int64ul, # bit 50 set + "encoder_addr" / Int64ul, + Ver("G < G14", "tvb_cluster_meta2" / Int64ul), + Ver("G < G14", "tvb_cluster_meta3" / Int64ul), + Ver("G < G14", "tiling_control" / Int64ul), + "unk_b0" / Array(6, Hex(Int64ul)), + "pipeline_base" / Int64ul, + Ver("G < G14", "tvb_cluster_meta4" / Int64ul), + Ver("G < G14", "unk_f0" / Int64ul), + "unk_f8" / Int64ul, + "unk_100" / Array(3, Hex(Int64ul)), + "unk_118" / Int32ul, + Ver("G >= G14", Padding(8 * 9)), + ) + +class StartTACmdStruct3(ConstructClass): + subcon = Struct( + "unk_480" / Array(6, Int32ul), + "unk_498" / Int64ul, + "unk_4a0" / Int32ul, + "iogpu_deflake_1" / Int64ul, + "unk_4ac" / Int32ul, + "unk_4b0" / Int64ul, + "unk_4b8" / Int32ul, + "unk_4bc" / Int64ul, + "unk_4c4_padding" / HexDump(Bytes(0x48)), + "unk_50c" / Int32ul, + "unk_510" / Int64ul, + "unk_518" / Int64ul, + "unk_520" / Int64ul, + "unk_528" / Int32ul, + "unk_52c" / Int32ul, + "unk_530" / Int32ul, + "encoder_id" / Int32ul, + "unk_538" / Int32ul, + "unk_53c" / Int32ul, + "unknown_buffer" / Int64ul, + "unk_548" / Int64ul, + "unk_550" / Array(6, Int32ul), + "stamp1_addr" / WrappedPointer, # same contents as below + "stamp1" / ROPointer(this.stamp1_addr.value, StampCounter), + "stamp2_addr" / WrappedPointer, # same as FinalizeComputeCmd.stamp - some kind of fence/token + "stamp2" / ROPointer(this.stamp2_addr.value, StampCounter), + "stamp_value" / Int32ul, + "ev_ta" / Int32ul, + "evctl_index" / Int32ul, # 0-3 + "unk_584" / Int32ul, + "uuid2" / Int32ul, + "prev_stamp_value" / Int32ul, + "unk_590" / Int32ul, + ) + + def __init__(self): + super().__init__() + self.stamp1_addr = StampCounter() + self.stamp2_addr = StampCounter() + +class StartTACmd(ConstructClass): + subcon = Struct( + "magic" / Const(0x22, Int32ul), + "tiling_params_addr" / Int64ul, + "tiling_params" / ROPointer(this.tiling_params_addr, TilingParameters), + "struct2_addr" / Int64ul, + "struct2" / ROPointer(this.struct2_addr, StartTACmdStruct2), + "buffer_mgr_addr" / Int64ul, + "buffer_mgr" / ROPointer(this.buffer_mgr_addr, BufferManagerInfo), + "buf_thing_addr" / Int64ul, + "buf_thing" / ROPointer(this.buf_thing_addr, BufferThing), + "stats_ptr" / Int64ul, + "cmdqueue_ptr" / Int64ul, + "context_id" / Int32ul, + "unk_38" / Int32ul, + "event_generation" / Int32ul, + "buffer_mgr_slot" / Int64ul, + "unk_48" / Int64ul, + "unk_50" / Int32ul, + "struct3_addr" / Int64ul, + "struct3" / ROPointer(this.struct3_addr, StartTACmdStruct3), + "unkptr_5c" / Int64ul, + "unk_5c" / ROPointer(this.unkptr_5c, HexDump(Bytes(0x18))), + "unk_64" / Int32ul, + "unk_68" / Int32ul, + "uuid" / Int32ul, + "unk_70" / Int32ul, + "unk_74" / Array(29, Int64ul), + "unk_15c" / Int32ul, + "unk_160" / Int64ul, + "unk_168" / Int32ul, + "unk_16c" / Int32ul, + "unk_170" / Int64ul, + "unk_178" / Int32ul, + Ver("V >= V13_0B4", "unk_17c" / Int32ul), + Ver("V >= V13_0B4", "unkptr_180" / Int64ul), + Ver("V >= V13_0B4", "unk_188" / Int32ul), + ) + +class FinalizeTACmd(ConstructClass): + subcon = Struct( + "magic" / Const(0x23, Int32ul), + "buf_thing_addr" / Int64ul, + "buf_thing" / ROPointer(this.buf_thing_addr, BufferThing), + "buffer_mgr_addr" / Int64ul, + "buffer_mgr" / ROPointer(this.buffer_mgr_addr, BufferManagerInfo), + "stats_ptr" / Int64ul, + "cmdqueue_ptr" / Int64ul, # + "context_id" / Int32ul, + "unk_28" / Int32ul, + "struct3_addr" / Int64ul, + "struct3" / ROPointer(this.struct3_addr, StartTACmdStruct3), + "unk_34" / Int32ul, + "uuid" / Int32ul, + "stamp_addr" / Int64ul, + "stamp" / ROPointer(this.stamp_addr, StampCounter), + "stamp_value" / Int32ul, + "unk_48" / Int64ul, + "unk_50" / Int32ul, + "unk_54" / Int32ul, + "unk_58" / Int64ul, + "unk_60" / Int32ul, + "unk_64" / Int32ul, + "unk_68" / Int32ul, + Ver("G >= G14", "unk_6c_g14" / Int64ul), + "restart_branch_offset" / Int32sl, + "unk_70" / Int32ul, + Ver("V >= V13_0B4", "unk_74" / HexDump(Bytes(0x10))), + ) + +class ComputeArgs(ConstructClass): + subcon = Struct( + unk = Bytes(0x7fa0), + arg_buffers = Array(8, Int64ul), + threadgroups_per_grid_addr = Int64ul, + threads_per_threadgroup_addr = Int64ul, + ) + +class ComputeInfo(ConstructClass): + # Only the cmdlist and pipelinebase and cmdlist fields are strictly needed to launch a basic + # compute shader. + subcon = Struct( # 0x1c bytes + "args" / Int64ul, # ComputeArgs + "cmdlist" / Int64ul, # CommandList from userspace + "unkptr_10" / Int64ul, # size 8, null + "unkptr_18" / Int64ul, # size 8, null + "unkptr_20" / Int64ul, # size 8, null + "unkptr_28" / Int64ul, # + "pipeline_base" / Int64ul, # 0x11_00000000: Used for certain "short" pointers like pipelines (and shaders?) + "unk_38" / Int64ul, # always 0x8c60. + "unk_40" / Int32ul, # 0x41 + "unk_44" / Int32ul, # 0 + "unkptr_48" / Int64ul, # related to threadgroups / thread layout + "unk_50" / Int32ul, # 0x40 - Size? + "unk_54" / Int32ul, # 0 + "unk_58" / Int32ul, # 1 + "unk_5c" / Int32ul, # 0 + "unk_60" / Int32ul, # 0x1c + ) + +# Related to "IOGPU Misc" +class ComputeInfo2(ConstructClass): + subcon = Struct( + unk_0 = HexDump(Bytes(0x24)), + unkptr_24 = Int64ul, # equal to args + unkptr_2c = Int64ul, # points at end of cmdlist? + unk_34 = HexDump(Bytes(0x38)), + encoder_id = Int32ul, + unk_70 = Int32ul, + unk_74 = Int32ul, + unknown_buffer = Int64ul, + unk_80 = Int32ul, + unk_84 = Int32ul, + unk_88 = Int32ul, + stamp1_addr = Int64ul, # same contents as below + stamp1 = ROPointer(this.stamp1_addr, Hex(Int32ul)), + stamp2_addr = Int64ul, # same as FinalizeComputeCmd.stamp - some kind of fence/token + stamp2 = ROPointer(this.stamp2_addr, Hex(Int32ul)), + stamp_value = Int32ul, + unk_a0 = Int32ul, + unk_a4 = Int32ul, + unk_a8 = Int32ul, + uuid = Int32ul, + unk_b0 = Int32ul, + ) + +class StartComputeCmd(ConstructClass): + subcon = Struct( # 0x154 bytes'''' + "magic" / Const(0x29, Int32ul), + "unkptr_4" / Int64ul, # empty: WorkCommandCP + 0x14, size: 0x54 + "computeinfo_addr" / Int64ul, # List of userspace VAs: WorkCommandCP + 0x68 + "computeinfo" / ROPointer(this.computeinfo_addr, ComputeInfo), + "unkptr_14" / Int64ul, # In gpu-asc's heap? Did this pointer come from the gfx firmware? + "cmdqueue_ptr" / Int64ul, # points back to the submitinfo that this command came from + "context_id" / Int32ul, # 4 + "unk_28" / Int32ul, # 1 + "unk_2c" / Int32ul, # 0 + "unk_30" / Int32ul, + "unk_34" / Int32ul, + "unk_38" / Int32ul, + "computeinfo2_addr" / Int64ul, # WorkCommandCP + 0x1f4 + "computeinfo2" / ROPointer(this.computeinfo2_addr, ComputeInfo2), + "unk_44" / Int32ul, + "uuid" / Int32ul, # uuid for tracking? + "padding" / Bytes(0x154 - 0x4c), + ) + +class FinalizeComputeCmd(ConstructClass): + subcon = Struct( # 0x64 bytes'''' + "magic" / Const(0x2a, Int32ul), + "unkptr_4" / Int64ul, # same as ComputeStartCmd.unkptr_14 + "cmdqueue_ptr" / Int64ul, # points back to the submitinfo + "unk_14" / Int32ul, # Context ID? + "unk_18" / Int32ul, + "unkptr_1c" / Int64ul, # same as ComputeStartCmd.unkptr_3c + "unk_24" / Int32ul, + "uuid" / Int32ul, # uuid for tracking? + "stamp" / Int64ul, + "stamp_value" / Int32ul, # Gets written to unkptr_2c (after stamp?) + "unk_38" / Int32ul, + "unk_3c" / Int32ul, + "unk_40" / Int32ul, + "unk_44" / Int32ul, + "unk_48" / Int32ul, + "unk_4c" / Int32ul, + "unk_50" / Int32ul, + "unk_54" / Int32ul, + "unk_58" / Int32ul, + Ver("G >= G14", "unk_5c_g14" / Int64ul), + "restart_branch_offset" / Int32sl, # realative offset from start of Finalize to StartComputeCmd + "unk_60" / Int32ul, + ) + +class EndCmd(ConstructClass): + subcon = Struct( + "magic" / Const(0x18, Int8ul), + "unk_1" / Int8ul, + "unk_2" / Int8ul, + "flags" / Int8ul, + ) + + def __init__(self): + super().__init__() + self.unk_1 = 0 + self.unk_2 = 0 + self.flags = 0x40 + +class TimestampCmd(ConstructClass): + subcon = Struct( # 0x34 bytes + "magic" / Const(0x19, Int8ul), + "unk_1" / Int8ul, + "unk_2" / Int8ul, + "unk_3" / Int8ul, # Sometimes 0x80 + # all these pointers point to 0xfa0... addresses. Might be where the timestamp should be writen? + "ts0_addr" / Int64ul, + "ts0" / ROPointer(this.ts0_addr, TimeStamp), + "ts1_addr" / Int64ul, + "ts1" / ROPointer(this.ts1_addr, TimeStamp), + "ts2_addr" / Int64ul, + "ts2" / ROPointer(this.ts2_addr, TimeStamp), + "cmdqueue_ptr" / Int64ul, + "unk_24" / Int64ul, + Ver("V >= V13_0B4", "unkptr_2c_0" / Int64ul), + "uuid" / Int32ul, + "unk_30_padding" / Int32ul, + ) + +class WaitForInterruptCmd(ConstructClass): + subcon = Struct( + "magic" / Const(0x01, Int8ul), + "unk_1" / Int8ul, + "unk_2" / Int8ul, + "unk_3" / Int8ul, + ) + + def __init__(self, unk_1, unk_2, unk_3): + super().__init__() + self.unk_1 = unk_1 + self.unk_2 = unk_2 + self.unk_3 = unk_3 + +class NopCmd(ConstructClass): + # This doesn't exist + subcon = Struct( + "magic" / Const(0x00, Int32ul), + ) + + def __str__(self) -> str: + return "Nop" + + +class MicroSequence(ConstructValueClass): + subcon = RepeatUntil(lambda obj, lst, ctx: lst[-1].cmdid == 0x18, + Struct( + "cmdid" / Peek(Int8ul), + "cmd" / Switch(this.cmdid, { + 0x01: WaitForInterruptCmd, + 0x18: EndCmd, + 0x19: TimestampCmd, + 0x22: StartTACmd, + 0x23: FinalizeTACmd, + 0x24: Start3DCmd, + 0x25: Finalize3DCmd, + 0x29: StartComputeCmd, + 0x2a: FinalizeComputeCmd, + }, default=Error) + ) + ) + + def __str__(self): + s = "{\n" + for cmd in self.value: + s += str(cmd.cmd) + '\n' + if isinstance(cmd.cmd, EndCmd): + s += "}\n" + break + else: + s += "?\n" + return s + +__all__.extend(k for k, v in globals().items() + if (callable(v) or isinstance(v, type)) and v.__module__ == __name__) diff --git a/tools/proxyclient/m1n1/fw/aop/__init__.py b/tools/proxyclient/m1n1/fw/aop/__init__.py new file mode 100644 index 0000000..6828232 --- /dev/null +++ b/tools/proxyclient/m1n1/fw/aop/__init__.py @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: MIT +from .bootargs import ASCArgumentSection + +class AOPBase: + def __init__(self, u, adtnode): + self.fw_base, self.fw_len = adtnode.get_reg(2) + if u.adt["arm-io"].compatible[0] == "arm-io,t6000": + # argh + self.fw_base -= 0x2_0000_0000 + + @property + def _bootargs_span(self): + base = self.fw_base + self.u.proxy.read32(self.fw_base + 0x224) + length = self.u.proxy.read32(self.fw_base + 0x228) + + return (base, length) + + def read_bootargs(self): + blob = self.u.proxy.iface.readmem(*self._bootargs_span) + return ASCArgumentSection(blob) + + def write_bootargs(self, args): + base, _ = self._bootargs_span + self.u.proxy.iface.writemem(base, args.to_bytes()) + + def update_bootargs(self, keyvals): + args = self.read_bootargs() + args.update(keyvals) + self.write_bootargs(args) + +__all__ = ["ASCArgumentSection", "AOPBase"] diff --git a/tools/proxyclient/m1n1/fw/aop/bootargs.py b/tools/proxyclient/m1n1/fw/aop/bootargs.py new file mode 100644 index 0000000..100e4f3 --- /dev/null +++ b/tools/proxyclient/m1n1/fw/aop/bootargs.py @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: MIT + +class ASCArgumentSection: + def __init__(self, bytes_): + self.blob = bytearray(bytes_) + self.index = self.build_index() + + def build_index(self): + off = 0 + fields = [] + while off < len(self.blob): + snip = self.blob[off:] + key = snip[0:4] + length = int.from_bytes(snip[4:8], byteorder='little') + fields.append((key.decode('ascii'), (off + 8, length))) + off += 8 + length + + if off > len(self.blob): + raise ValueError('blob overran during parsing') + + return dict(fields) + + def items(self): + for key, span in self.index.items(): + off, length = span + yield key, self.blob[off:off + length] + + def __getitem__(self, key): + off, length = self.index[key] + return bytes(self.blob[off:off + length]) + + def __setitem__(self, key, value): + off, length = self.index[key] + + if type(value) is int: + value = int.to_bytes(value, length, byteorder='little') + elif type(value) is str: + value = value.encode('ascii') + + if len(value) > length: + raise ValueError(f'field {key:s} overflown') + + self.blob[off:off + length] = value + + def update(self, keyvals): + for key, val in keyvals.items(): + self[key] = val + + def keys(self): + return self.index.keys() + + def dump(self): + for key, val in self.items(): + print(f"{key:4s} = {val}") + + def dump_diff(self, other, logger): + assert self.index == other.index + + for key in self.keys(): + if self[key] != other[key]: + logger(f"\t{key:4s} = {self[key]} -> {other[key]}") + + def to_bytes(self): + return bytes(self.blob) diff --git a/tools/proxyclient/m1n1/fw/aop/ipc.py b/tools/proxyclient/m1n1/fw/aop/ipc.py new file mode 100644 index 0000000..d26315d --- /dev/null +++ b/tools/proxyclient/m1n1/fw/aop/ipc.py @@ -0,0 +1,365 @@ +from enum import IntEnum +from construct import * +from io import BytesIO + +from m1n1.utils import FourCC, chexdump +from m1n1.constructutils import ZPadding +from m1n1.fw.afk.epic import EPICCmd, EPICCategory + + +EPICSubHeaderVer2 = Struct( + "length" / Int32ul, + "version" / Default(Int8ul, 2), + "category" / EPICCategory, + "type" / Hex(Int16ul), + "timestamp" / Default(Int64ul, 0), + "unk1" / Default(Hex(Int32ul), 0), + "unk2" / Default(Hex(Int32ul), 0), +) + +class AOPAudioPropKey(IntEnum): + IS_READY = 0x01 + + UNK_11 = 0x11 + PLACEMENT = 0x1e + UNK_21 = 0x21 + ORIENTATION = 0x2e + LOCATION_ID = 0x30 + SERIAL_NO = 0x3e + VENDOR_ID = 0x5a + PRODUCT_ID = 0x5b + + SERVICE_CONTROLLER = 0x64 + DEVICE_COUNT = 0x65 + + VERSION = 0x67 + +class EPICCall: + @classmethod + def matches(cls, hdr, sub): + return int(sub.type) == cls.TYPE + + def _args_fixup(self): + pass + + def __init__(self, *args, **kwargs): + if args: + self.args = args[0] + else: + self.args = Container(**kwargs) + self._args_fixup() + self.rets = None + + @classmethod + def from_stream(cls, f): + return cls(cls.ARGS.parse_stream(f)) + + def dump(self, logger=None): + if logger is None: + logger = print + args_fmt = [f"{k}={v}" for (k, v) in self.args.items() if k != "_io"] + rets_fmt = [f"{k}={v}" for (k, v) in self.rets.items() if k != "_io"] + logger(f"{type(self).__name__}({', '.join(args_fmt)}) -> ({', '.join(rets_fmt)})") + + def read_resp(self, f): + self.rets = self.RETS.parse_stream(f) + +CALLTYPES = [] +def reg_calltype(calltype): + CALLTYPES.append(calltype) + return calltype + +@reg_calltype +class GetHIDDescriptor(EPICCall): + TYPE = 0x1 + ARGS = Struct( + "blank" / Const(0x0, Int32ul), + ) + RETS = Struct( + "retcode" / Default(Hex(Int32ul), 0), + "descriptor" / HexDump(GreedyBytes), + ) + +@reg_calltype +class GetProperty(EPICCall): + TYPE = 0xa + ARGS = Struct( + "blank" / Const(0x0, Int32ul), + "key" / Enum(Int32ul, AOPAudioPropKey), + ) + RETS = Struct( + #"blank" / Const(0x0, Int32ul), + "value" / GreedyBytes, + ) + +@reg_calltype +class WrappedCall(EPICCall): + SUBCLASSES = {} + TYPE = 0x20 + HDR = Struct( + "blank" / Const(0x0, Int32ul), + "unk1" / Hex(Const(0xffffffff, Int32ul)), + "calltype" / Hex(Int32ul), + "blank2" / ZPadding(16), + "pad" / Hex(Int32ul), + "len" / Hex(Int64ul), + "residue" / HexDump(GreedyBytes), + ) + + @classmethod + def from_stream(cls, f): + payload = f.read() + subsub = cls.HDR.parse(payload) + calltype = int(subsub.calltype) + subcls = cls.SUBCLASSES.get(calltype, None) + if subcls is None: + raise ValueError(f"unknown calltype {calltype:#x}") + return subcls(subcls.ARGS.parse(payload)) + + @classmethod + def reg_subclass(cls, cls2): + cls.SUBCLASSES[int(cls2.CALLTYPE)] = cls2 + return cls2 + + @classmethod + def matches(cls, hdr, sub): + return sub.category == EPICCategory.NOTIFY and sub.type == cls.TYPE + + def check_retcode(self): + if self.rets.retcode: + self.dump() + raise ValueError(f"retcode {self.rets.retcode} in {str(type(self))} (call dumped, see above)") + +@WrappedCall.reg_subclass +class AttachDevice(WrappedCall): + CALLTYPE = 0xc3_00_00_02 + ARGS = Struct( + "blank" / Const(0x0, Int32ul), + "unk1" / Hex(Const(0xffffffff, Int32ul)), + "calltype" / Hex(Const(0xc3000002, Int32ul)), + "blank2" / ZPadding(16), + "pad" / Padding(4), + "len" / Hex(Const(0x2c, Int64ul)), + "devid" / FourCC, + "pad" / Padding(4), + ) + RETS = Struct( + "retcode" / Default(Hex(Int32ul), 0), + "unk" / HexDump(GreedyBytes), + ) + +@WrappedCall.reg_subclass +class ProbeDevice(WrappedCall): + CALLTYPE = 0xc3_00_00_01 + ARGS = Struct( + "blank" / Const(0x0, Int32ul), + "unk1" / Hex(Const(0xffffffff, Int32ul)), + "calltype" / Hex(Const(0xc3000001, Int32ul)), + "blank2" / ZPadding(16), + "pad" / Padding(4), + "len" / Hex(Const(0x28, Int64ul)), + "devno" / Int32ul, + ) + RETS = Struct( + "retcode" / Default(Hex(Int32ul), 0), + "devid" / FourCC, + "blank2" / Const(0x0, Int32ul), + "unk1" / Const(8, Int32ul), + "blank3" / Const(0x0, Int32ul), + "unk2" / Hex(Const(0x01_0d_1c_20, Int32ul)), + "blank4" / Const(0x0, Int32ul), + "remainder" / HexDump(GreedyBytes), + ) + +PDMConfig = Struct( + "unk1" / Int32ul, + "clockSource" / FourCC, + "pdmFrequency" / Int32ul, + "unk3_clk" / Int32ul, + "unk4_clk" / Int32ul, + "unk5_clk" / Int32ul, + "channelPolaritySelect" / Hex(Int32ul), + "unk7" / Hex(Int32ul), + "unk8" / Hex(Int32ul), + "unk9" / Hex(Int16ul), + "ratios" / Struct( + "r1" / Int8ul, + "r2" / Int8ul, + "r3" / Int8ul, + "pad" / Default(Int8ul, 0), + ), + "filterLengths" / Hex(Int32ul), + "coeff_bulk" / Int32ul, + #"coefficients" / Struct( + # "c1" / Int32sl[this._.ratios.r3 * 4 + 4], + # "c2" / Int32sl[this._.ratios.r2 * 4 + 4], + # "c3" / Int32sl[this._.ratios.r1 * 4 + 4], + #), + #"junk" / Padding( + # this.coeff_bulk * 4 - 48 \ + # - (this.ratios.r1 + this.ratios.r2 + this.ratios.r3) * 16 + #), + "coefficients" / Int32sl[ + (this.ratios.r1 + this.ratios.r2 + this.ratios.r3) * 4 + 12 + ], + "junk" / Padding( + lambda this: max(0, + this.coeff_bulk * 4 - 48 \ + - (this.ratios.r1 + this.ratios.r2 + this.ratios.r3) * 16 + ) + ), + "unk10" / Int32ul, # maybe + "micTurnOnTimeMs" / Int32ul, + "blank" / ZPadding(16), + "unk11" / Int32ul, + "micSettleTimeMs" / Int32ul, + "blank2" / ZPadding(69), +) + +DecimatorConfig = Struct( + "latency" / Int32ul, + "ratios" / Struct( + "r1" / Int8ul, + "r2" / Int8ul, + "r3" / Int8ul, + "pad" / Default(Int8ul, 0), + ), + "filterLengths" / Hex(Int32ul), + "coeff_bulk" / Int32ul, + "coefficients" / Int32sl[ + (this.ratios.r1 + this.ratios.r2 + this.ratios.r3) * 4 + 12 + ], + "junk" / Padding( + lambda this: max(0, + this.coeff_bulk * 4 - 48 \ + - (this.ratios.r1 + this.ratios.r2 + this.ratios.r3) * 16 + ) + ), +) + +PowerSetting = Struct( + "devid" / FourCC, + "cookie" / Int32ul, + "pad" / Padding(4), + "blank" / ZPadding(8), + "target_pstate" / FourCC, + "unk2" / Int32ul, + "blank2" / ZPadding(20), +) + +DEVPROPS = { + ('hpai', 202): PowerSetting, + ('lpai', 202): PowerSetting, + ('hpai', 200): FourCC, + ('lpai', 200): FourCC, + ('pdm0', 200): PDMConfig, + ('pdm0', 210): DecimatorConfig, + ('lpai', 301): Struct( + "unk1" / Int32ul, + "unk2" / Int32ul, + "unk3" / Int32ul, + "unk4" / Int32ul, + ), +} + +@WrappedCall.reg_subclass +class GetDeviceProp(WrappedCall): + CALLTYPE = 0xc3_00_00_04 + ARGS = Struct( + "blank" / Const(0x0, Int32ul), + "unk1" / Hex(Const(0xffffffff, Int32ul)), + "calltype" / Hex(Const(0xc3000004, Int32ul)), + "blank2" / ZPadding(16), + "pad" / Padding(4), + "len" / Hex(Const(0x30, Int64ul)), + "devid" / FourCC, + "modifier" / Int32ul, + "unk6" / Hex(Const(0x01, Int32ul)), + ) + RETS = Struct( + "retcode" / Default(Hex(Int32ul), 0), + "len" / Optional(Int32ul), + "data" / Switch(lambda s: (s._params.devid, s._params.modifier), + DEVPROPS, + default=HexDump(GreedyBytes)) + ) + + def read_resp(self, f): + self.rets = self.RETS.parse_stream(f, + devid=self.args.devid, modifier=self.args.modifier + ) + +@WrappedCall.reg_subclass +class SetDeviceProp(WrappedCall): + CALLTYPE = 0xc3_00_00_05 + ARGS = Struct( + "blank" / Const(0x0, Int32ul), + "unk1" / Hex(Const(0xffffffff, Int32ul)), + "calltype" / Hex(Const(0xc3000005, Int32ul)), + "blank2" / ZPadding(16), + "pad" / Padding(4), + "len" / Hex(Int64ul), # len(this.data) + 0x30 + "devid" / FourCC, + "modifier" / Int32ul, + "len2" / Hex(Int32ul), # len(this.data) + "data" / Switch(lambda s: (s.devid, s.modifier), + DEVPROPS, + default=HexDump(GreedyBytes)) + ) + RETS = Struct( + "retcode" / Default(Hex(Int32ul), 0), + "unk" / HexDump(GreedyBytes), + ) + + def _args_fixup(self): + data_len = len(self.ARGS.build(Container(len=0, len2=0, **self.args))) - 52 + if 'len' not in self.args: + self.args.len = data_len + 0x30 + if 'len2' not in self.args: + self.args.len2 = data_len + +@reg_calltype +class IndirectCall(EPICCall): + ARGS = EPICCmd + RETS = EPICCmd + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.txbuf = None + self.rxbuf = None + + @classmethod + def matches(cls, hdr, sub): + return sub.category == EPICCategory.COMMAND + + def read_txbuf(self, ep): + cmd = self.args + ep.dart.invalidate_cache() + self.txbuf = ep.dart.ioread(0, cmd.txbuf, cmd.txlen) + + # dump the command data for offline replays of traces + ep.log(f"===COMMAND TX DATA=== addr={cmd.txbuf:#x}") + chexdump(self.txbuf) + ep.log(f"===END DATA===") + + def read_rxbuf(self, ep): + cmd = self.rets + ep.dart.invalidate_cache() + self.rxbuf = ep.dart.ioread(0, cmd.rxbuf, cmd.rxlen) + + ep.log(f"===COMMAND RX DATA=== addr={cmd.rxbuf:#x}") + chexdump(self.rxbuf) + ep.log(f"===END DATA===") + + def unwrap(self): + fd = BytesIO() + fd.write(b"\x00\x00\x00\x00") + fd.write(self.txbuf) + fd.seek(0) + wrapped = WrappedCall.from_stream(fd) + fd = BytesIO() + fd.write(b"\x00\x00\x00\x00") + fd.write(self.rxbuf) + fd.seek(0) + wrapped.read_resp(fd) + return wrapped diff --git a/tools/proxyclient/m1n1/fw/asc/__init__.py b/tools/proxyclient/m1n1/fw/asc/__init__.py new file mode 100644 index 0000000..4ffdb8b --- /dev/null +++ b/tools/proxyclient/m1n1/fw/asc/__init__.py @@ -0,0 +1,125 @@ +# SPDX-License-Identifier: MIT +from ...utils import * + +from .crash import ASCCrashLogEndpoint +from .syslog import ASCSysLogEndpoint +from .mgmt import ASCManagementEndpoint +from .kdebug import ASCKDebugEndpoint +from .ioreporting import ASCIOReportingEndpoint +from .oslog import ASCOSLogEndpoint +from .base import ASCBaseEndpoint, ASCTimeout +from ...hw.asc import ASC + +__all__ = [] + +class ASCDummyEndpoint(ASCBaseEndpoint): + SHORT = "dummy" + +class StandardASC(ASC): + ENDPOINTS = { + 0: ASCManagementEndpoint, + 1: ASCCrashLogEndpoint, + 2: ASCSysLogEndpoint, + 3: ASCKDebugEndpoint, + 4: ASCIOReportingEndpoint, + 8: ASCOSLogEndpoint, + 0xa: ASCDummyEndpoint, # tracekit + } + + def __init__(self, u, asc_base, dart=None, stream=0): + super().__init__(u, asc_base) + self.remote_eps = set() + self.add_ep(0, ASCManagementEndpoint(self, 0)) + self.dart = dart + self.stream = stream + self.eps = [] + self.epcls = {} + self.dva_offset = 0 + self.dva_size = 1 << 32 + self.allow_phys = False + + for cls in type(self).mro(): + eps = getattr(cls, "ENDPOINTS", None) + if eps is None: + break + for k, v in eps.items(): + if k not in self.epcls: + self.epcls[k] = v + + def addr(self, addr): + return f"{addr:#x}" + + def iomap(self, addr, size): + if self.dart is None: + return addr + dva = self.dva_offset | self.dart.iomap(self.stream, addr, size) + + self.dart.invalidate_streams(1) + return dva + + def ioalloc(self, size): + paddr = self.u.memalign(0x4000, size) + dva = self.iomap(paddr, size) + return paddr, dva + + def ioread(self, dva, size): + if self.allow_phys and dva < self.dva_offset or dva >= (self.dva_offset + self.dva_size): + return self.iface.readmem(dva, size) + + if self.dart: + return self.dart.ioread(self.stream, dva & 0xFFFFFFFFF, size) + else: + return self.iface.readmem(dva, size) + + def iowrite(self, dva, data): + if self.allow_phys and dva < self.dva_offset or dva >= (self.dva_offset + self.dva_size): + return self.iface.writemem(dva, data) + + if self.dart: + return self.dart.iowrite(self.stream, dva & 0xFFFFFFFFF, data) + else: + return self.iface.writemem(dva, data) + + def iotranslate(self, dva, size): + if self.allow_phys and dva < self.dva_offset or dva >= (self.dva_offset + self.dva_size): + return [(dva, size)] + + if self.dart: + return self.dart.iotranslate(self.stream, dva & 0xFFFFFFFFF, size) + else: + return [(dva, size)] + + def start_ep(self, epno): + if epno not in self.epcls: + raise Exception(f"Unknown endpoint {epno:#x}") + + epcls = self.epcls[epno] + ep = epcls(self, epno) + self.add_ep(epno, ep) + print(f"Starting endpoint #{epno:#x} ({ep.name})") + self.mgmt.start_ep(epno) + ep.start() + + def start(self): + super().boot() + self.mgmt.start() + self.mgmt.wait_boot(3) + + def stop(self, state=0x10): + for ep in list(self.epmap.values())[::-1]: + if ep.epnum < 0x10: + continue + ep.stop() + self.mgmt.stop(state=state) + self.epmap = {} + self.add_ep(0, ASCManagementEndpoint(self, 0)) + if state < 0x10: + self.shutdown() + + def boot(self): + print("Booting ASC...") + super().boot() + self.mgmt.wait_boot(1) + +__all__.extend(k for k, v in globals().items() + if (callable(v) or isinstance(v, type)) and v.__module__ == __name__) diff --git a/tools/proxyclient/m1n1/fw/asc/base.py b/tools/proxyclient/m1n1/fw/asc/base.py new file mode 100644 index 0000000..b10aaaf --- /dev/null +++ b/tools/proxyclient/m1n1/fw/asc/base.py @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: MIT +from ...utils import * + +# System endpoints +def msg_handler(message, regtype=None): + def f(x): + x.is_message = True + x.message = message + x.regtype = regtype + return x + + return f + +class ASCMessage1(Register64): + EP = 7, 0 + +class ASCTimeout(Exception): + pass + +class ASCBaseEndpoint: + BASE_MESSAGE = Register64 + SHORT = None + + def __init__(self, asc, epnum, name=None): + self.asc = asc + self.epnum = epnum + self.name = name or self.SHORT or f"{type(self).__name__}@{epnum:#x}" + + self.msghandler = {} + self.msgtypes = {} + for name in dir(self): + i = getattr(self, name) + if not callable(i): + continue + if not getattr(i, "is_message", False): + continue + self.msghandler[i.message] = i + self.msgtypes[i.message] = i.regtype if i.regtype else self.BASE_MESSAGE + + def handle_msg(self, msg0, msg1): + msg0 = self.BASE_MESSAGE(msg0) + handler = self.msghandler.get(msg0.TYPE, None) + regtype = self.msgtypes.get(msg0.TYPE, self.BASE_MESSAGE) + + if handler is None: + return False + return handler(regtype(msg0.value)) + + def send(self, msg): + self.asc.send(msg, ASCMessage1(EP=self.epnum)) + + def start(self): + pass + + def stop(self): + pass + + def log(self, msg): + print(f"[{self.name}] {msg}") diff --git a/tools/proxyclient/m1n1/fw/asc/crash.py b/tools/proxyclient/m1n1/fw/asc/crash.py new file mode 100644 index 0000000..7f590c8 --- /dev/null +++ b/tools/proxyclient/m1n1/fw/asc/crash.py @@ -0,0 +1,248 @@ +# SPDX-License-Identifier: MIT +from .base import * +from ...utils import * +from construct import * +from ...sysreg import * + +class CrashLogMessage(Register64): + TYPE = 63, 52 + SIZE = 51, 44 + DVA = 43, 0 + +CrashHeader = Struct( + "type" / Const("CLHE", FourCC), + "ver" / Int32ul, + "total_size" / Int32ul, + "flags" / Int32ul, + Padding(16) +) + +CrashCver = Struct( + "uuid" / Bytes(16), + "version" / CString("utf8"), +) + +CrashCstr = Struct( + "id" / Int32ul, + "string" / CString("utf8"), +) + +CrashCtim = Struct( + "time" / Int64ul, +) + +CrashCmbx = Struct( + "hdr" / Array(4, Hex(Int32ul)), + "type" / Int32ul, + "unk" / Int32ul, + "index" / Int32ul, + "messages" / GreedyRange(Struct( + "endpoint" / Hex(Int64ul), + "message" / Hex(Int64ul), + "timestamp" / Hex(Int32ul), + Padding(4), + )), +) + +CrashCcst = Struct( + "task" / Int32ul, + "unk" / Int32ul, + "stack" / GreedyRange(Int64ul) +) + +CrashCasC = Struct( + "l2c_err_sts" / Hex(Int64ul), + "l2c_err_adr" / Hex(Int64ul), + "l2c_err_inf" / Hex(Int64ul), + "lsu_err_sts" / Hex(Int64ul), + "fed_err_sts" / Hex(Int64ul), + "mmu_err_sts" / Hex(Int64ul) +) + +CrashCrg8 = Struct( + "unk_0" / Int32ul, + "unk_4" / Int32ul, + "regs" / Array(31, Hex(Int64ul)), + "sp" / Int64ul, + "pc" / Int64ul, + "psr" / Int64ul, + "cpacr" / Int64ul, + "fpsr" / Int64ul, + "fpcr" / Int64ul, + "unk" / Array(64, Hex(Int64ul)), + "far" / Int64ul, + "unk_X" / Int64ul, + "esr" / Int64ul, + "unk_Z" / Int64ul, +) + +CrashEntry = Struct( + "type" / FourCC, + Padding(4), + "flags" / Hex(Int32ul), + "len" / Int32ul, + "payload" / FixedSized(lambda ctx: ctx.len - 16 if ctx.type != "CLHE" else 16, + Switch(this.type, { + "Cver": CrashCver, + "Ctim": CrashCtim, + "Cmbx": CrashCmbx, + "Cstr": CrashCstr, + "Crg8": CrashCrg8, + "Ccst": CrashCcst, + "CasC": CrashCasC, + }, default=GreedyBytes)), +) + +CrashLog = Struct( + "header" / CrashHeader, + "entries" / RepeatUntil(this.type == "CLHE", CrashEntry), +) + +class CrashLogParser: + def __init__(self, data=None, asc=None): + self.asc = asc + if data is not None: + self.parse(data) + + def parse(self, data): + self.data = CrashLog.parse(data) + pass + + def default(self, entry): + print(f"# {entry.type} flags={entry.flags:#x}") + chexdump(entry.payload) + print() + + def Ccst(self, entry): + print(f"Call stack (task {entry.payload.task}:") + for i in entry.payload.stack: + if not i: + break + print(f" - {i:#x}") + print() + + def CasC(self, entry): + print(f"Async error info:") + print(entry.payload) + print() + + def Cver(self, entry): + print(f"RTKit Version: {entry.payload.version}") + print() + + def Crg8(self, entry): + print(f"Exception info:") + + ctx = entry.payload + + addr = self.asc.addr + + spsr = SPSR(ctx.psr) + esr = ESR(ctx.esr) + elr = ctx.pc + far_phys = self.asc.iotranslate(ctx.far, 1)[0][0] + elr_phys = self.asc.iotranslate(ctx.pc, 1)[0][0] + sp_phys = self.asc.iotranslate(ctx.sp, 1)[0][0] + + print(f" == Exception taken from {spsr.M.name} ==") + el = spsr.M >> 2 + print(f" SPSR = {spsr}") + print(f" ELR = {addr(elr)}" + (f" (0x{elr_phys:x})" if elr_phys else "")) + print(f" ESR = {esr}") + print(f" FAR = {addr(ctx.far)}" + (f" (0x{far_phys:x})" if far_phys else "")) + print(f" SP = {ctx.sp:#x}" + (f" (0x{sp_phys:x})" if sp_phys else "")) + + for i in range(0, 31, 4): + j = min(30, i + 3) + print(f" {f'x{i}-x{j}':>7} = {' '.join(f'{r:016x}' for r in ctx.regs[i:j + 1])}") + + if elr_phys: + v = self.asc.p.read32(elr_phys) + + print() + if v == 0xabad1dea: + print(" == Faulting code is not available ==") + else: + print(" == Faulting code ==") + dist = 16 + self.asc.u.disassemble_at(elr_phys - dist * 4, (dist * 2 + 1) * 4, elr_phys) + + print() + + def Cstr(self, entry): + print(f"Message {entry.payload.id}: {entry.payload.string}") + print() + + def Ctim(self, entry): + print(f"Crash time: {entry.payload.time:#x}") + print() + + def Cmbx(self, entry): + print(f"Mailbox log (type {entry.payload.type}, index {entry.payload.index}):") + for i, msg in enumerate(entry.payload.messages): + print(f" #{i:3d} @{msg.timestamp:#10x} ep={msg.endpoint:#4x} {msg.message:#18x}") + print() + + def CLHE(self, entry): + pass + + def dump(self): + print("### Crash dump:") + print() + for entry in self.data.entries: + getattr(self, entry.type, self.default)(entry) + +class ASCCrashLogEndpoint(ASCBaseEndpoint): + SHORT = "crash" + BASE_MESSAGE = CrashLogMessage + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.iobuffer = None + self.iobuffer_dva = None + self.started = False + + @msg_handler(0x1) + def Handle(self, msg): + if self.started: + return self.handle_crashed(msg) + else: + return self.handle_getbuf(msg) + + def handle_getbuf(self, msg): + size = align(0x1000 * msg.SIZE, 0x4000) + + if msg.DVA: + self.iobuffer_dva = msg.DVA + self.log(f"buf prealloc at dva {self.iobuffer_dva:#x}") + else: + self.iobuffer, self.iobuffer_dva = self.asc.ioalloc(size) + self.log(f"buf {self.iobuffer:#x} / {self.iobuffer_dva:#x}") + self.send(CrashLogMessage(TYPE=1, SIZE=size // 0x1000, DVA=self.iobuffer_dva)) + + self.started = True + return True + + def crash_soft(self): + self.send(0x40) + + def crash_hard(self): + self.send(0x22) + + def handle_crashed(self, msg): + size = 0x1000 * msg.SIZE + + self.log(f"Crashed!") + crashdata = self.asc.ioread(msg.DVA, size) + open("crash.bin", "wb").write(crashdata) + clog = CrashLogParser(crashdata, self.asc) + clog.dump() + raise Exception("ASC crashed!") + + return True + +if __name__ == "__main__": + import sys + crashdata = open(sys.argv[1], "rb").read() + clog = CrashLogParser(crashdata) + clog.dump() diff --git a/tools/proxyclient/m1n1/fw/asc/ioreporting.py b/tools/proxyclient/m1n1/fw/asc/ioreporting.py new file mode 100644 index 0000000..f81b6c6 --- /dev/null +++ b/tools/proxyclient/m1n1/fw/asc/ioreporting.py @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: MIT +from .base import * +from ...utils import * + +class IOReportingMessage(Register64): + TYPE = 63, 52 + +class IOReporting_GetBuf(IOReportingMessage): + TYPE = 63, 52, Constant(1) + SIZE = 51, 44 + DVA = 43, 0 + +class IOReporting_Start(IOReportingMessage): + TYPE = 63, 52, Constant(0xc) + +class IOReporting_Report(IOReportingMessage): + TYPE = 63, 52, Constant(0x8) + +class ASCIOReportingEndpoint(ASCBaseEndpoint): + BASE_MESSAGE = IOReportingMessage + SHORT = "iorep" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.iobuffer = None + self.iobuffer_dva = None + + @msg_handler(1, IOReporting_GetBuf) + def GetBuf(self, msg): + if self.iobuffer: + self.log("WARNING: trying to reset iobuffer!") + + self.bufsize = align(0x1000 * msg.SIZE, 0x4000) + + if msg.DVA != 0: + self.iobuffer = self.iobuffer_dva = msg.DVA + self.log(f"buf {self.iobuffer:#x} / {self.iobuffer_dva:#x}") + else: + self.iobuffer, self.iobuffer_dva = self.asc.ioalloc(self.bufsize) + self.log(f"buf {self.iobuffer:#x} / {self.iobuffer_dva:#x}") + self.send(IOReporting_GetBuf(DVA=self.iobuffer_dva, SIZE=self.bufsize // 0x1000)) + + return True + + @msg_handler(0xc, IOReporting_Start) + def Start(self, msg): + self.log("start") + return True + + @msg_handler(8, IOReporting_Report) + def Init(self, msg): + self.log("report!") + buf = self.asc.iface.readmem(self.iobuffer, self.bufsize) + #chexdump(buf) + self.send(IOReporting_Report()) + return True diff --git a/tools/proxyclient/m1n1/fw/asc/kdebug.py b/tools/proxyclient/m1n1/fw/asc/kdebug.py new file mode 100644 index 0000000..32a433f --- /dev/null +++ b/tools/proxyclient/m1n1/fw/asc/kdebug.py @@ -0,0 +1,57 @@ +# SPDX-License-Identifier: MIT +from .base import * +from ...utils import * + +class KDebugMessage(Register64): + TYPE = 55, 48 + +class KDebugGetBufMessage(KDebugMessage): + TYPE = 55, 48, Constant(1) + COUNT = 47, 0 + +class KDebugPreallocBuf1Message(KDebugMessage): + TYPE = 55, 48, Constant(2) + DVA = 47, 12 + FLAGS = 11, 0 + +class KDebugPreallocBuf2Message(KDebugMessage): + TYPE = 55, 48, Constant(3) + DVA = 47, 0 + +class KDebugSendBufMessage(KDebugMessage): + TYPE = 55, 48 + DVA = 47, 0 + +class KDebugStart(KDebugMessage): + TYPE = 55, 48, Constant(8) + +class ASCKDebugEndpoint(ASCBaseEndpoint): + SHORT = "kdebug" + BASE_MESSAGE = KDebugMessage + + @msg_handler(1, KDebugGetBufMessage) + def GetBuf(self, msg): + size = align_up(msg.COUNT * 0x20, 0x4000) + self.iobuffer0, self.iobuffer0_iova = self.asc.ioalloc(size) + self.send(KDebugSendBufMessage(TYPE=1, DVA=self.iobuffer0_iova)) + + self.iobuffer1, self.iobuffer1_iova = self.asc.ioalloc(0x2000) + self.send(KDebugSendBufMessage(TYPE=2, DVA=self.iobuffer1_iova)) + return True + + @msg_handler(2, KDebugPreallocBuf1Message) + def SetBuf1(self, msg): + #self.send(KDebugSendBufMessage(TYPE=1, DVA=msg.DVA)) + return True + + @msg_handler(3, KDebugPreallocBuf2Message) + def SetBuf2(self, msg): + #self.send(KDebugSendBufMessage(TYPE=2, DVA=msg.DVA)) + return True + + def start(self): + self.iobuffer0 = None + self.iobuffer1 = None + self.iobuffer0_iova = None + self.iobuffer1_iova = None + self.send(KDebugStart()) diff --git a/tools/proxyclient/m1n1/fw/asc/mgmt.py b/tools/proxyclient/m1n1/fw/asc/mgmt.py new file mode 100644 index 0000000..162fcd2 --- /dev/null +++ b/tools/proxyclient/m1n1/fw/asc/mgmt.py @@ -0,0 +1,144 @@ +# SPDX-License-Identifier: MIT +import time + +from .base import * +from ...utils import * + +## Management endpoint +class ManagementMessage(Register64): + TYPE = 59, 52 + +class Mgmt_Hello(ManagementMessage): + TYPE = 59, 52, Constant(1) + MAX_VER = 31, 16 + MIN_VER = 15, 0 + +class Mgmt_HelloAck(ManagementMessage): + TYPE = 59, 52, Constant(2) + MAX_VER = 31, 16 + MIN_VER = 15, 0 + +class Mgmt_Ping(ManagementMessage): + TYPE = 59, 52, Constant(3) + +class Mgmt_Pong(ManagementMessage): + TYPE = 59, 52, Constant(4) + +class Mgmt_StartEP(ManagementMessage): + TYPE = 59, 52, Constant(5) + EP = 39, 32 + FLAG = 1, 0 + +class Mgmt_SetIOPPower(ManagementMessage): + TYPE = 59, 52, Constant(6) + STATE = 15, 0 + +class Mgmt_IOPPowerAck(ManagementMessage): + TYPE = 59, 52, Constant(7) + STATE = 15, 0 + +class Mgmt_EPMap(ManagementMessage): + TYPE = 59, 52, Constant(8) + LAST = 51 + BASE = 34, 32 + BITMAP = 31, 0 + +class Mgmt_EPMap_Ack(ManagementMessage): + TYPE = 59, 52, Constant(8) + LAST = 51 + BASE = 34, 32 + MORE = 0 + +class Mgmt_SetAPPower(ManagementMessage): + TYPE = 59, 52, Constant(0xb) + STATE = 15, 0 + +class ASCManagementEndpoint(ASCBaseEndpoint): + BASE_MESSAGE = ManagementMessage + SHORT = "mgmt" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.syslog_started = False + self.iop_power_state = 0 + self.ap_power_state = 0 + self.verbose = 1 + + @msg_handler(1, Mgmt_Hello) + def Hello(self, msg): + self.log(f"Supported versions {msg.MIN_VER} .. {msg.MAX_VER}") + # FIXME: we pick the highest version, we should negotiate + self.send(Mgmt_HelloAck(MIN_VER=msg.MAX_VER, MAX_VER=msg.MAX_VER)) + return True + + @msg_handler(8, Mgmt_EPMap) + def EPMap(self, msg): + for i in range(32): + if msg.BITMAP & (1 << i): + epno = 32 * msg.BASE + i + self.asc.eps.append(epno) + if self.verbose > 0: + self.log(f"Adding endpoint {epno:#x}") + + self.send(Mgmt_EPMap_Ack(BASE=msg.BASE, LAST=msg.LAST, MORE=0 if msg.LAST else 1)) + + if msg.LAST: + for ep in self.asc.eps: + if ep == 0: continue + if ep < 0x10: + self.asc.start_ep(ep) + self.boot_done() + + return True + + @msg_handler(0xb, Mgmt_SetAPPower) + def APPowerAck(self, msg): + if self.verbose > 0: + self.log(f"AP power state is now {msg.STATE:#x}") + self.ap_power_state = msg.STATE + return True + + @msg_handler(7, Mgmt_IOPPowerAck) + def IOPPowerAck(self, msg): + if self.verbose > 0: + self.log(f"IOP power state is now {msg.STATE:#x}") + self.iop_power_state = msg.STATE + return True + + @msg_handler(4, Mgmt_Pong) + def Pong(self, msg): + return True + + def start(self): + self.log("Starting via message") + self.send(Mgmt_SetIOPPower(STATE=0x220)) + + def wait_boot(self, timeout=None): + if timeout is not None: + timeout += time.time() + while self.iop_power_state != 0x20 or self.ap_power_state != 0x20: + self.asc.work() + if timeout and time.time() > timeout: + raise ASCTimeout("Boot timed out") + self.log("Startup complete") + + def start_ep(self, epno): + self.send(Mgmt_StartEP(EP=epno, FLAG=2)) + + def stop_ep(self, epno): + self.send(Mgmt_StartEP(EP=epno, FLAG=1)) + + def boot_done(self): + self.send(Mgmt_SetAPPower(STATE=0x20)) + + def ping(self): + self.send(Mgmt_Ping()) + + def stop(self, state=0x10): + self.log("Stopping via message") + self.send(Mgmt_SetAPPower(STATE=0x10)) + while self.ap_power_state == 0x20: + self.asc.work() + self.send(Mgmt_SetIOPPower(STATE=state)) + while self.iop_power_state != state: + self.asc.work() diff --git a/tools/proxyclient/m1n1/fw/asc/oslog.py b/tools/proxyclient/m1n1/fw/asc/oslog.py new file mode 100644 index 0000000..b1a360b --- /dev/null +++ b/tools/proxyclient/m1n1/fw/asc/oslog.py @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: MIT +from .base import * +from ...utils import * + +## OSLog endpoint + +class OSLogMessage(Register64): + TYPE = 63, 56 + +class OSLog_Init(OSLogMessage): + TYPE = 63, 56, Constant(1) + UNK = 51, 0 + +class OSLog_Ack(OSLogMessage): + TYPE = 63, 56, Constant(3) + +class ASCOSLogEndpoint(ASCBaseEndpoint): + BASE_MESSAGE = OSLogMessage + SHORT = "oslog" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.started = False + + @msg_handler(1, OSLog_Init) + def Init(self, msg): + self.log(f"oslog init: {msg.UNK:#x}") + self.send(OSLog_Ack()) + self.started = True + return True diff --git a/tools/proxyclient/m1n1/fw/asc/syslog.py b/tools/proxyclient/m1n1/fw/asc/syslog.py new file mode 100644 index 0000000..3387c27 --- /dev/null +++ b/tools/proxyclient/m1n1/fw/asc/syslog.py @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: MIT +import struct + +from .base import * +from ...utils import * + +## Syslog endpoint + +class SyslogMessage(Register64): + TYPE = 59, 52 + +class Syslog_Init(SyslogMessage): + TYPE = 59, 52, Constant(8) + ENTRYSIZE = 39, 24 + COUNT = 15, 0 + +class Syslog_GetBuf(SyslogMessage): + TYPE = 59, 52, Constant(1) + SIZE = 51, 44 + DVA = 43, 0 + +class Syslog_Log(SyslogMessage): + TYPE = 59, 52, Constant(5) + INDEX = 7, 0 + +class ASCSysLogEndpoint(ASCBaseEndpoint): + BASE_MESSAGE = SyslogMessage + SHORT = "syslog" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.entrysize = None + self.count = None + self.iobuffer = None + self.iobuffer_dva = None + self.started = False + + @msg_handler(8, Syslog_Init) + def Init(self, msg): + self.entrysize = msg.ENTRYSIZE + self.count = msg.COUNT + self.log(f"count {self.count}, entrysize {self.entrysize}") + return True + + @msg_handler(1, Syslog_GetBuf) + def GetBuf(self, msg): + size = align(0x1000 * msg.SIZE, 0x4000) + + if self.iobuffer: + print("WARNING: trying to reset iobuffer!") + + if msg.DVA: + self.iobuffer_dva = msg.DVA + self.log(f"buf prealloc at dva {self.iobuffer_dva:#x}") + else: + self.iobuffer, self.iobuffer_dva = self.asc.ioalloc(size) + self.log(f"buf {self.iobuffer:#x} / {self.iobuffer_dva:#x}") + self.send(Syslog_GetBuf(SIZE=size // 0x1000, DVA=self.iobuffer_dva)) + + self.started = True + return True + + @msg_handler(5, Syslog_Log) + def Log(self, msg): + stride = 0x20 + self.entrysize + log = self.asc.ioread(self.iobuffer_dva + msg.INDEX * stride, stride) + hdr, unk, context, logmsg = struct.unpack(f"<II24s{self.entrysize}s", log) + context = context.split(b"\x00")[0].decode("ascii") + logmsg = logmsg.split(b"\x00")[0].decode("ascii").rstrip("\n") + self.log(f"* [{context}]{logmsg}") + self.send(msg) + return True diff --git a/tools/proxyclient/m1n1/fw/common.py b/tools/proxyclient/m1n1/fw/common.py new file mode 100644 index 0000000..479e2df --- /dev/null +++ b/tools/proxyclient/m1n1/fw/common.py @@ -0,0 +1,170 @@ +# SPDX-License-Identifier: MIT + +from dataclasses import dataclass +from enum import IntEnum +from m1n1.utils import * +from construct import * + +uint8_t = Int8ul +int16_t = Int16sl +uint16_t = Int16ul +int32_t = Int32sl +uint32_t = Int32ul +int64_t = Int64sl +uint64_t = Int64ul + +uint = uint32_t +int_ = int32_t +ulong = uint64_t +long_ = int64_t + +def Bool(c): + return ExprAdapter(c, lambda d, ctx: bool(d & 1), lambda d, ctx: int(d)) + +def SizedArray(count, svar, subcon): + return Padded(subcon.sizeof() * count, Array(lambda ctx: min(count, ctx.get(svar, ctx._.get(svar))), subcon)) + +def SizedBytes(count, svar): + return Lazy(Padded(count, Bytes(lambda ctx: ctx.get(svar) or ctx._.get(svar)))) + +def UnkBytes(s): + return Default(HexDump(Bytes(s)), b"\x00" * s) + +bool_ = Bool(Int8ul) + +class OSObject(Construct): + TYPE = None + + def _parse(self, stream, context, path, recurse=False): + tag = stream.read(1).decode("ascii") + if not recurse and self.TYPE is not None and self.TYPE != tag: + raise Exception("Object type mismatch") + + if tag == "d": + count = Int32ul.parse_stream(stream) + d = {} + for i in range(count): + k = self._parse(stream, context, path, True) + v = self._parse(stream, context, path, True) + d[k] = v + return d + elif tag == "n": + return Int64ul.parse_stream(stream) + elif tag == "s": + length = Int32ul.parse_stream(stream) + s = stream.read(length).decode("utf-8") + assert stream.read(1) == b'\0' + return s + else: + raise Exception(f"Unknown object tag {tag!r}") + + def _build(self, obj, stream, context, path): + assert False + + def _sizeof(self, context, path): + return None + +class OSDictionary(OSObject): + TYPE = 'd' + +class OSSerialize(Construct): + def _parse(self, stream, context, path, recurse=False): + hdr = Int32ul.parse_stream(stream) + if hdr != 0xd3: + raise Exception("Bad header") + + obj, last = self.parse_obj(stream) + assert last + return obj + + def parse_obj(self, stream, level=0): + # align to 32 bits + pos = stream.tell() + if pos & 3: + stream.read(4 - (pos & 3)) + + tag = Int32ul.parse_stream(stream) + + last = bool(tag & 0x80000000) + otype = (tag >> 24) & 0x1f + size = tag & 0xffffff + + #print(f"{' '*level} @{stream.tell():#x} {otype} {last} {size}") + + if otype == 1: + d = {} + for i in range(size): + k, l = self.parse_obj(stream, level + 1) + assert not l + v, l = self.parse_obj(stream, level + 1) + assert l == (i == size - 1) + d[k] = v + elif otype == 2: + d = [] + for i in range(size): + v, l = self.parse_obj(stream, level + 1) + assert l == (i == size - 1) + d.append(v) + elif otype == 4: + d = Int64ul.parse_stream(stream) + elif otype == 9: + d = stream.read(size).decode("utf-8") + elif otype == 10: + d = stream.read(size) + elif otype == 11: + d = bool(size) + else: + raise Exception(f"Unknown tag {otype}") + + #print(f"{' '*level} => {d}") + return d, last + + def build_obj(self, obj, stream, last=True, level=0): + tag = 0 + if last: + tag |= 0x80000000 + + if isinstance(obj, dict): + tag |= (1 << 24) | len(obj) + Int32ul.build_stream(tag, stream) + for i, (k, v) in enumerate(obj.items()): + self.build_obj(k, stream, False, level + 1) + self.build_obj(v, stream, i == len(obj) - 1, level + 1) + elif isinstance(obj, list): + tag |= (2 << 24) | len(obj) + Int32ul.build_stream(tag, stream) + for i, v in enumerate(obj): + self.build_obj(v, stream, i == len(obj) - 1, level + 1) + elif isinstance(obj, int): + tag |= (4 << 24) | 64 + Int32ul.build_stream(tag, stream) + Int64ul.build_stream(obj, stream) + elif isinstance(obj, str): + obj = obj.encode("utf-8") + tag |= (9 << 24) | len(obj) + Int32ul.build_stream(tag, stream) + stream.write(obj) + elif isinstance(obj, bytes): + tag |= (10 << 24) | len(obj) + Int32ul.build_stream(tag, stream) + stream.write(obj) + elif isinstance(obj, bool): + tag |= (11 << 24) | int(obj) + Int32ul.build_stream(tag, stream) + else: + raise Exception(f"Cannot encode {obj!r}") + + pos = stream.tell() + if pos & 3: + stream.write(bytes(4 - (pos & 3))) + + def _build(self, obj, stream, context, path): + Int32ul.build_stream(0xd3, stream) + self.build_obj(obj, stream) + + def _sizeof(self, context, path): + return None + +def string(size): + return Padded(size, CString("utf8")) + diff --git a/tools/proxyclient/m1n1/fw/dcp/__init__.py b/tools/proxyclient/m1n1/fw/dcp/__init__.py new file mode 100644 index 0000000..e77f6cf --- /dev/null +++ b/tools/proxyclient/m1n1/fw/dcp/__init__.py @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: MIT + diff --git a/tools/proxyclient/m1n1/fw/dcp/client.py b/tools/proxyclient/m1n1/fw/dcp/client.py new file mode 100644 index 0000000..941ab20 --- /dev/null +++ b/tools/proxyclient/m1n1/fw/dcp/client.py @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: MIT +from ...utils import * + +from ..asc import StandardASC +from .dcpep import DCPEndpoint + +class DCPClient(StandardASC): + ENDPOINTS = { + 0x37: DCPEndpoint, + } + + def __init__(self, u, asc_base, dart=None, disp_dart=None): + super().__init__(u, asc_base, dart) + self.disp_dart = disp_dart diff --git a/tools/proxyclient/m1n1/fw/dcp/dcpav.py b/tools/proxyclient/m1n1/fw/dcp/dcpav.py new file mode 100644 index 0000000..d063c6c --- /dev/null +++ b/tools/proxyclient/m1n1/fw/dcp/dcpav.py @@ -0,0 +1,110 @@ +# SPDX-License-Identifier: MIT +from construct import * + +from ...utils import * +from ..asc import StandardASC +from ..afk.epic import * + +class DCPAVControllerService(EPICStandardService): + NAME = "dcpav-controller-epic" + SHORT = "dcpav" + + def setPower(self, power): + self.call(8, 0x8, struct.pack("<16xI12x", power)) + + def getPower(self, power): + return struct.unpack("<16xI12x", self.call(8, 0x9, bytes(32))) + + def wakeDisplay(self): + self.call(8, 0xa, bytes(16)) + + def sleepDisplay(self): + self.call(8, 0xb, bytes(16)) + + def forceHotPlugDetect(self): + self.call(8, 0xc, bytes(16)) + + def setVirtualDeviceMode(self, mode): + self.call(8, 0xd, struct.pack("<16xI12x", mode)) + +class DCPDPControllerService(EPICStandardService): + NAME = "dcpdp-controller-epic" + SHORT = "dcpdp" + +class DCPDPTXEndpoint(EPICEndpoint): + SHORT = "dptx" + + SERVICES = [ + DCPAVControllerService, + DCPDPControllerService, + ] + +ATC0 = 0 +ATC1 = 1 +ATC2 = 2 +ATC3 = 3 +LPDPTX = 4 +DPTX = 5 + +DPPHY = 0 +DPIN0 = 1 +DPIN1 = 2 + +class DCPDPTXRemotePortService(EPICStandardService): + NAME = "dcpdptx-port-epic" + SHORT = "port" + + def displayRequest(self): + self.call(8, 8, bytes(16)) + + def displayRelease(self): + self.call(8, 9, bytes(16)) + + def connectTo(self, connected, unit, port, unk=0): + target = 0 + if connected: + target |= (1 << 8) + target |= unit + target |= port << 4 + self.call(8, 13, struct.pack("<16xII8x", unk, target)) + +class DCPDPTXPortEndpoint(EPICEndpoint): + SHORT = "dpport" + + SERVICES = [ + DCPDPTXRemotePortService, + DCPDPControllerService, + ] + +class DCPDPDevice(EPICStandardService): + NAME = "dcpav-device-epic" + SHORT = "dpdev" + +class DCPAVDeviceEndpoint(EPICEndpoint): + SHORT = "avdev" + + SERVICES = [ + DCPDPDevice, + ] + +class DCPDPService(EPICStandardService): + NAME = "dcpav-service-epic" + SHORT = "dpserv" + +class DCPAVServiceEndpoint(EPICEndpoint): + SHORT = "avserv" + + SERVICES = [ + DCPDPService, + ] + +class DCPAVSimpleVideoInterface(EPICStandardService): + NAME = "dcpav-video-interface-epic" + SHORT = "video" + +class DCPAVVideoEndpoint(EPICEndpoint): + SHORT = "avserv" + + SERVICES = [ + DCPAVSimpleVideoInterface, + ] diff --git a/tools/proxyclient/m1n1/fw/dcp/dcpep.py b/tools/proxyclient/m1n1/fw/dcp/dcpep.py new file mode 100644 index 0000000..6e4f250 --- /dev/null +++ b/tools/proxyclient/m1n1/fw/dcp/dcpep.py @@ -0,0 +1,163 @@ +# SPDX-License-Identifier: MIT +import struct +from dataclasses import dataclass +from enum import IntEnum + +from ..asc.base import * +from ...utils import * + +## DCP main endpoint + +class DCPMessage(Register64): + TYPE = 3, 0 + +class DCPEp_SetShmem(DCPMessage): + DVA = 63, 16 + FLAG = 7, 4 + TYPE = 3, 0, Constant(0) + +class DCPEp_InitComplete(DCPMessage): + TYPE = 3, 0, Constant(1) + +class CallContext(IntEnum): + CB = 0 + CMD = 2 + ASYNC = 3 + OOBCB = 4 + OOBCMD = 6 + +class DCPEp_Msg(DCPMessage): + LEN = 63, 32 + OFF = 31, 16 + CTX = 11, 8, CallContext + ACK = 6 + TYPE = 3, 0, Constant(2) + +@dataclass +class DCPCallState: + tag: str + off: int + in_len: int + in_data: bytes + out_addr: int + out_len: int + complete: bool = False + +class DCPCallChannel(Reloadable): + def __init__(self, dcpep, name, buf, bufsize): + self.dcp = dcpep + self.name = name + self.buf = buf + self.bufsize = bufsize + self.off = 0 + self.pending = [] + + def ack(self): + if not self.pending: + raise Exception("ACK with no calls pending") + + self.pending[-1].complete = True + + def call(self, ctx, tag, inbuf, out_len): + in_len = len(inbuf) + data = tag.encode("ascii")[::-1] + struct.pack("<II", in_len, out_len) + inbuf + data_size = len(data) + out_len + assert (self.off + data_size) <= self.bufsize + + self.dcp.asc.iface.writemem(self.dcp.shmem + self.buf + self.off, data) + + state = DCPCallState(off=self.off, tag=tag, in_len=in_len, in_data=data, out_len=out_len, + out_addr=self.buf + self.off + 12 + in_len) + + self.off += align_up(data_size, 0x40) + self.pending.append(state) + + print(f"len={data_size:#x} {in_len}") + self.dcp.send(DCPEp_Msg(LEN=data_size, OFF=state.off, CTX=ctx, ACK=0)) + + while not state.complete: + self.dcp.asc.work() + + print(f"off={state.out_addr:#x} len={out_len}") + out_data = self.dcp.asc.iface.readmem(self.dcp.shmem + state.out_addr, out_len) + + assert self.pending.pop() is state + self.off = state.off + + return out_data + +class DCPCallbackChannel(Reloadable): + def __init__(self, dcpep, name, buf, bufsize): + self.dcp = dcpep + self.name = name + self.buf = buf + self.bufsize = bufsize + self.pending = [] + + def cb(self, msg): + data = self.dcp.asc.iface.readmem(self.dcp.shmem + self.buf + msg.OFF, msg.LEN) + tag = data[:4][::-1].decode("ascii") + in_len, out_len = struct.unpack("<II", data[4:12]) + in_data = data[12:12 + in_len] + + state = DCPCallState(off=msg.OFF, tag=tag, in_len=in_len, out_len=out_len, + in_data=in_data, out_addr=self.buf + msg.OFF + 12 + in_len) + + self.pending.append(state) + + out_data = self.dcp.mgr.handle_cb(state) + self.dcp.asc.iface.writemem(self.dcp.shmem + state.out_addr, out_data) + self.dcp.send(DCPEp_Msg(CTX=msg.CTX, ACK=1)) + + assert self.pending.pop() is state + + +class DCPEndpoint(ASCBaseEndpoint): + BASE_MESSAGE = DCPMessage + SHORT = "dcpep" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.shmem = self.shmem_dva = None + self.init_complete = False + self.mgr = None + + self.ch_cb = DCPCallbackChannel(self, "CB", 0x60000, 0x8000) + self.ch_cmd = DCPCallChannel(self, "CMD", 0, 0x8000) + self.ch_async = DCPCallbackChannel(self, "ASYNC", 0x40000, 0x20000) + self.ch_oobcb = DCPCallbackChannel(self, "OOBCB", 0x68000, 0x8000) + self.ch_oobcmd = DCPCallChannel(self, "OOBCMD", 0x8000, 0x8000) + + @msg_handler(2, DCPEp_Msg) + def Rx(self, msg): + if msg.ACK: + if msg.CTX in (CallContext.CMD, CallContext.CB): + self.ch_cmd.ack() + elif msg.CTX in (CallContext.OOBCMD, CallContext.OOBCB): + self.ch_oobcmd.ack() + else: + raise Exception(f"Unknown RX ack channel {msg.CTX}") + else: + if msg.CTX == CallContext.CB: + self.ch_cb.cb(msg) + elif msg.CTX == CallContext.OOBCMD: + self.ch_oobcb.cb(msg) + elif msg.CTX == CallContext.ASYNC: + self.ch_async.cb(msg) + else: + raise Exception(f"Unknown RX callback channel {msg.CTX}") + return True + + @msg_handler(1, DCPEp_InitComplete) + def InitComplete(self, msg): + self.log("init complete") + self.init_complete = True + return True + + def initialize(self): + self.shmem, self.shmem_dva = self.asc.ioalloc(0x100000) + self.asc.p.memset32(self.shmem, 0, 0x100000) + self.send(DCPEp_SetShmem(DVA=self.shmem_dva)) + while not self.init_complete: + self.asc.work() + diff --git a/tools/proxyclient/m1n1/fw/dcp/iboot.py b/tools/proxyclient/m1n1/fw/dcp/iboot.py new file mode 100644 index 0000000..8124ca9 --- /dev/null +++ b/tools/proxyclient/m1n1/fw/dcp/iboot.py @@ -0,0 +1,215 @@ +# SPDX-License-Identifier: MIT +from construct import * + +from ...utils import * +from ..asc import StandardASC +from ..afk.epic import * +from .dcpav import * + +EOTF = "EOTF" / Enum(Int32ul, + GAMMA_SDR = 1, + GAMMA_HDR = 2, +) + +Encoding = "Encoding" / Enum(Int32ul, + RGB = 1, + YCBCR_444 = 3, + YCBCR_422 = 4, + YCBCR_420 = 5, +) + +Colorimetry = "Colorimetry" / Enum(Int32ul, + BT601_709 = 1, + BT2020 = 2, + DCIP3 = 3, +) + +Colorspace = "Colorspace" / Enum(Int32ul, + SRGB = 1, + Native = 2, + BT2020 = 3, +) + +SurfaceFormat = "SurfaceFormat" / Enum(Int32ul, + BGRA = 1, + BGRA2 = 2, + RGBA = 3, + w18p = 4, + BGRA3 = 5, + _444v = 6, + _422v = 7, + _420v = 8, + w30r = 9, + w40a = 10, +) + +Transform = "Transform" / Enum(Int8ul, + NONE = 0, + XFLIP = 1, + YFLIP = 2, + ROT_90 = 3, + ROT_180 = 4, + ROT_270 = 5, +) + +AddrFormat = "AddrFormat" / Enum(Int32ul, + PLANAR = 1, + TILED = 2, + AGX = 3 +) + +TimingMode = Struct( + "valid" / Bool(Int32ul), + "width" / Int32ul, + "height" / Int32ul, + "fps_frac" / Int16ul, + "fps_int" / Int16ul, + Padding(8), +) + +TimingModeList = Struct( + "count" / Int32ul, + "list" / GreedyRange(TimingMode), +) + +ColorMode = Struct( + "valid" / Bool(Int32ul), + "colorimetry" / Colorimetry, + "eotf" / EOTF, + "encoding" / Encoding, + "bpp" / Int32ul, + "unk" / Int32ul, +) + +ColorModeList = Struct( + "count" / Int32ul, + "list" / GreedyRange(ColorMode), +) + +SwapInfo = Struct( + "unk1" / Int32ul, + "unk2" / Int32ul, + "unk3" / Int32ul, + "swap_id" / Int32ul, + "unk5" / Int32ul, +) + +IBootPlaneInfo = Struct( + "unk1" / Default(Int32ul, 0), + "addr" / Default(Int64ul, 0), + "tile_size" / Default(Int32ul, 0), + "stride" / Default(Int32ul, 0), + "unk5" / Default(Int32ul, 0), + "unk6" / Default(Int32ul, 0), + "unk7" / Default(Int32ul, 0), + "unk8" / Default(Int32ul, 0), + "addr_format" / Default(AddrFormat, 0), + "unk9" / Default(Int32ul, 0), +) + +IBootLayerInfo = Struct( + "planes" / Array(3, IBootPlaneInfo), + "unk" / Default(Int32ul, 0), + "plane_cnt" / Int32ul, + "width" / Int32ul, + "height" / Int32ul, + "surface_fmt" / SurfaceFormat, + "colorspace" / Colorspace, + "eotf" / EOTF, + "transform" / Transform, + Padding(3) +) + +SwapSetLayer = Struct( + "unk" / Default(Int32ul, 0), + "layer_id" / Int32ul, + "layer_info" / IBootLayerInfo, + "src_w" / Int32ul, + "src_h" / Int32ul, + "src_x" / Int32ul, + "src_y" / Int32ul, + "dst_w" / Int32ul, + "dst_h" / Int32ul, + "dst_x" / Int32ul, + "dst_y" / Int32ul, + "unk2" / Default(Int32ul, 0), +) + +class DCPIBootService(EPICService): + NAME = "disp0-service" + SHORT = "disp0" + + def send_cmd(self, op, data=b'', replen=None): + msg = struct.pack("<IIII", op, 16 + len(data), 0, 0) + data + if replen is not None: + replen += 8 + resp = super().send_cmd(0xc0, msg, replen) + if not resp: + return + rcmd, rlen = struct.unpack("<II", resp[:8]) + return resp[8:rlen] + + def setPower(self, power): + self.send_cmd(2, b"\x01" if power else b"\x00") + + def getModeCount(self): + buf = self.send_cmd(3, b"", 12) + hpd, timing_cnt, color_cnt = struct.unpack("<B3xII", buf) + return bool(hpd), timing_cnt, color_cnt + + def getTimingModes(self): + return TimingModeList.parse(self.send_cmd(4, replen=4096)).list + + def getColorModes(self): + return ColorModeList.parse(self.send_cmd(5, replen=4096)).list + + def setMode(self, timing_mode, color_mode): + data = TimingMode.build(timing_mode) + ColorMode.build(color_mode) + self.send_cmd(6, data) + + def swapBegin(self): + return SwapInfo.parse(self.send_cmd(15, replen=128)) + + def swapSetLayer(self, layer_id, info, src_rect, dst_rect): + data = Container() + data.layer_id = layer_id + data.layer_info = info + data.src_w, data.src_h, data.src_x, data.src_y = src_rect + data.dst_w, data.dst_h, data.dst_x, data.dst_y = dst_rect + return self.send_cmd(16, SwapSetLayer.build(data), replen=128) + + def swapSetTimestamp(self): + pass + # 17 + + def swapEnd(self): + return self.send_cmd(18, b"\x00" * 12, replen=128) + + #def swapWait(self, swap_id): + #buf = struct.pack("<IIII", 1, swap_id, 0, swap_id) + #return self.send_cmd(19, buf, replen=128) + +class DCPIBootEndpoint(EPICEndpoint): + SHORT = "iboot" + + SERVICES = [ + DCPIBootService, + ] + + +class DCPIBootClient(StandardASC): + DVA_OFFSET = 0xf00000000 + + ENDPOINTS = { + 0x20: AFKSystemEndpoint, + 0x23: DCPIBootEndpoint, + 0x24: DCPDPTXEndpoint, + 0x2a: DCPDPTXPortEndpoint, + 0x27: DCPAVDeviceEndpoint, + 0x28: DCPAVServiceEndpoint, + 0x29: DCPAVVideoEndpoint, + } + + def __init__(self, u, asc_base, dart=None, disp_dart=None): + super().__init__(u, asc_base, dart) + self.disp_dart = disp_dart diff --git a/tools/proxyclient/m1n1/fw/dcp/ipc.py b/tools/proxyclient/m1n1/fw/dcp/ipc.py new file mode 100644 index 0000000..c961a8c --- /dev/null +++ b/tools/proxyclient/m1n1/fw/dcp/ipc.py @@ -0,0 +1,789 @@ +# SPDX-License-Identifier: MIT + +from dataclasses import dataclass +import pprint +from enum import IntEnum + +from ..common import * +from m1n1.utils import * +from construct import * + +@dataclass +class ByRef: + val: object + +class Pointer(Subconstruct): + pass + +class InPtr(Pointer): + pass + +class OutPtr(Pointer): + pass + +class InOutPtr(Pointer): + pass + +class InOut(Subconstruct): + pass + +Ptr = InOutPtr + +class NULL: + def __str__(self): + return "NULL" + def __repr__(self): + return "NULL" +NULL = NULL() + +class Method: + def __init__(self, rtype, name, *args, **kwargs): + self.rtype = rtype + self.name = name + + if args and kwargs: + raise Exception("Cannot specify args and kwargs") + elif args: + args = [(f"arg{i}", arg) for i, arg in enumerate(args)] + self.as_kwargs = False + elif kwargs: + args = list(kwargs.items()) + self.as_kwargs = True + else: + args = [] + + self.args = args + + in_size = 0 + out_size = 0 + self.in_fields = [] + self.out_fields = [] + + if rtype is not None: + args.append(("ret", rtype)) + + self.dir = [] + self.nullable = [] + self.array_of_p = [] + + for i, (name, field) in enumerate(self.args): + align = 1 + + pfield = field + dir = "in" + + if name == "ret": + dir = "out" + + while isinstance(pfield, Subconstruct): + if isinstance(pfield, InPtr): + dir = "in" + elif isinstance(pfield, OutPtr): + dir = "out" + elif isinstance(pfield, (InOut, InOutPtr)): + dir = "inout" + pfield = pfield.subcon + if isinstance(pfield, FormatField): + align = min(4, pfield.length) + + if dir in ("in", "inout"): + #if in_size % align: + #self.in_fields.append(Padding(align - (in_size % align))) + #in_size += align - (in_size % align) + + self.in_fields.append(name / field) + in_size += field.sizeof() + + if dir in ("out", "inout"): + #if out_size % align: + #self.out_fields.append(Padding(align - (out_size % align))) + #out_size += align - (out_size % align) + + self.out_fields.append(name / field) + out_size += field.sizeof() + + self.dir.append(dir) + + for i, (name, field) in enumerate(self.args): + array_size = None + array_of_p = False + nullable = False + pfield = field + + while isinstance(pfield, Subconstruct): + if isinstance(pfield, Array) and array_size is None: + array_size = pfield.count + if isinstance(pfield, Pointer): + nullable = True + array_of_p = array_size is not None + pfield = pfield.subcon + + if nullable: + if array_of_p: + self.in_fields.append((name + "_null") / bool_[array_size]) + in_size += array_size + else: + self.in_fields.append((name + "_null") / bool_) + in_size += 1 + + self.nullable.append(nullable) + self.array_of_p.append(array_of_p) + + if in_size % 4: + self.in_fields.append(Padding(4 - (in_size % 4))) + if out_size % 4: + self.out_fields.append(Padding(4 - (out_size % 4))) + + self.in_struct = Struct(*self.in_fields) + self.out_struct = Struct(*self.out_fields) + + def get_field_val(self, i, in_vals, out_vals=None, nullobj=None): + name, field = self.args[i] + + nullable = self.nullable[i] + array_of_p = self.array_of_p[i] + + val = None + + if out_vals: + val = out_vals.get(name, val) + if val is None and in_vals: + val = in_vals.get(name, val) + + if nullable and val is not None: + null = in_vals.get(name + "_null", None) + if null is None: + return None + if not array_of_p: + val = nullobj if null else val + else: + val2 = [nullobj if n else val for val, n in zip(val, null)] + if isinstance(val, ListContainer): + val2 = ListContainer(val2) + val = val2 + + return val + + def fmt_args(self, in_vals, out_vals=None): + s = [] + + for i, (name, field) in enumerate(self.args): + if name == "ret": + continue + + dir = self.dir[i] + nullable = self.nullable[i] + + val = self.get_field_val(i, in_vals, out_vals, nullobj=NULL) + + if val is not None: + if self.is_long(val): + s.append(f"{name}=...") + elif isinstance(val, ListContainer): + s.append(f"{name}={list(val)!r}") + else: + s.append(f"{name}={val!r}") + elif dir == "out": + s.append(f"{name}=<out>") + else: + s.append(f"{name}=?") + + return ", ".join(s) + + def print_long_args(self, indent, in_vals, out_vals=None): + for i, (name, field) in enumerate(self.args): + if name == "ret": + continue + + val = self.get_field_val(i, in_vals, out_vals, nullobj=NULL) + + if name in in_vals and out_vals is not None and name not in out_vals: + continue + + if self.is_long(val): + hdr = f"{indent} {name} = " + if isinstance(val, (ListContainer, Container)): + print(hdr + str(val).replace("\n", "\n" + indent)) + elif isinstance(val, bytes): + print(hdr + f"({len(val):#x} bytes)") + chexdump(val, indent=indent + " ") + else: + dindent = " " * len(hdr) + if isinstance(val, dict) and "_io" in val: + del val["_io"] + print(hdr + pprint.pformat(val, sort_dicts=False).replace("\n", "\n" + dindent)) + + def is_long(self, arg): + if isinstance(arg, (list, bytes)): + return len(arg) > 4 or any(self.is_long(i) for i in arg) + + return isinstance(arg, (dict, list, bytes)) + + def parse_input(self, data): + vals = self.in_struct.parse(data) + + return Container({ k: v() if callable(v) else v for k,v in vals.items() }) + + def parse_output(self, data, in_vals): + context = dict(in_vals) + + if "data" in context: + del context["data"] + + vals = self.out_struct.parse(data, **context) + + return Container({ k: v() if callable(v) else v for k,v in vals.items() }) + + def __str__(self): + if self.rtype is None: + rtype = "void" + else: + rtype = str(self.rtype) + + args = [] + for name, field in self.args: + if name == "ret": + continue + args.append(f"{field} {name}") + + return f"{rtype} {self.name}({', '.join(args)})" + + def callback(self, func, in_data): + in_vals = self.parse_input(in_data) + + args = [] + kwargs = {} + + out_vals = {} + + for i, (name, field) in enumerate(self.args): + if name == "ret": + continue + + dir = self.dir[i] + + val = self.get_field_val(i, in_vals, out_vals, nullobj=NULL) + is_null = val is NULL + if is_null: + val = None + + if dir == "inout": + if val is not None and not isinstance(val, list): + val = ByRef(val) + out_vals[name] = val + elif dir == "out" and not is_null: + val = ByRef(None) + out_vals[name] = val + + if self.as_kwargs: + kwargs[name] = val + else: + args.append(val) + + retval = func(*args, **kwargs) + + if self.rtype is None: + assert retval is None + else: + assert retval is not None + out_vals["ret"] = retval + + out_vals = {k: v.val if isinstance(v, ByRef) else v for k, v in out_vals.items()} + + context = dict(in_vals) + + if "obj" in context: + del context["obj"] + + out_data = self.out_struct.build(out_vals, **context) + return out_data + + + def call(self, call, *args, **kwargs): + if args and kwargs: + raise Exception("Cannot use both args and kwargs") + + if args: + for arg, (name, field) in zip(args, self.args): + kwargs[name] = arg + + in_vals = {} + out_refs = {} + + for i, (name, field) in enumerate(self.args): + if name == "ret": + continue + + val = kwargs[name] + dir = self.dir[i] + nullable = self.nullable[i] + array_of_p = self.array_of_p[i] + + if nullable: + if not array_of_p: + in_vals[name + "_null"] = val is None + else: + defaults = field.parse(b"\x00" * field.sizeof()) + in_vals[name + "_null"] = [i is None for i in val] + val = [v if v is not None else defaults[i] for i, v in enumerate(val)] + else: + assert val is not None + + if val is None: + continue + + if dir == "out": + assert isinstance(val, ByRef) + out_refs[name] = val + elif dir == "inout": + if isinstance(val, ByRef): + in_vals[name] = val.val + out_refs[name] = val + elif val is not None: + in_vals[name] = val + elif val is not None: + in_vals[name] = val + + in_data = self.in_struct.build(in_vals) + print(f"{self.name}({self.fmt_args(in_vals)})") + + out_data = call(in_data) + out_vals = self.parse_output(out_data, in_vals) + + for k, v in out_refs.items(): + v.val = out_vals[k] + + if self.rtype is not None: + return out_vals["ret"] + +def dump_fields(fields): + off = 0 + for f in fields: + sizeof = f.sizeof() + print(f"{off:#x}: {f} ({sizeof:#x})") + off += sizeof + +class Call(Method): + pass + +class Callback(Method): + pass + +int8_t = Int8sl +uint8_t = Int8ul +int16_t = Int16sl +uint16_t = Int16ul +int32_t = Int32sl +uint32_t = Int32ul +int64_t = Int64sl +uint64_t = Int64ul + +uint = uint32_t +int_ = int32_t +ulong = uint64_t +long_ = int64_t + +void = None + +class IPCObject: + @classmethod + def methods(cls): + ret = {} + for c in cls.mro(): + ret.update({k: (cls, v) for k, v in cls.__dict__.items() if isinstance(v, Method)}) + + return ret + +rt_bw_config_t = Struct( + "unk1" / UnkBytes(8), + "reg1" / Int64ul, + "reg2" / Int64ul, + "unk2" / UnkBytes(4), + "bit" / Int32ul, + "padding" / UnkBytes(0x1c), +) + +IOUserClient = Struct( + "addr" / Hex(Int64ul), + "unk" / Int32ul, + "flag1" / Int8ul, + "flag2" / Int8ul, + Padding(2) +) + +IOMobileFramebufferUserClient = IOUserClient + +IOMFBStatus = Int32ul +IOMFBParameterName = Int32ul + +BufferDescriptor = uint64_t + +SwapCompleteData = Bytes(0x12) +SwapInfoBlob = Bytes(0x6c4) + +SWAP_SURFACES = 4 + +Rect = NamedTuple("rect", "x y w h", Int32ul[4]) + +IOMFBSwapRec = Struct( + "ts1" / Default(Int64ul, 0), + "ts2" / Default(Int64ul, 0), + "unk_10" / Default(Int64ul, 0), + "unk_18" / Default(Int64ul, 0), + "ts64_unk" / Default(Int64ul, 0), + "unk_28" / Default(Int64ul, 0), + "ts3" / Default(Int64ul, 0), + "unk_38" / Default(Int64ul, 0), + "flags1" / Hex(Int64ul), + "flags2" / Hex(Int64ul), + "swap_id" / Int32ul, + "surf_ids" / Int32ul[SWAP_SURFACES], + "src_rect" / Rect[SWAP_SURFACES], + "surf_flags" / Int32ul[SWAP_SURFACES], + "surf_unk" / Int32ul[SWAP_SURFACES], + "dst_rect" / Rect[SWAP_SURFACES], + "swap_enabled" / Hex(Int32ul), + "swap_completed" / Hex(Int32ul), + "unk_10c" / Hex(Default(Int32ul, 0)), + "unk_110" / UnkBytes(0x1b8), + "unk_2c8" / Hex(Default(Int32ul, 0)), + "unk_2cc" / UnkBytes(0x14), + "unk_2e0" / Hex(Default(Int32ul, 0)), + "unk_2e2" / UnkBytes(0x2), + "bl_unk" / Hex(Int64ul), # seen: 0x0, 0x1, 0x101, 0x1_0000, 0x101_010101 + "bl_val" / Hex(Int32ul), # range 0x10000000 - approximately 0x7fe07fc0 for 4 - 510 nits + "bl_power" / Hex(Int8ul), # constant 0x40, 0x00: backlight off + "unk_2f3" / UnkBytes(0x2d), +) + +assert IOMFBSwapRec.sizeof() == 0x320 + +MAX_PLANES = 3 + +ComponentTypes = Struct( + "count" / Int8ul, + "types" / SizedArray(7, "count", Int8ul), +) + +#ComponentTypes = Bytes(8) + +PlaneInfo = Struct( + "width" / Int32ul, + "height" / Int32ul, + "base" / Hex(Int32ul), + "offset" / Hex(Int32ul), + "stride" / Hex(Int32ul), + "size" / Hex(Int32ul), + "tile_size" / Int16ul, + "tile_w" / Int8ul, + "tile_h" / Int8ul, + "unk1" / UnkBytes(0xd), + "unk2" / Hex(Int8ul), + "unk3" / UnkBytes(0x26), +) + +assert PlaneInfo.sizeof() == 0x50 + +IOSurface = Struct( + "is_tiled" / bool_, + "unk_1" / bool_, + "unk_2" / bool_, + "plane_cnt" / Int32ul, + "plane_cnt2" / Int32ul, + "format" / FourCC, + "unk_f" / Default(Hex(Int32ul), 0), + "xfer_func" / Int8ul, + "colorspace" / Int8ul, + "stride" / Int32ul, + "pix_size" / Int16ul, + "pel_w" / Int8ul, + "pel_h" / Int8ul, + "offset" / Default(Hex(Int32ul), 0), + "width" / Int32ul, + "height" / Int32ul, + "buf_size" / Hex(Int32ul), + "unk_2d" / Default(Int32ul, 0), + "unk_31" / Default(Int32ul, 0), + "surface_id" / Int32ul, + "comp_types" / Default(SizedArray(MAX_PLANES, "plane_cnt", ComponentTypes), []), + "has_comp" / Bool(Int64ul), + "planes" / Default(SizedArray(MAX_PLANES, "plane_cnt", PlaneInfo), []), + "has_planes" / Bool(Int64ul), + "compression_info" / Default(SizedArray(MAX_PLANES, "plane_cnt", UnkBytes(0x34)), []), + "has_compr_info" / Bool(Int64ul), + "unk_1f5" / Int32ul, + "unk_1f9" / Int32ul, + "padding" / UnkBytes(7), +) + +assert IOSurface.sizeof() == 0x204 + +IOMFBColorFixedMatrix = Array(5, Array(3, ulong)) + +class PropID(IntEnum): + BrightnessCorrection = 14 + +class UPPipeAP_H13P(IPCObject): + A000 = Call(bool_, "late_init_signal") + A029 = Call(void, "setup_video_limits") + A034 = Call(void, "update_notify_clients_dcp", Array(14, uint)) + A035 = Call(bool_, "is_hilo") + A036 = Call(bool_, "apt_supported") + A037 = Call(uint, "get_dfb_info", InOutPtr(uint), InOutPtr(Array(4, ulong)), InOutPtr(uint)) + A038 = Call(uint, "get_dfb_compression_info", InOutPtr(uint)) + + D000 = Callback(bool_, "did_boot_signal") + D001 = Callback(bool_, "did_power_on_signal") + D002 = Callback(void, "will_power_off_signal") + D003 = Callback(void, "rt_bandwidth_setup_ap", config=OutPtr(rt_bw_config_t)) + +IdleCachingState = uint32_t + +class UnifiedPipeline2(IPCObject): + A352 = Call(bool_, "applyProperty", uint, uint) + A353 = Call(uint, "get_system_type") + A357 = Call(void, "set_create_DFB") + A358 = Call(IOMFBStatus, "vi_set_temperature_hint") + + D100 = Callback(void, "match_pmu_service") + D101 = Callback(uint32_t, "UNK_get_some_field") + D102 = Callback(void, "set_number_property", key=string(0x40), value=uint) + D103 = Callback(void, "set_boolean_property", key=string(0x40), value=bool_) + D106 = Callback(void, "removeProperty", key=string(0x40)) + D107 = Callback(bool_, "create_provider_service") + D108 = Callback(bool_, "create_product_service") + D109 = Callback(bool_, "create_PMU_service") + D110 = Callback(bool_, "create_iomfb_service") + D111 = Callback(bool_, "create_backlight_service") + D112 = Callback(void, "set_idle_caching_state_ap", IdleCachingState, uint) + D116 = Callback(bool_, "start_hardware_boot") + D117 = Callback(bool_, "is_dark_boot") + D118 = Callback(bool_, "is_waking_from_hibernate") + D120 = Callback(bool_, "read_edt_data", key=string(0x40), count=uint, value=InOut(Lazy(SizedArray(8, "count", uint32_t)))) + + D122 = Callback(bool_, "setDCPAVPropStart", length=uint) + D123 = Callback(bool_, "setDCPAVPropChunk", data=HexDump(SizedBytes(0x1000, "length")), offset=uint, length=uint) + D124 = Callback(bool_, "setDCPAVPropEnd", key=string(0x40)) + +class UPPipe2(IPCObject): + A102 = Call(uint64_t, "test_control", cmd=uint64_t, arg=uint) + A103 = Call(void, "get_config_frame_size", width=InOutPtr(uint), height=InOutPtr(uint)) + A104 = Call(void, "set_config_frame_size", width=uint, height=uint) + A105 = Call(void, "program_config_frame_size") + A130 = Call(bool_, "init_ca_pmu") + A131 = Call(bool_, "pmu_service_matched") + A132 = Call(bool_, "backlight_service_matched") + + D201 = Callback(uint32_t, "map_buf", buf=InPtr(BufferDescriptor), vaddr=OutPtr(ulong), dva=OutPtr(ulong), unk=bool_) + D202 = Callback(void, "unmap_buf", buf=InPtr(BufferDescriptor), unk1=uint, unk2=ulong, unkB=uint) + + D206 = Callback(bool_, "match_pmu_service_2") + D207 = Callback(bool_, "match_backlight_service") + D208 = Callback(uint64_t, "get_calendar_time_ms") + D211 = Callback(void, "update_backlight_factor_prop", int_) + +class PropRelay(IPCObject): + D300 = Callback(void, "pr_publish", prop_id=uint32_t, value=int_) + +class IOMobileFramebufferAP(IPCObject): + A401 = Call(uint32_t, "start_signal") + + A407 = Call(uint32_t, "swap_start", swap_id=InOutPtr(uint), client=InOutPtr(IOUserClient)) + A408 = Call(uint32_t, "swap_submit_dcp", + swap_rec=InPtr(IOMFBSwapRec), + surfaces=Array(4, InPtr(IOSurface)), + surfAddr=Array(4, Hex(ulong)), + unkBool=bool_, + unkFloat=Float64l, + unkInt=uint, + unkOutBool=OutPtr(bool_)) + + A410 = Call(uint32_t, "set_display_device", uint) + A411 = Call(bool_, "is_main_display") + A438 = Call(uint32_t, "swap_set_color_matrix", matrix=InOutPtr(IOMFBColorFixedMatrix), func=uint32_t, unk=uint) +#"A438": "IOMobileFramebufferAP::swap_set_color_matrix(IOMFBColorFixedMatrix*, IOMFBColorMatrixFunction, unsigned int)", + + A412 = Call(uint32_t, "set_digital_out_mode", uint, uint) + A413 = Call(uint32_t, "get_digital_out_state", InOutPtr(uint)) + A414 = Call(uint32_t, "get_display_area", InOutPtr(ulong)) + A419 = Call(uint32_t, "get_gamma_table", InOutPtr(Bytes(0xc0c))) + A422 = Call(uint32_t, "set_matrix", uint, InPtr(Array(3, Array(3, ulong)))) + A423 = Call(uint32_t, "set_contrast", InOutPtr(Float32l)) + A426 = Call(uint32_t, "get_color_remap_mode", InOutPtr(uint32_t)) + A427 = Call(uint32_t, "setBrightnessCorrection", uint) + + A435 = Call(uint32_t, "set_block_dcp", arg1=uint64_t, arg2=uint, arg3=uint, arg4=Array(8, ulong), arg5=uint, data=SizedBytes(0x1000, "length"), length=ulong) + A439 = Call(uint32_t, "set_parameter_dcp", param=IOMFBParameterName, value=Lazy(SizedArray(4, "count", ulong)), count=uint) + + A440 = Call(uint, "display_width") + A441 = Call(uint, "display_height") + A442 = Call(void, "get_display_size", OutPtr(uint), OutPtr(uint)) + A443 = Call(int_, "do_create_default_frame_buffer") + A444 = Call(void, "printRegs") + A447 = Call(int_, "enable_disable_video_power_savings", uint) + A454 = Call(void, "first_client_open") + A455 = Call(void, "last_client_close_dcp", OutPtr(uint)) + A456 = Call(bool_, "writeDebugInfo", ulong) + A457 = Call(void, "flush_debug_flags", uint) + A458 = Call(bool_, "io_fence_notify", uint, uint, ulong, IOMFBStatus) + A460 = Call(bool_, "setDisplayRefreshProperties") + A463 = Call(void, "flush_supportsPower", bool_) + A464 = Call(uint, "abort_swaps_dcp", InOutPtr(IOMobileFramebufferUserClient)) + + A467 = Call(uint, "update_dfb", surf=InPtr(IOSurface)) + A468 = Call(uint32_t, "setPowerState", ulong, bool_, OutPtr(uint)) + A469 = Call(bool_, "isKeepOnScreen") + + D552 = Callback(bool_, "setProperty_dict", key=string(0x40), value=InPtr(Padded(0x1000, OSDictionary()))) + D561 = Callback(bool_, "setProperty_dict", key=string(0x40), value=InPtr(Padded(0x1000, OSDictionary()))) + D563 = Callback(bool_, "setProperty_int", key=string(0x40), value=InPtr(uint64_t)) + D565 = Callback(bool_, "setProperty_bool", key=string(0x40), value=InPtr(Bool(uint32_t))) + D567 = Callback(bool_, "setProperty_str", key=string(0x40), value=string(0x40)) + + D574 = Callback(IOMFBStatus, "powerUpDART", bool_) + + D575 = Callback(bool_, "get_dot_pitch", OutPtr(uint)) + D576 = Callback(void, "hotPlug_notify_gated", ulong) + D577 = Callback(void, "powerstate_notify", bool_, bool_) + D578 = Callback(bool_, "idle_fence_create", IdleCachingState) + D579 = Callback(void, "idle_fence_complete") + + D581 = Callback(void, "swap_complete_head_of_line", uint, bool_, uint, bool_) + D582 = Callback(bool_, "create_default_fb_surface", uint, uint) + D583 = Callback(bool_, "serializeDebugInfoCb", ulong, InPtr(uint64_t), uint) + D584 = Callback(void, "clear_default_surface") + + D588 = Callback(void, "resize_default_fb_surface_gated") + D589 = Callback(void, "swap_complete_ap_gated", swap_id=uint, unkBool=bool_, swap_data=InPtr(SwapCompleteData), swap_info=SwapInfoBlob, unkUint=uint) + + D591 = Callback(void, "swap_complete_intent_gated", swap_id=uint, unkB=bool_, unkInt=uint32_t, width=uint, height=uint) + D593 = Callback(void, "enable_backlight_message_ap_gated", bool_) + D594 = Callback(void, "setSystemConsoleMode", bool_) + + D596 = Callback(bool_, "isDFBAllocated") + D597 = Callback(bool_, "preserveContents") + D598 = Callback(void, "find_swap_function_gated") + +class ServiceRelay(IPCObject): + D400 = Callback(void, "get_property", obj=FourCC, key=string(0x40), value=OutPtr(Bytes(0x200)), lenght=InOutPtr(uint)) + D401 = Callback(bool_, "sr_get_uint_prop", obj=FourCC, key=string(0x40), value=InOutPtr(ulong)) + D404 = Callback(void, "sr_set_uint_prop", obj=FourCC, key=string(0x40), value=uint) + D406 = Callback(void, "set_fx_prop", obj=FourCC, key=string(0x40), value=uint) + D408 = Callback(uint64_t, "sr_getClockFrequency", obj=FourCC, arg=uint) + D411 = Callback(IOMFBStatus, "sr_mapDeviceMemoryWithIndex", obj=FourCC, index=uint, flags=uint, addr=OutPtr(ulong), length=OutPtr(ulong)) + D413 = Callback(bool_, "sr_setProperty_dict", obj=FourCC, key=string(0x40), value=InPtr(Padded(0x1000, OSDictionary()))) + D414 = Callback(bool_, "sr_setProperty_int", obj=FourCC, key=string(0x40), value=InPtr(uint64_t)) + D415 = Callback(bool_, "sr_setProperty_bool", obj=FourCC, key=string(0x40), value=InPtr(Bool(uint32_t))) + +mem_desc_id = uint + +class MemDescRelay(IPCObject): + D451 = Callback(mem_desc_id, "allocate_buffer", uint, ulong, uint, OutPtr(ulong), OutPtr(ulong), OutPtr(ulong)) + D452 = Callback(mem_desc_id, "map_physical", paddr=ulong, size=ulong, flags=uint, dva=OutPtr(ulong), dvasize=OutPtr(ulong)) + D453 = Callback(mem_desc_id, "withAddressRange", ulong, ulong, uint, uint64_t, OutPtr(uint), OutPtr(ulong)) + D454 = Callback(IOMFBStatus, "prepare", uint, uint) + D455 = Callback(IOMFBStatus, "complete", uint, uint) + D456 = Callback(bool_, "release_descriptor", uint) + +ALL_CLASSES = [ + UPPipeAP_H13P, + UnifiedPipeline2, + IOMobileFramebufferAP, + ServiceRelay, + PropRelay, + UPPipe2, + MemDescRelay, +] + +ALL_METHODS = {} + +for cls in ALL_CLASSES: + ALL_METHODS.update(cls.methods()) + +SHORT_CHANNELS = { + "CB": "d", + "CMD": "C", + "ASYNC": "a", + "OOBCMD": "O", + "OOBCB": "o", +} + +RDIR = { ">": "<", "<": ">" } + +class Call: + def __init__(self, dir, chan, off, msg, in_size, out_size, in_data=b''): + self.dir = dir + self.chan = chan + self.msg = msg + self.off = off + self.in_size = in_size + self.out_size = out_size + self.in_data = in_data + self.out_data = None + self.complete = False + self.ret = None + + def ack(self, out_data): + self.out_data = out_data + self.complete = True + + def print_req(self, indent=""): + log = f"{indent}{self.dir}{SHORT_CHANNELS[self.chan]}[{self.off:#x}] {self.msg} " + + cls, method = ALL_METHODS.get(self.msg, (None, None)) + if cls is None: + print(log + f"{self.in_size:#x}/{self.out_size:#x}") + return + + log += f"{cls.__name__}::{method.name}(" + in_size = method.in_struct.sizeof() + + if in_size != len(self.in_data): + print(f"{log} !! Expected {in_size:#x} bytes, got {len(self.in_data):#x} bytes (in)") + dump_fields(method.in_fields) + chexdump(self.in_data) + self.in_vals = {} + return + + self.in_vals = method.parse_input(self.in_data) + + log += f"{method.fmt_args(self.in_vals)})" + + print(log) + + method.print_long_args(indent, self.in_vals) + #if method.in_fields: + #print(self.in_vals) + + def print_reply(self, indent=""): + assert self.complete + log = f"{indent}{RDIR[self.dir]}{SHORT_CHANNELS[self.chan]}[{self.off:#x}] {self.msg} " + + cls, method = ALL_METHODS.get(self.msg, (None, None)) + if cls is None: + print(log + f"{self.in_size:#x}/{self.out_size:#x}") + return + + log += f"{cls.__name__}::{method.name}(" + out_size = method.out_struct.sizeof() + + if out_size != len(self.out_data): + print(f"{log} !! Expected {out_size:#x} bytes, got {len(self.out_data):#x} bytes (out)") + dump_fields(method.out_fields) + chexdump(self.out_data) + return + + self.out_vals = method.parse_output(self.out_data, self.in_vals) + + log += f"{method.fmt_args(self.in_vals, self.out_vals)})" + + if "ret" in self.out_vals: + self.ret = self.out_vals.ret + del self.out_vals["ret"] + log += f" = {self.ret!r}" + + print(log) + + method.print_long_args(indent, self.in_vals, self.out_vals) + #if len(method.out_fields) - (self.ret is not None): + #print(self.out_vals) diff --git a/tools/proxyclient/m1n1/fw/dcp/manager.py b/tools/proxyclient/m1n1/fw/dcp/manager.py new file mode 100644 index 0000000..7977c3a --- /dev/null +++ b/tools/proxyclient/m1n1/fw/dcp/manager.py @@ -0,0 +1,319 @@ +# SPDX-License-Identifier: MIT +import pprint +import struct, functools, time +from dataclasses import dataclass +from enum import IntEnum + +from construct.lib import hexundump + +from ..asc.base import * +from ...utils import * + +from . import ipc +from .dcpep import CallContext + +## DCP API manager + +class DCPBaseManager: + def __init__(self, dcpep): + self.dcpep = dcpep + self.dcp = dcpep.asc + dcpep.mgr = self + + self.name_map = {} + self.tag_map = {} + + self.in_callback = 0 + + for k, (cls, v) in ipc.ALL_METHODS.items(): + self.name_map[v.name] = k, v + self.tag_map[k] = v + + def handle_cb(self, state): + method = self.tag_map.get(state.tag, None) + if method is None: + raise Exception(f"Unknown callback {state.tag}") + + func = getattr(self, method.name, None) + + if func is None: + raise Exception(f"Unimplemented callback {method!s} [{state.tag}]") + + self.in_callback += 1 + try: + retval = method.callback(func, state.in_data) + except Exception as e: + print(f"Exception in callback {method.name}") + raise + self.in_callback -= 1 + return retval + + def __getattr__(self, attr): + tag, method = self.name_map.get(attr, (None, None)) + if method is None or tag.startswith("D"): + raise AttributeError(f"Unknown method {attr}") + + out_len = method.out_struct.sizeof() + if self.in_callback: + ctx = CallContext.CB + else: + ctx = CallContext.CMD + rpc = functools.partial(self.dcpep.ch_cmd.call, ctx, tag, out_len=out_len) + return functools.partial(method.call, rpc) + +class DCPManager(DCPBaseManager): + def __init__(self, dcpep, compatible='t8103'): + super().__init__(dcpep) + + self.iomfb_prop = {} + self.dcpav_prop = {} + self.service_prop = {} + self.pr_prop = {} + + self.swaps = 0 + self.frame = 0 + + self.mapid = 0 + self.bufs = {} + + self.compatible = compatible + + ## IOMobileFramebufferAP methods + + def find_swap_function_gated(self): + pass + + def create_provider_service(self): + return True + + def create_product_service(self): + return True + + def create_PMU_service(self): + return True + + def create_iomfb_service(self): + return True + + def create_backlight_service(self): + return False + + def setProperty(self, key, value): + self.iomfb_prop[key] = value + print(f"setProperty({key} = {value!r})") + return True + + setProperty_dict = setProperty_int = setProperty_bool = setProperty_str = setProperty + + def swap_complete_ap_gated(self, swap_id, unkBool, swap_data, swap_info, unkUint): + swap_data_ptr = "NULL" if swap_data is None else "..." + print(f"swap_complete_ap_gated({swap_id}, {unkBool}, {swap_data_ptr}, ..., {unkUint}") + if swap_data is not None: + chexdump(swap_data) + chexdump(swap_info) + self.swaps += 1 + self.frame = swap_id + + def swap_complete_intent_gated(self, swap_id, unkB, unkInt, width, height): + print(f"swap_complete_intent_gated({swap_id}, {unkB}, {unkInt}, {width}, {height}") + self.swaps += 1 + self.frame = swap_id + + def enable_backlight_message_ap_gated(self, unkB): + print(f"enable_backlight_message_ap_gated({unkB})") + + # wrapper for set_digital_out_mode to print information on the setted modes + def SetDigitalOutMode(self, color_id, timing_id): + color_mode = [x for x in self.dcpav_prop['ColorElements'] if x['ID'] == color_id][0] + timing_mode = [x for x in self.dcpav_prop['TimingElements'] if x['ID'] == timing_id][0] + pprint.pprint(color_mode) + pprint.pprint(timing_mode) + self.set_digital_out_mode(color_id, timing_id) + + ## UPPipeAP_H13P methods + + def did_boot_signal(self): + return True + + def did_power_on_signal(self): + return True + + def will_power_off_signal(self): + return + + def rt_bandwidth_setup_ap(self, config): + print("rt_bandwidth_setup_ap(...)") + if self.compatible == 't8103': + config.val = { + "reg1": 0x23b738014, # reg[5] in disp0/dispext0, plus 0x14 - part of pmgr + "reg2": 0x23bc3c000, # reg[6] in disp0/dispext0 - part of pmp/pmgr + "bit": 2, + } + elif self.compatible == 't600x': + config.val = { + "reg1": 0x28e3d0000 + 0x988, # reg[4] in disp0/dispext0, plus 0x988 + "reg2": 0x0, + "bit": 0, + } + else: + raise ValueError(self.compatible) + + ## UnifiedPipeline2 methods + + def match_pmu_service(self): + pass + + def set_number_property(self, key, value): + pass + + def create_provider_service(self): + return True + + def is_dark_boot(self): + return False + + def read_edt_data(self, key, count, value): + return False + + def UNK_get_some_field(self): + return 0 + + def start_hardware_boot(self): + self.set_create_DFB() + self.do_create_default_frame_buffer() + self.setup_video_limits() + self.flush_supportsPower(True) + self.late_init_signal() + self.setDisplayRefreshProperties() + return True + + def setDCPAVPropStart(self, length): + print(f"setDCPAVPropStart({length:#x})") + self.dcpav_prop_len = length - 1 # off by one? + self.dcpav_prop_off = 0 + self.dcpav_prop_data = [] + return True + + def setDCPAVPropChunk(self, data, offset, length): + print(f"setDCPAVPropChunk(..., {offset:#x}, {length:#x})") + assert offset == self.dcpav_prop_off + self.dcpav_prop_data.append(data) + self.dcpav_prop_off += len(data) + return True + + def setDCPAVPropEnd(self, key): + print(f"setDCPAVPropEnd({key!r})") + blob = b"".join(self.dcpav_prop_data) + assert self.dcpav_prop_len == len(blob) + self.dcpav_prop[key] = ipc.OSSerialize().parse(blob) + self.dcpav_prop_data = self.dcpav_prop_len = self.dcpav_prop_off = None + #pprint.pprint(self.dcpav_prop[key]) + return True + + def set_boolean_property(self, key, value): + print(f"set {key!r} = {value}") + + def removeProperty(self, key): + print(f"removeProperty({key!r})") + + def powerstate_notify(self, unk1, unk2): + print(f"powerstate_notify({unk1}, {unk2})") + + def create_default_fb_surface(self, width, height): + print(f"create_default_fb_surface({width}x{height})") + return True + + def powerUpDART(self, unk): + print(f"powerUpDART({unk})") + return 0 + + def hotPlug_notify_gated(self, unk): + print(f"hotPlug_notify_gated({unk})") + + def is_waking_from_hibernate(self): + return False + + ## UPPipe2 methods + + def match_pmu_service_2(self): + return True + + def match_backlight_service(self): + return True + + def get_calendar_time_ms(self): + return time.time_ns() // 1000_000 + + def update_backlight_factor_prop(self, value): + pass + + def map_buf(self, buf, vaddr, dva, unk): + print(f"map buf {buf}, {unk}") + paddr, dcpdva, dvasize = self.bufs[buf] + vaddr.val = 0 + dva.val = self.dcp.disp_dart.iomap(4, paddr, dvasize) + print(f"mapped to dva {dva}") + return 0 + + def update_backlight_factor_prop(self, unk): + print(f"update_backlight_factor_prop {unk}") + + ## ServiceRelay methods + + def sr_setProperty(self, obj, key, value): + self.service_prop.setdefault(obj, {})[key] = value + print(f"sr_setProperty({obj}/{key} = {value!r})") + return True + + def sr_getClockFrequency(self, obj, arg): + print(f"sr_getClockFrequency({obj}, {arg})") + return 533333328 + + sr_setProperty_dict = sr_setProperty_int = sr_setProperty_bool = sr_setProperty_str = sr_setProperty + + def sr_get_uint_prop(self, obj, key, value): + value.val = 0 + return False + + def sr_set_uint_prop(self, obj, key, value): + print(f"sr_set_uint_prop({obj}, {key} = {value})") + + def set_fx_prop(self, obj, key, value): + print(f"set_fx_prop({obj}, {key} = {value})") + + def sr_mapDeviceMemoryWithIndex(self, obj, index, flags, addr, length): + assert obj == "PROV" + addr.val, length.val = self.dcp.u.adt["/arm-io/disp0"].get_reg(index) + print(f"sr_mapDeviceMemoryWithIndex({obj}, {index}, {flags}, {addr.val:#x}, {length.val:#x})") + return 0 + + ## PropRelay methods + + def pr_publish(self, prop_id, value): + self.pr_prop[prop_id] = value + print(f"pr_publish({prop_id}, {value!r})") + + ## MemDescRelay methods: + + def allocate_buffer(self, unk0, size, unk1, paddr, dva, dvasize): + print(f"allocate_buffer({unk0}, {size}, {unk1})") + + dvasize.val = align_up(size, 4096) + paddr.val = self.dcp.u.memalign(0x4000, size) + dva.val = self.dcp.dart.iomap(0, paddr.val, size) + + self.mapid += 1 + print(f"Allocating {self.mapid} as {hex(paddr.val)} / {hex(dva.val)}") + + self.bufs[self.mapid] = (paddr.val, dva.val, dvasize.val) + + return self.mapid + + def map_physical(self, paddr, size, flags, dva, dvasize): + dvasize.val = align_up(size, 4096) + dva.val = self.dcp.dart.iomap(0, paddr, size) + print(f"map_physical({paddr:#x}, {size:#x}, {flags}, {dva.val:#x}, {dvasize.val:#x})") + + self.mapid += 1 + return self.mapid + diff --git a/tools/proxyclient/m1n1/fw/dcp/parse_log.py b/tools/proxyclient/m1n1/fw/dcp/parse_log.py new file mode 100644 index 0000000..e57f637 --- /dev/null +++ b/tools/proxyclient/m1n1/fw/dcp/parse_log.py @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: MIT + +from m1n1.utils import * +from m1n1.fw.dcp.ipc import * + +def parse_log(fd): + op_stack = {} + for line in fd: + optype, args = line.split(" ", 1) + if optype == "CALL": + d, msg, chan, off, msg, in_size, out_size, in_data = args.split(" ") + op = Call(d, chan, int(off, 0), msg, int(in_size, 0), int(out_size, 0), + bytes.fromhex(in_data)) + op_stack.setdefault(chan, []).append(op) + elif optype == "ACK": + d, msg, chan, off, out_data = args.split(" ") + op = op_stack[chan].pop() + assert int(off, 0) == op.off + op.ack(bytes.fromhex(out_data)) + else: + raise Exception(f"Unknown log cmd {optype}") + + yield op + +def dump_log(fd): + nesting = { + "": 0, + "OOB": 0, + } + for op in parse_log(fd): + ctx = "" + if "OOB" in op.chan: + ctx = "[OOB] -----------> " + if not op.complete: + op.print_req(indent=ctx + " " * nesting.setdefault(ctx, 0)) + nesting[ctx] += 1 + else: + nesting[ctx] -= 1 + op.print_reply(indent=ctx + " " * nesting.setdefault(ctx, 0)) + +if __name__ == "__main__": + import sys + dump_log(open(sys.argv[1])) diff --git a/tools/proxyclient/m1n1/fw/mtp.py b/tools/proxyclient/m1n1/fw/mtp.py new file mode 100644 index 0000000..5b22b8a --- /dev/null +++ b/tools/proxyclient/m1n1/fw/mtp.py @@ -0,0 +1,411 @@ +# SPDX-License-Identifier: MIT + +import struct +from construct import * +from ..constructutils import * +from ..utils import * + +class HIDDescriptor(ConstructClass): + subcon = Struct( + "descriptor" / HexDump(GreedyBytes) + ) + +class GPIOInit(ConstructClass): + subcon = Struct( + "unk1" / Int16ul, + "gpio_id"/ Int16ul, + "gpio_name" / PaddedString(32, "ascii") + ) + +class InitBlock(ConstructClass): + subcon = Struct( + "type" / Int16ul, + "subtype" / Int16ul, + "length" / Int16ul, + "payload" / FixedSized(this.length, + Switch(this.type, { + 0: HIDDescriptor, + 1: GPIOInit, + 2: Bytes(0), + }, default=GreedyBytes)) + ) + +class InitMsg(ConstructClass): + subcon = Struct( + "msg_type" / Const(0xf0, Int8ul), + "msg_subtype" / Const(0x01, Int8ul), + "unk" / Const(0x00, Int8ul), + "device_id" / Int8ul, + "device_name" / PaddedString(16, "ascii"), + "msg" / RepeatUntil(lambda obj, lst, ctx: lst[-1].type == 2, InitBlock) + ) + +class DeviceReadyMsg(ConstructClass): + subcon = Struct( + "msg_type" / Const(0xf1, Int8ul), + "device_id" / Int8ul, + "unk" / Int16ul + ) + +class GPIORequestMsg(ConstructClass): + subcon = Struct( + "msg_type" / Const(0xa0, Int8ul), + "device_id" / Int8ul, + "gpio_num" / Int8ul, + "cmd" / Int16ul, + "args" / HexDump(GreedyBytes) + ) + +NotificationMsg = Select( + DeviceReadyMsg, + InitMsg, + GPIORequestMsg, + HexDump(GreedyBytes), +) + +class UnkDeviceControlMsg(ConstructClass): + subcon = Struct( + "command" / Int8ul, + "args" / HexDump(GreedyBytes), + ) + +class DeviceEnableMsg(ConstructClass): + subcon = Struct( + "command" / Const(0xb4, Int8ul), + "device_id" / Int8ul, + ) + +class DeviceResetMsg(ConstructClass): + subcon = Struct( + "command" / Const(0x40, Int8ul), + "unk1" / Int8ul, + "device_id" / Int8ul, + "state" / Int8ul, + ) + +class InitBufMsg(ConstructClass): + subcon = Struct( + "command" / Const(0x91, Int8ul), + "unk1" / Int8ul, + "unk2" / Int8ul, + "buf_addr" / Int64ul, + "buf_size" / Int32ul, + ) + +class InitAFEMsg(ConstructClass): + subcon = Struct( + "command" / Const(0x95, Int8ul), + "unk1" / Int8ul, + "unk2" / Int8ul, + "iface" / Int8ul, + "buf_addr" / Int64ul, + "buf_size" / Int32ul, + ) + +class UnkMsgC1(ConstructClass): + subcon = Struct( + "command" / Const(0xc1, Int8ul), + "unk1" / Int8ul, + ) + +class GPIOAckMsg(ConstructClass): + subcon = Struct( + "command" / Const(0xa1, Int8ul), + "unk" / Int32ul, + "msg" / GPIORequestMsg, + ) + +DeviceControlMsg = Select( + DeviceEnableMsg, + DeviceResetMsg, + InitAFEMsg, + InitBufMsg, + UnkMsgC1, + UnkDeviceControlMsg +) + +class DeviceControlAck(ConstructClass): + subcon = Struct( + "command" / Int8ul + ) + +class MessageHeader(ConstructClass): + subcon = Struct( + "flags" / Int16ul, + "length" / Int16ul, + "retcode" / Int32ul, + ) + +class TXMessage(ConstructClass): + subcon = Struct( + "hdr" / MessageHeader, + "msg" / FixedSized(this.hdr.length, + Switch(this.hdr.flags, { + 0x40: HexDump(GreedyBytes), + 0x80: DeviceControlMsg, + 0x81: Int8ul, + })) + ) + + def __init__(self): + self.hdr = MessageHeader() + +class RXMessage(ConstructClass): + subcon = Struct( + "hdr" / MessageHeader, + "msg" / FixedSized(this.hdr.length, HexDump(GreedyBytes)), + ) + +class MTPInterface: + def __init__(self, proto, iface): + self.proto = proto + self.iface = iface + self.tx_seq = 0 + self.initialized = False + self.gpios = {} + + def send(self, msg): + self.proto.send(self.iface, self.tx_seq & 0xff, msg) + self.tx_seq += 1 + + def get_report(self, idx): + msg = TXMessage() + msg.hdr.flags = 0x81 + msg.hdr.length = 1 + msg.hdr.retcode = 0 + msg.msg = idx + self.send(msg.build()) + + def packet(self, pkt): + self.log(f"RX: {pkt.hex()}") + + def log(self, s): + self.proto.log(f"[{self.NAME}] " + s) + + def initialize(self): + self.proto.comm.enable_device(self.iface) + + def report(self, msg): + self.log(f"report: {msg.hex()}") + + def ack(self, msg): + self.log(f"ack: {msg.hex()}") + + def unk(self, msg): + self.log(f"unk: {msg.hex()}") + + def packet(self, pkt): + msg = RXMessage.parse(pkt) + mtype = msg.hdr.flags + #self.log(f"FL:{msg.hdr.flag s:04x} unk:{msg.hdr.unk:08x}") + if mtype == 0x00: + self.report(msg.msg) + elif mtype == 0x80: + self.ack(msg.hdr.retcode, msg.msg) + elif mtype == 0x81: + self.log(f"REPORT") + chexdump(msg.msg, print_fn=self.log) + elif mtype == 0x40: + self.unk(msg.msg) + + def __str__(self): + return f"{self.iface}/{self.NAME}" + + +class MTPCommInterface(MTPInterface): + NAME = "comm" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.last_cmd = None + self.gpios = {} + + + def device_control(self, dcmsg): + while self.last_cmd is not None: + self.proto.work() + msg = TXMessage() + msg.hdr.flags = 0x80 + msg.hdr.length = len(dcmsg.build()) + msg.hdr.retcode = 0 + msg.msg = dcmsg + #self.log(f"Send device control {dcmsg}") + self.last_cmd = dcmsg.command + self.send(msg.build()) + while self.last_cmd is not None: + self.proto.work() + + def enable_device(self, iface): + msg = DeviceEnableMsg() + msg.device_id = iface + self.device_control(msg) + + def report(self, msg): + msg = NotificationMsg.parse(msg) + + if isinstance(msg, DeviceReadyMsg): + iface = self.proto.iface[msg.device_id] + iface.initialized = True + self.log(f"{iface}: init complete") + elif isinstance(msg, InitMsg): + iface = self.proto.get_interface(msg.device_id, msg.device_name) + for blk in msg.msg: + if isinstance(blk.payload, HIDDescriptor): + self.log(f"Got HID descriptor for {iface}:") + iface.descriptor = blk.payload.descriptor + self.log(hexdump(iface.descriptor)) + iface.initialize() + elif isinstance(blk.payload, GPIOInit): + self.log(f"GPIO Init: {blk.payload}") + prop = getattr(self.proto.node[msg.device_name], + f"function-{blk.payload.gpio_name}".replace("-", "_")) + key = struct.pack(">I", prop.args[0]).decode("ascii") + val = prop.args[1] + self.log(f"GPIO key: {key}") + self.gpios[(msg.device_id, blk.payload.gpio_id)] = key, val + elif isinstance(msg, GPIORequestMsg): + self.log(f"GPIO request: {msg}") + smcep = self.proto.smc.epmap[0x20] + key, val = self.gpios[(msg.device_id, msg.gpio_num)] + if msg.cmd == 3: + smcep.write32(key, val | 1) + smcep.write32(key, val) + + ackmsg = GPIOAckMsg() + ackmsg.unk = 0 + ackmsg.msg = msg + self.device_control(ackmsg) + + def ack(self, retcode, msg): + msg = DeviceControlAck.parse(msg) + self.log(f"Got ACK for {msg.command:#x}: {retcode:08x}") + assert msg.command == self.last_cmd + self.last_cmd = None + + def init_afe(self, iface, data): + paddr, dva = self.proto.mtp.ioalloc(len(data)) + self.proto.u.iface.writemem(paddr, data) + + afemsg = InitAFEMsg() + afemsg.unk1 = 2 + afemsg.unk2 = 0 + afemsg.iface = iface + afemsg.buf_addr = dva + afemsg.buf_size = len(data) + self.device_control(afemsg) + + def device_reset(self, iface, unk1, state): + self.log(f"device_reset({iface}, {unk1}, {state})") + rmsg = DeviceResetMsg() + rmsg.device_id = iface + rmsg.unk1 = unk1 + rmsg.state = state + self.device_control(rmsg) + +class MTPHIDInterface(MTPInterface): + pass + +class MTPMultitouchInterface(MTPHIDInterface): + NAME = "multi-touch" + + def initialize(self): + super().initialize() + + #data = open("afe.bin", "rb").read() + #self.proto.comm.init_afe(self.iface, data) + #self.proto.comm.device_reset(self.iface, 1, 0) + #self.proto.comm.device_reset(self.iface, 1, 2) + +class MTPKeyboardInterface(MTPHIDInterface): + NAME = "keyboard" + +class MTPSTMInterface(MTPHIDInterface): + NAME = "stm" + +class MTPActuatorInterface(MTPHIDInterface): + NAME = "actuator" + +class MTPTPAccelInterface(MTPHIDInterface): + NAME = "tp_accel" + +class MTPProtocol: + INTERFACES = [ + MTPCommInterface, + MTPMultitouchInterface, + MTPKeyboardInterface, + MTPSTMInterface, + MTPActuatorInterface, + MTPTPAccelInterface, + ] + + def __init__(self, u, node, mtp, dockchannel, smc): + self.node = node + self.smc = smc + self.u = u + self.mtp = mtp + self.dockchannel = dockchannel + self.iface = {} + + # Add initial comm interface + self.get_interface(0, "comm") + + def get_interface(self, iface, name): + if iface in self.iface: + return self.iface[iface] + + for cls in self.INTERFACES: + if cls.NAME == name: + break + else: + self.log(f"Unknown interface name {name}") + return None + obj = cls(self, iface) + self.iface[iface] = obj + setattr(self, name.replace("-", "_"), obj) + return obj + + def checksum(self, d): + assert len(d) % 4 == 0 + c = len(d) // 4 + return 0xffffffff - sum(struct.unpack(f"<{c}I", d)) & 0xffffffff + + def read_pkt(self): + self.mtp.work_pending() + hdr = self.dockchannel.read(8) + hlen, mtype, size, ctr, devid, pad = struct.unpack("<BBHBBH", hdr) + #self.log(f"<L:{hlen} T:{mtype:02x} S:{size:04x} D:{devid}") + assert hlen == 8 + #assert mtype == 0x12 + data = self.dockchannel.read(size) + checksum = struct.unpack("<I", self.dockchannel.read(4))[0] + expect = self.checksum(hdr + data) + if expect != checksum: + self.log(f"Checksum error: expected {expect:08x}, got {checksum:08x}") + return devid, data + + def send(self, iface, seq, msg): + if len(msg) % 4: + msg += bytes(4 - len(msg) % 4) + hdr = struct.pack("<BBHBBH", 8, 0x11, len(msg), seq, iface, 0) + checksum = self.checksum(hdr + msg) + pkt = hdr + msg + struct.pack("<I", checksum) + self.dockchannel.write(pkt) + self.mtp.work_pending() + + def work_pending(self): + self.mtp.work_pending() + while self.dockchannel.rx_count != 0: + self.work() + self.mtp.work_pending() + + def work(self): + devid, pkt = self.read_pkt() + self.iface[devid].packet(pkt) + + def wait_init(self, name): + self.log(f"Waiting for {name}...") + while not hasattr(self, name) or not getattr(self, name).initialized: + self.work() + + def log(self, m): + print("[MTP]" + m) diff --git a/tools/proxyclient/m1n1/fw/pmp.py b/tools/proxyclient/m1n1/fw/pmp.py new file mode 100644 index 0000000..fbfef45 --- /dev/null +++ b/tools/proxyclient/m1n1/fw/pmp.py @@ -0,0 +1,168 @@ +# SPDX-License-Identifier: MIT +import struct + +from ..utils import * + +from .asc import StandardASC +from .asc.base import * + +class PMPMessage(Register64): + TYPE = 56, 44 + +class PMP_Startup(PMPMessage): + TYPE = 56, 44, Constant(0x00) + +class PMP_Configure(PMPMessage): + TYPE = 56, 44, Constant(0x10) + DVA = 47, 0 + +class PMP_Configure_Ack(PMPMessage): + TYPE = 56, 44, Constant(0x20) + UNK = 47, 0 + +class PMP_Init1(PMPMessage): + TYPE = 56, 44, Constant(0x200) + UNK1 = 43, 16 + UNK2 = 15, 0 + +class PMP_Init1_Ack(PMPMessage): + TYPE = 56, 44, Constant(0x201) + UNK1 = 43, 16 + UNK2 = 15, 0 + +class PMP_Init2(PMPMessage): + TYPE = 56, 44, Constant(0x202) + UNK1 = 43, 16 + UNK2 = 15, 0 + +class PMP_Init2_Ack(PMPMessage): + TYPE = 56, 44, Constant(0x203) + UNK1 = 43, 16 + UNK2 = 15, 0 + +class PMP_Unk(PMPMessage): + TYPE = 56, 44, Constant(0x100) + UNK1 = 43, 16 + UNK2 = 15, 0 + +class PMP_Unk_Ack(PMPMessage): + TYPE = 56, 44, Constant(0x110) + UNK1 = 43, 16 + UNK2 = 15, 0 + +class PMP_DevPwr(PMPMessage): + TYPE = 56, 44, Constant(0x20e) + DEV = 31, 16 + STATE = 15, 0 + +class PMP_DevPwr_Sync(PMPMessage): + TYPE = 56, 44, Constant(0x208) + DEV = 31, 16 + STATE = 15, 0 + +class PMP_DevPwr_Ack(PMPMessage): + TYPE = 56, 44, Constant(0x209) + DEV = 31, 16 + STATE = 15, 0 + +class PMPEndpoint(ASCBaseEndpoint): + BASE_MESSAGE = PMPMessage + SHORT = "pmpep" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.shmem = self.shmem_dva = None + self.init_complete = False + self.init1_acked = False + self.init2_acked = False + self.unk_acked = False + + @msg_handler(0x00, PMP_Startup) + def Startup(self, msg): + self.log("Starting up") + + self.shmem, self.shmem_dva = self.asc.ioalloc(0x10000) + + self.send_init_config() + return True + + def send_init_config(self): + self.asc.p.memset32(self.shmem, 0, 0x10000) + dram_config = self.asc.u.adt["arm-io/pmp/iop-pmp-nub"].energy_model_dram_configs + self.asc.iface.writemem(self.shmem + 0x2000, dram_config) + + node = self.asc.u.adt["arm-io/pmp"] + + maps = [] + dva = 0xc0000000 + for i in range(3, len(node.reg)): + addr, size = node.get_reg(i) + if size == 0: + maps.append(struct.pack("<QQ", 0, 0)) + continue + + self.asc.dart.iomap_at(0, dva, addr, size) + self.log(f"map {addr:#x} -> {dva:#x} [{size:#x}]") + maps.append(struct.pack("<QQ", dva, size)) + dva += align(size, 0x4000) + + chexdump(b"".join(maps)) + + self.asc.iface.writemem(self.shmem + 0xe000, b"".join(maps)) + self.send(PMP_Configure(DVA=self.shmem_dva)) + + while not self.init_complete: + self.asc.work() + return True + + @msg_handler(0x20, PMP_Configure_Ack) + def Configure_Ack(self, msg): + self.init_complete = True + + props = self.asc.iface.readmem(self.shmem, 0x2000) + devinfo = self.asc.iface.readmem(self.shmem + 0x4000, 0x1000) + status = self.asc.iface.readmem(self.shmem + 0xc000, 0x100) + + print("PMP Props:") + chexdump(props) + print("PMP Device Info:") + chexdump(devinfo) + print("PMP Status:") + chexdump(status) + + self.send(PMP_Init1(UNK1=1, UNK2=3)) + while not self.init1_acked: + self.asc.work() + + self.send(PMP_Init2(UNK1=1, UNK2=0)) + while not self.init2_acked: + self.asc.work() + + self.send(PMP_Unk(UNK1=0x3bc, UNK2=2)) + while not self.unk_acked: + self.asc.work() + + return True + + @msg_handler(0x201, PMP_Init1_Ack) + def Init1_Ack(self, msg): + self.init1_acked = True + return True + + @msg_handler(0x203, PMP_Init2_Ack) + def Init2_Ack(self, msg): + self.init2_acked = True + return True + + @msg_handler(0x110, PMP_Unk_Ack) + def Unk_Ack(self, msg): + self.unk_acked = True + return True + + +class PMPClient(StandardASC): + pass + + ENDPOINTS = { + 0x20: PMPEndpoint, + } diff --git a/tools/proxyclient/m1n1/fw/smc.py b/tools/proxyclient/m1n1/fw/smc.py new file mode 100644 index 0000000..204501c --- /dev/null +++ b/tools/proxyclient/m1n1/fw/smc.py @@ -0,0 +1,202 @@ +# SPDX-License-Identifier: MIT +import struct + +from ..utils import * + +from .asc import StandardASC +from .asc.base import * + +SMC_READ_KEY = 0x10 +SMC_WRITE_KEY = 0x11 +SMC_GET_KEY_BY_INDEX = 0x12 +SMC_GET_KEY_INFO = 0x13 +SMC_INITIALIZE = 0x17 +SMC_NOTIFICATION = 0x18 +SMC_RW_KEY = 0x20 + +class SMCMessage(Register64): + TYPE = 7, 0 + UNK = 11, 8, Constant(0) + ID = 15, 12 + +class SMCInitialize(SMCMessage): + TYPE = 7, 0, Constant(SMC_INITIALIZE) + +class SMCGetKeyInfo(SMCMessage): + TYPE = 7, 0, Constant(SMC_GET_KEY_INFO) + KEY = 63, 32 + +class SMCGetKeyByIndex(SMCMessage): + TYPE = 7, 0, Constant(SMC_GET_KEY_BY_INDEX) + INDEX = 63, 32 + +class SMCWriteKey(SMCMessage): + TYPE = 7, 0, Constant(SMC_WRITE_KEY) + SIZE = 23, 16 + KEY = 63, 32 + +class SMCReadKey(SMCMessage): + TYPE = 7, 0, Constant(SMC_READ_KEY) + SIZE = 23, 16 + KEY = 63, 32 + +class SMCReadWriteKey(SMCMessage): + TYPE = 7, 0, Constant(SMC_RW_KEY) + RSIZE = 23, 16 + WSIZE = 31, 24 + KEY = 63, 32 + +class SMCResult(Register64): + RESULT = 7, 0 + ID = 15, 12 + SIZE = 31, 16 + VALUE = 63, 32 + +class SMCError(Exception): + pass + +class SMCEndpoint(ASCBaseEndpoint): + BASE_MESSAGE = SMCMessage + SHORT = "smcep" + TYPE_MAP = { + "ui64": ("<Q", None), + "ui32": ("<I", None), + "ui16": ("<H", None), + "ui8 ": ("<B", None), + "si64": ("<q", None), + "si32": ("<i", None), + "si16": ("<h", None), + "si8 ": ("<b", None), + "flag": ("<B", None), + "flt ": ("<f", None), + "hex_": (None, hexdump), + "ch8*": (None, lambda c: c.split(b"\x00")[0]), + "ioft": ("<Q", None), + } + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.shmem = None + self.msgid = 0 + self.outstanding = set() + self.ret = {} + + def start(self): + self.send(SMCInitialize(ID = 0)) + self.msgid += 1 # important! + while self.shmem is None: + self.asc.work() + + def new_msgid(self): + mid = (self.msgid & 0xF) + self.msgid += 1 + assert(mid not in self.outstanding) + self.outstanding.add(mid) + return mid + + def cmd(self, cmd): + cmd.ID = self.new_msgid() + self.send(cmd) + while cmd.ID in self.outstanding: + self.asc.work() + ret = self.ret[cmd.ID] + if ret.RESULT != 0: + raise SMCError(f"SMC error {ret}", ret) + return ret + + def write(self, key, data): + key = int.from_bytes(key.encode("ascii"), byteorder="big") + self.asc.iface.writemem(self.shmem, data) + self.cmd(SMCWriteKey(KEY = key, SIZE = len(data))) + + def read(self, key, size): + key = int.from_bytes(key.encode("ascii"), byteorder="big") + ret = self.cmd(SMCReadKey(KEY = key, SIZE = size)) + if size <= 4: + return struct.pack("<I", ret.VALUE)[:size] + else: + return self.asc.iface.readmem(self.shmem, ret.SIZE) + + def rw(self, key, data, outsize): + key = int.from_bytes(key.encode("ascii"), byteorder="big") + self.asc.iface.writemem(self.shmem, data) + ret = self.cmd(SMCReadWriteKey(KEY=key, RSIZE=outsize, WSIZE=len(data))) + if outsize <= 4: + return struct.pack("<I", ret.VALUE)[:outsize] + else: + return self.asc.iface.readmem(self.shmem, ret.SIZE) + + def get_key_by_index(self, index): + ret = self.cmd(SMCGetKeyByIndex(INDEX = index)) + key = ret.VALUE.to_bytes(4, byteorder="little").decode("ascii") + return key + + def get_key_info(self, key): + key = int.from_bytes(key.encode("ascii"), byteorder="big") + ret = self.cmd(SMCGetKeyInfo(KEY = key)) + info = self.asc.iface.readmem(self.shmem, 6) + length, type, flags = struct.unpack("B4sB", info) + return length, type.decode("ascii"), flags + + def read64(self, key): + return struct.unpack("<Q", self.read(key, 8))[0] + + def read32(self, key): + return struct.unpack("<I", self.read(key, 4))[0] + + def read16(self, key): + return struct.unpack("<H", self.read(key, 2))[0] + + def read8(self, key): + return struct.unpack("<B", self.read(key, 1))[0] + + def read32b(self, key): + return struct.unpack(">I", self.read(key, 4))[0] + + def write64(self, key, data): + self.write(key, struct.pack("<Q", data)) + + def write32(self, key, data): + self.write(key, struct.pack("<I", data)) + + def write16(self, key, data): + self.write(key, struct.pack("<H", data)) + + def write8(self, key, data): + self.write(key, struct.pack("<B", data)) + + def rw32(self, key, data): + return struct.unpack("<I", self.rw(key, struct.pack("<I", data), 4))[0] + + def read_type(self, key, size, typecode): + fmt, func = self.TYPE_MAP.get(typecode, (None, None)) + + val = self.read(key, size) + + if fmt: + val = struct.unpack(fmt, val)[0] + if func: + val = func(val) + return val + + def handle_msg(self, msg0, msg1): + if self.shmem is None: + self.log("Starting up") + self.shmem = msg0 + else: + msg = SMCResult(msg0) + ret = msg.RESULT + mid = msg.ID + if ret == SMC_NOTIFICATION: + self.log(f"Notification: {msg.VALUE:#x}") + return True + #print(f"msg {mid} return value {ret}") + self.outstanding.discard(mid) + self.ret[mid] = msg + + return True + +class SMCClient(StandardASC): + ENDPOINTS = { + 0x20: SMCEndpoint, + } diff --git a/tools/proxyclient/m1n1/gpiola.py b/tools/proxyclient/m1n1/gpiola.py new file mode 100644 index 0000000..bbfd71e --- /dev/null +++ b/tools/proxyclient/m1n1/gpiola.py @@ -0,0 +1,254 @@ +# SPDX-License-Identifier: MIT +import os, sys, struct, time + +from .utils import * +from . import asm +from .proxy import REGION_RX_EL1 +from .sysreg import * + +class GPIOLogicAnalyzer(Reloadable): + def __init__(self, u, node=None, pins={}, regs={}, div=1, cpu=1, on_pin_change=True, on_reg_change=True): + self.u = u + self.p = u.proxy + self.iface = u.iface + self.cpu = cpu + self.base = 0 + if node is not None: + self.base = u.adt[node].get_reg(0)[0] + else: + on_pin_change=False + self.node = node + self.pins = pins + self.regs = regs + assert len(pins) <= 32 + assert div > 0 + self.div = div + self.cbuf = self.u.malloc(0x1000) + self.dbuf = None + self.on_pin_change = on_pin_change + self.on_reg_change = on_reg_change + self.p.mmu_init_secondary(cpu) + self.tfreq = u.mrs(CNTFRQ_EL0) + + def load_regmap(self, regmap, skip=set(), regs=set()): + base = regmap._base + for name, (addr, rcls) in regmap._namemap.items(): + if name not in skip and (not regs or name in regs): + self.regs[name] = base + addr, rcls + + def start(self, ticks, bufsize=0x10000): + self.bufsize = bufsize + if self.dbuf: + self.u.free(self.dbuf) + self.dbuf = self.u.malloc(bufsize) + + text = f""" + trace: + mov x16, x2 + add x3, x3, x2 + add x2, x2, #4 + mov x12, #-8 + mov x10, x2 + mov x6, #-1 + mov x7, #0 + ldr x8, ={self.base} + mrs x4, CNTPCT_EL0 + isb + 1: + ldr w15, [x16] + cmp w15, #1 + b.eq done + add x4, x4, x1 + 2: + mrs x5, CNTPCT_EL0 + isb + """ + if self.div > 1: + text += f""" + cmp x5, x4 + b.lo 2b + """ + + for idx, pin in enumerate(self.pins.values()): + text += f""" + ldr w9, [x8, #{pin * 4}] + bfi x7, x9, #{idx}, #1 + """ + + if self.on_pin_change: + text += f""" + cmp x7, x6 + b.eq 3f + mov x6, x7 + """ + if self.on_reg_change: + text += f""" + mov x11, x2 + """ + + text += f""" + str w5, [x2], #4 + str w7, [x2], #4 + """ + if self.on_reg_change: + text += f""" + mov x13, #0 + add x14, x12, #8 + """ + + for reg in self.regs.values(): + if isinstance(reg, tuple): + reg = reg[0] + text += f""" + ldr x9, ={reg} + ldr w9, [x9] + str w9, [x2], #4 + """ + if self.on_reg_change: + text += f""" + eor w15, w9, #1 + cmp x14, #0 + b.eq 4f + ldr w15, [x14], #4 + 4: + eor w15, w15, w9 + orr w13, w13, w15 + """ + + if self.on_reg_change: + text += f""" + cmp x13, #0 + b.ne 4f + mov x2, x11 + mov x11, x12 + b 3f + 4: + """ + text += f""" + mov x12, x11 + cmp x2, x3 + b.hs done + 3: + sub x0, x0, #1 + cbnz x0, 1b + done: + sub x0, x2, x10 + ret + """ + + code = asm.ARMAsm(text, self.cbuf) + self.iface.writemem(self.cbuf, code.data) + self.p.dc_cvau(self.cbuf, len(code.data)) + self.p.ic_ivau(self.cbuf, len(code.data)) + + self.p.write32(self.dbuf, 0) + + self.p.smp_call(self.cpu, code.trace | REGION_RX_EL1, ticks, self.div, self.dbuf, bufsize - (8 + 4 * len(self.regs))) + + def complete(self): + self.p.write32(self.dbuf, 1) + wrote = self.p.smp_wait(self.cpu) + assert wrote <= self.bufsize + data = self.iface.readmem(self.dbuf + 4, wrote) + self.u.free(self.dbuf) + self.dbuf = None + + stride = 2 + len(self.regs) + + #chexdump(data) + + self.data = [struct.unpack("<" + "I" * stride, + data[i:i + 4 * stride]) + for i in range(0, len(data), 4 * stride)] + + def vcd(self): + off = self.data[0][0] + if False: #len(self.data) > 1: + off2 = max(0, ((self.data[1][0] - off) & 0xffffffff) - 5000) + else: + off2 = 0 + + #print(off, off2) + + vcd = [] + vcd.append(""" +$timescale 1ns $end +$scope module gpio $end +""") + sym = 0 + keys = [] + rkeys = [] + + for name in self.pins: + keys.append(f"s{sym}") + vcd.append(f"$var wire 1 s{sym} {name} $end\n") + sym += 1 + for name, reg in self.regs.items(): + vcd.append(f"$var reg 32 s{sym} {name} [31:0] $end\n") + if isinstance(reg, tuple): + subkeys = {} + rcls = reg[1] + rkeys.append((f"s{sym}", rcls, subkeys)) + sym += 1 + for fname in rcls().fields.keys(): + fdef = getattr(rcls, fname) + if isinstance(fdef, tuple): + width = fdef[0] - fdef[1] + 1 + else: + width = 1 + vcd.append(f"$var reg {width} s{sym} {name}.{fname} [{width-1}:0] $end\n") + subkeys[fname] = (width, f"s{sym}") + sym += 1 + else: + rkeys.append((f"s{sym}", None, None)) + sym += 1 + vcd.append(""" +$enddefinitions $end +$dumpvars +""") + + for v in self.data: + ts = v[0] + val = v[1] + regs = v[2:] + ts = ((ts - off) & 0xffffffff) - off2 + ns = max(0, 1000000000 * ts // self.tfreq) + vcd.append(f"#{ns}\n") + vcd.append("\n".join(f"{(val>>i) & 1}{k}" for i, k in enumerate(keys)) + "\n") + for (key, rcls, subkeys), v in zip(rkeys, regs): + vcd.append(f"b{v:032b} {key}\n") + if rcls: + rval = rcls(v) + for field, (width, key) in subkeys.items(): + v = getattr(rval, field) + vcd.append(f"b{v:0{width}b} {key}\n") + + + ns += ns//10 + vcd.append(f"#{ns}\n" + "\n".join(f"{(val>>i) & 1}{k}" for i, k in enumerate(keys)) + "\n") + + return "".join(vcd) + + def show(self): + with open("/tmp/dump.vcd", "w") as fd: + fd.write(self.vcd()) + + gtkw = (""" +[dumpfile] "/tmp/dump.vcd" +[timestart] 0 +[size] 3063 1418 +[pos] -1 -1 +*-17.000000 2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +[sst_width] 288 +[signals_width] 197 +[sst_expanded] 1 +[sst_vpaned_height] 421 +@23 +""" + + "\n".join("gpio." + k for k in self.pins) + "\n" + + "\n".join("gpio." + k + "[31:0]" for k in self.regs) + "\n") + + with open("/tmp/dump.gtkw", "w") as fd: + fd.write(gtkw) + + os.system("gtkwave /tmp/dump.gtkw&") diff --git a/tools/proxyclient/m1n1/hostutils.py b/tools/proxyclient/m1n1/hostutils.py new file mode 100644 index 0000000..535240b --- /dev/null +++ b/tools/proxyclient/m1n1/hostutils.py @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: MIT +from pathlib import Path +import os + +class KernelRegmapAccessor: + def __init__(self, name): + self.path = self._find_path(name) + self.read_ranges() + self.read_linelen() + + @classmethod + def _find_path(cls, name): + basedir = Path("/sys/kernel/debug/regmap") + + if (path := Path(name)).exists(): + return path + elif (path := basedir.joinpath(name)).exists(): + return path + elif name in (available := cls._list_regmaps(basedir)): + return available[name] + else: + raise ValueError(f"kernel regmap not found: {name}") + + @classmethod + def _list_regmaps(cls, basedir): + return { + p.joinpath("name").open("rb").read().strip().decode(): p + for p in basedir.iterdir() if p.is_dir() + } + + def open_node(self, name, mode="rb", **kwargs): + return self.path.joinpath(name).open(mode, **kwargs) + + def read_ranges(self): + with self.open_node("range") as f: + self.ranges = [ + range(int(a, 16), int(b, 16) + 1) + for a, b in (l.strip().split(b"-") for l in f) + ] + + def read_linelen(self): + with self.open_node("registers", buffering=0) as f: + l = f.read(64).split(b"\n")[0] + valstr = l.split(b":")[1].strip() + self.linelen = len(l) + 1 + self.working_width = len(valstr) * 4 + + def _find_off(self, reg): + off = 0 + for r in self.ranges: + if reg >= r.stop: + off += r.stop - r.start + else: + off += reg - r.start + break + if reg not in r: + raise ValueError(f"register {reg:04x} out of range") + return off * self.linelen + + def _read(self, reg, width=None): + assert width == self.working_width + with self.open_node("registers", buffering=0) as f: + f.seek(self._find_off(reg)) + l = f.read(self.linelen) + regstr, valstr = l.split(b":") + assert int(regstr, 16) == reg + return int(valstr, 16) + + def read(self, reg, width=None): + assert width % self.working_width == 0 + ret = 0 + for off in range(0, width // 8, self.working_width // 8): + ret |= self._read(reg + off, self.working_width) << (8 * off) + return ret + + def _write(self, reg, val, width=None): + assert width == self.working_width + with self.open_node("registers", mode="wb") as f: + f.write(f"{reg:x} {val:x}".encode()) + + def write(self, reg, val, width=None): + assert width % self.working_width == 0 + for off in range(0, width // 8, self.working_width // 8): + self._write(reg + off, val >> (8 * off), self.working_width) + +def require_debugfs(): + if os.path.ismount("/sys/kernel/debug"): + return + os.system("mount -t debugfs none /sys/kernel/debug") + +if __name__ == "__main__": + require_debugfs() + from m1n1.hw.codecs import TAS5770Regs + tas = TAS5770Regs(KernelRegmapAccessor("tas2770"), 0) + import code + code.interact(local=locals()) diff --git a/tools/proxyclient/m1n1/hv/__init__.py b/tools/proxyclient/m1n1/hv/__init__.py new file mode 100644 index 0000000..407b0d1 --- /dev/null +++ b/tools/proxyclient/m1n1/hv/__init__.py @@ -0,0 +1,1849 @@ +# SPDX-License-Identifier: MIT +import io, sys, traceback, struct, array, bisect, os, plistlib, signal, runpy +from construct import * + +from ..asm import ARMAsm +from ..tgtypes import * +from ..proxy import IODEV, START, EVENT, EXC, EXC_RET, ExcInfo +from ..utils import * +from ..sysreg import * +from ..macho import MachO +from ..adt import load_adt +from .. import xnutools, shell + +from .gdbserver import * +from .types import * +from .virtutils import * +from .virtio import * + +__all__ = ["HV"] + +class HV(Reloadable): + PAC_MASK = 0xfffff00000000000 + + PTE_VALID = 1 << 0 + + PTE_MEMATTR_UNCHANGED = 0b1111 << 2 + PTE_S2AP_RW = 0b11 << 6 + PTE_SH_NS = 0b11 << 8 + PTE_ACCESS = 1 << 10 + PTE_ATTRIBUTES = PTE_ACCESS | PTE_SH_NS | PTE_S2AP_RW | PTE_MEMATTR_UNCHANGED + + SPTE_TRACE_READ = 1 << 63 + SPTE_TRACE_WRITE = 1 << 62 + SPTE_TRACE_UNBUF = 1 << 61 + SPTE_MAP = 0 << 50 + SPTE_HOOK = 1 << 50 + SPTE_PROXY_HOOK_R = 2 << 50 + SPTE_PROXY_HOOK_W = 3 << 50 + SPTE_PROXY_HOOK_RW = 4 << 50 + + MSR_REDIRECTS = { + SCTLR_EL1: SCTLR_EL12, + TTBR0_EL1: TTBR0_EL12, + TTBR1_EL1: TTBR1_EL12, + TCR_EL1: TCR_EL12, + ESR_EL1: ESR_EL12, + FAR_EL1: FAR_EL12, + AFSR0_EL1: AFSR0_EL12, + AFSR1_EL1: AFSR1_EL12, + MAIR_EL1: MAIR_EL12, + AMAIR_EL1: AMAIR_EL12, + CONTEXTIDR_EL1: CONTEXTIDR_EL12, + ACTLR_EL1: ACTLR_EL12, + AMX_CTL_EL1: AMX_CTL_EL12, + SPRR_CONFIG_EL1: SPRR_CONFIG_EL12, + SPRR_PERM_EL1: SPRR_PERM_EL12, + SPRR_PERM_EL0: SPRR_PERM_EL02, + SPRR_UNK1_EL1: SPRR_UNK1_EL12, + SPRR_UMASK0_EL1: SPRR_UMASK0_EL12, + APCTL_EL1: APCTL_EL12, + APSTS_EL1: APSTS_EL12, + KERNELKEYLO_EL1: KERNELKEYLO_EL12, + KERNELKEYHI_EL1: KERNELKEYHI_EL12, + GXF_CONFIG_EL1: GXF_CONFIG_EL12, + GXF_ABORT_EL1: GXF_ABORT_EL12, + GXF_ENTER_EL1: GXF_ENTER_EL12, + VBAR_GL1: VBAR_GL12, + SPSR_GL1: SPSR_GL12, + ASPSR_GL1: ASPSR_GL12, + ESR_GL1: ESR_GL12, + ELR_GL1: ELR_GL12, + } + + AIC_EVT_TYPE_HW = 1 + IRQTRACE_IRQ = 1 + + def __init__(self, iface, proxy, utils): + self.iface = iface + self.p = proxy + self.u = utils + self.pac_mask = self.PAC_MASK + self.user_pac_mask = self.PAC_MASK + self.vbar_el1 = None + self.want_vbar = None + self.vectors = [None] + self._bps = [None, None, None, None, None] + self._bp_hooks = dict() + self._wps = [None, None, None, None] + self._wpcs = [0, 0, 0, 0] + self.sym_offset = 0 + self.symbols = [] + self.symbol_dict = {} + self.sysreg = {0: {}} + self.novm = False + self._in_handler = False + self._sigint_pending = False + self._in_shell = False + self._gdbserver = None + self.vm_hooks = [None] + self.interrupt_map = {} + self.mmio_maps = DictRangeMap() + self.dirty_maps = BoolRangeMap() + self.tracer_caches = {} + self.shell_locals = {} + self.xnu_mode = False + self._update_shell_locals() + self.wdt_cpu = None + self.smp = True + self.hook_exceptions = False + self.started_cpus = set() + self.started = False + self.ctx = None + self.hvcall_handlers = {} + self.switching_context = False + self.show_timestamps = False + self.virtio_devs = {} + + def _reloadme(self): + super()._reloadme() + self._update_shell_locals() + + def _update_shell_locals(self): + self.shell_locals.update({ + "hv": self, + "iface": self.iface, + "p": self.p, + "u": self.u, + "trace": trace, + "TraceMode": TraceMode, + }) + + for attr in dir(self): + a = getattr(self, attr) + if callable(a): + self.shell_locals[attr] = getattr(self, attr) + + self.shell_locals["ctx"] = self.context + + def log(self, s, *args, show_cpu=True, **kwargs): + if self.ctx is not None and show_cpu: + ts="" + if self.show_timestamps: + ts = f"[{self.u.mrs(CNTPCT_EL0):#x}]" + print(ts+f"[cpu{self.ctx.cpu_id}] " + s, *args, **kwargs) + if self.print_tracer.log_file: + print(f"# {ts}[cpu{self.ctx.cpu_id}] " + s, *args, file=self.print_tracer.log_file, **kwargs) + else: + print(s, *args, **kwargs) + if self.print_tracer.log_file: + print("# " + s, *args, file=self.print_tracer.log_file, **kwargs) + + def unmap(self, ipa, size): + assert self.p.hv_map(ipa, 0, size, 0) >= 0 + + def map_hw(self, ipa, pa, size): + '''map IPA (Intermediate Physical Address) to actual PA''' + #print(f"map_hw {ipa:#x} -> {pa:#x} [{size:#x}]") + if (ipa & 0x3fff) != (pa & 0x3fff): + self.map_sw(ipa, pa, size) + return + + ipa_p = align_up(ipa) + if ipa_p != ipa: + self.map_sw(ipa, pa, min(ipa_p - ipa, size)) + pa += ipa_p - ipa + size -= ipa_p - ipa + + if size <= 0: + return + + size_p = align_down(size) + if size_p > 0: + #print(f"map_hw real {ipa_p:#x} -> {pa:#x} [{size_p:#x}]") + assert self.p.hv_map(ipa_p, pa | self.PTE_ATTRIBUTES | self.PTE_VALID, size_p, 1) >= 0 + + if size_p != size: + self.map_sw(ipa_p + size_p, pa + size_p, size - size_p) + + def map_sw(self, ipa, pa, size): + #print(f"map_sw {ipa:#x} -> {pa:#x} [{size:#x}]") + assert self.p.hv_map(ipa, pa | self.SPTE_MAP, size, 1) >= 0 + + def map_hook(self, ipa, size, read=None, write=None, **kwargs): + index = len(self.vm_hooks) + self.vm_hooks.append((read, write, ipa, kwargs)) + self.map_hook_idx(ipa, size, index, read is not None, write is not None) + + def map_hook_idx(self, ipa, size, index, read=False, write=False, flags=0): + if read: + if write: + t = self.SPTE_PROXY_HOOK_RW + else: + t = self.SPTE_PROXY_HOOK_R + elif write: + t = self.SPTE_PROXY_HOOK_W + else: + assert False + + assert self.p.hv_map(ipa, (index << 2) | flags | t, size, 0) >= 0 + + def readmem(self, va, size): + '''read from virtual memory''' + with io.BytesIO() as buffer: + while size > 0: + pa = self.p.hv_translate(va, False, False) + if pa == 0: + break + + size_in_page = 4096 - (va % 4096) + if size < size_in_page: + buffer.write(self.iface.readmem(pa, size)) + break + + buffer.write(self.iface.readmem(pa, size_in_page)) + va += size_in_page + size -= size_in_page + + return buffer.getvalue() + + def writemem(self, va, data): + '''write to virtual memory''' + written = 0 + while written < len(data): + pa = self.p.hv_translate(va, False, True) + if pa == 0: + break + + size_in_page = 4096 - (va % 4096) + if len(data) - written < size_in_page: + self.iface.writemem(pa, data[written:]) + written = len(data) + break + + self.iface.writemem(pa, data[written:written + size_in_page]) + va += size_in_page + written += size_in_page + + return written + + def trace_irq(self, device, num, count, flags): + for n in range(num, num + count): + if flags & self.IRQTRACE_IRQ: + self.interrupt_map[n] = device + else: + self.interrupt_map.pop(n, None) + + start, size = self.adt["/arm-io/aic"].get_reg(0) + zone = irange(start, size) + if len(self.interrupt_map): + self.add_tracer(zone, "AIC_IRQ", TraceMode.RESERVED) + else: + self.del_tracer(zone, "AIC_IRQ") + + assert self.p.hv_trace_irq(self.AIC_EVT_TYPE_HW, num, count, flags) > 0 + + def add_tracer(self, zone, ident, mode=TraceMode.ASYNC, read=None, write=None, **kwargs): + assert mode in (TraceMode.RESERVED, TraceMode.OFF, TraceMode.BYPASS) or read or write + self.mmio_maps[zone, ident] = (mode, ident, read, write, kwargs) + self.dirty_maps.set(zone) + + def del_tracer(self, zone, ident): + del self.mmio_maps[zone, ident] + self.dirty_maps.set(zone) + + def clear_tracers(self, ident): + for r, v in self.mmio_maps.items(): + if ident in v: + v.pop(ident) + self.dirty_maps.set(r) + + def trace_device(self, path, mode=TraceMode.ASYNC, ranges=None): + node = self.adt[path] + for index in range(len(node.reg)): + if ranges is not None and index not in ranges: + continue + addr, size = node.get_reg(index) + self.trace_range(irange(addr, size), mode) + + def trace_range(self, zone, mode=TraceMode.ASYNC, read=True, write=True, name=None): + if mode is True: + mode = TraceMode.ASYNC + if mode and mode != TraceMode.OFF: + self.add_tracer(zone, "PrintTracer", mode, + self.print_tracer.event_mmio if read else None, + self.print_tracer.event_mmio if write else None, + start=zone.start, + name=name) + else: + self.del_tracer(zone, "PrintTracer") + + def pt_update(self): + if not self.dirty_maps: + return + + self.dirty_maps.compact() + self.mmio_maps.compact() + + top = 0 + + for zone in self.dirty_maps: + if zone.stop <= top: + continue + top = max(top, zone.start) + + for mzone, maps in self.mmio_maps.overlaps(zone): + if mzone.stop <= top: + continue + if top < mzone.start: + self.unmap(top, mzone.start - top) + self.log(f"PT[{top:09x}:{mzone.start:09x}] -> *UNMAPPED*") + + top = mzone.stop + if not maps: + continue + maps = sorted(maps.values(), reverse=True) + mode, ident, read, write, kwargs = maps[0] + + need_read = any(m[2] for m in maps) + need_write = any(m[3] for m in maps) + + if mode == TraceMode.RESERVED: + self.log(f"PT[{mzone.start:09x}:{mzone.stop:09x}] -> RESERVED {ident}") + continue + elif mode in (TraceMode.HOOK, TraceMode.SYNC): + self.map_hook_idx(mzone.start, mzone.stop - mzone.start, 0, + need_read, need_write) + if mode == TraceMode.HOOK: + for m2, i2, r2, w2, k2 in maps[1:]: + if m2 == TraceMode.HOOK: + self.log(f"!! Conflict: HOOK {i2}") + elif mode == TraceMode.WSYNC: + flags = self.SPTE_TRACE_READ if need_read else 0 + self.map_hook_idx(mzone.start, mzone.stop - mzone.start, 0, + False, need_write, flags=flags) + elif mode in (TraceMode.UNBUF, TraceMode.ASYNC, TraceMode.BYPASS): + pa = mzone.start + if mode == TraceMode.UNBUF: + pa |= self.SPTE_TRACE_UNBUF + if need_read: + pa |= self.SPTE_TRACE_READ + if need_write: + pa |= self.SPTE_TRACE_WRITE + self.map_sw(mzone.start, pa, mzone.stop - mzone.start) + elif mode == TraceMode.OFF: + self.map_hw(mzone.start, mzone.start, mzone.stop - mzone.start) + self.log(f"PT[{mzone.start:09x}:{mzone.stop:09x}] -> HW:{ident}") + continue + + rest = [m[1] for m in maps[1:] if m[0] != TraceMode.OFF] + if rest: + rest = " (+ " + ", ".join(rest) + ")" + else: + rest = "" + + self.log(f"PT[{mzone.start:09x}:{mzone.stop:09x}] -> {mode.name}.{'R' if read else ''}{'W' if read else ''} {ident}{rest}") + + if top < zone.stop: + self.unmap(top, zone.stop - top) + self.log(f"PT[{top:09x}:{zone.stop:09x}] -> *UNMAPPED*") + + self.u.inst(0xd50c83df) # tlbi vmalls12e1is + self.dirty_maps.clear() + + def shellwrap(self, func, description, update=None, needs_ret=False): + + while True: + try: + return func() + except Exception: + print(f"Exception in {description}") + traceback.print_exc() + + if not self.ctx: + print("Running in asynchronous context. Target operations are not available.") + + def do_exit(i): + raise shell.ExitConsole(i) + + self.shell_locals["skip"] = lambda: do_exit(1) + self.shell_locals["cont"] = lambda: do_exit(0) + ret = self.run_shell("Entering debug shell", "Returning to tracer") + self.shell_locals["skip"] = self.skip + self.shell_locals["cont"] = self.cont + + if self.ctx: + self.cpu() # Return to the original CPU to avoid confusing things + + if ret == 1: + if needs_ret: + print("Cannot skip, return value required.") + else: + return + + if update: + update() + + def run_shell(self, entry_msg="Entering shell", exit_msg="Continuing"): + def handle_sigusr1(signal, stack): + raise shell.ExitConsole(EXC_RET.HANDLED) + + def handle_sigusr2(signal, stack): + raise shell.ExitConsole(EXC_RET.EXIT_GUEST) + + default_sigusr1 = signal.signal(signal.SIGUSR1, handle_sigusr1) + try: + default_sigusr2 = signal.signal(signal.SIGUSR2, handle_sigusr2) + try: + self._in_shell = True + try: + if not self._gdbserver is None: + self._gdbserver.notify_in_shell() + return shell.run_shell(self.shell_locals, entry_msg, exit_msg) + finally: + self._in_shell = False + finally: + signal.signal(signal.SIGUSR2, default_sigusr2) + finally: + signal.signal(signal.SIGUSR1, default_sigusr1) + + @property + def in_shell(self): + return self._in_shell + + def gdbserver(self, address="/tmp/.m1n1-unix", log=None): + '''activate gdbserver''' + if not self._gdbserver is None: + raise Exception("gdbserver is already running") + + self._gdbserver = GDBServer(self, address, log) + self._gdbserver.activate() + + def shutdown_gdbserver(self): + '''shutdown gdbserver''' + self._gdbserver.shutdown() + self._gdbserver = None + + def handle_mmiotrace(self, data): + evt = EvtMMIOTrace.parse(data) + + def do_update(): + nonlocal mode, ident, read, write, kwargs + read = lambda *args, **kwargs: None + write = lambda *args, **kwargs: None + + m = self.mmio_maps[evt.addr].get(ident, None) + if not m: + return + + mode, ident, read_, write_, kwargs = m + read = read_ or read + write = write_ or write + + maps = sorted(self.mmio_maps[evt.addr].values(), reverse=True) + for mode, ident, read, write, kwargs in maps: + if mode > TraceMode.WSYNC or (evt.flags.WRITE and mode > TraceMode.UNBUF): + print(f"ERROR: mmiotrace event but expected {mode.name} mapping") + continue + if mode == TraceMode.OFF: + continue + if evt.flags.WRITE: + if write: + self.shellwrap(lambda: write(evt, **kwargs), + f"Tracer {ident}:write ({mode.name})", update=do_update) + else: + if read: + self.shellwrap(lambda: read(evt, **kwargs), + f"Tracer {ident}:read ({mode.name})", update=do_update) + + def handle_vm_hook_mapped(self, ctx, data): + maps = sorted(self.mmio_maps[data.addr].values(), reverse=True) + + if not maps: + raise Exception(f"VM hook without a mapping at {data.addr:#x}") + + def do_update(): + nonlocal mode, ident, read, write, kwargs + read = lambda *args, **kwargs: None + write = lambda *args, **kwargs: None + + m = self.mmio_maps[data.addr].get(ident, None) + if not m: + return + + mode, ident, read_, write_, kwargs = m + read = read_ or read + write = write_ or write + + mode, ident, read, write, kwargs = maps[0] + + first = 0 + + val = data.data + + if mode not in (TraceMode.HOOK, TraceMode.SYNC, TraceMode.WSYNC): + raise Exception(f"VM hook with unexpected mapping at {data.addr:#x}: {maps[0][0].name}") + + if not data.flags.WRITE: + if mode == TraceMode.HOOK: + val = self.shellwrap(lambda: read(data.addr, 8 << data.flags.WIDTH, **kwargs), + f"Tracer {ident}:read (HOOK)", update=do_update, needs_ret=True) + + if not isinstance(val, list) and not isinstance(val, tuple): + val = [val] + first += 1 + elif mode == TraceMode.SYNC: + try: + val = self.u.read(data.addr, 8 << data.flags.WIDTH) + except: + self.log(f"MMIO read failed: {data.addr:#x} (w={data.flags.WIDTH})") + raise + if not isinstance(val, list) and not isinstance(val, tuple): + val = [val] + elif mode == TraceMode.WSYNC: + raise Exception(f"VM hook with unexpected mapping at {data.addr:#x}: {maps[0][0].name}") + + for i in range(1 << max(0, data.flags.WIDTH - 3)): + self.p.write64(ctx.data + 16 + 8 * i, val[i]) + + elif mode == TraceMode.HOOK: + first += 1 + + flags = data.flags.copy() + flags.CPU = self.ctx.cpu_id + width = data.flags.WIDTH + + if width > 3: + flags.WIDTH = 3 + flags.MULTI = 1 + + for i in range(1 << max(0, width - 3)): + evt = Container( + flags = flags, + reserved = 0, + pc = ctx.elr, + addr = data.addr + 8 * i, + data = val[i] + ) + + for mode, ident, read, write, kwargs in maps[first:]: + if flags.WRITE: + if write: + self.shellwrap(lambda: write(evt, **kwargs), + f"Tracer {ident}:write ({mode.name})", update=do_update) + else: + if read: + self.shellwrap(lambda: read(evt, **kwargs), + f"Tracer {ident}:read ({mode.name})", update=do_update) + + if data.flags.WRITE: + mode, ident, read, write, kwargs = maps[0] + + if data.flags.WIDTH <= 3: + wval = val[0] + else: + wval = val + + if mode == TraceMode.HOOK: + self.shellwrap(lambda: write(data.addr, wval, 8 << data.flags.WIDTH, **kwargs), + f"Tracer {ident}:write (HOOK)", update=do_update) + elif mode in (TraceMode.SYNC, TraceMode.WSYNC): + try: + self.u.write(data.addr, wval, 8 << data.flags.WIDTH) + except: + if data.flags.WIDTH > 3: + wval = wval[0] + self.log(f"MMIO write failed: {data.addr:#x} = {wval} (w={data.flags.WIDTH})") + raise + + return True + + def handle_vm_hook(self, ctx): + data = self.iface.readstruct(ctx.data, VMProxyHookData) + + if data.id == 0: + return self.handle_vm_hook_mapped(ctx, data) + + rfunc, wfunc, base, kwargs = self.vm_hooks[data.id] + + d = data.data + if data.flags.WIDTH < 3: + d = d[0] + + if data.flags.WRITE: + wfunc(base, data.addr - base, d, 8 << data.flags.WIDTH, **kwargs) + else: + val = rfunc(base, data.addr - base, 8 << data.flags.WIDTH, **kwargs) + if not isinstance(val, list) and not isinstance(val, tuple): + val = [val] + for i in range(1 << max(0, data.flags.WIDTH - 3)): + self.p.write64(ctx.data + 16 + 8 * i, val[i]) + + return True + + def handle_irqtrace(self, data): + evt = EvtIRQTrace.parse(data) + + if evt.type == self.AIC_EVT_TYPE_HW and evt.flags & self.IRQTRACE_IRQ: + dev = self.interrupt_map[int(evt.num)] + print(f"IRQ: {dev}: {evt.num}") + + def addr(self, addr): + unslid_addr = addr + self.sym_offset + if self.xnu_mode and (addr < self.tba.virt_base or unslid_addr < self.macho.vmin): + return f"0x{addr:x}" + + saddr, name = self.sym(addr) + + if name is None: + return f"0x{addr:x} (0x{unslid_addr:x})" + + return f"0x{addr:x} ({name}+0x{unslid_addr - saddr:x})" + + def resolve_symbol(self, name): + return self.symbol_dict[name] - self.sym_offset + + def sym(self, addr): + unslid_addr = addr + self.sym_offset + + if self.xnu_mode and (addr < self.tba.virt_base or unslid_addr < self.macho.vmin): + return None, None + + idx = bisect.bisect_left(self.symbols, (unslid_addr + 1, "")) - 1 + if idx < 0 or idx >= len(self.symbols): + return None, None + + return self.symbols[idx] + + def get_sym(self, addr): + a, name = self.sym(addr) + if addr == a: + return name + else: + return None + + def handle_msr(self, ctx, iss=None): + if iss is None: + iss = ctx.esr.ISS + iss = ESR_ISS_MSR(iss) + enc = iss.Op0, iss.Op1, iss.CRn, iss.CRm, iss.Op2 + + name = sysreg_name(enc) + + skip = set() + shadow = { + #SPRR_CONFIG_EL1, + #SPRR_PERM_EL0, + #SPRR_PERM_EL1, + VMSA_LOCK_EL1, + #SPRR_UNK1_EL1, + #SPRR_UNK2_EL1, + MDSCR_EL1, + } + ro = { + ACC_CFG_EL1, + ACC_OVRD_EL1, + } + xlate = { + DC_CIVAC, + } + for i in range(len(self._bps)): + shadow.add(DBGBCRn_EL1(i)) + shadow.add(DBGBVRn_EL1(i)) + for i in range(len(self._wps)): + shadow.add(DBGWCRn_EL1(i)) + shadow.add(DBGWVRn_EL1(i)) + + value = 0 + if enc in shadow: + if iss.DIR == MSR_DIR.READ: + value = self.sysreg[self.ctx.cpu_id].setdefault(enc, 0) + self.log(f"Shadow: mrs x{iss.Rt}, {name} = {value:x}") + if iss.Rt != 31: + ctx.regs[iss.Rt] = value + else: + if iss.Rt != 31: + value = ctx.regs[iss.Rt] + self.log(f"Shadow: msr {name}, x{iss.Rt} = {value:x}") + self.sysreg[self.ctx.cpu_id][enc] = value + elif enc in skip or (enc in ro and iss.DIR == MSR_DIR.WRITE): + if iss.DIR == MSR_DIR.READ: + self.log(f"Skip: mrs x{iss.Rt}, {name} = 0") + if iss.Rt != 31: + ctx.regs[iss.Rt] = 0 + else: + if iss.Rt != 31: + value = ctx.regs[iss.Rt] + self.log(f"Skip: msr {name}, x{iss.Rt} = {value:x}") + else: + if iss.DIR == MSR_DIR.READ: + enc2 = self.MSR_REDIRECTS.get(enc, enc) + value = self.u.mrs(enc2) + self.log(f"Pass: mrs x{iss.Rt}, {name} = {value:x} ({sysreg_name(enc2)})") + if iss.Rt != 31: + ctx.regs[iss.Rt] = value + else: + if iss.Rt != 31: + value = ctx.regs[iss.Rt] + enc2 = self.MSR_REDIRECTS.get(enc, enc) + sys.stdout.flush() + if enc in xlate: + value = self.p.hv_translate(value, True, False) + self.u.msr(enc2, value, call=self.p.gl2_call) + self.log(f"Pass: msr {name}, x{iss.Rt} = {value:x} (OK) ({sysreg_name(enc2)})") + + ctx.elr += 4 + + if self.hook_exceptions: + self.patch_exception_handling() + + return True + + def handle_impdef(self, ctx): + if ctx.esr.ISS == 0x20: + return self.handle_msr(ctx, ctx.afsr1) + + code = struct.unpack("<I", self.iface.readmem(ctx.elr_phys, 4)) + c = ARMAsm(".inst " + ",".join(str(i) for i in code), ctx.elr_phys) + insn = "; ".join(c.disassemble()) + + self.log(f"IMPDEF exception on: {insn}") + + return False + + def handle_hvc(self, ctx): + idx = ctx.esr.ISS + if idx == 0: + return False + + vector, target = self.vectors[idx] + if target is None: + self.log(f"EL1: Exception #{vector} with no target") + target = 0 + ok = False + else: + ctx.elr = target + ctx.elr_phys = self.p.hv_translate(target, False, False) + ok = True + + if (vector & 3) == EXC.SYNC: + spsr = SPSR(self.u.mrs(SPSR_EL12)) + esr = ESR(self.u.mrs(ESR_EL12)) + elr = self.u.mrs(ELR_EL12) + elr_phys = self.p.hv_translate(elr, False, False) + sp_el1 = self.u.mrs(SP_EL1) + sp_el0 = self.u.mrs(SP_EL0) + far = None + if esr.EC == ESR_EC.DABORT or esr.EC == ESR_EC.IABORT: + far = self.u.mrs(FAR_EL12) + if self.sym(elr)[1] != "com.apple.kernel:_panic_trap_to_debugger": + self.log("Page fault") + return ok + + self.log(f"EL1: Exception #{vector} ({esr.EC!s}) to {self.addr(target)} from {spsr.M.name}") + self.log(f" ELR={self.addr(elr)} (0x{elr_phys:x})") + self.log(f" SP_EL1=0x{sp_el1:x} SP_EL0=0x{sp_el0:x}") + if far is not None: + self.log(f" FAR={self.addr(far)}") + if elr_phys: + self.u.disassemble_at(elr_phys - 4 * 4, 9 * 4, elr - 4 * 4, elr, sym=self.get_sym) + if self.sym(elr)[1] == "com.apple.kernel:_panic_trap_to_debugger": + self.log("Panic! Trying to decode panic...") + try: + self.decode_panic_call() + except: + self.log("Error decoding panic.") + try: + self.bt() + except: + pass + return False + if esr.EC == ESR_EC.UNKNOWN: + instr = self.p.read32(elr_phys) + if instr == 0xe7ffdeff: + self.log("Debugger break! Trying to decode panic...") + try: + self.decode_dbg_panic() + except: + self.log("Error decoding panic.") + try: + self.bt() + except: + pass + return False + return False + else: + elr = self.u.mrs(ELR_EL12) + self.log(f"Guest: {str(EXC(vector & 3))} at {self.addr(elr)}") + + return ok + + def handle_step(self, ctx): + # not sure why MDSCR_EL1.SS needs to be disabled here but otherwise + # if also SPSR.SS=0 no instruction will be executed after eret + # and instead a debug exception is generated again + self.u.msr(MDSCR_EL1, MDSCR(MDE=1).value) + + # enable all breakpoints again + for i, vaddr in enumerate(self._bps): + if vaddr is None: + continue + self.u.msr(DBGBCRn_EL1(i), DBGBCR(E=1, PMC=0b11, BAS=0xf).value) + + # enable all watchpoints again + for i, wpc in enumerate(self._wpcs): + self.u.msr(DBGWCRn_EL1(i), wpc) + + return True + + def handle_break(self, ctx): + # disable all breakpoints so that we don't get stuck + for i in range(5): + self.u.msr(DBGBCRn_EL1(i), 0) + + # we'll need to single step to enable these breakpoints again + self.u.msr(MDSCR_EL1, MDSCR(SS=1, MDE=1).value) + self.ctx.spsr.SS = 1 + + if ctx.elr in self._bp_hooks: + if self._bp_hooks[ctx.elr](ctx): + return True + + def handle_watch(self, ctx): + # disable all watchpoints so that we don't get stuck + for i in range(len(self._wps)): + self.u.msr(DBGWCRn_EL1(i), 0) + + # we'll need to single step to enable these watchpoints again + self.u.msr(MDSCR_EL1, MDSCR(SS=1, MDE=1).value) + self.ctx.spsr.SS = 1 + + def add_hvcall(self, callid, handler): + self.hvcall_handlers[callid] = handler + + def handle_brk(self, ctx): + iss = ctx.esr.ISS + if iss != 0x4242: + return self._lower() + + # HV call from EL0/1 + callid = ctx.regs[0] + handler = self.hvcall_handlers.get(callid, None) + if handler is None: + self.log(f"Undefined HV call #{callid}") + return False + + ok = handler(ctx) + if ok: + ctx.elr += 4 + return ok + + def handle_dabort(self, ctx): + insn = self.p.read32(ctx.elr_phys) + far_phys = self.p.hv_translate(ctx.far, True, False) + + if insn & 0x3b200c00 == 0x38200000: + page = far_phys & ~0x3fff + + before = self.p.read32(far_phys) + self.map_hw(page, page, 0x4000) + r0b = self.ctx.regs[0] + self.log(f"-ELR={self.ctx.elr:#x} LR={self.ctx.regs[30]:#x}") + self.step() + self.log(f"+ELR={self.ctx.elr:#x}") + r0a = self.ctx.regs[0] + self.dirty_maps.set(irange(page, 0x4000)) + self.pt_update() + after = self.p.read32(far_phys) + self.log(f"Unhandled atomic: @{far_phys:#x} {before:#x} -> {after:#x} | r0={r0b:#x} -> {r0a:#x}") + return True + + if insn & 0x3f000000 == 0x08000000: + page = far_phys & ~0x3fff + before = self.p.read32(far_phys) + self.map_hw(page, page, 0x4000) + r0b = self.ctx.regs[0] + self.log(f"-ELR={self.ctx.elr:#x} LR={self.ctx.regs[30]:#x}") + self.step() + self.log(f"+ELR={self.ctx.elr:#x}") + r0a = self.ctx.regs[0] + self.dirty_maps.set(irange(page, 0x4000)) + self.pt_update() + after = self.p.read32(far_phys) + self.log(f"Unhandled exclusive: @{far_phys:#x} {before:#x} -> {after:#x} | r0={r0b:#x} -> {r0a:#x}") + return True + + def handle_sync(self, ctx): + if ctx.esr.EC == ESR_EC.MSR: + return self.handle_msr(ctx) + + if ctx.esr.EC == ESR_EC.IMPDEF: + return self.handle_impdef(ctx) + + if ctx.esr.EC == ESR_EC.HVC: + return self.handle_hvc(ctx) + + if ctx.esr.EC == ESR_EC.SSTEP_LOWER: + return self.handle_step(ctx) + + if ctx.esr.EC == ESR_EC.BKPT_LOWER: + return self.handle_break(ctx) + + if ctx.esr.EC == ESR_EC.WATCH_LOWER: + return self.handle_watch(ctx) + + if ctx.esr.EC == ESR_EC.BRK: + return self.handle_brk(ctx) + + if ctx.esr.EC == ESR_EC.DABORT_LOWER: + return self.handle_dabort(ctx) + + def _load_context(self): + self._info_data = self.iface.readmem(self.exc_info, ExcInfo.sizeof()) + self.ctx = ExcInfo.parse(self._info_data) + return self.ctx + + def _commit_context(self): + new_info = ExcInfo.build(self.ctx) + if new_info != self._info_data: + self.iface.writemem(self.exc_info, new_info) + self._info_data = new_info + + def handle_exception(self, reason, code, info): + self.exc_info = info + self.exc_reason = reason + if reason in (START.EXCEPTION_LOWER, START.EXCEPTION): + code = EXC(code) + elif reason == START.HV: + code = HV_EVENT(code) + self.exc_code = code + self.is_fault = reason == START.EXCEPTION_LOWER and code in (EXC.SYNC, EXC.SERROR) + + # Nested context switch is handled by the caller + if self.switching_context: + self.switching_context = False + return + + self._in_handler = True + + ctx = self._load_context() + self.exc_orig_cpu = self.ctx.cpu_id + + handled = False + user_interrupt = False + + try: + if reason == START.EXCEPTION_LOWER: + if code == EXC.SYNC: + handled = self.handle_sync(ctx) + elif code == EXC.FIQ: + self.u.msr(CNTV_CTL_EL0, 0) + self.u.print_context(ctx, False, sym=self.get_sym) + handled = True + elif reason == START.HV: + code = HV_EVENT(code) + if code == HV_EVENT.HOOK_VM: + handled = self.handle_vm_hook(ctx) + elif code == HV_EVENT.USER_INTERRUPT: + handled = True + user_interrupt = True + except Exception as e: + self.log(f"Python exception while handling guest exception:") + traceback.print_exc() + + if handled: + ret = EXC_RET.HANDLED + if self._sigint_pending: + self.update_pac_mask() + self.log("User interrupt") + else: + self.log(f"Guest exception: {reason.name}/{code.name}") + self.update_pac_mask() + self.u.print_context(ctx, self.is_fault, sym=self.get_sym) + + if self._sigint_pending or not handled or user_interrupt: + self._sigint_pending = False + + signal.signal(signal.SIGINT, self.default_sigint) + ret = self.run_shell("Entering hypervisor shell", "Returning from exception") + signal.signal(signal.SIGINT, self._handle_sigint) + + if ret is None: + ret = EXC_RET.HANDLED + + self.pt_update() + + self._commit_context() + self.ctx = None + self.exc_orig_cpu = None + self.p.exit(ret) + + self._in_handler = False + if self._sigint_pending: + self._handle_sigint() + + def handle_bark(self, reason, code, info): + self._in_handler = True + self._sigint_pending = False + + signal.signal(signal.SIGINT, self.default_sigint) + ret = self.run_shell("Entering panic shell", "Exiting") + signal.signal(signal.SIGINT, self._handle_sigint) + + self.p.exit(0) + + def attach_virtio(self, dev, base=None, irq=None, verbose=False): + if base is None: + base = alloc_mmio_base(self.adt, 0x1000) + if irq is None: + irq = alloc_aic_irq(self.adt) + + data = dev.config_data + data_base = self.u.heap.malloc(len(data)) + self.iface.writemem(data_base, data) + + config = VirtioConfig.build({ + "irq": irq, + "devid": dev.devid, + "feats": dev.feats, + "num_qus": dev.num_qus, + "data": data_base, + "data_len": len(data), + "verbose": verbose, + }) + + config_base = self.u.heap.malloc(len(config)) + self.iface.writemem(config_base, config) + + name = None + for i in range(16): + n = "/arm-io/virtio%d" % i + if n not in self.adt: + name = n + break + if name is None: + raise ValueError("Too many virtios in ADT") + + print(f"Adding {n} @ 0x{base:x}, irq {irq}") + + node = self.adt.create_node(name) + node.reg = [Container(addr=node.to_bus_addr(base), size=0x1000)] + node.interrupt_parent = getattr(self.adt["/arm-io/aic"], "AAPL,phandle") + node.interrupts = (irq,) + node.compatible = ["virtio,mmio"] + + self.p.hv_map_virtio(base, config_base) + self.add_tracer(irange(base, 0x1000), "VIRTIO", TraceMode.RESERVED) + + dev.base = base + dev.hv = self + self.virtio_devs[base] = dev + + def handle_virtio(self, reason, code, info): + ctx = self.iface.readstruct(info, ExcInfo) + self.virtio_ctx = info = self.iface.readstruct(ctx.data, VirtioExcInfo) + + try: + handled = self.virtio_devs[info.devbase].handle_exc(info) + except: + self.log(f"Python exception from within virtio handler") + traceback.print_exc() + handled = False + + if not handled: + signal.signal(signal.SIGINT, self.default_sigint) + self.run_shell("Entering hypervisor shell", "Returning") + signal.signal(signal.SIGINT, self._handle_sigint) + + self.p.exit(EXC_RET.HANDLED) + + def skip(self): + self.ctx.elr += 4 + self.cont() + + def cont(self): + os.kill(os.getpid(), signal.SIGUSR1) + + def _lower(self): + if not self.is_fault: + print("Cannot lower non-fault exception") + return False + + self.u.msr(ELR_EL12, self.ctx.elr) + self.u.msr(SPSR_EL12, self.ctx.spsr.value) + self.u.msr(ESR_EL12, self.ctx.esr.value) + self.u.msr(FAR_EL12, self.ctx.far) + + exc_off = 0x80 * self.exc_code + + if self.ctx.spsr.M == SPSR_M.EL0t: + exc_off += 0x400 + elif self.ctx.spsr.M == SPSR_M.EL1t: + pass + elif self.ctx.spsr.M == SPSR_M.EL1h: + exc_off += 0x200 + else: + print(f"Unknown exception level {self.ctx.spsr.M}") + return False + + self.ctx.spsr.M = SPSR_M.EL1h + self.ctx.spsr.D = 1 + self.ctx.spsr.A = 1 + self.ctx.spsr.I = 1 + self.ctx.spsr.F = 1 + self.ctx.elr = self.u.mrs(VBAR_EL12) + exc_off + + return True + + def lower(self, step=False): + self.cpu() # Return to exception CPU + + if not self._lower(): + return + elif step: + self.step() + else: + self.cont() + + def step(self): + self.u.msr(MDSCR_EL1, MDSCR(SS=1, MDE=1).value) + self.ctx.spsr.SS = 1 + self.p.hv_pin_cpu(self.ctx.cpu_id) + self._switch_context() + self.p.hv_pin_cpu(0xffffffffffffffff) + + def _switch_context(self, exit=EXC_RET.HANDLED): + # Flush current CPU context out to HV + self._commit_context() + self.exc_info = None + self.ctx = None + + self.switching_context = True + # Exit out of the proxy + self.p.exit(exit) + # Wait for next proxy entry + self.iface.wait_and_handle_boot() + if self.switching_context: + raise Exception(f"Failed to switch context") + + # Fetch new context + self._load_context() + + def cpu(self, cpu=None): + if cpu is None: + cpu = self.exc_orig_cpu + if cpu == self.ctx.cpu_id: + return + + if not self.p.hv_switch_cpu(cpu): + raise ValueError(f"Invalid or inactive CPU #{cpu}") + + self._switch_context() + if self.ctx.cpu_id != cpu: + raise Exception(f"Switching to CPU #{cpu} but ended on #{self.ctx.cpu_id}") + + def add_hw_bp(self, vaddr, hook=None): + if None not in self._bps: + raise ValueError("Cannot add more HW breakpoints") + + i = self._bps.index(None) + cpu_id = self.ctx.cpu_id + try: + for cpu in self.cpus(): + self.u.msr(DBGBCRn_EL1(i), DBGBCR(E=1, PMC=0b11, BAS=0xf).value) + self.u.msr(DBGBVRn_EL1(i), vaddr) + finally: + self.cpu(cpu_id) + self._bps[i] = vaddr + if hook is not None: + self._bp_hooks[vaddr] = hook + + def remove_hw_bp(self, vaddr): + idx = self._bps.index(vaddr) + self._bps[idx] = None + cpu_id = self.ctx.cpu_id + try: + for cpu in self.cpus(): + self.u.msr(DBGBCRn_EL1(idx), 0) + self.u.msr(DBGBVRn_EL1(idx), 0) + finally: + self.cpu(cpu_id) + if vaddr in self._bp_hooks: + del self._bp_hooks[vaddr] + + def add_sym_bp(self, name, hook=None): + return self.add_hw_bp(self.resolve_symbol(name), hook=hook) + + def remove_sym_bp(self, name): + return self.remove_hw_bp(self.resolve_symbol(name)) + + def clear_hw_bps(self): + for vaddr in self._bps: + self.remove_hw_bp(vaddr) + + def add_hw_wp(self, vaddr, bas, lsc): + for i, i_vaddr in enumerate(self._wps): + if i_vaddr is None: + self._wps[i] = vaddr + self._wpcs[i] = DBGWCR(E=1, PAC=0b11, BAS=bas, LSC=lsc).value + cpu_id = self.ctx.cpu_id + try: + for cpu in self.cpus(): + self.u.msr(DBGWCRn_EL1(i), self._wpcs[i]) + self.u.msr(DBGWVRn_EL1(i), vaddr) + finally: + self.cpu(cpu_id) + return + raise ValueError("Cannot add more HW watchpoints") + + def get_wp_bas(self, vaddr): + for i, i_vaddr in enumerate(self._wps): + if i_vaddr == vaddr: + return self._wpcs[i].BAS + + def remove_hw_wp(self, vaddr): + idx = self._wps.index(vaddr) + self._wps[idx] = None + self._wpcs[idx] = 0 + cpu_id = self.ctx.cpu_id + try: + for cpu in self.cpus(): + self.u.msr(DBGWCRn_EL1(idx), 0) + self.u.msr(DBGWVRn_EL1(idx), 0) + finally: + self.cpu(cpu_id) + + def exit(self): + os.kill(os.getpid(), signal.SIGUSR2) + + def reboot(self): + print("Hard rebooting the system") + self.p.reboot() + sys.exit(0) + + def hvc(self, arg): + assert 0 <= arg <= 0xffff + return 0xd4000002 | (arg << 5) + + def decode_dbg_panic(self): + xnutools.decode_debugger_state(self.u, self.ctx) + + def decode_panic_call(self): + xnutools.decode_panic_call(self.u, self.ctx) + + def context(self): + f = f" (orig: #{self.exc_orig_cpu})" if self.ctx.cpu_id != self.exc_orig_cpu else "" + print(f" == On CPU #{self.ctx.cpu_id}{f} ==") + print(f" Reason: {self.exc_reason.name}/{self.exc_code.name}") + self.u.print_context(self.ctx, self.is_fault, sym=self.get_sym) + + def bt(self, frame=None, lr=None): + if frame is None: + frame = self.ctx.regs[29] + if lr is None: + lr = self.unpac(self.ctx.elr) + 4 + + print("Stack trace:") + frames = set() + while frame: + if frame in frames: + print("Stack loop detected!") + break + frames.add(frame) + print(f" - {self.addr(lr - 4)}") + lrp = self.p.hv_translate(frame + 8) + fpp = self.p.hv_translate(frame) + if not fpp: + break + lr = self.unpac(self.p.read64(lrp)) + frame = self.p.read64(fpp) + + def cpus(self): + for i in sorted(self.started_cpus): + self.cpu(i) + yield i + + def patch_exception_handling(self): + if self.ctx.cpu_id != 0: + return + + if self.want_vbar is not None: + vbar = self.want_vbar + else: + vbar = self.u.mrs(VBAR_EL12) + + if vbar == self.vbar_el1: + return + + if vbar == 0: + return + + if self.u.mrs(SCTLR_EL12) & 1: + vbar_phys = self.p.hv_translate(vbar, False, False) + if vbar_phys == 0: + self.log(f"VBAR vaddr 0x{vbar:x} translation failed!") + if self.vbar_el1 is not None: + self.want_vbar = vbar + self.u.msr(VBAR_EL12, self.vbar_el1) + return + else: + if vbar & (1 << 63): + self.log(f"VBAR vaddr 0x{vbar:x} without translation enabled") + if self.vbar_el1 is not None: + self.want_vbar = vbar + self.u.msr(VBAR_EL12, self.vbar_el1) + return + + vbar_phys = vbar + + if self.want_vbar is not None: + self.want_vbar = None + self.u.msr(VBAR_EL12, vbar) + + self.log(f"New VBAR paddr: 0x{vbar_phys:x}") + + #for i in range(16): + for i in [0, 3, 4, 7, 8, 11, 12, 15]: + idx = 0 + addr = vbar_phys + 0x80 * i + orig = self.p.read32(addr) + if (orig & 0xfc000000) != 0x14000000: + self.log(f"Unknown vector #{i}:\n") + self.u.disassemble_at(addr, 16) + else: + idx = len(self.vectors) + delta = orig & 0x3ffffff + if delta == 0: + target = None + self.log(f"Vector #{i}: Loop\n") + else: + target = (delta << 2) + vbar + 0x80 * i + self.log(f"Vector #{i}: 0x{target:x}\n") + self.vectors.append((i, target)) + self.u.disassemble_at(addr, 16) + self.p.write32(addr, self.hvc(idx)) + + self.p.dc_cvau(vbar_phys, 0x800) + self.p.ic_ivau(vbar_phys, 0x800) + + self.vbar_el1 = vbar + + def set_logfile(self, fd): + self.print_tracer.log_file = fd + + def init(self): + self.adt = load_adt(self.u.get_adt()) + self.iodev = self.p.iodev_whoami() + self.tba = self.u.ba.copy() + self.device_addr_tbl = self.adt.build_addr_lookup() + self.print_tracer = trace.PrintTracer(self, self.device_addr_tbl) + + # disable unused USB iodev early so interrupts can be reenabled in hv_init() + for iodev in IODEV: + if iodev >= IODEV.USB0 and iodev != self.iodev: + print(f"Disable iodev {iodev!s}") + self.p.iodev_set_usage(iodev, 0) + + print("Initializing hypervisor over iodev %s" % self.iodev) + self.p.hv_init() + + self.iface.set_handler(START.EXCEPTION_LOWER, EXC.SYNC, self.handle_exception) + self.iface.set_handler(START.EXCEPTION_LOWER, EXC.IRQ, self.handle_exception) + self.iface.set_handler(START.EXCEPTION_LOWER, EXC.FIQ, self.handle_exception) + self.iface.set_handler(START.EXCEPTION_LOWER, EXC.SERROR, self.handle_exception) + self.iface.set_handler(START.EXCEPTION, EXC.FIQ, self.handle_exception) + self.iface.set_handler(START.HV, HV_EVENT.USER_INTERRUPT, self.handle_exception) + self.iface.set_handler(START.HV, HV_EVENT.HOOK_VM, self.handle_exception) + self.iface.set_handler(START.HV, HV_EVENT.VTIMER, self.handle_exception) + self.iface.set_handler(START.HV, HV_EVENT.WDT_BARK, self.handle_bark) + self.iface.set_handler(START.HV, HV_EVENT.CPU_SWITCH, self.handle_exception) + self.iface.set_handler(START.HV, HV_EVENT.VIRTIO, self.handle_virtio) + self.iface.set_event_handler(EVENT.MMIOTRACE, self.handle_mmiotrace) + self.iface.set_event_handler(EVENT.IRQTRACE, self.handle_irqtrace) + + # Map MMIO ranges as HW by default + for r in self.adt["/arm-io"].ranges: + print(f"Mapping MMIO range: {r.parent_addr:#x} .. {r.parent_addr + r.size:#x}") + self.add_tracer(irange(r.parent_addr, r.size), "HW", TraceMode.OFF) + + hcr = HCR(self.u.mrs(HCR_EL2)) + if self.novm: + hcr.VM = 0 + hcr.AMO = 0 + else: + hcr.TACR = 1 + hcr.TIDCP = 0 + hcr.TVM = 0 + hcr.FMO = 1 + hcr.IMO = 0 + hcr.TTLBOS = 1 + self.u.msr(HCR_EL2, hcr.value) + + # Trap dangerous things + hacr = HACR(0) + if not self.novm: + #hacr.TRAP_CPU_EXT = 1 + #hacr.TRAP_SPRR = 1 + #hacr.TRAP_GXF = 1 + hacr.TRAP_CTRR = 1 + hacr.TRAP_EHID = 1 + hacr.TRAP_HID = 1 + hacr.TRAP_ACC = 1 + hacr.TRAP_IPI = 1 + hacr.TRAP_SERROR_INFO = 1 # M1RACLES mitigation + hacr.TRAP_PM = 1 + self.u.msr(HACR_EL2, hacr.value) + + # enable and route debug exceptions to EL2 + mdcr = MDCR(0) + mdcr.TDE = 1 + mdcr.TDA = 1 + mdcr.TDOSA = 1 + mdcr.TDRA = 1 + self.u.msr(MDCR_EL2, mdcr.value) + self.u.msr(MDSCR_EL1, MDSCR(MDE=1).value) + + # Enable AMX + amx_ctl = AMX_CTL(self.u.mrs(AMX_CTL_EL1)) + amx_ctl.EN_EL1 = 1 + self.u.msr(AMX_CTL_EL1, amx_ctl.value) + + # Set guest AP keys + self.u.msr(APVMKEYLO_EL2, 0x4E7672476F6E6147) + self.u.msr(APVMKEYHI_EL2, 0x697665596F755570) + self.u.msr(APSTS_EL12, 1) + + self.map_vuart() + + actlr = ACTLR(self.u.mrs(ACTLR_EL12)) + actlr.EnMDSB = 1 + self.u.msr(ACTLR_EL12, actlr.value) + + self.setup_adt() + + def map_vuart(self): + node = base = self.adt["/arm-io/uart0"] + base = node.get_reg(0)[0] + + zone = irange(base, 0x4000) + irq = node.interrupts[0] + self.p.hv_map_vuart(base, irq, self.iodev) + self.add_tracer(zone, "VUART", TraceMode.RESERVED) + + def map_essential(self): + # Things we always map/take over, for the hypervisor to work + _pmgr = {} + + def wh(base, off, data, width): + self.log(f"PMGR W {base:x}+{off:x}:{width} = 0x{data:x}: Dangerous write") + self.p.mask32(base + off, 0x3ff, (data | 0xf) & ~(0x80000400)) + _pmgr[base + off] = (data & 0xfffffc0f) | ((data & 0xf) << 4) + + def rh(base, off, width): + data = self.p.read32(base + off) + ret = _pmgr.setdefault(base + off, data) + self.log(f"PMGR R {base:x}+{off:x}:{width} = 0x{data:x} -> 0x{ret:x}") + return ret + + atc = f"ATC{self.iodev - IODEV.USB0}_USB" + + hook_devs = ["UART0", atc] + + pmgr = self.adt["/arm-io/pmgr"] + dev_by_name = {dev.name: dev for dev in pmgr.devices} + dev_by_id = {dev.id: dev for dev in pmgr.devices} + + pmgr_hooks = [] + + def hook_pmgr_dev(dev): + ps = pmgr.ps_regs[dev.psreg] + if dev.psidx or dev.psreg: + addr = pmgr.get_reg(ps.reg)[0] + ps.offset + dev.psidx * 8 + pmgr_hooks.append(addr) + for idx in dev.parents: + if idx in dev_by_id: + hook_pmgr_dev(dev_by_id[idx]) + + for name in hook_devs: + dev = dev_by_name[name] + hook_pmgr_dev(dev) + + pmgr0_start = pmgr.get_reg(0)[0] + + for addr in pmgr_hooks: + self.map_hook(addr, 4, write=wh, read=rh) + #TODO : turn into a real tracer + self.add_tracer(irange(addr, 4), "PMGR HACK", TraceMode.RESERVED) + + pg_overrides = { + 0x23d29c05c: 0xc000000, + 0x23d29c044: 0xc000000, + } + + for addr in pg_overrides: + self.map_hook(addr, 4, read=lambda base, off, width: pg_overrides[base + off]) + self.add_tracer(irange(addr, 4), "PMGR HACK", TraceMode.RESERVED) + + def cpustart_wh(base, off, data, width): + self.log(f"CPUSTART W {base:x}+{off:x}:{width} = 0x{data:x}") + if off >= 8: + assert width == 32 + die = base // 0x20_0000_0000 + cluster = (off - 8) // 4 + for i in range(32): + if data & (1 << i): + self.start_secondary(die, cluster, i) + + die_count = self.adt["/arm-io"].die_count if hasattr(self.adt["/arm-io"], "die-count") else 1 + + for die in range(0, die_count): + if self.u.adt["/chosen"].chip_id in (0x8103, 0x6000, 0x6001, 0x6002): + cpu_start = 0x54000 + die * 0x20_0000_0000 + elif self.u.adt["/chosen"].chip_id in (0x8112,): + cpu_start = 0x34000 + die * 0x20_0000_0000 + else: + self.log("CPUSTART unknown for this SoC!") + break + + zone = irange(pmgr0_start + cpu_start, 0x20) + self.map_hook(pmgr0_start + cpu_start, 0x20, write=cpustart_wh) + self.add_tracer(zone, "CPU_START", TraceMode.RESERVED) + + def start_secondary(self, die, cluster, cpu): + self.log(f"Starting guest secondary {die}:{cluster}:{cpu}") + + for node in list(self.adt["cpus"]): + if ((die << 11) | (cluster << 8) | cpu) == node.reg: + break + else: + self.log("CPU not found!") + return + + entry = self.p.read64(node.cpu_impl_reg[0]) & 0xfffffffffff + index = node.cpu_id + self.log(f" CPU #{index}: RVBAR = {entry:#x}") + + self.sysreg[index] = {} + self.started_cpus.add(index) + self.p.hv_start_secondary(index, entry) + + def setup_adt(self): + self.adt["product"].product_name += " on m1n1 hypervisor" + self.adt["product"].product_description += " on m1n1 hypervisor" + soc_name = "Virtual " + self.adt["product"].product_soc_name + " on m1n1 hypervisor" + self.adt["product"].product_soc_name = soc_name + + if self.iodev >= IODEV.USB0: + idx = self.iodev - IODEV.USB0 + for prefix in ("/arm-io/dart-usb%d", + "/arm-io/atc-phy%d", + "/arm-io/usb-drd%d", + "/arm-io/acio%d", + "/arm-io/acio-cpu%d", + "/arm-io/dart-acio%d", + "/arm-io/apciec%d", + "/arm-io/dart-apciec%d", + "/arm-io/apciec%d-piodma", + "/arm-io/i2c0/hpmBusManager/hpm%d", + "/arm-io/atc%d-dpxbar", + "/arm-io/atc%d-dpphy", + "/arm-io/atc%d-dpin0", + "/arm-io/atc%d-dpin1", + "/arm-io/atc-phy%d", + ): + name = prefix % idx + print(f"Removing ADT node {name}") + try: + del self.adt[name] + except KeyError: + pass + + if self.wdt_cpu is not None: + name = f"/cpus/cpu{self.wdt_cpu}" + print(f"Removing ADT node {name}") + try: + del self.adt[name] + except KeyError: + pass + + if not self.smp: + for cpu in list(self.adt["cpus"]): + if cpu.name != "cpu0": + print(f"Removing ADT node {cpu._path}") + try: + del self.adt["cpus"][cpu.name] + except KeyError: + pass + + def set_bootargs(self, boot_args): + if "-v" in boot_args.split(): + self.tba.video.display = 0 + else: + self.tba.video.display = 1 + print(f"Setting boot arguments to {boot_args!r}") + self.tba.cmdline = boot_args + + def unmap_carveouts(self): + print(f"Unmapping TZ carveouts...") + carveout_p = self.p.mcc_get_carveouts() + while True: + base = self.p.read64(carveout_p) + size = self.p.read64(carveout_p + 8) + if not base: + break + print(f" Unmap [{base:#x}..{base + size - 1:#x}]") + self.del_tracer(irange(base, size), "RAM-LOW") + self.del_tracer(irange(base, size), "RAM-HIGH") + carveout_p += 16 + + def enable_time_stealing(self): + self.p.hv_set_time_stealing(True) + + def disable_time_stealing(self): + self.p.hv_set_time_stealing(False) + + + def load_raw(self, image, entryoffset=0x800, use_xnu_symbols=False, vmin=0): + sepfw_start, sepfw_length = self.u.adt["chosen"]["memory-map"].SEPFW + tc_start, tc_size = self.u.adt["chosen"]["memory-map"].TrustCache + if hasattr(self.u.adt["chosen"]["memory-map"], "preoslog"): + preoslog_start, preoslog_size = self.u.adt["chosen"]["memory-map"].preoslog + else: + preoslog_size = 0 + + image_size = align(len(image)) + sepfw_off = image_size + image_size += align(sepfw_length) + preoslog_off = image_size + image_size += preoslog_size + self.bootargs_off = image_size + bootargs_size = 0x4000 + image_size += bootargs_size + + print(f"Total region size: 0x{image_size:x} bytes") + + self.phys_base = phys_base = guest_base = self.u.heap_top + self.ram_base = self.phys_base & ~0xffffffff + self.ram_size = self.u.ba.mem_size_actual + guest_base += 16 << 20 # ensure guest starts within a 16MB aligned region of mapped RAM + self.adt_base = guest_base + guest_base += align(self.u.ba.devtree_size) + tc_base = guest_base + guest_base += align(tc_size) + self.guest_base = guest_base + mem_top = self.u.ba.phys_base + self.u.ba.mem_size + mem_size = mem_top - phys_base + + print(f"Physical memory: 0x{phys_base:x} .. 0x{mem_top:x}") + print(f"Guest region start: 0x{guest_base:x}") + + self.entry = guest_base + entryoffset + + print(f"Mapping guest physical memory...") + self.add_tracer(irange(self.ram_base, self.u.ba.phys_base - self.ram_base), "RAM-LOW", TraceMode.OFF) + self.add_tracer(irange(phys_base, self.u.ba.mem_size_actual - phys_base + self.ram_base), "RAM-HIGH", TraceMode.OFF) + self.unmap_carveouts() + + print(f"Loading kernel image (0x{len(image):x} bytes)...") + self.u.compressed_writemem(guest_base, image, True) + self.p.dc_cvau(guest_base, len(image)) + self.p.ic_ivau(guest_base, len(image)) + + print(f"Copying SEPFW (0x{sepfw_length:x} bytes)...") + self.p.memcpy8(guest_base + sepfw_off, sepfw_start, sepfw_length) + + print(f"Copying TrustCache (0x{tc_size:x} bytes)...") + self.p.memcpy8(tc_base, tc_start, tc_size) + + if hasattr(self.u.adt["chosen"]["memory-map"], "preoslog"): + print(f"Copying preoslog (0x{preoslog_size:x} bytes)...") + self.p.memcpy8(guest_base + preoslog_off, preoslog_start, preoslog_size) + + print(f"Adjusting addresses in ADT...") + self.adt["chosen"]["memory-map"].SEPFW = (guest_base + sepfw_off, sepfw_length) + self.adt["chosen"]["memory-map"].TrustCache = (tc_base, tc_size) + self.adt["chosen"]["memory-map"].DeviceTree = (self.adt_base, align(self.u.ba.devtree_size)) + self.adt["chosen"]["memory-map"].BootArgs = (guest_base + self.bootargs_off, bootargs_size) + if hasattr(self.u.adt["chosen"]["memory-map"], "preoslog"): + self.adt["chosen"]["memory-map"].preoslog = (guest_base + preoslog_off, preoslog_size) + + print(f"Setting up bootargs at 0x{guest_base + self.bootargs_off:x}...") + + self.tba.mem_size = mem_size + self.tba.phys_base = phys_base + self.tba.virt_base = 0xfffffe0010000000 + (phys_base & (32 * 1024 * 1024 - 1)) + self.tba.devtree = self.adt_base - phys_base + self.tba.virt_base + self.tba.top_of_kernel_data = guest_base + image_size + + if use_xnu_symbols == True: + self.sym_offset = vmin - guest_base + self.tba.phys_base - self.tba.virt_base + + self.iface.writemem(guest_base + self.bootargs_off, BootArgs.build(self.tba)) + + print("Setting secondary CPU RVBARs...") + rvbar = self.entry & ~0xfff + for cpu in self.adt["cpus"][1:]: + addr, size = cpu.cpu_impl_reg + print(f" {cpu.name}: [0x{addr:x}] = 0x{rvbar:x}") + self.p.write64(addr, rvbar) + + def _load_macho_symbols(self): + self.symbol_dict = self.macho.symbols + self.symbols = [(v, k) for k, v in self.macho.symbols.items()] + self.symbols.sort() + + def load_macho(self, data, symfile=None): + if isinstance(data, str): + data = open(data, "rb") + + self.macho = macho = MachO(data) + if symfile is not None: + if isinstance(symfile, str): + symfile = open(symfile, "rb") + syms = MachO(symfile) + macho.add_symbols("com.apple.kernel", syms) + self.xnu_mode = True + + self._load_macho_symbols() + + def load_hook(data, segname, size, fileoff, dest): + if segname != "__TEXT_EXEC": + return data + + print(f"Patching segment {segname}...") + + a = array.array("I", data) + + output = [] + + p = 0 + while (p := data.find(b"\x20\x00", p)) != -1: + if (p & 3) != 2: + p += 1 + continue + + opcode = a[p // 4] + inst = self.hvc((opcode & 0xffff)) + off = fileoff + (p & ~3) + if off >= 0xbfcfc0: + print(f" 0x{off:x}: 0x{opcode:04x} -> hvc 0x{opcode:x} (0x{inst:x})") + a[p // 4] = inst + p += 4 + + print("Done.") + return a.tobytes() + + #image = macho.prepare_image(load_hook) + image = macho.prepare_image() + self.load_raw(image, entryoffset=(macho.entry - macho.vmin), use_xnu_symbols=self.xnu_mode, vmin=macho.vmin) + + + def update_pac_mask(self): + tcr = TCR(self.u.mrs(TCR_EL12)) + valid_bits = (1 << (64 - tcr.T1SZ)) - 1 + self.pac_mask = 0xffffffffffffffff & ~valid_bits + valid_bits = (1 << (64 - tcr.T0SZ)) - 1 + self.user_pac_mask = 0xffffffffffffffff & ~valid_bits + + def unpac(self, v): + if v & (1 << 55): + return v | self.pac_mask + else: + return v & ~self.user_pac_mask + + def load_system_map(self, path): + # Assume Linux + self.sym_offset = 0 + self.xnu_mode = False + self.symbols = [] + self.symbol_dict = {} + with open(path) as fd: + for line in fd.readlines(): + addr, t, name = line.split() + addr = int(addr, 16) + self.symbols.append((addr, name)) + self.symbol_dict[name] = addr + self.symbols.sort() + + def add_kext_symbols(self, kext, demangle=False): + info_plist = plistlib.load(open(f"{kext}/Contents/Info.plist", "rb")) + identifier = info_plist["CFBundleIdentifier"] + name = info_plist["CFBundleName"] + macho = MachO(open(f"{kext}/Contents/MacOS/{name}", "rb")) + self.macho.add_symbols(identifier, macho, demangle=demangle) + self._load_macho_symbols() + + def _handle_sigint(self, signal=None, stack=None): + self._sigint_pending = True + self.interrupt() + + def interrupt(self): + if self._in_handler: + return + + # Kick the proxy to break out of the hypervisor + self.iface.dev.write(b"!") + + def run_script(self, path): + new_locals = runpy.run_path(path, init_globals=self.shell_locals, run_name="<hv_script>") + self.shell_locals.clear() + self.shell_locals.update(new_locals) + + def run_code(self, code): + exec(code, self.shell_locals) + + def start(self): + print("Disabling other iodevs...") + for iodev in IODEV: + if iodev != self.iodev: + print(f" - {iodev!s}") + self.p.iodev_set_usage(iodev, 0) + + print("Doing essential MMIO remaps...") + self.map_essential() + + print("Updating page tables...") + self.pt_update() + + adt_blob = self.adt.build() + print(f"Uploading ADT (0x{len(adt_blob):x} bytes)...") + self.iface.writemem(self.adt_base, adt_blob) + + print("Improving logo...") + self.p.fb_improve_logo() + + print("Shutting down framebuffer...") + self.p.fb_shutdown(True) + + print("Enabling SPRR...") + self.u.msr(SPRR_CONFIG_EL1, 1) + + print("Enabling GXF...") + self.u.msr(GXF_CONFIG_EL1, 1) + + print(f"Jumping to entrypoint at 0x{self.entry:x}") + + self.iface.dev.timeout = None + self.default_sigint = signal.signal(signal.SIGINT, self._handle_sigint) + + set_sigquit_stackdump_handler() + + if self.wdt_cpu is not None: + self.p.hv_wdt_start(self.wdt_cpu) + # Does not return + + self.started = True + self.started_cpus.add(0) + self.p.hv_start(self.entry, self.guest_base + self.bootargs_off) + +from .. import trace diff --git a/tools/proxyclient/m1n1/hv/gdbserver/__init__.py b/tools/proxyclient/m1n1/hv/gdbserver/__init__.py new file mode 100644 index 0000000..ade807f --- /dev/null +++ b/tools/proxyclient/m1n1/hv/gdbserver/__init__.py @@ -0,0 +1,480 @@ +# SPDX-License-Identifier: MIT +import errno, io, os, pkgutil, re, selectors, socketserver, threading, traceback +from construct import Array, BytesInteger, Container, Int32ul, Int64ul, Struct + +from ...proxy import * +from ...sysreg import * +from ...utils import * + +from ..types import * + +__all__ = ["GDBServer"] + +class GDBServer: + __g = Struct( + "regs" / Array(32, Int64ul), + "pc" / Int64ul, + "spsr" / Int32ul, + "q" / Array(32, BytesInteger(16, swapped=True)), + "fpsr" / Int32ul, + "fpcr" / Int32ul, + ) + __seperator = re.compile("[,;:]") + + def __init__(self, hv, address, log): + self.__hc = None + self.__hg = None + self.__hv = hv + self.__interrupt_eventfd = os.eventfd(0, flags=os.EFD_CLOEXEC | os.EFD_NONBLOCK) + self.__interrupt_selector = selectors.DefaultSelector() + self.__request = None + self.log = log + + self.__interrupt_selector.register(self.__interrupt_eventfd, selectors.EVENT_READ) + + handle = self.__handle + + class Handler(socketserver.BaseRequestHandler): + def handle(self): + handle(self.request) + + self.__server = socketserver.UnixStreamServer(address, Handler, False) + self.__thread = threading.Thread(target=self.__server.serve_forever,) + + def __add_wp(self, addr, kind, lsc): + start = addr & 7 + if start + kind > 8: + return b"E01" + + self.__hv.add_hw_wp(addr & ~7, ((1 << kind) - 1) << start, lsc) + return b"OK" + + def __remove_wp(self, addr): + self.__hv.remove_hw_wp(addr & ~7) + return b"OK" + + def __cpu(self, cpu): + if cpu is None: + return + + self.__hv.cpu(cpu) + + def __stop_reply(self): + self.__hc = None + self.__hg = None + + prefix = b"T05thread:" + + if self.__hv.exc_reason == START.EXCEPTION_LOWER: + if self.__hv.exc_code == EXC.SYNC: + if self.__hv.ctx.esr.EC == ESR_EC.BKPT_LOWER: + prefix = b"T05hwbreak:;thread:" + elif self.__hv.ctx.esr.EC == ESR_EC.WATCH_LOWER: + bas = self.__hv.get_wp_bas(self.__hv.ctx.far) + if not bas is None and bas != 0: + offset = 0 + while (bas & (1 << offset)) == 0: + offset += 1 + addr = self.__hv.ctx.far + offset + formatted_addr = bytes(format(addr, "x"), "utf-8") + prefix = b"T05watch:" + formatted_addr + b";thread:" + elif self.__hv.exc_reason == START.HV: + if self.__hv.exc_code == HV_EVENT.USER_INTERRUPT: + prefix = b"T02thread:" + + return prefix + bytes(format(self.__hv.ctx.cpu_id, "x"), "utf-8") + b";" + + def __wait_shell(self): + try: + os.eventfd_read(self.__interrupt_eventfd) + except BlockingIOError: + pass + + while not self.__interrupt_eventfd in (key.fileobj for key, mask in self.__interrupt_selector.select()): + recv = self.__request.recv(1) + if not recv: + break + + for byte in recv: + if byte in b"\1\3": + self.__hv.interrupt() + break + + def __eval(self, data): + if self.log: + self.log(f"eval: {data}") + + if len(data) < 1: + return b"" + + if data[0] in b"?": + return self.__stop_reply() + + if data[0] in b"c": + if len(data) != 1: + self.__cpu(self.__hc) + self.__hv.ctx.elr = int(data[1:].decode(), 16) + + self.__hv.cont() + self.__wait_shell() + return self.__stop_reply() + + if data[0] in b"g": + self.__cpu(self.__hg) + g = Container() + g.regs = self.__hv.ctx.regs.copy() + g.regs[31] = self.__hv.ctx.sp[1] + g.pc = self.__hv.ctx.elr + g.spsr = self.__hv.ctx.spsr.value + g.q = self.__hv.u.q + g.fpsr = self.__hv.u.mrs(FPSR) + g.fpcr = self.__hv.u.mrs(FPCR) + + return bytes(GDBServer.__g.build(g).hex(), "utf-8") + + if data[0] in b"G": + g = GDBServer.__g.parse(bytes.fromhex(data[1:].decode())) + self.__cpu(self.__hg) + + for index in range(31): + self.__hv.ctx.regs[index] = g.regs[index] + + self.__hv.ctx.sp[1] = g.regs[31] + self.__hv.ctx.elr = g.pc + self.__hv.ctx.spsr = g.spsr.value + + q = self.__hv.u.q + for index, value in enumerate(g.q): + q[index] = value + self.__hv.u.push_simd() + + self.__hv.u.msr(FPSR, g.fpsr, silent=True) + self.__hv.u.msr(FPCR, g.fpsr, silent=True) + + return b"OK" + + if data[0] in b"H": + if len(data) > 1: + if data[1] in b"c": + cpu_id = int(data[2:].decode(), 16) + if cpu_id in self.__hv.started_cpus: + self.__hc = cpu_id + return b"OK" + + return b"E01" + + if data[1] in b"g": + cpu_id = int(data[2:].decode(), 16) + if cpu_id in self.__hv.started_cpus: + self.__hg = cpu_id + return b"OK" + + return b"E01" + + return b"" + + if data[0] in b"krR": + self.__hv.reboot() + + if data[0] in b"m": + split = GDBServer.__seperator.split(data[1:].decode(), maxsplit=1) + fields = [int(field, 16) for field in split] + return bytes(self.__hv.readmem(fields[0], fields[1]).hex(), "utf-8") + + if data[0] in b"M": + split = GDBServer.__seperator.split(data[1:].decode(), maxsplit=2) + mem = bytes.fromhex(split[2])[:int(split[1], 16)] + if self.__hv.writemem(int(split[0], 16), mem) < len(mem): + return "E22" + + return b"OK" + + if data[0] in b"p": + number = int(data[1:].decode(), 16) + self.__cpu(self.__hg) + if number < 31: + reg = GDBServer.__g.regs.subcon.subcon.build(self.__hv.ctx.regs[number]) + elif number == 31: + reg = GDBServer.__g.regs.subcon.subcon.build(self.__hv.ctx.sp[1]) + elif number == 32: + reg = GDBServer.__g.pc.build(self.__hv.ctx.elr) + elif number == 33: + reg = GDBServer.__g.spsr.build(self.__hv.ctx.spsr.value) + elif number < 66: + reg = GDBServer.__g.q.subcon.subcon.build(self.__hv.u.q[number - 34]) + elif number == 66: + reg = GDBServer.__g.fpsr.build(self.__hv.u.mrs(FPSR)) + elif number == 67: + reg = GDBServer.__g.fpcr.build(self.__hv.u.mrs(FPCR)) + else: + return b"E01" + + return bytes(reg.hex(), "utf-8") + + if data[0] in b"P": + partition = data[1:].partition(b"=") + number = int(partition[0].decode(), 16) + reg = bytes.fromhex(partition[2].decode()) + self.__cpu(self.__hg) + if number < 31: + self.__hv.ctx.regs[number] = GDBServer.__g.regs.subcon.subcon.unpack(reg) + elif number == 31: + self.__hv.ctx.regs[1] = GDBServer.__g.regs.subcon.subcon.unpack(reg) + elif number == 32: + self.__hv.ctx.elr = GDBServer.__g.pc.parse(reg) + elif number == 33: + self.__hv.ctx.spsr.value = GDBServer.__g.spsr.parse(reg) + elif number < 66: + self.__hv.u.q[number - 34] = GDBServer.__g.q.subcon.subcon.parse(reg) + self.__hv.u.push_simd() + elif number == 66: + self.__hv.u.msr(FPSR, GDBServer.__g.fpsr.parse(reg), silent=True) + elif number == 67: + self.__hv.u.msr(FPCR, GDBServer.__g.fpcr.parse(reg), silent=True) + else: + return b"E01" + + return b"OK" + + if data[0] in b"q": + split = GDBServer.__seperator.split(data[1:].decode(), maxsplit=1) + if split[0] == "C": + cpu_id = self.__hg or self.__hv.ctx.cpu_id + return b"QC" + bytes(format(cpu_id, "x"), "utf-8") + + if split[0] == "fThreadInfo": + cpu_ids = b",".join(bytes(format(cpu.cpu_id, "x"), "utf-8") for cpu in self.__hv.adt["cpus"]) + return b"m" + cpu_ids + + if split[0] == "sThreadInfo": + return b"l" + + if split[0] == "Rcmd": + self.__cpu(self.__hg) + self.__hv.run_code(split[1]) + return b"OK" + + if split[0] == "Supported": + return b"PacketSize=65536;qXfer:features:read+;hwbreak+" + + if split[0] == "ThreadExtraInfo": + thread_id = int(split[1], 16) + for node in self.__hv.adt["cpus"]: + if node.cpu_id == thread_id: + return bytes(bytes(str(node), "utf-8").hex(), "utf-8") + + return b"" + + if split[0] == "Xfer": + xfer = GDBServer.__seperator.split(split[1], maxsplit=4) + if xfer[0] == "features" and xfer[1] == "read": + resource = os.path.join("features", xfer[2]) + annex = pkgutil.get_data(__name__, resource) + if annex is None: + return b"E00" + + request_offset = int(xfer[3], 16) + request_len = int(xfer[4], 16) + read = annex[request_offset:request_offset + request_len] + return (b"l" if len(read) < request_len else b"m") + read + + return b"" + + if split[0] == "HostInfo": + addressing_bits = bytes(str(64 - self.__hv.pac_mask.bit_count()), "utf-8") + return b"cputype:16777228;cpusubtype:2;endian:little;ptrsize:64;watchpoint_exceptions_received:before;addressing_bits:" + addressing_bits + b";" + + return b"" + + if data[0] in b"s": + self.__cpu(self.__hc) + + if len(data) != 1: + self.__hv.ctx.elr = int(data[1:].decode(), 16) + + self.__hv.step() + return self.__stop_reply() + + if data[0] in b"T": + if int(data[1:].decode(), 16) in self.__hv.started_cpus: + return b"OK" + + return b"E01" + + if data[0] in b"X": + partition = data[1:].partition(b":") + split = GDBServer.__seperator.split(partition[0].decode(), maxsplit=1) + mem = partition[2][:int(split[1], 16)] + if self.__hv.writemem(int(split[0], 16), mem) < len(mem): + return b"E22" + + return b"OK" + + if data[0] in b"z": + split = GDBServer.__seperator.split(data[1:].decode(), maxsplit=2) + if split[0] == "1": + self.__hv.remove_hw_bp(int(split[1], 16)) + return b"OK" + + if split[0] == "2": + return self.__remove_wp(int(split[1], 16)) + + if split[0] == "3": + return self.__remove_wp(int(split[1], 16)) + + if split[0] == "4": + return self.__remove_wp(int(split[1], 16)) + + return b"" + + if data[0] in b"Z": + split = GDBServer.__seperator.split(data[1:].decode(), maxsplit=2) + if split[0] == "1": + self.__hv.add_hw_bp(int(split[1], 16)) + return b"OK" + + if split[0] == "2": + addr = int(split[1], 16) + kind = int(split[2], 16) + return self.__add_wp(addr, kind, DBGWCR_LSC.S) + + if split[0] == "3": + addr = int(split[1], 16) + kind = int(split[2], 16) + return self.__add_wp(addr, kind, DBGWCR_LSC.L) + + if split[0] == "4": + addr = int(split[1], 16) + kind = int(split[2], 16) + return self.__add_wp(addr, kind, DBGWCR_LSC.S | DBGWCR_LSC.L) + + return b"" + + return b"" + + def __send(self, prefix, data): + with io.BytesIO(prefix) as buffer: + buffer.write(prefix) + + last = 0 + for index, byte in enumerate(data): + if not byte in b"#$}*": + continue + + buffer.write(data[last:index]) + buffer.write(b"}") + buffer.write(bytes([byte ^ 0x20])) + last = index + 1 + + buffer.write(data[last:]) + checksum = (sum(buffer.getvalue()) - sum(prefix)) % 256 + + buffer.write(b"#") + buffer.write(bytes(format(checksum, "02x"), "utf-8")) + + value = buffer.getvalue() + + if self.log: + self.log(f"send: {value}") + + self.__request.send(value) + + def __handle(self, request): + self.__request = request + input_buffer = b"" + + if not self.__hv.in_shell: + self.__hv.interrupt() + self.__wait_shell() + + self.__interrupt_selector.register(self.__request, selectors.EVENT_READ) + try: + while True: + recv = self.__request.recv(65536) + if not recv: + break + + input_buffer += recv + + while True: + dollar = input_buffer.find(b"$") + if dollar < 0: + input_buffer = b"" + break + + sharp = input_buffer.find(b"#", dollar) + if sharp < 0 or len(input_buffer) < sharp + 3: + input_buffer = input_buffer[dollar:] + break + + input_data = input_buffer[dollar + 1:sharp] + input_checksum = input_buffer[sharp + 1:sharp + 3] + input_buffer = input_buffer[sharp + 3:] + + try: + parsed_input_checksum = int(input_checksum.decode(), 16) + except ValueError as error: + print(error) + continue + + if (sum(input_data) % 256) != parsed_input_checksum: + self.__request.send(b"-") + continue + + self.__request.send(b"+") + + with io.BytesIO() as input_decoded: + input_index = 0 + input_last = 0 + while input_index < len(input_data): + if input_data[input_index] == b"*": + input_decoded.write(input_data[input_last:input_index]) + instance = input_decoded.getvalue()[-1] + input_index += 1 + input_run_len = input_data[input_index] - 29 + input_run = bytes([instance]) * input_run_len + input_decoded.write(input_run) + input_index += 1 + input_last = input_index + elif input_data[input_index] == b"}": + input_decoded.write(input_data[input_last:input_index]) + input_index += 1 + input_decoded.write(bytes([input_data[input_index] ^ 0x20])) + input_index += 1 + input_last = input_index + else: + input_index += 1 + + input_decoded.write(input_data[input_last:]) + + try: + output_decoded = self.__eval(input_decoded.getvalue()) + except Exception: + output_decoded = b"E." + bytes(traceback.format_exc(), "utf-8") + + self.__send(b"$", output_decoded) + finally: + self.__interrupt_selector.unregister(self.__request) + + def notify_in_shell(self): + os.eventfd_write(self.__interrupt_eventfd, 1) + + def activate(self): + try: + self.__server.server_bind() + except OSError as error: + if error.errno != errno.EADDRINUSE: + raise + + os.remove(self.__server.server_address) + self.__server.server_bind() + + self.__server.server_activate() + self.__thread.start() + + def shutdown(self): + os.close(self.__interrupt_eventfd) + self.__interrupt_selector.close() + self.__server.shutdown() + self.__server.server_close() + self.__thread.join() diff --git a/tools/proxyclient/m1n1/hv/gdbserver/features/aarch64-core.xml b/tools/proxyclient/m1n1/hv/gdbserver/features/aarch64-core.xml new file mode 100644 index 0000000..b6d344f --- /dev/null +++ b/tools/proxyclient/m1n1/hv/gdbserver/features/aarch64-core.xml @@ -0,0 +1,91 @@ +<?xml version="1.0"?> +<!-- Copyright (C) 2009-2022 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + Copying and distribution of this file, with or without modification, + are permitted in any medium without royalty provided the copyright + notice and this notice are preserved. --> + +<!DOCTYPE feature SYSTEM "gdb-target.dtd"> +<feature name="org.gnu.gdb.aarch64.core"> + <reg name="x0" bitsize="64"/> + <reg name="x1" bitsize="64"/> + <reg name="x2" bitsize="64"/> + <reg name="x3" bitsize="64"/> + <reg name="x4" bitsize="64"/> + <reg name="x5" bitsize="64"/> + <reg name="x6" bitsize="64"/> + <reg name="x7" bitsize="64"/> + <reg name="x8" bitsize="64"/> + <reg name="x9" bitsize="64"/> + <reg name="x10" bitsize="64"/> + <reg name="x11" bitsize="64"/> + <reg name="x12" bitsize="64"/> + <reg name="x13" bitsize="64"/> + <reg name="x14" bitsize="64"/> + <reg name="x15" bitsize="64"/> + <reg name="x16" bitsize="64"/> + <reg name="x17" bitsize="64"/> + <reg name="x18" bitsize="64"/> + <reg name="x19" bitsize="64"/> + <reg name="x20" bitsize="64"/> + <reg name="x21" bitsize="64"/> + <reg name="x22" bitsize="64"/> + <reg name="x23" bitsize="64"/> + <reg name="x24" bitsize="64"/> + <reg name="x25" bitsize="64"/> + <reg name="x26" bitsize="64"/> + <reg name="x27" bitsize="64"/> + <reg name="x28" bitsize="64"/> + <reg name="x29" bitsize="64"/> + <reg name="x30" bitsize="64"/> + <reg name="sp" bitsize="64" type="data_ptr"/> + + <reg name="pc" bitsize="64" type="code_ptr"/> + + <flags id="cpsr_flags" size="4"> + <!-- Stack Pointer. --> + <field name="SP" start="0" end="0"/> + + <!-- Exception Level. --> + <field name="EL" start="2" end="3"/> + <!-- Execution state. --> + <field name="nRW" start="4" end="4"/> + + <!-- FIQ interrupt mask. --> + <field name="F" start="6" end="6"/> + <!-- IRQ interrupt mask. --> + <field name="I" start="7" end="7"/> + <!-- SError interrupt mask. --> + <field name="A" start="8" end="8"/> + <!-- Debug exception mask. --> + <field name="D" start="9" end="9"/> + + <!-- ARMv8.0-A: Speculative Store Bypass. --> + <field name="SSBS" start="12" end="12"/> + + <!-- Illegal Execution state. --> + <field name="IL" start="20" end="20"/> + <!-- Software Step. --> + <field name="SS" start="21" end="21"/> + <!-- ARMv8.1-A: Privileged Access Never. --> + <field name="PAN" start="22" end="22"/> + <!-- ARMv8.2-A: User Access Override. --> + <field name="UAO" start="23" end="23"/> + <!-- ARMv8.4-A: Data Independent Timing. --> + <field name="DIT" start="24" end="24"/> + <!-- ARMv8.5-A: Tag Check Override. --> + <field name="TCO" start="25" end="25"/> + + <!-- Overflow Condition flag. --> + <field name="V" start="28" end="28"/> + <!-- Carry Condition flag. --> + <field name="C" start="29" end="29"/> + <!-- Zero Condition flag. --> + <field name="Z" start="30" end="30"/> + <!-- Negative Condition flag. --> + <field name="N" start="31" end="31"/> + </flags> + <reg name="cpsr" bitsize="32" type="cpsr_flags"/> + +</feature> diff --git a/tools/proxyclient/m1n1/hv/gdbserver/features/aarch64-fpu.xml b/tools/proxyclient/m1n1/hv/gdbserver/features/aarch64-fpu.xml new file mode 100644 index 0000000..4db5c50 --- /dev/null +++ b/tools/proxyclient/m1n1/hv/gdbserver/features/aarch64-fpu.xml @@ -0,0 +1,160 @@ +<?xml version="1.0"?> +<!-- Copyright (C) 2009-2022 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + Copying and distribution of this file, with or without modification, + are permitted in any medium without royalty provided the copyright + notice and this notice are preserved. --> + +<!DOCTYPE feature SYSTEM "gdb-target.dtd"> +<feature name="org.gnu.gdb.aarch64.fpu"> + <vector id="v2d" type="ieee_double" count="2"/> + <vector id="v2u" type="uint64" count="2"/> + <vector id="v2i" type="int64" count="2"/> + <vector id="v4f" type="ieee_single" count="4"/> + <vector id="v4u" type="uint32" count="4"/> + <vector id="v4i" type="int32" count="4"/> + <vector id="v8f" type="ieee_half" count="8"/> + <vector id="v8u" type="uint16" count="8"/> + <vector id="v8i" type="int16" count="8"/> + <vector id="v8bf16" type="bfloat16" count="8"/> + <vector id="v16u" type="uint8" count="16"/> + <vector id="v16i" type="int8" count="16"/> + <vector id="v1u" type="uint128" count="1"/> + <vector id="v1i" type="int128" count="1"/> + <union id="vnd"> + <field name="f" type="v2d"/> + <field name="u" type="v2u"/> + <field name="s" type="v2i"/> + </union> + <union id="vns"> + <field name="f" type="v4f"/> + <field name="u" type="v4u"/> + <field name="s" type="v4i"/> + </union> + <union id="vnh"> + <field name="bf" type="v8bf16"/> + <field name="f" type="v8f"/> + <field name="u" type="v8u"/> + <field name="s" type="v8i"/> + </union> + <union id="vnb"> + <field name="u" type="v16u"/> + <field name="s" type="v16i"/> + </union> + <union id="vnq"> + <field name="u" type="v1u"/> + <field name="s" type="v1i"/> + </union> + <union id="aarch64v"> + <field name="d" type="vnd"/> + <field name="s" type="vns"/> + <field name="h" type="vnh"/> + <field name="b" type="vnb"/> + <field name="q" type="vnq"/> + </union> + <reg name="v0" bitsize="128" type="aarch64v" regnum="34"/> + <reg name="v1" bitsize="128" type="aarch64v" /> + <reg name="v2" bitsize="128" type="aarch64v" /> + <reg name="v3" bitsize="128" type="aarch64v" /> + <reg name="v4" bitsize="128" type="aarch64v" /> + <reg name="v5" bitsize="128" type="aarch64v" /> + <reg name="v6" bitsize="128" type="aarch64v" /> + <reg name="v7" bitsize="128" type="aarch64v" /> + <reg name="v8" bitsize="128" type="aarch64v" /> + <reg name="v9" bitsize="128" type="aarch64v" /> + <reg name="v10" bitsize="128" type="aarch64v"/> + <reg name="v11" bitsize="128" type="aarch64v"/> + <reg name="v12" bitsize="128" type="aarch64v"/> + <reg name="v13" bitsize="128" type="aarch64v"/> + <reg name="v14" bitsize="128" type="aarch64v"/> + <reg name="v15" bitsize="128" type="aarch64v"/> + <reg name="v16" bitsize="128" type="aarch64v"/> + <reg name="v17" bitsize="128" type="aarch64v"/> + <reg name="v18" bitsize="128" type="aarch64v"/> + <reg name="v19" bitsize="128" type="aarch64v"/> + <reg name="v20" bitsize="128" type="aarch64v"/> + <reg name="v21" bitsize="128" type="aarch64v"/> + <reg name="v22" bitsize="128" type="aarch64v"/> + <reg name="v23" bitsize="128" type="aarch64v"/> + <reg name="v24" bitsize="128" type="aarch64v"/> + <reg name="v25" bitsize="128" type="aarch64v"/> + <reg name="v26" bitsize="128" type="aarch64v"/> + <reg name="v27" bitsize="128" type="aarch64v"/> + <reg name="v28" bitsize="128" type="aarch64v"/> + <reg name="v29" bitsize="128" type="aarch64v"/> + <reg name="v30" bitsize="128" type="aarch64v"/> + <reg name="v31" bitsize="128" type="aarch64v"/> + + <flags id="fpsr_flags" size="4"> + <!-- Invalid Operation cumulative floating-point exception bit. --> + <field name="IOC" start="0" end="0"/> + <!-- Divide by Zero cumulative floating-point exception bit. --> + <field name="DZC" start="1" end="1"/> + <!-- Overflow cumulative floating-point exception bit. --> + <field name="OFC" start="2" end="2"/> + <!-- Underflow cumulative floating-point exception bit. --> + <field name="UFC" start="3" end="3"/> + <!-- Inexact cumulative floating-point exception bit.. --> + <field name="IXC" start="4" end="4"/> + <!-- Input Denormal cumulative floating-point exception bit. --> + <field name="IDC" start="7" end="7"/> + <!-- Cumulative saturation bit, Advanced SIMD only. --> + <field name="QC" start="27" end="27"/> + <!-- When AArch32 is supported at any Exception level and AArch32 + floating-point is implemented: Overflow condition flag for AArch32 + floating-point comparison operations. --> + <field name="V" start="28" end="28"/> + <!-- When AArch32 is supported at any Exception level and AArch32 + floating-point is implemented: + Carry condition flag for AArch32 floating-point comparison operations. + --> + <field name="C" start="29" end="29"/> + <!-- When AArch32 is supported at any Exception level and AArch32 + floating-point is implemented: + Zero condition flag for AArch32 floating-point comparison operations. + --> + <field name="Z" start="30" end="30"/> + <!-- When AArch32 is supported at any Exception level and AArch32 + floating-point is implemented: + Negative condition flag for AArch32 floating-point comparison + operations. --> + <field name="N" start="31" end="31"/> + </flags> + <reg name="fpsr" bitsize="32" type="fpsr_flags"/> + + <flags id="fpcr_flags" size="4"> + <!-- Flush Inputs to Zero (part of Armv8.7). --> + <field name="FIZ" start="0" end="0"/> + <!-- Alternate Handling (part of Armv8.7). --> + <field name="AH" start="1" end="1"/> + <!-- Controls how the output elements other than the lowest element of the + vector are determined for Advanced SIMD scalar instructions (part of + Armv8.7). --> + <field name="NEP" start="2" end="2"/> + <!-- Invalid Operation floating-point exception trap enable. --> + <field name="IOE" start="8" end="8"/> + <!-- Divide by Zero floating-point exception trap enable. --> + <field name="DZE" start="9" end="9"/> + <!-- Overflow floating-point exception trap enable. --> + <field name="OFE" start="10" end="10"/> + <!-- Underflow floating-point exception trap enable. --> + <field name="UFE" start="11" end="11"/> + <!-- Inexact floating-point exception trap enable. --> + <field name="IXE" start="12" end="12"/> + <!-- Input Denormal floating-point exception trap enable. --> + <field name="IDE" start="15" end="15"/> + <!-- Flush-to-zero mode control bit on half-precision data-processing + instructions. --> + <field name="FZ16" start="19" end="19"/> + <!-- Rounding Mode control field. --> + <field name="RMode" start="22" end="23"/> + <!-- Flush-to-zero mode control bit. --> + <field name="FZ" start="24" end="24"/> + <!-- Default NaN mode control bit. --> + <field name="DN" start="25" end="25"/> + <!-- Alternative half-precision control bit. --> + <field name="AHP" start="26" end="26"/> + </flags> + <reg name="fpcr" bitsize="32" type="fpcr_flags"/> +</feature> diff --git a/tools/proxyclient/m1n1/hv/gdbserver/features/target.xml b/tools/proxyclient/m1n1/hv/gdbserver/features/target.xml new file mode 100644 index 0000000..ca0454a --- /dev/null +++ b/tools/proxyclient/m1n1/hv/gdbserver/features/target.xml @@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<!-- SPDX-License-Identifier: MIT --> +<!DOCTYPE target SYSTEM "gdb-target.dtd"> +<target version="1.0"> + <architecture>aarch64</architecture> + <xi:include href="aarch64-core.xml" /> + <xi:include href="aarch64-fpu.xml" /> +</target> diff --git a/tools/proxyclient/m1n1/hv/types.py b/tools/proxyclient/m1n1/hv/types.py new file mode 100644 index 0000000..0c3142e --- /dev/null +++ b/tools/proxyclient/m1n1/hv/types.py @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: MIT +from construct import * +from enum import IntEnum + +from ..utils import * + +__all__ = [ + "MMIOTraceFlags", "EvtMMIOTrace", "EvtIRQTrace", "HV_EVENT", + "VMProxyHookData", "TraceMode", +] + +class MMIOTraceFlags(Register32): + ATTR = 31, 24 + CPU = 23, 16 + SH = 15, 14 + WIDTH = 4, 0 + WRITE = 5 + MULTI = 6 + +EvtMMIOTrace = Struct( + "flags" / RegAdapter(MMIOTraceFlags), + "reserved" / Int32ul, + "pc" / Hex(Int64ul), + "addr" / Hex(Int64ul), + "data" / Hex(Int64ul), +) + +EvtIRQTrace = Struct( + "flags" / Int32ul, + "type" / Hex(Int16ul), + "num" / Int16ul, +) + +class HV_EVENT(IntEnum): + HOOK_VM = 1 + VTIMER = 2 + USER_INTERRUPT = 3 + WDT_BARK = 4 + CPU_SWITCH = 5 + VIRTIO = 6 + +VMProxyHookData = Struct( + "flags" / RegAdapter(MMIOTraceFlags), + "id" / Int32ul, + "addr" / Hex(Int64ul), + "data" / Array(8, Hex(Int64ul)), +) + +class TraceMode(IntEnum): + ''' +Different types of Tracing ''' + + OFF = 0 + BYPASS = 1 + ASYNC = 2 + UNBUF = 3 + WSYNC = 4 + SYNC = 5 + HOOK = 6 + RESERVED = 7 diff --git a/tools/proxyclient/m1n1/hv/virtio.py b/tools/proxyclient/m1n1/hv/virtio.py new file mode 100644 index 0000000..790bb26 --- /dev/null +++ b/tools/proxyclient/m1n1/hv/virtio.py @@ -0,0 +1,133 @@ +# SPDX-License-Identifier: MIT +from construct import Struct, Int8ul, Int16ul, Int32sl, Int32ul, Int64ul +from subprocess import Popen, PIPE +import pathlib +import struct +import os +import sys + +from ..utils import * + +VirtioConfig = Struct( + "irq" / Int32sl, + "devid" / Int32ul, + "feats" / Int64ul, + "num_qus" / Int32ul, + "data" / Int64ul, + "data_len" / Int64ul, + "verbose" / Int8ul, +) + +class VirtioDescFlags(Register16): + WRITE = 1 + NEXT = 0 + +VirtioDesc = Struct( + "addr" / Int64ul, + "len" / Int32ul, + "flags" / RegAdapter(VirtioDescFlags), + "next" / Int16ul, +) + +VirtioExcInfo = Struct( + "devbase" / Int64ul, + "qu" / Int16ul, + "idx" / Int16ul, + "pad" / Int32ul, + "descbase" / Int64ul, +) + +class VirtioDev: + def __init__(self): + self.base, self.hv = None, None # assigned by HV object + + def read_buf(self, desc): + return self.hv.iface.readmem(desc.addr, desc.len) + + def read_desc(self, ctx, idx): + off = VirtioDesc.sizeof() * idx + return self.hv.iface.readstruct(ctx.descbase + off, VirtioDesc) + + @property + def config_data(self): + return b"" + + @property + def devid(self): + return 0 + + @property + def num_qus(self): + return 1 + + @property + def feats(self): + return 0 + +class Virtio9PTransport(VirtioDev): + def __init__(self, tag="m1n1", root=None): + p_stdin, self.fin = os.pipe() + self.fout, p_stdout = os.pipe() + if root is None: + root = str(pathlib.Path(__file__).resolve().parents[3]) + if type(tag) is str: + self.tag = tag.encode("ascii") + else: + self.tag = tag + self.p = Popen([ + "u9fs", + "-a", "none", # no auth + "-n", # not a network conn + "-u", os.getlogin(), # single user + root, + ], stdin=p_stdin, stdout=p_stdout, stderr=sys.stderr) + + @property + def config_data(self): + return struct.pack("=H", len(self.tag)) + self.tag + + @property + def devid(self): + return 9 + + @property + def num_qus(self): + return 1 + + @property + def feats(self): + return 1 + + def call(self, req): + os.write(self.fin, req) + resp = os.read(self.fout, 4) + length = int.from_bytes(resp, byteorder="little") + resp += os.read(self.fout, length - 4) + return resp + + def handle_exc(self, ctx): + head = self.read_desc(ctx, ctx.idx) + assert not head.flags.WRITE + + req = bytearray() + + while not head.flags.WRITE: + req += self.read_buf(head) + + if not head.flags.NEXT: + break + head = self.read_desc(ctx, head.next) + + resp = self.call(bytes(req)) + resplen = len(resp) + + while len(resp): + self.hv.iface.writemem(head.addr, resp[:head.len]) + resp = resp[head.len:] + if not head.flags.NEXT: + break + head = self.read_desc(ctx, head.next) + + self.hv.p.virtio_put_buffer(ctx.devbase, ctx.qu, ctx.idx, resplen) + + return True diff --git a/tools/proxyclient/m1n1/hv/virtutils.py b/tools/proxyclient/m1n1/hv/virtutils.py new file mode 100644 index 0000000..934abc0 --- /dev/null +++ b/tools/proxyclient/m1n1/hv/virtutils.py @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: MIT +from m1n1.utils import align_up + +def collect_aic_irqs_in_use(adt): + used = set() + aic_phandle = getattr(adt["/arm-io/aic"], "AAPL,phandle") + for node in adt.walk_tree(): + if not hasattr(node, "interrupt_parent") or \ + node.interrupt_parent != aic_phandle: + continue + for no in node.interrupts: + used.add(no) + return used + +def usable_aic_irq_range(adt): + # These are too optimistic but since we allocate + # from the bottom of the range it doesn't matter much. + return { + "aic,1": range(0, 0x400), + "aic,2": range(0, 0x1000), + }.get(adt["/arm-io/aic"].compatible[0]) + +def alloc_aic_irq(adt): + used = collect_aic_irqs_in_use(adt) + for no in usable_aic_irq_range(adt): + if no not in used: + return no + return None + +def usable_mmio_range(adt): + arm_io_range = adt["arm-io"].ranges[0] + return range(arm_io_range.parent_addr, arm_io_range.parent_addr + arm_io_range.size) + +def alloc_mmio_base(adt, size, alignment=0x4000): + span = usable_mmio_range(adt) + la = adt.build_addr_lookup() + for zone, devs in la.populate(span): + if len(devs) != 0: + continue + base = align_up(zone.start, alignment) + if zone.stop > base + size: + return base + return None diff --git a/tools/proxyclient/m1n1/hw/admac.py b/tools/proxyclient/m1n1/hw/admac.py new file mode 100644 index 0000000..3fb8032 --- /dev/null +++ b/tools/proxyclient/m1n1/hw/admac.py @@ -0,0 +1,416 @@ +# SPDX-License-Identifier: MIT +import sys, time +from enum import IntEnum +from ..utils import * + +__all__ = ["ADMACRegs", "ADMAC", "E_BUSWIDTH", "E_FRAME"] + + +class R_RING(Register32): + # overflow/underflow counter + OF_UF = 31, 16 + + # goes through 0, 1, 2, 3 as the pieces of a report/descriptor + # are being read/written through REPORT_READ/DESC_WRITE + READOUT_PROGRESS = 13, 12 + + # when READ_SLOT==WRITE_SLOT one of the two is set + EMPTY = 8 + FULL = 9 + + ERR = 10 + + # next slot to read + READ_SLOT = 5, 4 + + # next slot to be written to + WRITE_SLOT = 1, 0 + +class R_CHAN_STATUS(Register32): + # only raised if the descriptor had NOTIFY set + DESC_DONE = 0 + + DESC_RING_EMPTY = 4 + REPORT_RING_FULL = 5 + + # cleared by writing ERR=1 either to TX_DESC_RING or TX_REPORT_RING + RING_ERR = 6 + + UNK0 = 1 + UNK3 = 8 + UNK4 = 9 + UNK5 = 10 + +class R_CHAN_CONTROL(Register32): + RESET_RINGS = 0 + CLEAR_OF_UF_COUNTERS = 1 + UNK1 = 3 + +class E_BUSWIDTH(IntEnum): + W_8BIT = 0 + W_16BIT = 1 + W_32BIT = 2 + +class E_FRAME(IntEnum): + F_1_WORD = 0 + F_2_WORDS = 1 + F_4_WORDS = 2 + +class R_BUSWIDTH(Register32): + WORD = 2, 0, E_BUSWIDTH + FRAME = 6, 4, E_FRAME + +class R_CARVEOUT(Register32): + SIZE = 31, 16 + BASE = 15, 0 + +class ADMACRegs(RegMap): + TX_EN = 0x0, Register32 # one bit per channel + TX_EN_CLR = 0x4, Register32 + + RX_EN = 0x8, Register32 + RX_EN_CLR = 0xc, Register32 + + UNK_CTL = 0x10, Register32 + + # each of the four registers represents an internal interrupt line, + # bits represent DMA channels which at the moment raise that particular line + # + # the irq-destination-index prop in ADT maybe selects the line which + # is actually wired out + # + TX_INTSTATE = irange(0x30, 4, 0x4), Register32 + RX_INTSTATE = irange(0x40, 4, 0x4), Register32 + + # a 24 MHz always-running counter, top bit is always set + COUNTER = 0x70, Register64 + + TX_SRAM_SIZE = 0x94, Register32 + RX_SRAM_SIZE = 0x98, Register32 + + # -- per-channel registers -- + + CHAN_CTL = (irange(0x8000, 32, 0x200)), R_CHAN_CONTROL + + CHAN_BUSWIDTH = (irange(0x8040, 32, 0x200)), R_BUSWIDTH + CHAN_SRAM_CARVEOUT = (irange(0x8050, 32, 0x200)), R_CARVEOUT + CHAN_BURSTSIZE = (irange(0x8054, 32, 0x200)), Register32 + + CHAN_RESIDUE = irange(0x8064, 32, 0x200), Register32 + + CHAN_DESC_RING = irange(0x8070, 32, 0x200), R_RING + CHAN_REPORT_RING = irange(0x8074, 32, 0x200), R_RING + + TX_DESC_WRITE = irange(0x10000, 16, 4), Register32 + TX_REPORT_READ = irange(0x10100, 16, 4), Register32 + + RX_DESC_WRITE = irange(0x14000, 16, 4), Register32 + RX_REPORT_READ = irange(0x14100, 16, 4), Register32 + + # per-channel, per-internal-line + CHAN_STATUS = (irange(0x8010, 32, 0x200), irange(0x0, 4, 0x4)), R_CHAN_STATUS + CHAN_INTMASK = (irange(0x8020, 32, 0x200), irange(0x0, 4, 0x4)), R_CHAN_STATUS + + +class ADMACDescriptorFlags(Register32): + # whether to raise DESC_DONE in CHAN_STATUS + NOTIFY = 16 + + # whether to repeat this descriptor ad infinitum + # + # once a descriptor with this flag is loaded, any descriptors loaded + # afterwards are also repeated and nothing short of full power domain reset + # seems to revoke that behaviour. this looks like a HW bug. + REPEAT = 17 + + # arbitrary ID propagated into reports + DESC_ID = 7, 0 + +class ADMACDescriptor(Reloadable): + def __init__(self, addr, length, **flags): + self.addr = addr + self.length = length + self.flags = ADMACDescriptorFlags(**flags) + + def __repr__(self): + return f"<descriptor: addr=0x{self.addr:x} len=0x{self.length:x} flags={self.flags}>" + + def ser(self): + return [ + self.addr & (1<<32)-1, + self.addr>>32 & (1<<32)-1, + self.length & (1<<32)-1, + int(self.flags) + ] + + @classmethod + def deser(self, seq): + if not len(seq) == 4: + raise ValueError + return ADMACDescriptor( + seq[0] | seq[1] << 32, # addr + seq[2], # length (in bytes) + **ADMACDescriptorFlags(seq[3]).fields + ) + + +class ADMACReportFlags(Register32): + UNK1 = 24 + UNK2 = 25 + UNK4 = 26 # memory access fault? + UNK3 = 27 + DESC_ID = 7, 0 + +class ADMACReport(Reloadable): + def __init__(self, countval, unk1, flags): + self.countval, self.unk1, self.flags = countval, unk1, ADMACReportFlags(flags) + + def __repr__(self): + return f"<report: countval=0x{self.countval:x} unk1=0x{self.unk1:x} flags={self.flags}>" + + def ser(self): + return [ + self.countval & (1<<32)-1, + self.countval>>32 & (1<<32)-1, + self.unk1 & (1<<32)-1, + int(self.flags) + ] + + @classmethod + def deser(self, seq): + if not len(seq) == 4: + raise ValueError + return ADMACReport( + seq[0] | seq[1] << 32, # countval + seq[2], # unk1 + seq[3] # flags + ) + + +class ADMACChannel(Reloadable): + def __init__(self, parent, channo): + self.p = parent + self.iface = parent.p.iface + self.dart = parent.dart + self.regs = parent.regs + self.tx = (channo % 2) == 0 + self.rx = not self.tx + self.ch = channo + + self._desc_id = 0 + self._submitted = {} + self._last_report = None + self._est_byte_rate = None + + def reset(self): + self.regs.CHAN_CTL[self.ch].set(RESET_RINGS=1, CLEAR_OF_UF_COUNTERS=1) + self.regs.CHAN_CTL[self.ch].set(RESET_RINGS=0, CLEAR_OF_UF_COUNTERS=0) + + self.burstsize = 0xc0_0060 + self.buswidth = E_BUSWIDTH.W_32BIT + self.framesize = E_FRAME.F_1_WORD + + def enable(self): + self.regs.CHAN_INTMASK[self.ch, 0].reg = \ + R_CHAN_STATUS(DESC_DONE=1, DESC_RING_EMPTY=1, + REPORT_RING_FULL=1, RING_ERR=1) + + if self.tx: + self.regs.TX_EN.val = 1 << (self.ch//2) + else: + self.regs.RX_EN.val = 1 << (self.ch//2) + + def disable(self): + if self.tx: + self.regs.TX_EN_CLR.val = 1 << (self.ch//2) + else: + self.regs.RX_EN_CLR.val = 1 << (self.ch//2) + + @property + def buswidth(self): + self.regs.CHAN_BUSWIDTH[self.ch].reg.WORD + + @buswidth.setter + def buswidth(self, wordsize): + return self.regs.CHAN_BUSWIDTH[self.ch].set(WORD=wordsize) + + @property + def framesize(self): + self.regs.CHAN_BUSWIDTH[self.ch].reg.FRAME + + @framesize.setter + def framesize(self, framesize): + return self.regs.CHAN_BUSWIDTH[self.ch].set(FRAME=framesize) + + @property + def burstsize(self): + return self.regs.CHAN_BURSTSIZE[self.ch].val + + @burstsize.setter + def burstsize(self, size): + self.regs.CHAN_BURSTSIZE[self.ch].val = size + + @property + def sram_carveout(self): + reg = self.regs.CHAN_SRAM_CARVEOUT[self.ch].reg + return (reg.BASE, reg.SIZE) + + @sram_carveout.setter + def sram_carveout(self, carveout): + base, size = carveout + self.regs.CHAN_SRAM_CARVEOUT[self.ch].reg = \ + R_CARVEOUT(BASE=base, SIZE=size) + + @property + def DESC_WRITE(self): + if self.tx: + return self.regs.TX_DESC_WRITE[self.ch//2] + else: + return self.regs.RX_DESC_WRITE[self.ch//2] + + @property + def REPORT_READ(self): + if self.tx: + return self.regs.TX_REPORT_READ[self.ch//2] + else: + return self.regs.RX_REPORT_READ[self.ch//2] + + def can_submit(self): + return not self.regs.CHAN_DESC_RING[self.ch].reg.FULL + + def submit_desc(self, desc): + if self.regs.CHAN_DESC_RING[self.ch].reg.FULL: + raise Exception(f"ch{self.ch} descriptor ring full") + + if self.p.debug: + print(f"admac: submitting (ch{self.ch}): {desc}", file=sys.stderr) + + for piece in desc.ser(): + self.DESC_WRITE.val = piece + + self._submitted[desc.flags.DESC_ID] = desc + + def submit(self, data=None, buflen=None, **kwargs): + if self.tx: + assert data is not None + buflen = len(data) + else: + assert buflen is not None + + iova = self.p.get_buffer(buflen) + if self.tx: + self.p.iowrite(iova, data) + self.submit_desc(ADMACDescriptor( + iova, buflen, DESC_ID=self._desc_id, NOTIFY=1, **kwargs + )) + self._desc_id = (self._desc_id + 1) % 256 + + def read_reports(self): + data = bytearray() + + while not self.regs.CHAN_REPORT_RING[self.ch].reg.EMPTY: + pieces = [] + for _ in range(4): + pieces.append(self.REPORT_READ.val) + report = ADMACReport.deser(pieces) + + if report.flags.DESC_ID in self._submitted: + desc = self._submitted[report.flags.DESC_ID] + else: + print(f"admac: stray report (ch{self.ch}): {report}", file=sys.stderr) + desc = None + + if self.rx and desc and self.p.dart: + data.extend(self.p.ioread(desc.addr, desc.length)) + + if self.p.debug: + if self._last_report and desc: + countval_delta = report.countval - self._last_report.countval + est_rate = 24e6*desc.length/countval_delta/4 + est = f"(estimated rate: {est_rate:.2f} dwords/s)" + else: + est = "" + + print(f"admac: picked up (ch{self.ch}): {report} {est}", file=sys.stderr) + + self._last_report = report + + return data if self.rx else None + + @property + def status(self): + return self.regs.CHAN_STATUS[self.ch, 0].reg + + def poll(self, wait=True): + while not (self.status.DESC_DONE or self.status.RING_ERR): + time.sleep(0.001) + + if not wait: + break + + self.regs.CHAN_STATUS[self.ch,0].reg = R_CHAN_STATUS(DESC_DONE=1) + + if self.status.RING_ERR: + if self.p.debug: + print(f"STATUS={self.regs.CHAN_STATUS[self.ch,1].reg} " + \ + f"REPORT_RING={self.regs.CHAN_DESC_RING[self.ch]} " + \ + f"DESC_RING={self.regs.CHAN_REPORT_RING[self.ch]}", + file=sys.stderr) + self.regs.CHAN_DESC_RING[self.ch].set(ERR=1) + self.regs.CHAN_REPORT_RING[self.ch].set(ERR=1) + + return self.read_reports() + + +class ADMAC(Reloadable): + def __init__(self, u, devpath, dart=None, dart_stream=2, + reserved_size=4*1024*1024, debug=False): + self.u = u + self.p = u.proxy + self.debug = debug + + if type(devpath) is str: + adt_node = u.adt[devpath] + # ADT's #dma-channels counts pairs of RX/TX channel, so multiply by two + self.nchans = adt_node._properties["#dma-channels"] * 2 + self.base, _ = adt_node.get_reg(0) + else: + self.base = devpath + self.nchans = 26 + + self.regs = ADMACRegs(u, self.base) + self.dart, self.dart_stream = dart, dart_stream + + if dart is not None: + resmem_phys = u.heap.memalign(128*1024, reserved_size) + self.resmem_iova = self.dart.iomap(dart_stream, resmem_phys, reserved_size) + self.resmem_size = reserved_size + self.resmem_pos = 0 + self.dart.invalidate_streams(1 << dart_stream) + + self.chans = [ADMACChannel(self, no) for no in range(self.nchans)] + + def ioread(self, base, size): + assert self.dart is not None + return self.dart.ioread(self.dart_stream, base, size) + + def iowrite(self, base, data): + assert self.dart is not None + self.dart.iowrite(self.dart_stream, base, data) + + def fill_canary(self): + ranges = self.dart.iotranslate(self.dart_stream, + self.resmem_iova, self.resmem_size) + assert len(ranges) == 1 + start, size = ranges[0] + self.p.memset8(start, 0xba, size) + + def get_buffer(self, size): + assert size < self.resmem_size + + if self.resmem_pos + size > self.resmem_size: + self.resmem_pos = 0 + + bufptr = self.resmem_iova + self.resmem_pos + self.resmem_pos += size + return bufptr diff --git a/tools/proxyclient/m1n1/hw/aes.py b/tools/proxyclient/m1n1/hw/aes.py new file mode 100644 index 0000000..7e09335 --- /dev/null +++ b/tools/proxyclient/m1n1/hw/aes.py @@ -0,0 +1,110 @@ +# SPDX-License-Identifier: MIT +from ..utils import * +from enum import IntEnum +from .dart import DART, DARTRegs +import struct +from enum import IntEnum + + +class AES_OPCODE(IntEnum): + # 0 triggers an invalid command interrupt + SET_KEY = 1 + SET_IV = 2 + # 0x03 seems to take three additional argument, function unknown + # 0x04 seems to take one additional argument, function unknown + CRYPT = 5 + GET_IV = 6 + # 0x07 takes one additional argument, function unknown + BARRIER = 8 # can be used to trigger an IRQ but possibly also does more + # > 8 trigger an invalid command interrupt + + +class AES_SET_KEY_LEN(IntEnum): + AES128 = 0 + AES192 = 1 + AES256 = 2 + + +class AES_SET_KEY_BLOCK_MODE(IntEnum): + ECB = 0 + CBC = 1 + CTR = 2 + + +class AESCommandBase(Register32): + OPCODE = 31, 28, AES_OPCODE + + +class AESHwKey(IntEnum): + SOFTWARE = 0 + UID = 1 # unique key for each chip + GID0 = 2 # (probably) globally unique key within a chip family + GID1 = 3 # globally unique key within a chip family + # 4-7 are probably empty / reserved for future use + + +class AESSetKeyCommand(AESCommandBase): + OPCODE = 31, 28, Constant(AES_OPCODE.SET_KEY) + SLOT = 27, 27 + KEY_SELECT = 26, 24 + KEYLEN = 23, 22, AES_SET_KEY_LEN + # setting bit 21 breaks the engine and sets two bits in the IRQ status + ENCRYPT = 20, 20 + KEYGEN = 19, 18 + BLOCK_MODE = 17, 16, AES_SET_KEY_BLOCK_MODE + # 15, 0 doesn't seem to have any effect + + +class AESCryptCommand(AESCommandBase): + OPCODE = 31, 28, Constant(AES_OPCODE.CRYPT) + KEY_SLOT = 27, 27 + IV_SLOT = 26, 25 + LEN = 24, 0 + + +class AESBarrierCommand(AESCommandBase): + OPCODE = 31, 28, Constant(AES_OPCODE.BARRIER) + IRQ = 27, 27 + + +class AESGetIVCommand(AESCommandBase): + OPCODE = 31, 28, Constant(AES_OPCODE.GET_IV) + + +class AESSetIVCommand(AESCommandBase): + OPCODE = 31, 28, Constant(AES_OPCODE.SET_IV) + SLOT = 27, 26 + + +class AESIrqReg(Register32): + KEY1_EMPTY = 17, 17 + KEY1_INVALID = 13, 13 + KEY0_EMPTY = 11, 11 + KEY0_INVALID = 7, 7 + FLAG = 5, 5 + UNKNOWN_COMMAND = 2, 2 + FIFO_OVERFLOW = 1, 1 + + +class AESControlReg(Register32): + START = 0, 0 + STOP = 1, 1 + CLEAR_FIFO = 2, 2 + # TOOD: not convinced about RESET anymore, I remember this un-broke the engine once but I can't reproduce that anymore + RESET = 3, 3 + + +class AESFifoStatusReg(Register32): + FIFO_WRITE_PTR = 31, 24 + FIFO_READ_PTR = 23, 16 + FIFO_LEVEL = 15, 8 + FIFO_FULL = 2, 2 + FIFO_EMPTY = 1, 1 + + +class AESRegs(RegMap): + R_CONTROL = 0x08, AESControlReg + R_IRQ_STATUS = 0x18, AESIrqReg + R_IRQ_ENABLE = 0x1C, AESIrqReg + R_FIFO_STATUS = 0x24, AESFifoStatusReg + R_CMD_FIFO = 0x200, Register32 diff --git a/tools/proxyclient/m1n1/hw/agx.py b/tools/proxyclient/m1n1/hw/agx.py new file mode 100644 index 0000000..e5c2daa --- /dev/null +++ b/tools/proxyclient/m1n1/hw/agx.py @@ -0,0 +1,111 @@ +# SPDX-License-Identifier: MIT +from ..utils import * +from enum import IntEnum + +__all__ = ["SGXRegs", "SGXInfoRegs", "agx_decode_unit", "R_FAULT_INFO"] + +class FAULT_REASON(IntEnum): + INVALID = 0 + AF_FAULT = 1 + WRITE_ONLY = 2 + READ_ONLY = 3 + NO_ACCESS = 4 + UNK = 5 + +class R_FAULT_INFO(Register64): + ADDR = 63, 24 + WRITE = 23 + CONTEXT = 22, 17 + UNIT = 16, 9 + UNK_8 = 8 + REASON = 3, 1, FAULT_REASON + FAULTED = 0 + +class SGXRegs(RegMap): + FAULT_INFO = 0x17030, R_FAULT_INFO + +class SGXInfoRegs(RegMap): + CORE_MASK_0 = 0x1500, Register32, + CORE_MASK_1 = 0x1514, Register32, + + ID_00 = 0x4000, Register32, + ID_04 = 0x4004, Register32, + ID_08 = 0x4008, Register32, + ID_0c = 0x400c, Register32, + ID_10 = 0x4010, Register32, + ID_14 = 0x4014, Register32, + ID_18 = 0x4018, Register32, + ID_1c = 0x401c, Register32, + + ID_8024 = 0x8024, Register32, + +class UNIT_00(IntEnum): + DCMPn = 0x00 + UL1Cn = 0x01 + CMPn = 0x02 + GSL1_n = 0x03 + IAPn = 0x04 + VCEn = 0x05 + TEn = 0x06 + RASn = 0x07 + VDMn = 0x08 + PPPn = 0x09 + IPFn = 0x0a + IPF_CPFn = 0x0b + VFn = 0x0c + VF_CPFn = 0x0d + ZLSn = 0x0e + +class UNIT_A0(IntEnum): + dPM = 0xa1 + dCDM_KS0 = 0xa2 + dCDM_KS1 = 0xa3 + dCDM_KS2 = 0xa4 + dIPP = 0xa5 + dIPP_CS = 0xa6 + dVDM_CSD = 0xa7 + dVDM_SSD = 0xa8 + dVDM_ILF = 0xa9 + dVDM_ILD = 0xaa + dRDE0 = 0xab + dRDE1 = 0xac + FC = 0xad + GSL2 = 0xae + + GL2CC_META0 = 0xb0 + GL2CC_META1 = 0xb1 + GL2CC_META2 = 0xb2 + GL2CC_META3 = 0xb3 + GL2CC_META4 = 0xb4 + GL2CC_META5 = 0xb5 + GL2CC_META6 = 0xb6 + GL2CC_META7 = 0xb7 + GL2CC_MB = 0xb8 + +class UNIT_E0(IntEnum): + gPM_SPn = 0xe0 + gVDM_CSD_SPn = 0xe1 + gVDM_SSD_SPn = 0xe2 + gVDM_ILF_SPn = 0xe3 + gVDM_TFP_SPn = 0xe4 + gVDM_MMB_SPn = 0xe5 + gCDM_CS_SPn_KS0 = 0xe6 + gCDM_CS_SPn_KS1 = 0xe7 + gCDM_CS_SPn_KS2 = 0xe8 + gCDM_SPn_KS0 = 0xe9 + gCDM_SPn_KS1 = 0xea + gCDM_SPn_KS2 = 0xeb + gIPP_SPn = 0xec + gIPP_CS_SPn = 0xed + gRDE0_SPn = 0xee + gRDE1_SPn = 0xef + +def agx_decode_unit(v): + if v < 0xa0: + group = v >> 4 + return UNIT_00(v & 0x0f).name.replace("n", str(group)) + elif v < 0xe0: + return UNIT_A0(v).name + else: + group = (v >> 4) & 1 + return UNIT_E0(v & 0xef).name.replace("n", str(group)) diff --git a/tools/proxyclient/m1n1/hw/asc.py b/tools/proxyclient/m1n1/hw/asc.py new file mode 100644 index 0000000..f0923f8 --- /dev/null +++ b/tools/proxyclient/m1n1/hw/asc.py @@ -0,0 +1,121 @@ +# SPDX-License-Identifier: MIT +from ..utils import * +import time + +class R_MBOX_CTRL(Register32): + FIFOCNT = 23, 20 + OVERFLOW = 18 + EMPTY = 17 + FULL = 16 + RPTR = 15, 12 + WPTR = 11, 8 + ENABLE = 0 + +class R_CPU_CONTROL(Register32): + RUN = 4 + +class R_CPU_STATUS(Register32): + IDLE = 5 + FIQ_NOT_PEND = 3 # guess + IRQ_NOT_PEND = 2 # guess + STOPPED = 1 + RUNNING = 0 + +class R_INBOX1(Register64): + EP = 7, 0 + +class R_OUTBOX1(Register64): + OUTCNT = 56, 52 + INCNT = 51, 48 + OUTPTR = 47, 44 + INPTR = 43, 40 + EP = 7, 0 + +class ASCRegs(RegMap): + CPU_CONTROL = 0x0044, R_CPU_CONTROL + CPU_STATUS = 0x0048, R_CPU_STATUS + + INBOX_CTRL = 0x8110, R_MBOX_CTRL + OUTBOX_CTRL = 0x8114, R_MBOX_CTRL + INBOX0 = 0x8800, Register64 + INBOX1 = 0x8808, R_INBOX1 + OUTBOX0 = 0x8830, Register64 + OUTBOX1 = 0x8838, R_OUTBOX1 + +class ASC: + def __init__(self, u, asc_base): + self.u = u + self.p = u.proxy + self.iface = u.iface + self.asc = ASCRegs(u, asc_base) + self.verbose = 0 + self.epmap = {} + + def recv(self): + if self.asc.OUTBOX_CTRL.reg.EMPTY: + return None, None + + msg0 = self.asc.OUTBOX0.val + msg1 = R_INBOX1(self.asc.OUTBOX1.val) + if self.verbose >= 3: + print(f"< {msg1.EP:02x}:{msg0:#x}") + return msg0, msg1 + + def send(self, msg0, msg1): + self.asc.INBOX0.val = msg0 + self.asc.INBOX1.val = msg1 + + if self.verbose >= 3: + if isinstance(msg0, Register): + print(f"> {msg1.EP:02x}:{msg0}") + else: + print(f"> {msg1.EP:02x}:{msg0:#x}") + + while self.asc.INBOX_CTRL.reg.FULL: + pass + + def is_running(self): + return not self.asc.CPU_STATUS.reg.STOPPED + + def boot(self): + self.asc.CPU_CONTROL.set(RUN=1) + + def shutdown(self): + self.asc.CPU_CONTROL.set(RUN=0) + + def add_ep(self, idx, ep): + self.epmap[idx] = ep + setattr(self, ep.SHORT, ep) + + def has_messages(self): + return not self.asc.OUTBOX_CTRL.reg.EMPTY + + def work_pending(self): + while self.has_messages(): + self.work() + + def work(self): + if self.asc.OUTBOX_CTRL.reg.EMPTY: + return True + + msg0, msg1 = self.recv() + + handled = False + + ep = self.epmap.get(msg1.EP, None) + if ep: + handled = ep.handle_msg(msg0, msg1) + + if not handled: + print(f"unknown message: {msg0:#16x} / {msg1}") + + return handled + + def work_forever(self): + while self.work(): + pass + + def work_for(self, timeout): + deadline = time.time() + timeout + while time.time() < deadline: + self.work() diff --git a/tools/proxyclient/m1n1/hw/atc.py b/tools/proxyclient/m1n1/hw/atc.py new file mode 100644 index 0000000..7ae92c0 --- /dev/null +++ b/tools/proxyclient/m1n1/hw/atc.py @@ -0,0 +1,455 @@ +# SPDX-License-Identifier: MIT + +from enum import IntEnum +from m1n1.utils import * + + +class R_USB2PHY_USBCTL(Register32): + MODE_HOST = 1 + MODE_ISOLATION = 2 + + +class R_USB2PHY_CTL(Register32): + RESET = 0 + PORT_RESET = 1 + APB_RESETN = 2 + SIDDQ = 3 + + +class R_USB2PHY_SIG(Register32): + VBUSDET_FORCE_VAL = 0 + VBUSDET_FORCE_EN = 1 + VBUSVLDEXT_FORCE_VAL = 2 + VBUSVLDEXT_FORCE_EN = 3 + MODE_HOST = 19, 12 + + +class R_USB2PHY_MISCTUNE(Register32): + APBCLK_GATE_OFF = 29 + REFCLK_GATE_OFF = 30 + + +class Usb2PhyRegs(RegMap): + USB2PHY_USBCTL = 0x00, R_USB2PHY_USBCTL + USB2PHY_CTL = 0x04, R_USB2PHY_CTL + USB2PHY_SIG = 0x08, R_USB2PHY_SIG + USB2PHY_MISCTUNE = 0x1C, R_USB2PHY_MISCTUNE + + +class R_AUSPLL_DCO_EFUSE_SPARE(Register32): + RODCO_ENCAP_EFUSE = 10, 9 + RODCO_BIAS_ADJUST_EFUSE = 14, 12 + + +class R_AUSPLL_FRACN_CAN(Register32): + DLL_START_CAPCODE = 18, 17 + + +class R_AUSPLL_FSM_CTRL(Register32): + APBREQ_OVSEL = 21, 13 + + +class R_AUSPLL_CMD_OVERRIDE(Register32): + APB_OVERRIDE = 28 + + +class R_AUSPLL_CLKOUT_DTC_VREG(Register32): + DTC_VREG_ADJUST = 16, 14 + + +class R_AUS_COMMON_SHIM_BLK_VREG(Register32): + VREG_TRIM = 6, 2 + + +class R_CIO3PLL_CLK_CTRL(Register32): + PCLK_EN = 1 + REFCLK_EN = 5 + + +class R_CIO3PLL_DCO_NCTRL(Register32): + DCO_COARSEBIN_EFUSE0 = 6, 0 + DCO_COARSEBIN_EFUSE1 = 23, 17 + + +class R_CIO3PLL_FRACN_CAN(Register32): + DLL_CAL_START_CAPCODE = 18, 17 + + +class R_CIO3PLL_DTC_VREG(Register32): + DTC_VREG_ADJUST = 16, 14 + + +class E_ACIOPHY_CROSSBAR_PROTOCOL(IntEnum): + USB4 = 0 + USB3 = 5 + USB3_DP = 8 + DP = 10 + + +class R_ACIOPHY_CROSSBAR(Register32): + PROTOCOL_SWAPPED = 0 + PROTOCOL = 4, 1, E_ACIOPHY_CROSSBAR_PROTOCOL + DPMODE = 17, 5 + + +class E_ACIOPHY_LANE_MODE(IntEnum): + USB4 = 0 + USB3 = 1 + DP = 2 + OFF = 3 + + +class R_ACIOPHY_LANE_MODE(Register32): + RX0 = 2, 0, E_ACIOPHY_LANE_MODE + TX0 = 5, 3, E_ACIOPHY_LANE_MODE + RX1 = 8, 6, E_ACIOPHY_LANE_MODE + TX1 = 11, 9, E_ACIOPHY_LANE_MODE + + +class R_ATCPHY_POWER(Register32): + SLEEP_SMALL = 0 + SLEEP_BIG = 1 + CLAMP_EN = 2 + APB_RESET_N = 3 + PHY_RESET_N = 4 + + +class R_ATCPHY_MISC(Register32): + RESET_N = 0 + LANE_SWAP = 2 + + +class R_AUSPMA_TX_SHM_TXA_CFG_MAIN_REG0(Register32): + PMA_TXA_BYTECLK_RESET_SYNC_EN_OV = 2 + PMA_TXA_BYTECLK_RESET_SYNC_EN = 3 + PMA_TXA_BYTECLK_RESET_SYNC_CLR_OV = 4 + PMA_TXA_BYTECLK_RESET_SYNC_CLR = 5 + PMA_TXA_BYTECLK_RESET_SYNC_SEL_OV = 6 + + +class R_AUSPMA_TX_SHM_TXA_CFG_MAIN_REG1(Register32): + PMA_TXA_DIV2_EN_OV = 8 + PMA_TXA_DIV2_EN = 9 + PMA_TXA_DIV2_RESET_OV = 10 + PMA_TXA_DIV2_RESET = 11 + PMA_TXA_CLK_EN_OV = 22 + PMA_TXA_CLK_EN = 23 + + +class R_AUSPMA_TX_SHM_TXA_IMP_REG0(Register32): + PMA_TXA_CAL_CTRL_OV = 0 + PMA_TXA_CAL_CTRL = 18, 1 + PMA_TXA_CAL_CTRL_BASE_OV = 19 + PMA_TXA_CAL_CTRL_BASE = 23, 20 + PMA_TXA_HIZ_OV = 29 + PMA_TXA_HIZ = 30 + + +class R_AUSPMA_TX_SHM_TXA_IMP_REG2(Register32): + PMA_TXA_MARGIN_OV = 0 + PMA_TXA_MARGIN = 18, 1 + PMA_TXA_MARGIN_2R_OV = 19 + PMA_TXA_MARGIN_2R = 20 + + +class R_AUSPMA_TX_SHM_TXA_IMP_REG3(Register32): + PMA_TXA_MARGIN_POST_OV = 0 + PMA_TXA_MARGIN_POST = 10, 1 + PMA_TXA_MARGIN_POST_2R_OV = 11 + PMA_TXA_MARGIN_POST_2R = 12 + PMA_TXA_MARGIN_POST_4R_OV = 13 + PMA_TXA_MARGIN_POST_4R = 14 + PMA_TXA_MARGIN_PRE_OV = 15 + PMA_TXA_MARGIN_PRE = 21, 16 + PMA_TXA_MARGIN_PRE_2R_OV = 22 + PMA_TXA_MARGIN_PRE_2R = 23 + PMA_TXA_MARGIN_PRE_4R_OV = 24 + PMA_TXA_MARGIN_PRE_4R = 25 + + +class R_AUSPMA_TX_SHM_TXA_LDOCLK(Register32): + PMA_TXA_LDOCLK_BYPASS_SML_OV = 8 + PMA_TXA_LDOCLK_BYPASS_SML = 9 + PMA_TXA_LDOCLK_BYPASS_BIG_OV = 10 + PMA_TXA_LDOCLK_BYPASS_BIG = 11 + PMA_TXA_LDOCLK_EN_SML_OV = 12 + PMA_TXA_LDOCLK_EN_SML = 13 + PMA_TXA_LDOCLK_EN_BIG_OV = 14 + PMA_TXA_LDOCLK_EN_BIG = 15 + + +class R_AUSPMA_RX_SHM_TJ_RXA_CTLE_CTRL0(Register32): + PMA_RXA_TX_CLK_EN = 20 + PMA_RXA_TX_CLK_EN_OV = 21 + + +class R_AUSPMA_RX_SHM_TJ_RXA_AFE_CTRL1(Register32): + CLK_LANE_RX_DIV20_SYNC_RESET_N_OV = 29 + CLK_LANE_RX_DIV20_SYNC_RESET_N_VAL = 30 + + +class R_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL10(Register32): + PMA_RXA_DTVREG_ADJUST = 31, 27 + + +class R_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL11(Register32): + PMA_RXA_DTVREG_BIG_EN = 23 + PMA_RXA_DTVREG_BIG_EN_OV = 24 + PMA_RXA_DTVREG_SML_EN = 25 + PMA_RXA_DTVREG_SML_EN_OV = 26 + + +class R_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL12(Register32): + PMA_RXA_TX_BYTECLK_RESET_SYNC_CLR = 22 + PMA_RXA_TX_BYTECLK_RESET_SYNC_CLR_OV = 23 + PMA_RXA_TX_BYTECLK_RESET_SYNC_EN = 24 + PMA_RXA_TX_BYTECLK_RESET_SYNC_EN_OV = 25 + PMA_RXA_TX_HRCLK_SEL = 28 + PMA_RXA_TX_HRCLK_SEL_OV = 29 + PMA_RXA_TX_PBIAS_EN = 30 + PMA_RXA_TX_PBIAS_EN_OV = 31 + + +class R_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL13(Register32): + PMA_RXA_TX_PRE_EN = 0 + PMA_RXA_TX_PRE_EN_OV = 1 + PMA_RXA_TX_PST1_EN = 2 + PMA_RXA_TX_PST1_EN_OV = 3 + PMA_RXA_DTVREG_ADJUST_OV = 15 + + +class R_AUSPMA_RX_SHM_TJ_RXA_SAVOS_CTRL16(Register32): + PMA_RXA_RXTERM_EN = 21 + PMA_RXA_RXTERM_EN_OV = 22 + PMA_RXA_RXTERM_PULLUP_LEAK_EN = 23 + PMA_RXA_RXTERM_PULLUP_LEAK_EN_OV = 24 + PMA_RXA_TX_CAL_CODE = 29, 25 + PMA_RXA_TX_CAL_CODE_OV = 30 + + +class R_AUSPMA_RX_SHM_TJ_RXA_TX_CTRL17(Register32): + PMA_RXA_TX_MARGIN = 19, 15 + PMA_RXA_TX_MARGIN_OV = 20 + PMA_RXA_TX_MARGIN_LSB = 21 + PMA_RXA_TX_MARGIN_LSB_OV = 22 + PMA_RXA_TX_MARGIN_P1 = 26, 23 + PMA_RXA_TX_MARGIN_P1_OV = 27 + PMA_RXA_TX_MARGIN_P1_LSB = 29, 28 + PMA_RXA_TX_MARGIN_P1_LSB_OV = 30 + + +class R_AUSPMA_RX_SHM_TJ_RXA_TX_CTRL18(Register32): + PMA_RXA_TX_P1_CODE = 3, 0 + PMA_RXA_TX_P1_CODE_OV = 4 + PMA_RXA_TX_P1_LSB_CODE = 6, 5 + PMA_RXA_TX_P1_LSB_CODE_OV = 7 + PMA_RXA_TX_MARGIN_PRE = 10, 8 + PMA_RXA_TX_MARGIN_PRE_OV = 11 + PMA_RXA_TX_MARGIN_PRE_LSB = 13, 12 + PMA_RXA_TX_MARGIN_PRE_LSB_OV = 14 + PMA_RXA_TX_PRE_LSB_CODE = 16, 15 + PMA_RXA_TX_PRE_LSB_CODE_OV = 17 + PMA_RXA_TX_PRE_CODE = 21, 18 + PMA_RXA_TX_PRE_CODE_OV = 22 + + +class R_AUSPMA_RX_SHM_TJ_RXA_TERM_CTRL19(Register32): + PMA_RXA_TX_TEST_EN = 21 + PMA_RXA_TX_TEST_EN_OV = 22 + PMA_RXA_TX_EN = 23 + PMA_RXA_TX_EN_OV = 24 + PMA_RXA_TX_CLK_DLY_CTRL_TAPGEN = 27, 25 + PMA_RXA_TX_CLK_DIV2_EN = 28 + PMA_RXA_TX_CLK_DIV2_EN_OV = 29 + PMA_RXA_TX_CLK_DIV2_RST = 30 + PMA_RXA_TX_CLK_DIV2_RST_OV = 31 + + +class R_AUSPMA_RX_SHM_TJ_RXA_VREF_CTRL22(Register32): + PMA_RXA_VREF_ADJUST_GRAY = 11, 7 + PMA_RXA_VREF_ADJUST_GRAY_OV = 12 + PMA_RXA_VREF_BIAS_SEL = 14, 13 + PMA_RXA_VREF_BIAS_SEL_OV = 15 + PMA_RXA_VREF_BOOST_EN = 16 + PMA_RXA_VREF_BOOST_EN_OV = 17 + PMA_RXA_VREF_EN = 18 + PMA_RXA_VREF_EN_OV = 19 + LPBKIN_RECOVERED_DATA = 29, 28 + PMA_RXA_TEST_RXLPBKDT_EN = 30 + PMA_RXA_TEST_RXLPBKDT_EN_OV = 31 + + +class R_AUSPMA_RX_TOP_TJ_CFG_RX_TXMODE(Register32): + RX_TXMODE = 0 + +class R_ACIOPHY_LANE_DP_CFG_BLK_TX_DP_CTRL0(Register32): + DP_PMA_BYTECLK_RESET = 0 + DP_MAC_DIV20_CLK_SEL = 1 + DPTXPHY_PMA_LANE_RESET_N = 2 + DPTXPHY_PMA_LANE_RESET_N_OV = 3 + DPTX_PCLK1_SELECT = 6, 4 + DPTX_PCLK2_SELECT = 9, 7 + DPRX_PCLK_SELECT = 12, 10 + DPTX_PCLK1_ENABLE = 13 + DPTX_PCLK2_ENABLE = 14 + DPRX_PCLK_ENABLE = 15 + + +class AtcPhyRegs(RegMap): + ACIOPHY_CFG0 = 0x08, Register32 + ACIOPHY_LANE_MODE = 0x48, R_ACIOPHY_LANE_MODE + ACIOPHY_CROSSBAR = 0x4C, R_ACIOPHY_CROSSBAR + ACIOPHY_BIST_EN = 0x84, Register32 + ACIOPHY_BIST_OV = 0x8C, Register32 + ACIOPHY_BIST_CFG0 = 0x90, Register32 + ACIOPHY_BIST_STAT = 0x9C, Register32 + ACIOPHY_BIST_RESET = 0xA8, Register32 + ACIOPHY_BIST_CFG1 = 0xAC, Register32 + ACIOPHY_SLEEP_CTRL = 0x1B0, Register32 + + AUS_COMMON_SHIM_BLK_VREG = 0x0A04, R_AUS_COMMON_SHIM_BLK_VREG + + AUSPLL_FSM_CTRL = 0x1014, R_AUSPLL_FSM_CTRL + AUSPLL_CMD_OVERRIDE = 0x2000, R_AUSPLL_CMD_OVERRIDE + AUSPLL_CLKOUT_DTC_VREG = 0x2220, R_AUSPLL_CLKOUT_DTC_VREG + AUSPLL_DCO_EFUSE_SPARE = 0x222C, R_AUSPLL_DCO_EFUSE_SPARE + AUSPLL_FRACN_CAN = 0x22A4, R_AUSPLL_FRACN_CAN + + CIO3PLL_CLK_CTRL = 0x2A00, R_CIO3PLL_CLK_CTRL + CIO3PLL_DTC_VREG = 0x2A20, R_CIO3PLL_DTC_VREG + CIO3PLL_DCO_NCTRL = 0x2A38, R_CIO3PLL_DCO_NCTRL + CIO3PLL_FRACN_CAN = 0x2AA4, R_CIO3PLL_FRACN_CAN + + ATCPHY_POWER_CTRL = 0x20000, R_ATCPHY_POWER + ATCPHY_POWER_STAT = 0x20004, R_ATCPHY_POWER + ATCPHY_MISC = 0x20008, R_ATCPHY_MISC + + ACIOPHY_LANE_DP_CFG_BLK_TX_DP_CTRL0 = 0x7000, R_ACIOPHY_LANE_DP_CFG_BLK_TX_DP_CTRL0 + DPPHY_UNK_1028 = 0x1028, Register32 + USB2PHY_AUX_CFG_BLK_AUX_POWER_DOWN_CONTROL_0 = 0x54000, Register32 + + FABRIC_TUNABLES = irange(0x45000, 0x1000 // 4, 4), Register32 + + LPDPTX_AUX_CFG_BLK = irange(0x50000, 0x1000 // 4, 4), Register32 + LPDPTX_AUX_CFG_BLK_AUX_CTRL = 0x50000, Register32 + LPDPTX_AUX_CFG_BLK_AUX_LDO_CTRL = 0x50008, Register32 + LPDPTX_AUX_CFG_BLK_AUX_MARGIN = 0x5000c, Register32 + LPDPTX_AUX_SHM_CFG_BLK_AUX_CTRL_REG0 = 0x50204, Register32 + LPDPTX_AUX_SHM_CFG_BLK_AUX_CTRL_REG1 = 0x50208, Register32 + + LN0_AUSPMA_RX_TOP = irange(0x9000, 0x1000 // 4, 4), Register32 + LN0_AUSPMA_RX_TOP_TJ_CFG_RX_TXMODE = 0x90F0, R_AUSPMA_RX_TOP_TJ_CFG_RX_TXMODE + + LN0_AUSPMA_RX_EQ = irange(0xA000, 0x1000 // 4, 4), Register32 + + LN0_AUSPMA_RX_SHM = irange(0xB000, 0x1000 // 4, 4), Register32 + LN0_AUSPMA_RX_SHM_TJ_RXA_CTLE_CTRL0 = 0xB000, R_AUSPMA_RX_SHM_TJ_RXA_CTLE_CTRL0 + LN0_AUSPMA_RX_SHM_TJ_RXA_AFE_CTRL1 = 0xB004, R_AUSPMA_RX_SHM_TJ_RXA_AFE_CTRL1 + LN0_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL2 = 0xB008, Register32 + LN0_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL3 = 0xB00C, Register32 + LN0_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL4 = 0xB010, Register32 + LN0_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL5 = 0xB014, Register32 + LN0_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL6 = 0xB018, Register32 + LN0_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL7 = 0xB01C, Register32 + LN0_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL8 = 0xB020, Register32 + LN0_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL9 = 0xB024, Register32 + LN0_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL10 = 0xB028, R_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL10 + LN0_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL11 = 0xB02C, R_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL11 + LN0_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL12 = 0xB030, R_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL12 + LN0_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL13 = 0xB034, R_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL13 + LN0_AUSPMA_RX_SHM_TJ_UNK_CTRL14A = 0xB038, Register32 + LN0_AUSPMA_RX_SHM_TJ_UNK_CTRL14B = 0xB03C, Register32 + LN0_AUSPMA_RX_SHM_TJ_UNK_CTRL15A = 0xB040, Register32 + LN0_AUSPMA_RX_SHM_TJ_UNK_CTRL15B = 0xB044, Register32 + LN0_AUSPMA_RX_SHM_TJ_RXA_SAVOS_CTRL16 = 0xB048, R_AUSPMA_RX_SHM_TJ_RXA_SAVOS_CTRL16 + LN0_AUSPMA_RX_SHM_TJ_RXA_TX_CTRL17 = 0xB04C, R_AUSPMA_RX_SHM_TJ_RXA_TX_CTRL17 + LN0_AUSPMA_RX_SHM_TJ_RXA_TX_CTRL18 = 0xB050, R_AUSPMA_RX_SHM_TJ_RXA_TX_CTRL18 + LN0_AUSPMA_RX_SHM_TJ_RXA_TERM_CTRL19 = 0xB054, R_AUSPMA_RX_SHM_TJ_RXA_TERM_CTRL19 + LN0_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL20 = 0xB058, Register32 + LN0_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL21 = 0xB05C, Register32 + LN0_AUSPMA_RX_SHM_TJ_RXA_VREF_CTRL22 = 0xB060, R_AUSPMA_RX_SHM_TJ_RXA_VREF_CTRL22 + + LN0_AUSPMA_TX_TOP = irange(0xC000, 0x1000 // 4, 4), Register32 + + LN0_AUSPMA_TX_SHM = irange(0xD000, 0x1000 // 4, 4), Register32 + LN0_AUSPMA_TX_SHM_TXA_CFG_MAIN_REG0 = 0xD000, R_AUSPMA_TX_SHM_TXA_CFG_MAIN_REG0 + LN0_AUSPMA_TX_SHM_TXA_CFG_MAIN_REG1 = 0xD004, R_AUSPMA_TX_SHM_TXA_CFG_MAIN_REG1 + LN0_AUSPMA_TX_SHM_TXA_IMP_REG0 = 0xD008, R_AUSPMA_TX_SHM_TXA_IMP_REG0 + LN0_AUSPMA_TX_SHM_TXA_IMP_REG1 = 0xD00C, Register32 + LN0_AUSPMA_TX_SHM_TXA_IMP_REG2 = 0xD010, R_AUSPMA_TX_SHM_TXA_IMP_REG2 + LN0_AUSPMA_TX_SHM_TXA_IMP_REG3 = 0xD014, R_AUSPMA_TX_SHM_TXA_IMP_REG3 + LN0_AUSPMA_TX_SHM_TXA_UNK_REG0 = 0xD018, Register32 + LN0_AUSPMA_TX_SHM_TXA_UNK_REG1 = 0xD01C, Register32 + LN0_AUSPMA_TX_SHM_TXA_UNK_REG2 = 0xD020, Register32 + LN0_AUSPMA_TX_SHM_TXA_LDOCLK = 0xD024, R_AUSPMA_TX_SHM_TXA_LDOCLK + + LN1_AUSPMA_RX_TOP = irange(0x10000, 0x1000 // 4, 4), Register32 + LN1_AUSPMA_RX_TOP_TJ_CFG_RX_TXMODE = 0x100F0, R_AUSPMA_RX_TOP_TJ_CFG_RX_TXMODE + + LN1_AUSPMA_RX_EQ = irange(0x11000, 0x1000 // 4, 4), Register32 + + LN1_AUSPMA_RX_SHM = irange(0x12000, 0x1000 // 4, 4), Register32 + LN1_AUSPMA_RX_SHM_TJ_RXA_CTLE_CTRL0 = 0x12000, R_AUSPMA_RX_SHM_TJ_RXA_CTLE_CTRL0 + LN1_AUSPMA_RX_SHM_TJ_RXA_AFE_CTRL1 = 0x12004, R_AUSPMA_RX_SHM_TJ_RXA_AFE_CTRL1 + LN1_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL2 = 0x12008, Register32 + LN1_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL3 = 0x1200C, Register32 + LN1_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL4 = 0x12010, Register32 + LN1_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL5 = 0x12014, Register32 + LN1_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL6 = 0x12018, Register32 + LN1_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL7 = 0x1201C, Register32 + LN1_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL8 = 0x12020, Register32 + LN1_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL9 = 0x12024, Register32 + LN1_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL10 = 0x12028, R_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL10 + LN1_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL11 = 0x1202C, R_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL11 + LN1_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL12 = 0x12030, R_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL12 + LN1_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL13 = 0x12034, R_AUSPMA_RX_SHM_TJ_RXA_DFE_CTRL13 + LN1_AUSPMA_RX_SHM_TJ_UNK_CTRL14A = 0x12038, Register32 + LN1_AUSPMA_RX_SHM_TJ_UNK_CTRL14B = 0x1203C, Register32 + LN1_AUSPMA_RX_SHM_TJ_UNK_CTRL15A = 0x12040, Register32 + LN1_AUSPMA_RX_SHM_TJ_UNK_CTRL15B = 0x12044, Register32 + LN1_AUSPMA_RX_SHM_TJ_RXA_SAVOS_CTRL16 = 0x12048, R_AUSPMA_RX_SHM_TJ_RXA_SAVOS_CTRL16 + LN1_AUSPMA_RX_SHM_TJ_RXA_TX_CTRL17 = 0x1204C, R_AUSPMA_RX_SHM_TJ_RXA_TX_CTRL17 + LN1_AUSPMA_RX_SHM_TJ_RXA_TX_CTRL18 = 0x12050, R_AUSPMA_RX_SHM_TJ_RXA_TX_CTRL18 + LN1_AUSPMA_RX_SHM_TJ_RXA_TERM_CTRL19 = 0x12054, R_AUSPMA_RX_SHM_TJ_RXA_TERM_CTRL19 + LN1_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL20 = 0x12058, Register32 + LN1_AUSPMA_RX_SHM_TJ_RXA_UNK_CTRL21 = 0x1205C, Register32 + LN1_AUSPMA_RX_SHM_TJ_RXA_VREF_CTRL22 = 0x12060, R_AUSPMA_RX_SHM_TJ_RXA_VREF_CTRL22 + + LN1_AUSPMA_TX_TOP = irange(0x13000, 0x1000 // 4, 4), Register32 + + LN1_AUSPMA_TX_SHM = irange(0x14000, 0x1000 // 4, 4), Register32 + LN1_AUSPMA_TX_SHM_TXA_CFG_MAIN_REG0 = 0x14000, R_AUSPMA_TX_SHM_TXA_CFG_MAIN_REG0 + LN1_AUSPMA_TX_SHM_TXA_CFG_MAIN_REG1 = 0x14004, R_AUSPMA_TX_SHM_TXA_CFG_MAIN_REG1 + LN1_AUSPMA_TX_SHM_TXA_IMP_REG0 = 0x14008, R_AUSPMA_TX_SHM_TXA_IMP_REG0 + LN1_AUSPMA_TX_SHM_TXA_IMP_REG1 = 0x1400C, Register32 + LN1_AUSPMA_TX_SHM_TXA_IMP_REG2 = 0x14010, R_AUSPMA_TX_SHM_TXA_IMP_REG2 + LN1_AUSPMA_TX_SHM_TXA_IMP_REG3 = 0x14014, R_AUSPMA_TX_SHM_TXA_IMP_REG3 + LN1_AUSPMA_TX_SHM_TXA_UNK_REG0 = 0x14018, Register32 + LN1_AUSPMA_TX_SHM_TXA_UNK_REG1 = 0x1401C, Register32 + LN1_AUSPMA_TX_SHM_TXA_UNK_REG2 = 0x14020, Register32 + LN1_AUSPMA_TX_SHM_TXA_LDOCLK = 0x14024, R_AUSPMA_TX_SHM_TXA_LDOCLK + + ACIOPHY_TOP_TUNABLE_118 = 0x118, Register32 + ACIOPHY_TOP_TUNABLE_11c = 0x11C, Register32 + ACIOPHY_TOP_TUNABLE_124 = 0x124, Register32 + ACIOPHY_TOP_TUNABLE_a00 = 0xA00, Register32 + ACIOPHY_TOP_TUNABLE_808 = 0x808, Register32 + + AUSPLL_TOP_FREQ_DESC_0A = 0x2080, Register32 + AUSPLL_TOP_FREQ_DESC_0B = 0x2084, Register32 + AUSPLL_TOP_FREQ_DESC_0C = 0x2088, Register32 + + AUSPLL_TOP_TUNABLE_2094 = 0x2094, Register32 + AUSPLL_TOP_TUNABLE_20a0 = 0x20A0, Register32 + AUSPLL_TOP_TUNABLE_20ac = 0x20AC, Register32 + AUSPLL_TOP_TUNABLE_20b8 = 0x20B8, Register32 + + CIO3PLL_TOP_TUNABLE_10 = 0x2810, Register32 + CIO3PLL_TOP_TUNABLE_88 = 0x2888, Register32 + CIO3PLL_TOP_TUNABLE_94 = 0x2894, Register32 + + CIO3PLL_CORE_TUNABLE_1c = 0x2A1C, Register32 + CIO3PLL_CORE_TUNABLE_28 = 0x2A28, Register32 + CIO3PLL_CORE_TUNABLE_9c = 0x2A9C, Register32 + + AUSPLL_CORE_TUNABLE_78 = 0x2278, Register32 diff --git a/tools/proxyclient/m1n1/hw/codecs/__init__.py b/tools/proxyclient/m1n1/hw/codecs/__init__.py new file mode 100644 index 0000000..46bb5ee --- /dev/null +++ b/tools/proxyclient/m1n1/hw/codecs/__init__.py @@ -0,0 +1,109 @@ +from m1n1.utils import RegMap, Register8 +from enum import IntEnum +from .cs42l84 import * + +class E_PWR_MODE(IntEnum): + ACTIVE = 0 + MUTE = 1 + SHUTDOWN = 2 + +class R_PWR_CTL(Register8): + ISNS_PD = 3 + VSNS_PD = 2 + MODE = 1, 0, E_PWR_MODE + +class R_PB_CFG0(Register8): + PDM_MAP = 7 + PB_PDM_SRC = 6 + PB_SRC = 5 + AMP_LEVEL = 4, 0 + +class R_PB_CFG2(Register8): + DVC_PCM = 7, 0 + +class R_PB_CFG3(Register8): + DVC_PDM = 7, 0 + +class E_RX_SCFG(IntEnum): + I2C_OFFSET = 0b00 + LEFT = 0b01 + RIGHT = 0b10 + DOWNMIX = 0b11 + +class E_RX_WLEN(IntEnum): + W_16BIT = 0b00 + W_20BIT = 0b01 + W_24BIT = 0b10 + W_32BIT = 0b11 + +class E_RX_SLEN(IntEnum): + W_16BIT = 0b00 + W_24BIT = 0b01 + W_32BIT = 0b10 + +class R_TDM_CFG2(Register8): + RX_SCFG = 5, 4, E_RX_SCFG + RX_WLEN = 3, 2, E_RX_WLEN + RX_SLEN = 1, 0, E_RX_SLEN + +class R_TDM_CFG3(Register8): + RX_SLOT_R = 7, 4 + RX_SLOT_L = 3, 0 + +class TAS5770Regs(RegMap): + PWR_CTL = 0x002, R_PWR_CTL + PB_CFG0 = 0x003, R_PB_CFG0 + PB_CFG2 = 0x005, R_PB_CFG2 + PB_CFG3 = 0x006, R_PB_CFG3 + TDM_CFG2 = 0x00c, R_TDM_CFG2 + TDM_CFG3 = 0x00d, R_TDM_CFG3 + +class R_MODE_CTRL(Register8): + BOP_SRC = 7 + ISNS_PD = 3 + VSNS_PD = 2 + MODE = 1, 0, E_PWR_MODE + +class R_CHNL_0(Register8): + CDS_MODE = 7, 6 + AMP_LEVEL = 5, 1 + +class R_DVC(Register8): + DVC_LVL = 7, 0 + +class R_INT_MASK0(Register8): + BOPM = 7 + BOPIH = 6 + LIMMA = 5 + PBIP = 4 + LIMA = 3 + TDMCE = 2 + OC = 1 + OT = 0 + +class R_INT_CLK_CFG(Register8): + CLK_ERR_PWR_EN = 7 + DIS_CLK_HAT = 6 + CLK_HALT_TIMER = 5, 3 + IRQZ_CLR = 2 + IRQZ_PIN_CFG = 1, 0 + +class SN012776Regs(RegMap): + MODE_CTRL = 0x002, R_MODE_CTRL + CHNL_0 = 0x003, R_CHNL_0 + DVC = 0x01a, R_DVC + + INT_MASK0 = 0x03b, R_INT_MASK0 + INT_MASK1 = 0x03c, Register8 + INT_MASK2 = 0x040, Register8 + INT_MASK3 = 0x041, Register8 + INT_MASK4 = 0x03d, Register8 + + INT_LTCH0 = 0x049, R_INT_MASK0 + INT_LTCH1 = 0x04a, Register8 + INT_LTCH1_0 = 0x04b, Register8 + INT_LTCH2 = 0x04f, Register8 + INT_LTCH3 = 0x050, Register8 + INT_LTCH4 = 0x051, Register8 + + INT_CLK_CFG = 0x05c, R_INT_CLK_CFG diff --git a/tools/proxyclient/m1n1/hw/codecs/cs42l84.py b/tools/proxyclient/m1n1/hw/codecs/cs42l84.py new file mode 100644 index 0000000..095b421 --- /dev/null +++ b/tools/proxyclient/m1n1/hw/codecs/cs42l84.py @@ -0,0 +1,365 @@ +from m1n1.utils import Register8, Register16, Register32, RegMap, irange +from enum import IntEnum + +class R_IRQ_MASK1(Register8): + RING_PLUG = 0 + RING_UNPLUG = 1 + TIP_PLUG = 2 + TIP_UNPLUG = 3 + +class R_IRQ_MASK3(Register8): + HSDET_AUTO_DONE = 7 + +class E_DCID_GND_SEL(IntEnum): + NONE = 0 + HS3 = 1 + HS4 = 2 + +class E_DCID_Z_RANGE(IntEnum): + NONE = 0 + UNK2 = 2 + UNK3 = 3 + +class R_DCID_CTRL1(Register8): + Z_RANGE = 2, 0, E_DCID_Z_RANGE + +class R_DCID_CTRL2(Register8): + GND_SEL = 6, 4, E_DCID_GND_SEL + +class R_DCID_CTRL3(Register8): + START = 0 + +class R_DCID_STATUS(Register32): + OVERALL = 9, 0 + DONE = 10 + U = 20, 11 + D = 30, 21 + +class E_DEBOUNCE_TIME(IntEnum): + T_0MS = 0b000 + T_125MS = 0b001 + T_250MS = 0b010 + T_500MS = 0b011 + T_750MS = 0b100 + T_1S = 0b101 + +class R_TR_SENSE_CTRL(Register8): + INV = 7 + UNK1 = 6 + FALLTIME = 5, 3, E_DEBOUNCE_TIME + RISETIME = 2, 0, E_DEBOUNCE_TIME + +class R_TR_SENSE_STATUS(Register8): + RING_PLUG = 0 + RING_UNPLUG = 1 + TIP_PLUG = 2 + TIP_UNPLUG = 3 + +class R_HS_DET_STATUS2(Register8): + HS_TRUE = 1 + SHORT_TRUE = 0 + +class R_MSM_BLOCK_EN1(Register8): + pass + +class R_MSM_BLOCK_EN2(Register8): + ASP_EN = 6 + BUS_EN = 5 + DAC_EN = 4 + ADC_EN = 3 + +class R_MSM_BLOCK_EN3(Register8): + TR_SENSE_EN = 3 + DCID_EN = 4 + +class R_HS_CLAMP_DISABLE(Register8): + HS_CLAMP_DISABLE = 0 + +class E_SAMP_RATE(IntEnum): + S_16KHZ = 1 + S_24KHZ = 2 + S_32KHZ = 3 + S_48KHZ = 4 + S_96KHZ = 5 + S_192KHZ = 6 + S_22K05HZ = 10 + S_44K1HZ = 12 + S_88K2HZ = 13 + S_176K4HZ = 14 + +class E_MCLK_SRC(IntEnum): + RCO = 0b00 + MCLK_PIN = 0b01 + BCLK = 0b10 + PLL = 0b11 + +class E_MCLK_FREQ(IntEnum): + F_12MHZ = 0b00 + F_24MHZ = 0b01 + F_12_288KHZ = 0b10 + F_24_576KHZ = 0b11 + +class R_CCM_CTRL1(Register8): + MCLK_SRC = 1, 0, E_MCLK_SRC + MCLK_FREQ = 3, 2, E_MCLK_FREQ + +class E_REFCLK_DIV(IntEnum): + DIV1 = 0b00 + DIV2 = 0b01 + DIV4 = 0b10 + DIV8 = 0b11 + +class R_CCM_CTRL3(Register8): + REFCLK_DIV = 2, 1, E_REFCLK_DIV + REFCLK_IS_MCLK = 0 # BLCK otherwise + +class R_CCM_CTRL4(Register8): + REFCLK_EN = 0 + +class R_CCM_SAMP_RATE(Register8): + RATE = 7, 0, E_SAMP_RATE + +class E_PLL_MODE(IntEnum): + UNSUPP = 0b00 + BYPASS_512 = 0b01 + BYPASS_1024 = 0b10 + BYPASS_BOTH = 0b11 + +class R_PLL_CTRL(Register8): + MODE = 2, 1, E_PLL_MODE + EN = 0 + +class E_WNF_CF(IntEnum): + F_UNK = 0b00 + F_300HZ = 0b11 + +class R_ADC_CTRL1(Register8): + PREAMP_GAIN = 7, 6 + PGA_GAIN = 5, 0 + +class R_ADC_CTRL4(Register8): # maybe + WNF_CF = 5, 4, E_WNF_CF + WNF_EN = 3 + +class R_DAC_CTRL1(Register8): + UNMUTE = 0 + + HP_LOAD = 2 # maybe + UNK1 = 3 + UNK2 = 4 + UNK3 = 5 + HIGH_V = 6 + +class E_PULLDOWN_R(IntEnum): + NONE = 0x0 + R_UNK8 = 0x8 + R_1K1OHMS = 0xc + +class R_DAC_CTRL2(Register8): + PULLDOWN_R = 3, 0, E_PULLDOWN_R + +class R_HP_VOL_CTRL(Register8): + ZERO_CROSS = 1 + SOFT = 0 + +class E_BUS_SOURCE(IntEnum): + EMPTY = 0b0000 + ADC = 0b0111 + ASP_RX_CH1 = 0b1101 + ASP_RX_CH2 = 0b1110 + +class R_BUS_DAC_SRC(Register8): + CHB = 7, 4, E_BUS_SOURCE + CHA = 3, 0, E_BUS_SOURCE + +class R_BUS_ASP_TX_SRC(Register8): + CH2 = 7, 4, E_BUS_SOURCE + CH1 = 3, 0, E_BUS_SOURCE + +class E_HSBIAS_SENSE_TRIP(IntEnum): + C_12UA = 0b000 + C_23UA = 0b001 + C_41UA = 0b010 + C_52UA = 0b011 + C_64UA = 0b100 + C_75UA = 0b101 + C_93UA = 0b110 + C_104UA = 0b111 + +class R_HSBIAS_SC_AUTOCTL(Register8): + HSBIAS_SENSE_EN = 7 + AUTO_HSBIAS_HIZ = 6 + TIP_SENSE_EN = 5 + SENSE_TRIP = 2, 0, E_HSBIAS_SENSE_TRIP + +class E_TIP_SENSE_CTRL(IntEnum): + DISABLED = 0b00 + DIG_INPUT = 0b01 + SHORT_DET = 0b11 + +class R_TIP_SENSE_CTRL2(Register8): + CTRL = 7, 6, E_TIP_SENSE_CTRL + INV = 5 + +class E_HSBIAS_DET_MODE(IntEnum): + DISABLED = 0b00 + SHORT_DET = 0b01 + NORMAL = 0b11 + +class E_HSBIAS_CTRL(IntEnum): + HI_Z = 0b00 + U_0V0 = 0b01 + U_2V0 = 0b10 + U_2V7 = 0b11 + +class R_MISC_DET_CTRL(Register8): + UNK1 = 7 + DETECT_MODE = 4, 3, E_HSBIAS_DET_MODE + HSBIAS_CTRL = 2, 1, E_HSBIAS_CTRL + PDN_MIC_LVL_DET = 0 + +class E_S0_DEBOUNCE_TIME(IntEnum): + T_10MS = 0b000 + T_20MS = 0b001 + T_30MS = 0b010 + T_40MS = 0b011 + T_50MS = 0b100 + T_60MS = 0b101 + T_70MS = 0b110 + T_80MS = 0b111 + +class R_MIC_DET_CTRL2(Register8): + DEBOUNCE_TIME = 7, 5, E_S0_DEBOUNCE_TIME + +class R_MIC_DET_CTRL4(Register8): + LATCH_TO_VP = 1 + +class R_HS_DET_CTRL2(Register8): + CTRL = 7, 6 + SET = 5, 4 + REF = 3 + AUTO_TIME = 1, 0 + +class R_HS_SWITCH_CTRL(Register8): + REF_HS3 = 7 + REF_HS4 = 6 + HSB_FILT_HS3 = 5 + HSB_FILT_HS4 = 4 + HSB_HS3 = 3 + HSB_HS4 = 2 + GNDHS_HS3 = 1 + GNDHS_HS4 = 0 + +class R_ASP_CTRL(Register8): + TDM_MODE = 2 + BCLK_EN = 1 + +class R_ASP_FSYNC_CTRL23(Register16): + BCLK_PERIOD = 12, 1 + +class R_ASP_TX_HIZ_DLY_CTRL(Register8): + DRV_Z = 5, 4 + HIZ_DELAY = 3, 2 + FS = 1 + UNK1 = 0 + +class R_ASP_RX_EN(Register8): + CH2_EN = 1 + CH1_EN = 0 + +class R_ASP_CH_CTRL(Register32): + WIDTH = 23, 16 + SLOT_START = 10, 1 + EDGE = 0 # set for rising edge + +class CS42L84Regs(RegMap): + DEVID = irange(0x0, 5), Register8 + FREEZE = 0x6, Register8 + + SW_RESET = 0x203, Register8 + + IRQ_STATUS1 = 0x400, R_IRQ_MASK1 + IRQ_STATUS2 = 0x401, Register8 + IRQ_STATUS3 = 0x402, R_IRQ_MASK3 + PLL_LOCK_STATUS = 0x40e, Register8 # bit 0x10 + + IRQ_MASK1 = 0x418, R_IRQ_MASK1 + IRQ_MASK2 = 0x419, Register8 + IRQ_MASK3 = 0x41a, R_IRQ_MASK3 + + CCM_CTRL1 = 0x600, R_CCM_CTRL1 + CCM_SAMP_RATE = 0x601, R_CCM_SAMP_RATE + CCM_CTRL3 = 0x602, R_CCM_CTRL3 + CCM_CTRL4 = 0x603, R_CCM_CTRL4 + CCM_ASP_CLK_CTRL = 0x608, Register8 + + PLL_CTRL = 0x800, R_PLL_CTRL + PLL_DIV_FRAC = irange(0x804, 3), Register8 + PLL_DIV_INT = 0x807, Register8 + PLL_DIVOUT = 0x808, Register8 + + DCID_CTRL1 = 0x1200, R_DCID_CTRL1 + DCID_CTRL2 = 0x1201, R_DCID_CTRL2 + DCID_CTRL3 = 0x1202, R_DCID_CTRL3 + DCID_TRIM_OFFSET = 0x1207, Register8 + DCID_TRIM_SLOPE = 0x120a, Register8 + + # R_pull = 1100 - (regval - 128)*2 + DCID_PULLDOWN_TRIM = 0x120b, Register8 + DCID_STATUS = 0x120c, R_DCID_STATUS + + # tip/ring sense + TR_SENSE_CTRL1 = 0x1280, Register8 + TR_SENSE_CTRL2 = 0x1281, Register8 + RING_SENSE_CTRL = 0x1282, R_TR_SENSE_CTRL + TIP_SENSE_CTRL = 0x1283, R_TR_SENSE_CTRL + TR_SENSE_STATUS = 0x1288, R_TR_SENSE_STATUS + + HSBIAS_SC_AUTOCTL = 0x1470, R_HSBIAS_SC_AUTOCTL + WAKE_CTRL = 0x1471, Register8 + TIP_SENSE_CTRL2 = 0x1473, R_TIP_SENSE_CTRL2 + MISC_DET_CTRL = 0x1474, R_MISC_DET_CTRL + MIC_DET_CTRL2 = 0x1478, R_MIC_DET_CTRL2 + MIC_DET_CTRL4 = 0x1477, R_MIC_DET_CTRL4 + + HS_DET_STATUS1 = 0x147c, Register8 + HS_DET_STATUS2 = 0x147d, R_HS_DET_STATUS2 + HS_DET_IRQ_MASK = irange(0x1480, 2), Register8 + HS_DET_IRQ_STATUS = irange(0x1484, 2), Register8 + + MSM_BLOCK_EN1 = 0x1800, R_MSM_BLOCK_EN1 + MSM_BLOCK_EN2 = 0x1801, R_MSM_BLOCK_EN2 + MSM_BLOCK_EN3 = 0x1802, R_MSM_BLOCK_EN3 + + HS_DET_CTRL1 = 0x1810, Register8 + HS_DET_CTRL2 = 0x1811, R_HS_DET_CTRL2 + HS_SWITCH_CTRL = 0x1812, R_HS_SWITCH_CTRL + HS_CLAMP_DISABLE = 0x1813, R_HS_CLAMP_DISABLE + ADC_CTRL1 = 0x2000, R_ADC_CTRL1 + ADC_CTRL2 = 0x2001, Register8 # volume + ADC_CTRL3 = 0x2002, Register8 + ADC_CTRL4 = 0x2003, R_ADC_CTRL4 + + DAC_CTRL1 = 0x3000, R_DAC_CTRL1 + DAC_CTRL2 = 0x3001, R_DAC_CTRL2 + DACA_VOL_LSB = 0x3004, Register8 + DACA_VOL_MSB = 0x3005, Register8 # sign bit + DACB_VOL_LSB = 0x3006, Register8 + DACB_VOL_MSB = 0x3007, Register8 # sign bit + HP_VOL_CTRL = 0x3020, R_HP_VOL_CTRL + HP_CLAMP_CTRL = 0x3123, Register8 + + BUS_ASP_TX_SRC = 0x4000, R_BUS_ASP_TX_SRC + BUS_DAC_SRC = 0x4001, R_BUS_DAC_SRC + + ASP_CTRL = 0x5000, R_ASP_CTRL + ASP_FSYNC_CTRL23 = 0x5010, R_ASP_FSYNC_CTRL23 + ASP_DATA_CTRL = 0x5018, R_ASP_TX_HIZ_DLY_CTRL + + ASP_RX_EN = 0x5020, R_ASP_RX_EN + ASP_TX_EN = 0x5024, Register8 + + ASP_RX1_CTRL = 0x5028, R_ASP_CH_CTRL # 32bit + ASP_RX2_CTRL = 0x502c, R_ASP_CH_CTRL # 32bit + ASP_TX1_CTRL = 0x5068, R_ASP_CH_CTRL + ASP_TX2_CTRL = 0x506c, R_ASP_CH_CTRL diff --git a/tools/proxyclient/m1n1/hw/dart.py b/tools/proxyclient/m1n1/hw/dart.py new file mode 100644 index 0000000..cd3d8d7 --- /dev/null +++ b/tools/proxyclient/m1n1/hw/dart.py @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: MIT + +import struct + +from enum import IntEnum +from ..utils import * +from ..malloc import Heap + +from .dart8020 import DART8020, DART8020Regs +from .dart8110 import DART8110, DART8110Regs + +__all__ = ["DART"] + +class DART(Reloadable): + PAGE_BITS = 14 + PAGE_SIZE = 1 << PAGE_BITS + + def __init__(self, iface, regs, util=None, compat="dart,t8020", iova_range=(0x80000000, 0x90000000)): + self.iface = iface + self.iova_allocator = [Heap(iova_range[0], iova_range[1], self.PAGE_SIZE) + for i in range(16)] + if compat in ["dart,t8020", "dart,t6000"]: + self.dart = DART8020(iface, regs, util, compat) + elif compat in ["dart,t8110"]: + self.dart = DART8110(iface, regs, util) + else: + raise TypeError(compat) + + @classmethod + def from_adt(cls, u, path, instance=0, **kwargs): + dart_addr = u.adt[path].get_reg(instance)[0] + compat = u.adt[path].compatible[0] + if compat in ["dart,t8020", "dart,t6000"]: + regs = DART8020Regs(u, dart_addr) + elif compat in ["dart,t8110"]: + regs = DART8110Regs(u, dart_addr) + return cls(u.iface, regs, u, compat, **kwargs) + + def ioread(self, stream, base, size): + if size == 0: + return b"" + + ranges = self.iotranslate(stream, base, size) + + iova = base + data = [] + for addr, size in ranges: + if addr is None: + raise Exception(f"Unmapped page at iova {iova:#x}") + data.append(self.iface.readmem(addr, size)) + iova += size + + return b"".join(data) + + def iowrite(self, stream, base, data): + if len(data) == 0: + return + + ranges = self.iotranslate(stream, base, len(data)) + + iova = base + p = 0 + for addr, size in ranges: + if addr is None: + raise Exception(f"Unmapped page at iova {iova:#x}") + self.iface.writemem(addr, data[p:p + size]) + p += size + iova += size + + def iomap(self, stream, addr, size): + iova = self.iova_allocator[stream].malloc(size) + + self.iomap_at(stream, iova, addr, size) + return iova + + def iomap_at(self, stream, iova, addr, size): + self.dart.iomap_at(stream, iova, addr, size) + + def iotranslate(self, stream, start, size): + return self.dart.iotranslate(stream, start, size) + + def initialize(self): + self.dart.initialize() + + def show_error(self): + self.dart.show_error() + + def invalidate_streams(self, streams=0xffffffff): + self.dart.invalidate_streams(streams) + + def invalidate_cache(self): + self.dart.invalidate_cache() + + def dump_device(self, idx): + self.dart.dump_device(idx) + + def dump_all(self): + for i in range(16): + self.dump_device(i) + + def dump_params(self): + self.dart.dump_params() diff --git a/tools/proxyclient/m1n1/hw/dart8020.py b/tools/proxyclient/m1n1/hw/dart8020.py new file mode 100644 index 0000000..9954d3c --- /dev/null +++ b/tools/proxyclient/m1n1/hw/dart8020.py @@ -0,0 +1,381 @@ +# SPDX-License-Identifier: MIT + +import struct + +from enum import IntEnum +from ..utils import * +from ..malloc import Heap + +__all__ = ["DART8020Regs", "DART8020"] + +class R_ERROR(Register32): + FLAG = 31 + STREAM = 27, 24 + CODE = 23, 0 + NO_DAPF_MATCH = 11 + WRITE = 10 + SUBPAGE_PROT = 7 + PTE_READ_FAULT = 6 + READ_FAULT = 4 + WRITE_FAULT = 3 + NO_PTE = 2 + NO_PMD = 1 + NO_TTBR = 0 + +class R_STREAM_COMMAND(Register32): + INVALIDATE = 20 + BUSY = 2 + +class R_TCR(Register32): + BYPASS_DAPF = 12 + BYPASS_DART = 8 + TRANSLATE_ENABLE = 7 + +class R_TTBR(Register32): + VALID = 31 + ADDR = 30, 0 + +class R_REMAP(Register32): + MAP3 = 31, 24 + MAP2 = 23, 16 + MAP1 = 15, 8 + MAP0 = 7, 0 + +class PTE_T8020(Register64): + SP_START = 63, 52 + SP_END = 51, 40 + OFFSET = 39, 14 + SP_PROT_DIS = 1 + VALID = 0 + +class PTE_T6000(Register64): + SP_START = 63, 52 + SP_END = 51, 40 + OFFSET = 39, 10 + SP_PROT_DIS = 1 + VALID = 0 + +class R_CONFIG(Register32): + LOCK = 15 + +class R_DAPF_LOCK(Register32): + LOCK = 0 + +class DART8020Regs(RegMap): + STREAM_COMMAND = 0x20, R_STREAM_COMMAND + STREAM_SELECT = 0x34, Register32 + ERROR = 0x40, R_ERROR + ERROR_ADDR_LO = 0x50, Register32 + ERROR_ADDR_HI = 0x54, Register32 + CONFIG = 0x60, R_CONFIG + REMAP = irange(0x80, 4, 4), R_REMAP + + DAPF_LOCK = 0xf0, R_DAPF_LOCK + UNK1 = 0xf8, Register32 + ENABLED_STREAMS = 0xfc, Register32 + + TCR = irange(0x100, 16, 4), R_TCR + TTBR = (irange(0x200, 16, 16), range(0, 16, 4)), R_TTBR + +PTE_TYPES = { + "dart,t8020": PTE_T8020, + "dart,t6000": PTE_T6000, +} + +class DART8020(Reloadable): + PAGE_BITS = 14 + PAGE_SIZE = 1 << PAGE_BITS + + L0_SIZE = 4 # TTBR count + L0_OFF = 36 + L1_OFF = 25 + L2_OFF = 14 + + IDX_BITS = 11 + Lx_SIZE = (1 << IDX_BITS) + IDX_MASK = Lx_SIZE - 1 + + def __init__(self, iface, regs, util=None, compat="dart,t8020"): + self.iface = iface + self.regs = regs + self.u = util + self.pt_cache = {} + self.enabled_streams = regs.ENABLED_STREAMS.val + self.ptecls = PTE_TYPES[compat] + + @classmethod + def from_adt(cls, u, path, instance=0, **kwargs): + dart_addr = u.adt[path].get_reg(instance)[0] + dart = cls(u.iface, dart_addr, u) + dart.ptecls = PTE_TYPES[u.adt[path].compatible[0]] + return dart + + def iomap_at(self, stream, iova, addr, size): + if size == 0: + return + + if not (self.enabled_streams & (1 << stream)): + self.enabled_streams |= (1 << stream) + self.regs.ENABLED_STREAMS.val |= self.enabled_streams + + tcr = self.regs.TCR[stream].reg + + if tcr.BYPASS_DART and not tcr.TRANSLATE_ENABLE: + raise Exception("Stream is bypassed in DART") + + if tcr.BYPASS_DART or not tcr.TRANSLATE_ENABLE: + raise Exception(f"Unknown DART mode {tcr}") + + if addr & (self.PAGE_SIZE - 1): + raise Exception(f"Unaligned PA {addr:#x}") + + if iova & (self.PAGE_SIZE - 1): + raise Exception(f"Unaligned IOVA {iova:#x}") + + start_page = align_down(iova, self.PAGE_SIZE) + end = iova + size + end_page = align_up(end, self.PAGE_SIZE) + + dirty = set() + + for page in range(start_page, end_page, self.PAGE_SIZE): + paddr = addr + page - start_page + + l0 = page >> self.L0_OFF + assert l0 < self.L0_SIZE + ttbr = self.regs.TTBR[stream, l0].reg + if not ttbr.VALID: + l1addr = self.u.memalign(self.PAGE_SIZE, self.PAGE_SIZE) + self.pt_cache[l1addr] = [0] * self.Lx_SIZE + ttbr.VALID = 1 + ttbr.ADDR = l1addr >> 12 + self.regs.TTBR[stream, l0].reg = ttbr + + cached, l1 = self.get_pt(ttbr.ADDR << 12) + l1idx = (page >> self.L1_OFF) & self.IDX_MASK + l1pte = self.ptecls(l1[l1idx]) + if not l1pte.VALID: + l2addr = self.u.memalign(self.PAGE_SIZE, self.PAGE_SIZE) + self.pt_cache[l2addr] = [0] * self.Lx_SIZE + l1pte = self.ptecls( + OFFSET=l2addr >> self.PAGE_BITS, VALID=1, SP_PROT_DIS=1) + l1[l1idx] = l1pte.value + dirty.add(ttbr.ADDR << 12) + else: + l2addr = l1pte.OFFSET << self.PAGE_BITS + + dirty.add(l1pte.OFFSET << self.PAGE_BITS) + cached, l2 = self.get_pt(l2addr) + l2idx = (page >> self.L2_OFF) & self.IDX_MASK + self.pt_cache[l2addr][l2idx] = self.ptecls( + SP_START=0, SP_END=0xfff, + OFFSET=paddr >> self.PAGE_BITS, VALID=1, SP_PROT_DIS=1).value + + for page in dirty: + self.flush_pt(page) + + def iotranslate(self, stream, start, size): + if size == 0: + return [] + + tcr = self.regs.TCR[stream].reg + + if tcr.BYPASS_DART and not tcr.TRANSLATE_ENABLE: + return [(start, size)] + + if tcr.BYPASS_DART or not tcr.TRANSLATE_ENABLE: + raise Exception(f"Unknown DART mode {tcr}") + + start = start & 0xffffffff + + start_page = align_down(start, self.PAGE_SIZE) + start_off = start - start_page + end = start + size + end_page = align_up(end, self.PAGE_SIZE) + end_size = end - (end_page - self.PAGE_SIZE) + + pages = [] + + for page in range(start_page, end_page, self.PAGE_SIZE): + l0 = page >> self.L0_OFF + assert l0 < self.L0_SIZE + ttbr = self.regs.TTBR[stream, l0].reg + if not ttbr.VALID: + pages.append(None) + continue + + cached, l1 = self.get_pt(ttbr.ADDR << 12) + l1pte = self.ptecls(l1[(page >> self.L1_OFF) & self.IDX_MASK]) + if not l1pte.VALID and cached: + cached, l1 = self.get_pt(ttbr.ADDR << 12, uncached=True) + l1pte = self.ptecls(l1[(page >> self.L1_OFF) & self.IDX_MASK]) + if not l1pte.VALID: + pages.append(None) + continue + + cached, l2 = self.get_pt(l1pte.OFFSET << self.PAGE_BITS) + l2pte = self.ptecls(l2[(page >> self.L2_OFF) & self.IDX_MASK]) + if not l2pte.VALID and cached: + cached, l2 = self.get_pt(l1pte.OFFSET << self.PAGE_BITS, uncached=True) + l2pte = self.ptecls(l2[(page >> self.L2_OFF) & self.IDX_MASK]) + if not l2pte.VALID: + pages.append(None) + continue + + pages.append(l2pte.OFFSET << self.PAGE_BITS) + + ranges = [] + + for page in pages: + if not ranges: + ranges.append((page, self.PAGE_SIZE)) + continue + laddr, lsize = ranges[-1] + if ((page is None and laddr is None) or + (page is not None and laddr == (page - lsize))): + ranges[-1] = laddr, lsize + self.PAGE_SIZE + else: + ranges.append((page, self.PAGE_SIZE)) + + ranges[-1] = (ranges[-1][0], ranges[-1][1] - self.PAGE_SIZE + end_size) + + if start_off: + ranges[0] = (ranges[0][0] + start_off if ranges[0][0] else None, + ranges[0][1] - start_off) + + return ranges + + def get_pt(self, addr, uncached=False): + cached = True + if addr not in self.pt_cache or uncached: + cached = False + self.pt_cache[addr] = list( + struct.unpack(f"<{self.Lx_SIZE}Q", self.iface.readmem(addr, self.PAGE_SIZE))) + + return cached, self.pt_cache[addr] + + def flush_pt(self, addr): + assert addr in self.pt_cache + self.iface.writemem(addr, struct.pack(f"<{self.Lx_SIZE}Q", *self.pt_cache[addr])) + + def initialize(self): + for i in range(15): + self.regs.TCR[i].reg = R_TCR(TRANSLATE_ENABLE=1) + self.regs.TCR[15].reg = R_TCR(BYPASS_DART=1) + + for i in range(16): + for j in range(4): + self.regs.TTBR[i, j].reg = R_TTBR(VALID = 0) + + self.regs.ERROR.val = 0xffffffff + self.regs.UNK1.val = 0 + self.regs.ENABLED_STREAMS.val = 0 + self.enabled_streams = 0 + + self.invalidate_streams() + + def show_error(self): + if self.regs.ERROR.reg.FLAG: + print(f"ERROR: {self.regs.ERROR.reg!s}") + print(f"ADDR: {self.regs.ERROR_ADDR_HI.val:#x}:{self.regs.ERROR_ADDR_LO.val:#x}") + self.regs.ERROR.val = 0xffffffff + + def invalidate_streams(self, streams=0xffffffff): + self.regs.STREAM_SELECT.val = streams + self.regs.STREAM_COMMAND.val = R_STREAM_COMMAND(INVALIDATE=1) + while self.regs.STREAM_COMMAND.reg.BUSY: + pass + + def invalidate_cache(self): + self.pt_cache = {} + + def dump_table2(self, base, l1_addr): + + def print_block(base, pte, start, last): + pgcount = last - start + pte.OFFSET -= pgcount + print(" page (%4d): %08x ... %08x -> %016x [%d%d]" % ( + start, base + start*0x4000, base + (start+1)*0x4000, + pte.OFFSET << self.PAGE_BITS, pte.SP_PROT_DIS, pte.VALID)) + if start < last: + print(" ==> (%4d): ... %08x -> %016x size: %08x" % ( + last, base + (last+1)*0x4000, + (pte.OFFSET + pgcount - 1) << self.PAGE_BITS, pgcount << self.PAGE_BITS)) + + cached, tbl = self.get_pt(l1_addr) + + unmapped = False + start = 0 + next_pte = self.ptecls(VALID=0) + + for i, pte in enumerate(tbl): + pte = self.ptecls(pte) + if not pte.VALID: + if not unmapped: + if next_pte.VALID: + print_block(base, next_pte, start, i) + print(" ...") + unmapped = True + next_pte = pte + continue + + unmapped = False + + if int(pte) != int(next_pte): + if next_pte.VALID: + print_block(base, next_pte, start, i) + start = i + + next_pte = pte + next_pte.OFFSET += 1 + + if next_pte.VALID: + print_block(base, next_pte, start, 2048) + + def dump_table(self, base, l1_addr): + cached, tbl = self.get_pt(l1_addr) + + unmapped = False + for i, pte in enumerate(tbl): + pte = self.ptecls(pte) + if not pte.VALID: + if not unmapped: + print(" ...") + unmapped = True + continue + + unmapped = False + + print(" table (%d): %08x ... %08x -> %016x [%d%d]" % ( + i, base + i*0x2000000, base + (i+1)*0x2000000, + pte.OFFSET << self.PAGE_BITS, pte.SP_PROT_DIS, pte.VALID)) + self.dump_table2(base + i*0x2000000, pte.OFFSET << self.PAGE_BITS) + + def dump_ttbr(self, idx, ttbr): + if not ttbr.VALID: + return + + l1_addr = (ttbr.ADDR) << 12 + print(" TTBR%d: %09x" % (idx, l1_addr)) + + self.dump_table(0, l1_addr) + + def dump_device(self, idx): + tcr = self.regs.TCR[idx].reg + ttbrs = self.regs.TTBR[idx, :] + print(f"dev {idx:02x}: TCR={tcr!s} TTBRs = [{', '.join(map(str, ttbrs))}]") + + if tcr.TRANSLATE_ENABLE and tcr.BYPASS_DART: + print(" mode: INVALID") + elif tcr.TRANSLATE_ENABLE: + print(" mode: TRANSLATE") + + for idx, ttbr in enumerate(ttbrs): + self.dump_ttbr(idx, ttbr.reg) + elif tcr.BYPASS_DART: + print(" mode: BYPASS") + else: + print(" mode: UNKNOWN") + + def dump_params(self): + pass diff --git a/tools/proxyclient/m1n1/hw/dart8110.py b/tools/proxyclient/m1n1/hw/dart8110.py new file mode 100644 index 0000000..1655182 --- /dev/null +++ b/tools/proxyclient/m1n1/hw/dart8110.py @@ -0,0 +1,541 @@ +# SPDX-License-Identifier: MIT + +import struct + +from enum import IntEnum +from ..utils import * +from ..malloc import Heap + +__all__ = ["DART8110Regs", "DART8110"] + +class R_PARAMS_0(Register32): + CLIENT_PARTITIONS_SUPPORTED = 29 + LOG2_PGSZ = 27, 24 + LOG2_TE_COUNT = 22, 20 + TLB_SET_COUNT = 11, 0 + +class R_PARAMS_4(Register32): + LOG2_NUM_WAYS = 30, 28 + NUM_ASCS = 25, 24 + NUM_W_PORTS = 22, 20 + NUM_R_PORTS = 18, 16 + NUM_APFS = 15, 8 + SUPPORT_STT_PREFETCH = 6 + SUPPORT_TLB_PREFETCH = 5 + SUPPORT_CTC_PREFETCH = 4 + SUPPORT_HW_FLUSH = 3 + SUPPORT_TZ_TAGGER = 2 + SUPPORT_REG_LOCK = 1 + SUPPORT_FULL_BYPASS = 0 + +class R_PARAMS_8(Register32): + PA_WIDTH = 29, 24 + VA_WIDTH = 21, 16 + VERS_MAJ = 15, 8 + VERS_MIN = 7, 0 + +class R_PARAMS_C(Register32): + NUM_CLIENTS = 24, 16 + NUM_SIDS = 8, 0 + +class R_ERROR(Register32): + FLAG = 31 + SMMU = 30 + REGION_PROTECT = 29 + WRITE_nREAD = 28 + SID = 27, 20 + SECONDARY = 19 + FILL_REGION = 18 + BPF_REJECT = 14 + EXTERNAL = 13 + STT_FLUSH = 12 + STT_MISMATCH = 11 + APF_REJECT = 10 + DROP_PROTECT = 9 + CTRR_WRITE_PROTECT = 8 + AXI_ERROR = 7 + AXI_DECODE = 6 + READ_FAULT = 5 + WRITE_FAULT = 4 + NO_PTE = 3 + NO_PMD = 2 # "STE" + NO_PGD = 1 # "CTE" + NO_TTBR = 0 + +class R_TLB_OP(Register32): + BUSY = 31 + + # None of these bits are supported on hwrev 1 + HARDWARE_FLUSH = 30 + FLUSH_VA_RANGE = 14 + ENABLE_STT_FLUSH = 13 + DISABLE_STC_FLUSH = 12 + + # 0 = flush all + # 1 = flush SID + # 2 = TLB read + # 3 = TLB write???? + # 4 = flush unlock, definitely not supported on hwrev 1 + OP = 10, 8 + STREAM = 7, 0 + +class R_TLB_OP_IDX(Register32): + SET = 13, 8 + WAY = 6, 4 + TE = 2, 0 + +class R_PROTECT(Register32): + LOCK_TZ_SELECT = 4 + LOCK_TZ_CONFIG = 3 + # This bit can be set, but unknown what it protects + _BIT2 = 2 + LOCK_REG_4xx = 1 + LOCK_TCR_TTBR = 0 + +class R_DIAG_LOCK(Register32): + # FIXME: how does this work exactly? + LOCK_ON_ERR = 1 + LOCK = 0 + +class R_TCR(Register32): + REMAP = 11, 8 + REMAP_EN = 7 + FOUR_LEVELS = 3 # not supported on hwrev 1 + BYPASS_DAPF = 2 + BYPASS_DART = 1 + TRANSLATE_ENABLE = 0 + +class R_TTBR(Register32): + ADDR = 29, 2 + VALID = 0 + +class PTE(Register64): + SP_START = 63, 52 + SP_END = 51, 40 + OFFSET = 37, 10 + RDPROT = 3 + WRPROT = 2 + UNCACHABLE = 1 + VALID = 0 + +class DART8110Regs(RegMap): + PARAMS_0 = 0x000, R_PARAMS_0 + PARAMS_4 = 0x004, R_PARAMS_4 + PARAMS_8 = 0x008, R_PARAMS_8 + PARAMS_C = 0x00C, R_PARAMS_C + # Unknown RO + REG_0x10 = 0x010, Register32 + REG_0x14 = 0x014, Register32 # hwrev 2 only + + TLB_OP = 0x080, R_TLB_OP + TLP_OP_IDX = 0x084, R_TLB_OP_IDX + TLB_TAG_LO = 0x088, Register32 + TLB_TAG_HI = 0x08c, Register32 # hwrev 2 only + TLB_PA_LO = 0x090, Register32 + TLB_PA_HI = 0x094, Register32 + TLB_START_DVA_PAGE = 0x098, Register32 # hwrev 2 only + TLB_END_DVA_PAGE = 0x0a0, Register32 # hwrev 2 only + + ERROR = 0x100, R_ERROR + ERROR_DISABLE = 0x104, R_ERROR + + # Found via register bruteforcing + STREAM_UNK_SET = irange(0x120, 8, 4), Register32 + STREAM_UNK_CLR = irange(0x140, 8, 4), Register32 + + # these are all accessed by error interrupt handler + REG_0x160 = 0x160, Register32 + REG_0x164 = 0x164, Register32 + ERROR_ADDR_LO = 0x170, Register32 + ERROR_ADDR_HI = 0x174, Register32 + REG_0x178 = 0x178, Register32 # hwrev 2 only + REG_0x180 = irange(0x180, 4, 4), Register32 + REG_0x1a0 = irange(0x1a0, 8, 4), Register32 + ERR_SECONDARY = irange(0x1c0, 8, 4), Register32 + + # Write bits to _PROTECT to protect them. + # They can be unprotected by writing to _UNPROTECT unless _LOCK is written. + # If _LOCK is written, protection can be enabled but not disabled. + REG_PROTECT = 0x200, R_PROTECT + REG_UNPROTECT = 0x204, R_PROTECT + REG_PROTECT_LOCK = 0x208, R_PROTECT + + # Tunables touch this, can set bits FF00001F, RW + REG_0x20c = 0x20c, Register32 + + DIAG_LOCK = 0x210, R_DIAG_LOCK + + # All unknown, related to transaction queueing??? + + # can set bits 3FFFFFFC, RW + REG_0x218 = 0x218, Register32 + # Tunables touch this, can set bits 000F0F0F, RW + REG_0x220 = 0x220, Register32 + # Tunables touch this, can set bits 00FFFFFF, RW + REG_0x224 = 0x224, Register32 + # can set bits 3F3F3F3F + TLIMIT = 0x228, Register32 + # can set bits 07070707 + TEQRESERVE = 0x22c, Register32 + # RO, outstanding transaction count??? + TRANS = irange(0x230, 4, 4), Register32 + + # hwrev 2 only for all of these + REG_0x300 = 0x300, Register32 + REG_0x308 = 0x308, Register32 + REG_0x310 = 0x310, Register32 + REG_0x318 = 0x318, Register32 + REG_0x320 = 0x320, Register32 + REG_0x328 = 0x328, Register32 + REG_0x330 = 0x330, Register32 + REG_0x338 = 0x338, Register32 + REG_0x340 = 0x340, Register32 + REG_0x348 = 0x348, Register32 + REG_0x350 = 0x350, Register32 + REG_0x358 = 0x358, Register32 + + # Unknown + REG_0x400 = 0x400, Register32 # can set 00000003 + REG_0x404 = 0x404, Register32 # can set 001FFFFF + REG_0x408 = 0x408, Register32 # can set 00FFFFFC + REG_0x410 = 0x410, Register32 # can set 3FFFFFFC + + # These registers exist even though it's "not supported" + TZ_CONFIG = 0x500, Register32 # 3 bits + TZ_SELECT = 0x504, Register32 # 1 bit + TZ_REGION0_START = 0x508, Register32 + TZ_REGION0_END = 0x510, Register32 + TZ_REGION0_OFFSET = 0x518, Register32 + TZ_REGION1_START = 0x520, Register32 + TZ_REGION1_END = 0x528, Register32 + TZ_REGION1_OFFSET = 0x530, Register32 + TZ_REGION2_START = 0x538, Register32 + TZ_REGION2_END = 0x540, Register32 + TZ_REGION2_OFFSET = 0x548, Register32 + + # completely guessed, unverified, can set bits 0F077077 + PERF_INTR_ENABLE = 0x700, Register32 + PERF_INTR_STATUS = 0x704, Register32 + + PERF_UNK1 = irange(0x720, 8, 4), Register32 + PERF_UNK2 = irange(0x740, 8, 4), Register32 + + PERF_TLB_MISS = 0x760, Register32 + PERF_TLB_FILL = 0x764, Register32 + PERF_TLB_HIT = 0x768, Register32 + PERF_ST_MISS = 0x770, Register32 + PERF_ST_FILL = 0x774, Register32 + PERF_ST_HIT = 0x778, Register32 + # hwrev 1 doesn't have these + PERF_CTC_MISS = 0x780, Register32 + PERF_CTC_FILL = 0x784, Register32 + PERF_CTC_HIT = 0x788, Register32 + + UNK_TUNABLES = irange(0x800, 256, 4), Register32 + + ENABLE_STREAMS = irange(0xc00, 8, 4), Register32 + DISABLE_STREAMS = irange(0xc20, 8, 4), Register32 + + TCR = irange(0x1000, 256, 4), R_TCR + TTBR = irange(0x1400, 256, 4), R_TTBR + + +class DART8110(Reloadable): + PAGE_BITS = 14 + PAGE_SIZE = 1 << PAGE_BITS + + L1_OFF = 25 + L2_OFF = 14 + + IDX_BITS = 11 + Lx_SIZE = (1 << IDX_BITS) + IDX_MASK = Lx_SIZE - 1 + + def __init__(self, iface, regs, util=None): + self.iface = iface + self.regs = regs + self.u = util + self.pt_cache = {} + + enabled_streams = 0 + for i in range(8): + enabled_streams |= regs.ENABLE_STREAMS[i].val << 32*i + self.enabled_streams = enabled_streams + + @classmethod + def from_adt(cls, u, path, instance=0, **kwargs): + dart_addr = u.adt[path].get_reg(instance)[0] + regs = DART8110Regs(u, dart_addr) + dart = cls(u.iface, regs, u, **kwargs) + return dart + + def iomap_at(self, stream, iova, addr, size): + if size == 0: + return + + if not (self.enabled_streams & (1 << stream)): + self.enabled_streams |= (1 << stream) + self.regs.ENABLE_STREAMS[stream // 32].val |= (1 << (stream % 32)) + + tcr = self.regs.TCR[stream].reg + + if tcr.BYPASS_DART and not tcr.TRANSLATE_ENABLE: + raise Exception("Stream is bypassed in DART") + + if tcr.BYPASS_DART or not tcr.TRANSLATE_ENABLE: + raise Exception(f"Unknown DART mode {tcr}") + + if addr & (self.PAGE_SIZE - 1): + raise Exception(f"Unaligned PA {addr:#x}") + + if iova & (self.PAGE_SIZE - 1): + raise Exception(f"Unaligned IOVA {iova:#x}") + + start_page = align_down(iova, self.PAGE_SIZE) + end = iova + size + end_page = align_up(end, self.PAGE_SIZE) + + dirty = set() + + for page in range(start_page, end_page, self.PAGE_SIZE): + paddr = addr + page - start_page + + ttbr = self.regs.TTBR[stream].reg + if not ttbr.VALID: + l1addr = self.u.memalign(self.PAGE_SIZE, self.PAGE_SIZE) + self.pt_cache[l1addr] = [0] * self.Lx_SIZE + ttbr.VALID = 1 + ttbr.ADDR = l1addr >> self.PAGE_BITS + self.regs.TTBR[stream].reg = ttbr + + cached, l1 = self.get_pt(ttbr.ADDR << self.PAGE_BITS) + l1idx = (page >> self.L1_OFF) & self.IDX_MASK + l1pte = PTE(l1[l1idx]) + if not l1pte.VALID: + l2addr = self.u.memalign(self.PAGE_SIZE, self.PAGE_SIZE) + self.pt_cache[l2addr] = [0] * self.Lx_SIZE + l1pte = PTE( + OFFSET=l2addr >> self.PAGE_BITS, VALID=1) + l1[l1idx] = l1pte.value + dirty.add(ttbr.ADDR << self.PAGE_BITS) + else: + l2addr = l1pte.OFFSET << self.PAGE_BITS + + dirty.add(l1pte.OFFSET << self.PAGE_BITS) + cached, l2 = self.get_pt(l2addr) + l2idx = (page >> self.L2_OFF) & self.IDX_MASK + self.pt_cache[l2addr][l2idx] = PTE( + SP_START=0, SP_END=0xfff, + OFFSET=paddr >> self.PAGE_BITS, VALID=1).value + + for page in dirty: + self.flush_pt(page) + + def iotranslate(self, stream, start, size): + if size == 0: + return [] + + tcr = self.regs.TCR[stream].reg + + if tcr.BYPASS_DART and not tcr.TRANSLATE_ENABLE: + # FIXME this may not be correct + return [(start, size)] + + if tcr.BYPASS_DART or not tcr.TRANSLATE_ENABLE: + raise Exception(f"Unknown DART mode {tcr}") + + start = start & 0xfffffffff + + start_page = align_down(start, self.PAGE_SIZE) + start_off = start - start_page + end = start + size + end_page = align_up(end, self.PAGE_SIZE) + end_size = end - (end_page - self.PAGE_SIZE) + + pages = [] + + for page in range(start_page, end_page, self.PAGE_SIZE): + ttbr = self.regs.TTBR[stream].reg + if not ttbr.VALID: + pages.append(None) + continue + + cached, l1 = self.get_pt(ttbr.ADDR << self.PAGE_BITS) + l1pte = PTE(l1[(page >> self.L1_OFF) & self.IDX_MASK]) + if not l1pte.VALID and cached: + cached, l1 = self.get_pt(ttbr.ADDR << self.PAGE_BITS, uncached=True) + l1pte = PTE(l1[(page >> self.L1_OFF) & self.IDX_MASK]) + if not l1pte.VALID: + pages.append(None) + continue + + cached, l2 = self.get_pt(l1pte.OFFSET << self.PAGE_BITS) + l2pte = PTE(l2[(page >> self.L2_OFF) & self.IDX_MASK]) + if not l2pte.VALID and cached: + cached, l2 = self.get_pt(l1pte.OFFSET << self.PAGE_BITS, uncached=True) + l2pte = PTE(l2[(page >> self.L2_OFF) & self.IDX_MASK]) + if not l2pte.VALID: + pages.append(None) + continue + + pages.append(l2pte.OFFSET << self.PAGE_BITS) + + ranges = [] + + for page in pages: + if not ranges: + ranges.append((page, self.PAGE_SIZE)) + continue + laddr, lsize = ranges[-1] + if ((page is None and laddr is None) or + (page is not None and laddr == (page - lsize))): + ranges[-1] = laddr, lsize + self.PAGE_SIZE + else: + ranges.append((page, self.PAGE_SIZE)) + + ranges[-1] = (ranges[-1][0], ranges[-1][1] - self.PAGE_SIZE + end_size) + + if start_off: + ranges[0] = (ranges[0][0] + start_off if ranges[0][0] else None, + ranges[0][1] - start_off) + + return ranges + + def get_pt(self, addr, uncached=False): + cached = True + if addr not in self.pt_cache or uncached: + cached = False + self.pt_cache[addr] = list( + struct.unpack(f"<{self.Lx_SIZE}Q", self.iface.readmem(addr, self.PAGE_SIZE))) + + return cached, self.pt_cache[addr] + + def flush_pt(self, addr): + assert addr in self.pt_cache + self.iface.writemem(addr, struct.pack(f"<{self.Lx_SIZE}Q", *self.pt_cache[addr])) + + def initialize(self): + for i in range(15): + self.regs.TCR[i].reg = R_TCR(TRANSLATE_ENABLE=1) + self.regs.TCR[15].reg = R_TCR(BYPASS_DART=1) + + for i in range(16): + self.regs.TTBR[i].reg = R_TTBR(VALID = 0) + + # self.regs.ERROR.val = 0xffffffff + # self.regs.UNK1.val = 0 + self.regs.DISABLE_STREAMS[0].val = 0xffff + self.enabled_streams = 0 + + self.invalidate_streams() + + def show_error(self): + if self.regs.ERROR.reg.FLAG: + print(f"ERROR: {self.regs.ERROR.reg!s}") + print(f"ADDR: {self.regs.ERROR_ADDR_HI.val:#x}:{self.regs.ERROR_ADDR_LO.val:#x}") + self.regs.ERROR.val = 0x80000004 + + def invalidate_streams(self, streams=0xffff): + for sid in range(256): + if streams & (1 << sid): + self.regs.TLB_OP.val = R_TLB_OP(STREAM=sid, OP=1) + while self.regs.TLB_OP.reg.BUSY: + pass + + def invalidate_cache(self): + self.pt_cache = {} + + def dump_table2(self, base, l1_addr): + + def print_block(base, pte, start, last): + pgcount = last - start + pte.OFFSET -= pgcount + print(" page (%4d): %09x ... %09x -> %016x [%d%d%d%d]" % ( + start, base + start*0x4000, base + (start+1)*0x4000, + pte.OFFSET << self.PAGE_BITS, + pte.RDPROT, pte.WRPROT, pte.UNCACHABLE, pte.VALID)) + if start < last: + print(" ==> (%4d): ... %09x -> %016x size: %08x" % ( + last, base + (last+1)*0x4000, + (pte.OFFSET + pgcount - 1) << self.PAGE_BITS, pgcount << self.PAGE_BITS)) + + cached, tbl = self.get_pt(l1_addr) + + unmapped = False + start = 0 + next_pte = PTE(VALID=0) + + for i, pte in enumerate(tbl): + pte = PTE(pte) + if not pte.VALID: + if not unmapped: + if next_pte.VALID: + print_block(base, next_pte, start, i) + print(" ...") + unmapped = True + next_pte = pte + continue + + unmapped = False + + if int(pte) != int(next_pte): + if next_pte.VALID: + print_block(base, next_pte, start, i) + start = i + + next_pte = pte + next_pte.OFFSET += 1 + + if next_pte.VALID: + print_block(base, next_pte, start, 2048) + + def dump_table(self, base, l1_addr): + cached, tbl = self.get_pt(l1_addr) + + unmapped = False + for i, pte in enumerate(tbl): + pte = PTE(pte) + if not pte.VALID: + if not unmapped: + print(" ...") + unmapped = True + continue + + unmapped = False + + print(" table (%d): %09x ... %09x -> %016x [%d%d%d%d]" % ( + i, base + i*0x2000000, base + (i+1)*0x2000000, + pte.OFFSET << self.PAGE_BITS, + pte.RDPROT, pte.WRPROT, pte.UNCACHABLE, pte.VALID)) + self.dump_table2(base + i*0x2000000, pte.OFFSET << self.PAGE_BITS) + + def dump_ttbr(self, ttbr): + if not ttbr.VALID: + return + + l1_addr = (ttbr.ADDR) << self.PAGE_BITS + print(" TTBR: %011x" % (l1_addr)) + + self.dump_table(0, l1_addr) + + def dump_device(self, idx): + tcr = self.regs.TCR[idx].reg + ttbr = self.regs.TTBR[idx] + print(f"dev {idx:02x}: TCR={tcr!s} TTBR = {ttbr!s}") + + if tcr.TRANSLATE_ENABLE and tcr.BYPASS_DART: + print(" mode: INVALID") + elif tcr.TRANSLATE_ENABLE: + print(" mode: TRANSLATE") + + self.dump_ttbr(ttbr.reg) + elif tcr.BYPASS_DART: + print(" mode: BYPASS") + else: + print(" mode: UNKNOWN") + + def dump_params(self): + print(self.regs.PARAMS_0.reg) + print(self.regs.PARAMS_4.reg) + print(self.regs.PARAMS_8.reg) + print(self.regs.PARAMS_C.reg) diff --git a/tools/proxyclient/m1n1/hw/dockchannel.py b/tools/proxyclient/m1n1/hw/dockchannel.py new file mode 100644 index 0000000..1a9443e --- /dev/null +++ b/tools/proxyclient/m1n1/hw/dockchannel.py @@ -0,0 +1,120 @@ +# SPDX-License-Identifier: MIT +import struct + +from ..utils import * + +__all__ = ["DockChannel"] + +# DockChannel layout: +# 00000 : Global regs + +# 08000 : IRQ regs (0) +# 0c000 : IRQ regs (1) +# 10000 : IRQ regs (2) +# 14000 : IRQ regs (3) -> AIC #0 +# 18000 : IRQ regs (4) -> AIC #1 +# 1c000 : IRQ regs (5) (not always present) + +# 28000 : FIFO regs (1A) +# 2c000 : Data regs (1A) +# 30000 : FIFO regs (1B) +# 34000 : Data regs (1B) +# 38000 : FIFO regs (2A) +# 3c000 : Data regs (2A) +# 40000 : FIFO regs (2B) +# 44000 : Data regs (2B) +# (possibly more) + +class R_RX_DATA(Register32): + DATA = 31, 8 + COUNT = 7, 0 + +class DockChannelIRQRegs(RegMap): + IRQ_MASK = 0x0, Register32 + IRQ_FLAG = 0x4, Register32 + +class DockChannelConfigRegs(RegMap): + TX_THRESH = 0x0, Register32 + RX_THRESH = 0x4, Register32 + +class DockChannelDataRegs(RegMap): + TX_8 = 0x4, Register32 + TX_16 = 0x8, Register32 + TX_24 = 0xc, Register32 + TX_32 = 0x10, Register32 + TX_FREE = 0x14, Register32 + RX_8 = 0x1c, R_RX_DATA + RX_16 = 0x20, R_RX_DATA + RX_24 = 0x24, R_RX_DATA + RX_32 = 0x28, Register32 + RX_COUNT = 0x2c, Register32 + +class DockChannel: + def __init__(self, u, irq_base, fifo_base, irq_idx): + self.u = u + self.p = u.proxy + self.iface = u.iface + self.config = DockChannelConfigRegs(u, fifo_base) + self.data = DockChannelDataRegs(u, fifo_base + 0x4000) + self.irq = DockChannelIRQRegs(u, irq_base) + self.irq_idx = irq_idx + self.irq.IRQ_MASK.val = 3 << (irq_idx * 2) + + @property + def tx_irq(self): + self.irq.IRQ_FLAG.val = 1 << (self.irq_idx * 2) + return self.irq.IRQ_FLAG.val & (1 << (self.irq_idx * 2)) + + @property + def rx_irq(self): + self.irq.IRQ_FLAG.val = 2 << (self.irq_idx * 2) + return self.irq.IRQ_FLAG.val & (2 << (self.irq_idx * 2)) + + @property + def rx_count(self): + return self.data.RX_COUNT.val + + @property + def tx_free(self): + return self.data.TX_FREE.val + + def set_tx_thresh(self, v): + self.config.TX_THRESH.val = v + + def set_rx_thresh(self, v): + self.config.RX_THRESH.val = v + + def write(self, data): + p = 0 + left = len(data) + while left >= 4: + while self.tx_free < 4: + pass + d = struct.unpack("<I", data[p:p+4])[0] + self.data.TX_32.val = d + p += 4 + left -= 4 + while left >= 1: + while self.tx_free < 1: + pass + self.data.TX_8.val = data[p] + p += 1 + left -= 1 + + def read(self, count): + data = [] + left = count + while left >= 4: + while self.rx_count < 4: + pass + data.append(struct.pack("<I", self.data.RX_32.val)) + left -= 4 + while left >= 1: + while self.rx_count < 1: + pass + data.append(bytes([self.data.RX_8.DATA])) + left -= 1 + return b"".join(data) + + def read_all(self): + return self.read(self.rx_count) diff --git a/tools/proxyclient/m1n1/hw/dwc3.py b/tools/proxyclient/m1n1/hw/dwc3.py new file mode 100644 index 0000000..41948f4 --- /dev/null +++ b/tools/proxyclient/m1n1/hw/dwc3.py @@ -0,0 +1,268 @@ +# SPDX-License-Identifier: MIT + +from enum import IntEnum +from m1n1.utils import * + + +class R_XHCI_USBCMD(Register32): + EU3S = 11 + EWE = 10 + CRS = 9 + CSS = 8 + LHCRST = 7 + HSEE = 3 + INTE = 2 + HCRST = 1 + RS = 0 + + +class R_XHCI_USBSTS(Register32): + HCE = 12 + CNR = 11 + SRE = 10 + RSS = 9 + SSS = 8 + PCD = 4 + EINT = 3 + HSE = 2 + HCH = 0 + + +class R_XHCI_CRCR_LO(Register32): + CRP = 31, 6 + CRR = 3 + CA = 2 + CS = 1 + RCS = 0 + + +class R_XHCI_DNCTRL(Register32): + N0_N15 = 15, 0 + + +class R_XHCI_DOORBELL(Register32): + TASK_ID = 31, 16 + RSVD = 15, 8 + TARGET = 7, 0 + + +class R_XHCI_PORTSC(Register32): + CCS = 0 + PED = 1 + OCA = 3 + RESET = 4 + PLS = 8, 5 + PP = 9 + SPEED = 13, 10 + PIC = 15, 14 + LWS = 16 + CSC = 17 + PEC = 18 + WRC = 19 + OCC = 20 + PRC = 21 + PLC = 22 + CEC = 23 + CAS = 24 + WCE = 25 + WDE = 26 + WOE = 27 + DR = 30 + WPR = 31 + + +class R_XHCI_PORTLI(Register32): + ERROR_CNT = 15, 0 + RLC = 19, 16 + TLC = 23, 20 + RSV = 31, 24 + + +class R_XHCI_IMAN(Register32): + IP = 0 + IE = 1 + + +class XhciRegs(RegMap): + HCSPARAMS1 = 0x04, Register32 + HCSPARAMS2 = 0x08, Register32 + HCSPARAMS3 = 0x0C, Register32 + HCCPARAMS1 = 0x10, Register32 + DBOFF = 0x14, Register32 + RTSOFF = 0x18, Register32 + HCCPARAMS2 = 0x1C, Register32 + USBCMD = 0x20, R_XHCI_USBCMD + USBSTS = 0x24, R_XHCI_USBSTS + DNCTRL = 0x34, R_XHCI_DNCTRL + CRCR_LO = 0x38, R_XHCI_CRCR_LO + CRCR_HI = 0x3C, Register32 + DCBAAP_LO = 0x50, Register32 + DCBAAP_HI = 0x54, Register32 + + PORTSC0 = 0x420, R_XHCI_PORTSC + PORTPMSC0 = 0x424, Register32 + PORTLI0 = 0x428, R_XHCI_PORTLI + PORTHLPMC0 = 0x42C, Register32 + + PORTSC1 = 0x430, R_XHCI_PORTSC + PORTPMSC1 = 0x434, Register32 + PORTLI1 = 0x438, R_XHCI_PORTLI + PORTHLPMC1 = 0x43C, Register32 + + MFINDEX = 0x440, Register32 + IMAN0 = 0x460 + 0x00, R_XHCI_IMAN + IMOD0 = 0x460 + 0x04, Register32 + ERSTSZ0 = 0x460 + 0x08, Register32 + RSVD0 = 0x460 + 0x0C, Register32 + ERSTBA0 = 0x460 + 0x10, Register64 + ERDP0 = 0x460 + 0x18, Register64 + + IMAN1 = 0x480 + 0x00, R_XHCI_IMAN + IMOD1 = 0x480 + 0x04, Register32 + ERSTSZ1 = 0x480 + 0x08, Register32 + RSVD1 = 0x480 + 0x0C, Register32 + ERSTBA1 = 0x480 + 0x10, Register64 + ERDP1 = 0x480 + 0x18, Register64 + + IMAN2 = 0x4A0 + 0x00, R_XHCI_IMAN + IMOD2 = 0x4A0 + 0x04, Register32 + ERSTSZ2 = 0x4A0 + 0x08, Register32 + RSVD0 = 0x4A0 + 0x0C, Register32 + ERSTBA2 = 0x4A0 + 0x10, Register64 + ERDP2 = 0x4A0 + 0x18, Register64 + + IMAN3 = 0x4C0 + 0x00, R_XHCI_IMAN + IMOD3 = 0x4C0 + 0x04, Register32 + ERSTSZ3 = 0x4C0 + 0x08, Register32 + RSVD0 = 0x4C0 + 0x0C, Register32 + ERSTBA3 = 0x4C0 + 0x10, Register64 + ERDP3 = 0x4C0 + 0x18, Register64 + + DOORBELL = irange(0x4E0, 256, 4), R_XHCI_DOORBELL + + +class R_GUSB3PIPECTL(Register32): + PHYSOFTRST = 31 + U2SSINP3OK = 29 + DISRXDETINP3 = 28 + UX_EXIT_PX = 27 + REQP1P2P3 = 24 + DEPOCHANGE = 18 + SUSPHY = 17 + LFPSFILT = 9 + RX_DETOPOLL = 8 + + +class R_GUSB2PHYCFG(Register32): + PHYSOFTRST = 31 + U2_FREECLK_EXISTS = 30 + SUSPHY = 6 + ULPI_UTMI = 4 + ENBLSLPM = 8 + + +class R_GCTL(Register32): + U2RSTECN = 16 + PRTCAP = 14, 12 + CORESOFTRESET = 11 + SOFITPSYNC = 10 + SCALEDOWN = 6, 4 + DISSCRAMBLE = 3 + U2EXIT_LFPS = 2 + GBLHIBERNATIONEN = 1 + DSBLCLKGTNG = 0 + + +class Dwc3CoreRegs(RegMap): + GSBUSCFG0 = 0x100, Register32 + GSBUSCFG1 = 0x104, Register32 + GTXTHRCFG = 0x108, Register32 + GRXTHRCFG = 0x10C, Register32 + GCTL = 0x110, R_GCTL + GEVTEN = 0x114, Register32 + GSTS = 0x118, Register32 + GUCTL1 = 0x11C, Register32 + GSNPSID = 0x120, Register32 + GGPIO = 0x124, Register32 + GUID = 0x128, Register32 + GUCTL = 0x12C, Register32 + GBUSERRADDR0 = 0x130, Register32 + GBUSERRADDR1 = 0x134, Register32 + GPRTBIMAP0 = 0x138, Register32 + GPRTBIMAP1 = 0x13C, Register32 + GHWPARAMS0 = 0x140, Register32 + GHWPARAMS1 = 0x144, Register32 + GHWPARAMS2 = 0x148, Register32 + GHWPARAMS3 = 0x14C, Register32 + GHWPARAMS4 = 0x150, Register32 + GHWPARAMS5 = 0x154, Register32 + GHWPARAMS6 = 0x158, Register32 + GHWPARAMS7 = 0x15C, Register32 + GDBGFIFOSPACE = 0x160, Register32 + GDBGLTSSM = 0x164, Register32 + GDBGBMU = 0x16C, Register32 + GDBGLSPMUX = 0x170, Register32 + GDBGLSP = 0x174, Register32 + GDBGEPINFO0 = 0x178, Register32 + GDBGEPINFO1 = 0x17C, Register32 + GPRTBIMAP_HS0 = 0x180, Register32 + GPRTBIMAP_HS1 = 0x184, Register32 + GPRTBIMAP_FS0 = 0x188, Register32 + GPRTBIMAP_FS1 = 0x18C, Register32 + GUCTL2 = 0x19C, Register32 + GUSB2PHYCFG = 0x200, R_GUSB2PHYCFG + GUSB2I2CCTL = 0x240, Register32 + GUSB2PHYACC = 0x280, Register32 + GUSB3PIPECTL = 0x2C0, R_GUSB3PIPECTL + DWC3_GHWPARAMS8 = 0x600, Register32 + DWC3_GUCTL3 = 0x60C, Register32 + DWC3_GFLADJ = 0x630, Register32 + DWC3_GHWPARAMS9 = 0x680, Register32 + + +class R_PIPEHANDLER_OVERRIDE(Register32): + RXVALID = 0 + RXDETECT = 2 + + +class E_PIPEHANDLER_MUX_MODE(IntEnum): + USB3_PHY = 0 + DUMMY_PHY = 1 + UNK2 = 2 + + +class E_PIPEHANDLER_CLK_SELECT(IntEnum): + UNK0 = 0 + USB3_PHY = 1 + DUMMY_PHY = 2 + UNK4 = 4 + + +class R_PIPEHANDLER_MUX_CTRL(Register32): + MUX_MODE = 1, 0, E_PIPEHANDLER_MUX_MODE + CLK_SELECT = 5, 3, E_PIPEHANDLER_CLK_SELECT + + +class R_PIPEHANDLER_LOCK(Register32): + LOCK_EN = 0 + + +class R_PIPEHANDLER_AON_GEN(Register32): + DWC3_FORCE_CLAMP_EN = 4 + DWC3_RESET_N = 0 + + +class R_PIPEHANDLER_NONSELECTED_OVERRIDE(Register32): + NATIVE_POWER_DOWN = 3, 0 + NATIVE_RESET = 12 + DUMMY_PHY_EN = 15 + + +class PipehandlerRegs(RegMap): + PIPEHANDLER_OVERRIDE = 0x00, R_PIPEHANDLER_OVERRIDE + PIPEHANDLER_OVERRIDE_VALUES = 0x04, R_PIPEHANDLER_OVERRIDE + PIPEHANDLER_MUX_CTRL = 0x0C, R_PIPEHANDLER_MUX_CTRL + PIPEHANDLER_LOCK_REQ = 0x10, R_PIPEHANDLER_LOCK + PIPEHANDLER_LOCK_ACK = 0x14, R_PIPEHANDLER_LOCK + PIPEHANDLER_AON_GEN = 0x1C, R_PIPEHANDLER_AON_GEN + PIPEHANDLER_NONSELECTED_OVERRIDE = 0x20, R_PIPEHANDLER_NONSELECTED_OVERRIDE diff --git a/tools/proxyclient/m1n1/hw/i2c.py b/tools/proxyclient/m1n1/hw/i2c.py new file mode 100644 index 0000000..e2bda7a --- /dev/null +++ b/tools/proxyclient/m1n1/hw/i2c.py @@ -0,0 +1,251 @@ +# SPDX-License-Identifier: MIT +from ..utils import * +from enum import IntEnum + +__all__ = ["I2C", "I2CRegs"] + + +class R_MTXFIFO(Register32): + READ = 10 # Read (DATA=count) + STOP = 9 # Issue START before + START = 8 # Issue STOP after + DATA = 7, 0 # Byte to send or count + +class R_MRXFIFO(Register32): + EMPTY = 8 # FIFO empty + DATA = 7, 0 # FIFO data + +class R_MCNT(Register32): + S_RXCNT = 31, 24 # Slave RX count + S_TXCNT = 23, 16 # Slave TX count + M_RXCNT = 15, 8 # Master RX count + M_TXCNT = 7, 0 # Master TX count + +class E_MST(IntEnum): + IDLE = 0 + FRD1 = 1 + FRD2 = 2 + COMMAND = 3 + START = 4 + WRITE = 5 + READ = 6 + ACK = 7 + STOP = 8 + BAD = 15 + +class E_SST(IntEnum): + IDLE = 0 + START = 1 + ST_ACK = 2 + DATA = 3 + ACK = 4 + +class R_XFSTA(Register32): + MST = 27, 24, E_MST # Master controller state + SRD = 20 # Slave read in progress + SWR = 19 # Slave write in progress + SST = 18, 16, E_SST # Slave controller state + XFIFO = 9, 8 # FIFO number for error + XFCNT = 7, 0 # Number of bytes in current xfer + +class R_SADDR(Register32): + DEB = 31 # Enable SDA/SCL read debug + DIR = 30 # Direct (bitbang) mode + ENS = 29 # Enable slave interface + RST_STX = 28 # Reset slave TX FIFO + RST_SRX = 27 # Reset master RX fifo (if ^ both, controller too) + PEN = 26 # Promiscuous mode (slave) + AAE = 25 # SALT/ALTMASK enable + SAE = 24 # SADDR enable + ALTMASK = 23, 16 # MASK for SALT bits + SALT = 15, 8 # Alt slave address + SADDR = 7, 0 # Slave address + +class R_SMSTA(Register32): + XIP = 28 # Xaction in progress + XEN = 27 # Xaction ended + UJF = 26 # UnJam failure + JMD = 25 # Jam ocurred + JAM = 24 # Currently jammed + MTO = 23 # Master timeout + MTA = 22 # Master arb lost + MTN = 21 # Master received NACK + MRF = 20 # Master RX fifo full + MRNE = 19 # Master RX fifo not empty + MTF = 17 # Master TX fifo full + MTE = 16 # Master RX fifo empty + STO = 15 # Slave timeout + STA = 14 # Slave arb lost + STN = 13 # Slave received NACK + SRF = 12 # Slave RX fifo full + SRNE = 11 # Slave RX fifo not empty + STR = 10 # Slave transmit required + STF = 9 # Slave TX fifo full + STE = 8 # Slave TX fifo empty + TOS = 7 # Timeout due to slave FIFO + TOM = 6 # Timeout due to master FIFO + TOE = 5 # Slave timeout due to ext clock stretch + DCI = 4 # Direct clock in + DDI = 3 # Direct data in + DCO = 2 # Direct clock out + DDO = 1 # Direct data out + NN = 0 # NACK next (slave) + +class R_CTL(Register32): + MSW = 26, 16 # Maximum slave write size + ENABLE = 11 # Unknown enable bit (clock sel? Apple thing) + MRR = 10 # Master receive FIFO reset + MTR = 9 # Master transmit FIFO reset + UJM = 8 # Enable auto unjam machine + CLK = 7, 0 # Clock divider + +class R_STXFIFO(Register32): + DATA = 7, 0 # Data + +class R_SRXFIFO(Register32): + N = 12 # NACK received after this byte + P = 11 # Stop received, data not valid + S = 10 # Start received before + O = 9 # Overflow (promisc only) + E = 8 # Empty (data not valid) + DATA = 7, 0 # Data + +# Apple reg +class R_FIFOCTL(Register32): + HALT = 0 # Halt machinery + +class I2CRegs(RegMap): + MTXFIFO = 0x00, R_MTXFIFO + MRXFIFO = 0x04, R_MRXFIFO + MCNT = 0x08, R_MCNT + XFSTA = 0x0c, R_XFSTA + SADDR = 0x10, R_SADDR + SMSTA = 0x14, R_SMSTA + IMASK = 0x18, R_SMSTA + CTL = 0x1c, R_CTL + STXFIFO = 0x20, R_STXFIFO + SRXFIFO = 0x20, R_SRXFIFO + FIFOCTL = 0x44, R_FIFOCTL + + +class I2C: + def __init__(self, u, adt_path): + self.u = u + self.p = u.proxy + self.iface = u.iface + self.base = u.adt[adt_path].get_reg(0)[0] + self.regs = I2CRegs(u, self.base) + self.devs = [] + + def clear_fifos(self): + self.regs.CTL.set(MTR=1, MRR=1) + + def clear_status(self): + self.regs.SMSTA.val = 0xffffffff + + def _fifo_read(self, nbytes): + read = [] + for _ in range(nbytes): + val = self.regs.MRXFIFO.reg + timeout = 10000 + while val.EMPTY and timeout > 0: + val = self.regs.MRXFIFO.reg + timeout -= 1 + if timeout == 0: + raise Exception("timeout") + read.append(int(val) & 0xff) + return bytes(read) + + def _fifo_write(self, buf, stop=False): + for no, byte in enumerate(buf): + sending_stop = stop and no == len(buf) - 1 + self.regs.MTXFIFO.set(DATA=byte, STOP=int(sending_stop)) + + if not stop: + return + + timeout = 10000 + while not self.regs.SMSTA.reg.XEN and timeout > 0: + timeout -= 1 + if timeout == 0: + raise Exception("timeout") + + def write_reg(self, addr, reg, data, regaddrlen=1): + self.clear_fifos() + self.clear_status() + + self.regs.CTL.set(ENABLE=1, CLK=0x4) + self.regs.MTXFIFO.set(DATA=addr << 1, START=1) + regbytes = int.to_bytes(reg, regaddrlen, byteorder="big") + self._fifo_write(regbytes + bytes(data), stop=True) + self.regs.CTL.set(ENABLE=0, CLK=0x4) + + def read_reg(self, addr, reg, nbytes, regaddrlen=1): + self.clear_fifos() + self.clear_status() + + self.regs.CTL.set(ENABLE=1, CLK=0x4) + self.regs.MTXFIFO.set(DATA=addr << 1, START=1) + regbytes = int.to_bytes(reg, regaddrlen, byteorder="big") + self._fifo_write(regbytes, stop=False) + self.regs.MTXFIFO.set(DATA=(addr << 1) | 1, START=1) + self.regs.MTXFIFO.set(DATA=nbytes, STOP=1, READ=1) + data = self._fifo_read(nbytes) + self.regs.CTL.set(ENABLE=0, CLK=0x4) + return data + +class I2CRegMapDev: + REGMAP = None + ADDRESSING = (0, 1) + + def __init__(self, bus, addr, name=None): + self.bus = bus + self.addr = addr + self.curr_page = None + self.name = name + + self.paged, self.regimmbytes = self.ADDRESSING + if self.REGMAP is not None: + self.regs = self.REGMAP(self, 0) + + @classmethod + def from_adt(cls, bus, path): + node = bus.u.adt[path] + addr = node.reg[0] & 0xff + return cls(bus, addr, node.name) + + def _switch_page(self, page): + assert self.paged + self.bus.write_reg(self.addr, 0, bytes([page]), + regaddrlen=self.regimmbytes) + self.curr_page = page + + def _snip_regaddr(self, addr): + pageshift = self.regimmbytes * 8 + page = addr >> pageshift + immediate = addr & ~(~0 << pageshift) + return (page, immediate) + + def write(self, reg, val, width=8): + page, imm = self._snip_regaddr(reg) + + if self.paged and page != self.curr_page: + self._switch_page(page) + + valbytes = val.to_bytes(width//8, byteorder="little") + self.bus.write_reg(self.addr, imm, valbytes, + regaddrlen=self.regimmbytes) + + def read(self, reg, width=8): + page, imm = self._snip_regaddr(reg) + + if self.paged and page != self.curr_page: + self._switch_page(page) + + data = self.bus.read_reg(self.addr, imm, width//8, + regaddrlen=self.regimmbytes) + return int.from_bytes(data, byteorder='little') + + def __repr__(self): + label = self.name or f"@ {self.addr:02x}" + return f"<{type(self).__name__} {label}>" diff --git a/tools/proxyclient/m1n1/hw/isp.py b/tools/proxyclient/m1n1/hw/isp.py new file mode 100644 index 0000000..4d8d0fd --- /dev/null +++ b/tools/proxyclient/m1n1/hw/isp.py @@ -0,0 +1,507 @@ +import struct +from enum import IntEnum +from ..utils import * + +class ISPCommandDirection(IntEnum): + RX = 0 + TX = 1 + +class ISPCommand: + """ Represents a command in any IPC channel """ + + def __init__(self, channel, message, direction): + value, u0, u1 = struct.unpack('<3q40x', message.data) + self.message = message + self.channel = channel + self.direction = direction + self.tracer = channel.tracer + self.raw_value = value + self.value = value & 0xFFFFFFFFFFFFFFFC + self.arg0 = u0 + self.arg1 = u1 + + def dump(self): + self.log(f"[CMD Value: {hex(self.value)}, U0: {hex(self.arg0)}, U1: {hex(self.arg1)}]") + + def read_iova(self, address, length): + return self.tracer.dart.ioread(0, address, length) + + def valid(self): + return True + + def log(self, message): + if self.direction is ISPCommandDirection.RX: + self.tracer.log(f"<== [{self.channel.name}]({self.message.index}): {message}") + else: + self.tracer.log(f"==> [{self.channel.name}]({self.message.index}): {message}") + +class ISPTerminalCommand(ISPCommand): + """ Represents a command in TERMINAL channel + + A command arguments include a pointer to a buffer that contains log line + and the length of the buffer. Buffers are 0x80 bytes wide. + """ + # ISP sends buffer address at beginning + BUFFER_ADDRESS = None + # It seems messages are capped to 100 bytes + MAX_BUFFER_SIZE = 0x80 + + @staticmethod + def set_address(address): + if address != 0: + ISPTerminalCommand.BUFFER_ADDRESS = address + + @staticmethod + def move_cursor(): + if ISPTerminalCommand.BUFFER_ADDRESS: + ISPTerminalCommand.BUFFER_ADDRESS += ISPTerminalCommand.MAX_BUFFER_SIZE + else: + return None + + def __init__(self, channel, message, direction): + super().__init__(channel, message, direction) + + ## Set buffer address + ISPTerminalCommand.set_address(self.value) + + ## Read contents + self.buffer_message = self.read_iova(ISPTerminalCommand.BUFFER_ADDRESS, self.arg0) + + ## Move cursor + ISPTerminalCommand.move_cursor() + + def dump(self): + self.log(f"ISPCPU: {self.buffer_message}]") + + def log(self, message): + self.tracer.log(f"[{self.channel.name}]({str(self.message.index).ljust(3)}): {message}") + +class ISPIOCommand(ISPCommand): + """ Represents a command in IO channel + + An IO command is used to request ISP to perform some operations. The command + contains a pointer to a command struct which contains a OPCODE. The OPCODE + is used to differentate commands. + """ + + def __init__(self, channel, message, direction): + super().__init__(channel, message, direction) + self.iova = self.value + if self.iova != 0: + contents = self.read_iova(self.iova, 0x8) + self.contents = int.from_bytes(contents, byteorder="little") + else: + self.contents = None + + def dump(self): + if self.iova != 0: + self.log(f"[IO Addr: {hex(self.iova)}, Size: {hex(self.arg0)}, U1: {hex(self.arg1)} -> Opcode: {hex(self.contents >> 32)}]") + +class ISPT2HBufferCommand(ISPCommand): + """ Represents a command in BUF_T2H channel """ + def __init__(self, channel, message, direction): + super().__init__(channel, message, direction) + self.iova = self.value + if self.iova != 0: + self.contents = self.read_iova(self.iova, 0x280) + + def dump(self): + super().dump() + if self.iova != 0: + chexdump(self.contents) + +class ISPH2TBufferCommand(ISPCommand): + """ Represents a command in BUF_H2T channel """ + def __init__(self, channel, message, direction): + super().__init__(channel, message, direction) + self.iova = self.value + if self.iova != 0: + # Dumping first 0x20 bytes after iova translation, but no idea how internal struct + self.contents = self.read_iova(self.iova, 0x20) + + def dump(self): + super().dump() + if self.iova != 0: + chexdump(self.contents) + +class ISPT2HIOCommand(ISPCommand): + """ Represents a command in IO_T2H channel """ + def __init__(self, channel, message, direction): + super().__init__(channel, message, direction) + self.iova = self.value + if self.iova != 0: + # Dumping first 0x20 bytes after iova translation, but no idea how internal struct + self.contents = self.read_iova(self.iova, 0x20) + + def dump(self): + super().dump() + if self.iova != 0: + chexdump(self.contents) + +class ISPSharedMallocCommand(ISPCommand): + """ Represents a command in SHAREDMALLOC channel + + A command of this type can either request memory allocation or memory free + depending the arguments. When ISP needs to allocate memory, it puts a + message in the SHAREDMALLOC channel, message arguments are length of buffer + and type of allocation. + + CPU detects the new message, perform memory allocation and mutate the + original message to indicate the address of the allocated memory block. + """ + + def __init__(self, channel, message, direction): + super().__init__(channel, message, direction) + self.address = self.value + self.size = self.arg0 + self.type = self.arg1 #.to_bytes(8, byteorder="little") + + def dump(self): + if self.direction == ISPCommandDirection.RX: + if self.address is 0: + self.log(f"[FW Malloc, Length: {hex(self.size)}, Type: {hex(self.type)}]") + else: + self.log(f"[FW Free, Address: {hex(self.value)}, Length: {hex(self.size)}, Type: {hex(self.type)})]") + else: + if self.address is 0: + self.log(f"[FW Free]") + else: + self.log(f"[FW Malloc, Address: {hex(self.value)}, Type: {hex(self.type)})]") + +class ISPChannelTable: + """ A class used to present IPC table. + + The Channel Table describes the IPC channels available to communicate with + the ISP. + + In the M1 processor (tonga), the list of channels exposed by ISP are: + [CH - TERMINAL] (src = 0, type = 2, entries = 768, iova = 0x1804700) + [CH - IO] (src = 1, type = 0, entries = 8, iova = 0x1810700) + [CH - BUF_H2T] (src = 2, type = 0, entries = 64, iova = 0x1810b00) + [CH - BUF_T2H] (src = 3, type = 1, entries = 64, iova = 0x1811b00) + [CH - SHAREDMALLOC] (src = 3, type = 1, entries = 8, iova = 0x1812b00) + [CH - IO_T2H] (src = 3, type = 1, entries = 8, iova = 0x1812d00) + + Each entry in the table is 256 bytes wide. Here is the layout of each entry: + 0x00 - 0x1F = Name (NULL terminated string) + 0x20 - 0x3F = Padding + 0x40 - 0x43 = Type (DWORD) + 0x44 - 0x47 = Source (DWORD) + 0x48 - 0x4F = Entries (QWORD) + 0x50 - 0x58 = Address (QWORD) + """ + + ENTRY_LENGTH = 256 + + def __init__(self, tracer, number_of_channels, table_address): + self.tracer = tracer + self.address = table_address + self.count = number_of_channels + self.size = number_of_channels * self.ENTRY_LENGTH + self.channels = [] + + _table = self.ioread(self.address & 0xFFFFFFFF, self.size) + for offset in range(0, self.size, self.ENTRY_LENGTH): + _entry = _table[offset: offset + self.ENTRY_LENGTH] + _name, _type, _source, _entries, _address = struct.unpack('<32s32x2I2q168x', _entry) + _channel = ISPChannel(self, _name, _type, _source, _entries, _address) + # We want to process terminal logs as fast as possible before they are processed by CPU + # So we use a special implementation for TERMINAL channel that fetches all logs + if _channel.name == "TERMINAL": + _channel = ISPTerminalChannel(self, _name, _type, _source, _entries, _address) + self.channels.append(_channel) + + def get_last_write_command(self, doorbell_value): + """ Gets last written message given a Doorbell value """ + if self.channels and len(self.channels) > 0: + names = [] + channel_cmds = [] + for channel in self.channels: + # We want to process terminal logs as fast as possible before they are processed by CPU + if (channel.doorbell == doorbell_value) or channel.name == "TERMINAL": + names.append(channel.name) + for cmd in channel.get_commands(ISPCommandDirection.TX): + channel_cmds.append(cmd) + + self.log(f"CHs: [{(','.join(names))}]") + for cmd in channel_cmds: + cmd.dump() + + def get_last_read_command(self, pending_irq): + """ Gets last read message given a IRQ value """ + cmds = [] + scanned_channels = [] + if self.channels and len(self.channels) > 0: + cidx = 0 + for channel in self.channels: + if (pending_irq >> channel.source & 1) != 0: + scanned_channels.append(channel.name) + for cmd in channel.get_commands(ISPCommandDirection.RX): + cmds.append(cmd) + cidx = cidx + 1 + + if len(scanned_channels) > 0: + self.log(f"CHs: [{(','.join(scanned_channels))}]") + for cmd in cmds: + cmd.dump() + + def dump(self): + """ Dumps the content of each channel """ + if self.channels and len(self.channels) > 0: + for channel in self.channels: + channel.dump() + + def ioread(self, address, size): + return self.tracer.ioread(address, size) + + def log(self, message): + self.tracer.log(message) + + def __str__(self): + s = "======== CHANNEL TABLE ========\n" + for channel in self.channels: + s += f"\t{str(channel)}\n" + return s + +class ISPChannel: + """ A class used to represent IPC channel + + ISP channels are ring buffers used by communication between CPU and ISP. + channel length is measured in number of entries, each entry is 64 bytes, + so channel size is '(entries * 64)' bytes. + + Channel Source is used to filter out channels when processing interrupts + and doorbell. Each time CPU wants to notify ISP about a new message it + writes doorbell register. In the other hand, when ISP wants to notify CPU + about a new message it triggers a hardware interrupt. + + Channel Type is a mistery, but it seems to have a connection with cmd bit + mask. + """ + + ENTRY_LENGTH = 64 + + def __init__(self, table, name, _type, source, number_of_entries, address): + self.table = table + self.tracer = table.tracer + self.name = str(name, "ascii").rstrip('\x00') + self.source = source + self.type = _type + self.number_of_entries = number_of_entries + self.entry_size = self.ENTRY_LENGTH + self.size = self.number_of_entries * self.entry_size + self.address = address + self.doorbell = 1 << source + self.last_message_sent = None + self.last_message_received = None + + def get_commands(self, direction): + """ Gets a command from the channel""" + commands = [] + message = self.get_message(direction) + if message: + command = self.__convert2command__(message, direction) + if command: + commands.append(command) + return commands + + def get_message(self, direction): + """ Gets a message from the channel and increase the associated index """ + last_message = self.last_message_sent if direction is ISPCommandDirection.TX else self.last_message_received + index = (last_message.index + 1) if last_message else 0 + new_index, message = self.__read_message__(index) + if message: + if last_message and last_message == message: + return + + last_message = message + if direction is ISPCommandDirection.TX: + self.last_message_sent = last_message + else: + self.last_message_received = last_message + return message + + def dump(self): + """ Dumps the content of the channel """ + s = f"[{self.name}] Channel messages: \n" + for index in range(self.number_of_entries): + _, message = self.__read_message__(index) + s = s + "\t" + str(message) + "\n" + self.table.log(s) + + def __convert2command__(self, message, direction): + """ Converts a channel message into a command """ + if self.name == "TERMINAL": + return ISPTerminalCommand(self, message, direction) + elif self.name == "IO" or self.name == "DEBUG": + return ISPIOCommand(self, message, direction) + elif self.name == "SHAREDMALLOC": + return ISPSharedMallocCommand(self, message, direction) + elif self.name == "BUF_T2H": + return ISPT2HBufferCommand(self, message, direction) + elif self.name == "BUF_H2T": + return ISPH2TBufferCommand(self, message, direction) + elif self.name == "IO_T2H": + return ISPT2HIOCommand(self, message, direction) + else: + return ISPCommand(self, message, direction) + + def __read_message__(self, index): + message_data = self.__read_by_index__(index) + message = ISPChannelMessage(index, message_data) + if message.valid(): + index += 1 + if index >= self.number_of_entries: + index = 0 + return index, message + return 0, None + + def __read_by_index__(self, index): + return self.table.ioread(self.address + (self.entry_size * index), self.entry_size) + + def __str__(self): + return f"[CH - {str(self.name)}] (src = {self.source!s}, type = {self.type!s}, size = {self.number_of_entries!s}, iova = {hex(self.address)!s})" + +class ISPTerminalChannel(ISPChannel): + """ Special channel implementation for TERMINAL channel + Addresses of log buffers are removed from memory after MacOS processes them, + hence we want to be a little bit ahead of MacOS and fetch all entries if + possible. + """ + + def __init__(self, table, name, _type, source, number_of_entries, address): + super().__init__(table, name, _type, source, number_of_entries, address) + self.last_index = 0 + + def get_commands(self, direction): + """ Gets a command from the channel""" + commands = [] + for i in range(self.number_of_entries): + index = (self.last_index + i) % self.number_of_entries + _, message = self.__read_message__(index) + if message and message.valid(): + command = self.__convert2command__(message, ISPCommandDirection.RX) + if command: + commands.append(command) + else: + self.last_index = index + break + return commands + +class ISPChannelMessage: + """ A class used to represent IPC channel message or entry + + Each entry is 64 bytes, however only 24 bytes seems to be used. These 24 + bytes are divided in three qwords (8-bytes). + """ + + def __init__(self, index, data): + self.index = index + self.data = data + idx = 0 + for arg in struct.unpack('<8q', self.data): + setattr(self, f"arg{idx}", arg) + idx += 1 + + def valid(self): + """ Checks if a message seems to be valid + + So far I have observed that invalid messages or empty slots + are usually marked as 0x1 (or 0x3 in case of TERMINAL msgs) + """ + return (self.arg0 is not 0x1) and (self.arg0 is not 0x3) + + def __str__(self): + s = "ISP Message: {" + idx = 0 + for arg in struct.unpack('<8q', self.data): + s = s + f"Arg{idx}: {hex(arg)}, " + idx = idx + 1 + s = s + "}" + return s + + def __eq__(self, other): + return self.data == other.data + +class ISP_REVISION(Register32): + REVISION = 15, 0 + +class ISP_PMU(Register32): + STATUS = 7, 0 + OTHER = 63, 8 + +class ISP_PMU_SPECIAL_STATUS(Register32): + STATUS = 7, 0 + OTHER = 63, 8 + +class ISPRegs(RegMap): + ISP_CPU_CONTROL = 0x0000, Register32 + ISP_CPU_STATUS = 0x0004, Register32 + ISP_REVISION = 0x1800000, ISP_REVISION + ISP_POWER_UNKNOWN = 0x20e0080, Register32 + ISP_IRQ_INTERRUPT = 0x2104000, Register32 + ISP_IRQ_INTERRUPT_2 = 0x2104004, Register32 + ISP_SENSOR_REF_CLOCK = irange(0x2104190, 3, 4), Register32 + ISP_GPR0 = 0x2104170, Register32 + ISP_GPR1 = 0x2104174, Register32 + ISP_GPR2 = 0x2104178, Register32 + ISP_GPR3 = 0x210417c, Register32 + ISP_GPR4 = 0x2104180, Register32 + ISP_GPR5 = 0x2104184, Register32 + ISP_GPR6 = 0x2104188, Register32 + ISP_GPR7 = 0x210418c, Register32 + + ISP_DOORBELL_RING0 = 0x21043f0, Register32 + ISP_IRQ_INTERRUPT_ACK = 0x21043fc, Register32 + + ISP_SMBUS_REG_MTXFIFO = irange(0x2110000, 4, 0x1000), Register32 + ISP_SMBUS_REG_MRXFIFO = irange(0x2110004, 4, 0x1000), Register32 + ISP_SMBUS_REG_UNK_1 = irange(0x2110008, 4, 0x1000), Register32 + ISP_SMBUS_REG_UNK_2 = irange(0x211000c, 4, 0x1000), Register32 + ISP_SMBUS_REG_UNK_3 = irange(0x2110010, 4, 0x1000), Register32 + ISP_SMBUS_REG_SMSTA = irange(0x2110014, 4, 0x1000), Register32 + ISP_SMBUS_REG_UNK_4 = irange(0x2110018, 4, 0x1000), Register32 + ISP_SMBUS_REG_CTL = irange(0x211001c, 4, 0x1000), Register32 + ISP_SMBUS_REG_UNK_5 = irange(0x2110020, 4, 0x1000), Register32 + ISP_SMBUS_REG_UNK_6 = irange(0x2110024, 4, 0x1000), Register32 + ISP_SMBUS_REG_REV = irange(0x2110028, 4, 0x1000), Register32 + ISP_SMBUS_REG_UNK_7 = irange(0x211002c, 4, 0x1000), Register32 + ISP_SMBUS_REG_UNK_8 = irange(0x2110030, 4, 0x1000), Register32 + ISP_SMBUS_REG_UNK_9 = irange(0x2110034, 4, 0x1000), Register32 + ISP_SMBUS_REG_UNK_A = irange(0x2110038, 4, 0x1000), Register32 + ISP_SMBUS_REG_UNK_B = irange(0x211003c, 4, 0x1000), Register32 + + ISP_DPE_REG_UNK1 = 0x2504000, Register32 + ISP_DPE_REG_UNK2 = 0x2508000, Register32 + + ISP_CPU_BUFFER = 0x1050000, Register32 + + ISP_SPMI0_REGISTER_BASE = 0x2900000, Register32 + ISP_SPMI1_REGISTER_BASE = 0x2920000, Register32 + ISP_SPMI2_REGISTER_BASE = 0x2940000, Register32 + +class PSReg(RegMap): + PMU_UNKNOWN0 = 0x4000, ISP_PMU + PMU_UNKNOWN1 = 0x4008, ISP_PMU + PMU_UNKNOWN2 = 0x4010, ISP_PMU + PMU_UNKNOWN3 = 0x4018, ISP_PMU + PMU_UNKNOWN4 = 0x4020, ISP_PMU + PMU_UNKNOWN5 = 0x4028, ISP_PMU + PMU_UNKNOWN6 = 0x4030, ISP_PMU + PMU_UNKNOWN7 = 0x4038, ISP_PMU + PMU_UNKNOWN8 = 0x4040, ISP_PMU + PMU_UNKNOWN9 = 0x4048, ISP_PMU + PMU_UNKNOWNA = 0x4050, ISP_PMU + PMU_UNKNOWNB = 0x4058, ISP_PMU + PMU_SPECIAL_STATUS = 0x4060, ISP_PMU_SPECIAL_STATUS + CLOCK_TICK_LOW = 0x34004, Register32 + CLOCK_TICK_HIGH = 0x34008, Register32 + RT_BANDWIDTH_SCRATCH1 = 0x38014, Register32 + RT_BANDWIDTH_SCRATCH2 = 0x38018, Register32 + +class SPMIReg(RegMap): + SPMI_UNKNOWN0 = 0x28, Register32 + SPMI_UNKNOWN1 = 0x40, Register32 + SPMI_UNKNOWN2 = 0x90, Register32 + SPMI_UNKNOWN3 = 0x80a0, Register32 + SPMI_UNKNOWN4 = 0x80a4, Register32
\ No newline at end of file diff --git a/tools/proxyclient/m1n1/hw/jpeg.py b/tools/proxyclient/m1n1/hw/jpeg.py new file mode 100644 index 0000000..c92bfe0 --- /dev/null +++ b/tools/proxyclient/m1n1/hw/jpeg.py @@ -0,0 +1,334 @@ +# SPDX-License-Identifier: MIT +from ..utils import * +from enum import IntEnum + + +class R_STATUS(Register32): + DONE = 0 + TIMEOUT = 1 + RD_BUF_OVERFLOW = 2 + WR_BUF_OVERFLOW = 3 + CODEC_BUF_OVERFLOW = 4 + SOME_KIND_OF_MACROBLOCK_SIZE_ERROR = 5 + AXI_ERROR = 6 + UNKNOWN_FLAG = 7 + + +class E_CODEC(IntEnum): + _444 = 0 + _422 = 1 + _411 = 2 + _420 = 3 + _400 = 4 + + +class R_CODEC(Register32): + CODEC = 2, 0, E_CODEC + + +class E_ENCODE_PIXEL_FORMAT(IntEnum): + RGB101010 = 0 + YUV10_linear = 1 + RGB888 = 2 + RGB565 = 3 + YUV_planar = 4 # partially tested, details not understood + YUV_linear = 5 # partially tested, details not understood + + +class R_ENCODE_PIXEL_FORMAT(Register32): + FORMAT = 4, 0, E_ENCODE_PIXEL_FORMAT + + +class E_SCALE(IntEnum): + DIV1 = 0 + DIV2 = 1 + DIV4 = 2 + DIV8 = 3 + + +class R_SCALE_FACTOR(Register32): + SCALE = 1, 0, E_SCALE + + +class E_DECODE_PIXEL_FORMAT(IntEnum): + YUV444_planar = 0 + YUV422_planar = 1 + YUV420_planar = 2 + YUV422_linear = 3 + _YUV10_broken_doesnt_work = 4 + RGBA8888 = 5 + RGB565 = 6 + _RGB101010_broken_doesnt_work = 7 + + +class R_DECODE_PIXEL_FORMAT(Register32): + FORMAT = 3, 0, E_DECODE_PIXEL_FORMAT + + +class E_JPEG_IO_FLAGS_SUBSAMPLING(IntEnum): + _444 = 0 + _422 = 1 + _420 = 2 + _400 = 3 + FOUR_COMPONENTS_MODE = 4 + _411_BROKEN = 6 + + +class R_JPEG_IO_FLAGS(Register32): + SUBSAMPLING_MODE = 2, 0, E_JPEG_IO_FLAGS_SUBSAMPLING + # not sure what this is supposed to do + MAKE_DECODE_WORK_BREAK_ENCODE = 3 + OUTPUT_MACROBLOCKS_UNFLIPPED_H = 4 + OUTPUT_8BYTE_CHUNKS_CORRECTLY = 5 + + +class R_JPEG_OUTPUT_FLAGS(Register32): + # bit0 doesn't seem to do anything + SKIP_HEADERS = 1 # output only SOS/EOI, no SOI/DQT/SOF0/DHT + OUTPUT_SOF0_AFTER_DHT = 2 # output SOF0 after DHT instead of before it + # bit3 doesn't seem to do anything + COMPRESS_WORSE = 4 # not sure exactly what this does + + +class R_QTBL_SEL(Register32): + COMPONENT0 = 1, 0 + COMPONENT1 = 3, 2 + COMPONENT2 = 5, 4 + COMPONENT3 = 7, 6 # guessed + + +class JPEGRegs(RegMap): + REG_0x0 = 0x0, Register32 + REG_0x4 = 0x4, Register32 + MODE = 0x8, Register32 + REG_0xc = 0xc, Register32 + + REG_0x10 = 0x10, Register32 + REG_0x14 = 0x14, Register32 + REG_0x18 = 0x18, Register32 + # REG_0x1c = 0x1c, Register32 + + REG_0x20 = 0x20, Register32 + STATUS = 0x24, R_STATUS + + CODEC = 0x28, R_CODEC + + REG_0x2c = 0x2c, Register32 + REG_0x30 = 0x30, Register32 + REG_0x34 = 0x34, Register32 + # this changes the output drastically if set to 1 for decode + # breaks encode if not set to 1 + REG_0x38 = 0x38, Register32 + + # not sure what the difference is. siting? type2 seems to win over type1 + CHROMA_HALVE_H_TYPE1 = 0x3c, Register32 + CHROMA_HALVE_H_TYPE2 = 0x40, Register32 + CHROMA_HALVE_V_TYPE1 = 0x44, Register32 + CHROMA_HALVE_V_TYPE2 = 0x48, Register32 + + # if double and quadruple both set --> double + CHROMA_DOUBLE_H = 0x4c, Register32 + CHROMA_QUADRUPLE_H = 0x50, Register32 + CHROMA_DOUBLE_V = 0x54, Register32 + + # details not fully understood yet + PX_USE_PLANE1 = 0x58, Register32 + PX_TILES_W = 0x5c, Register32 + PX_TILES_H = 0x60, Register32 + PX_PLANE0_WIDTH = 0x64, Register32 + PX_PLANE0_HEIGHT = 0x68, Register32 + PX_PLANE0_TILING_H = 0x6c, Register32 + PX_PLANE0_TILING_V = 0x70, Register32 + PX_PLANE0_STRIDE = 0x74, Register32 + PX_PLANE1_WIDTH = 0x78, Register32 + PX_PLANE1_HEIGHT = 0x7c, Register32 + PX_PLANE1_TILING_H = 0x80, Register32 + PX_PLANE1_TILING_V = 0x84, Register32 + PX_PLANE1_STRIDE = 0x88, Register32 + + INPUT_START1 = 0x8c, Register32 + INPUT_START2 = 0x90, Register32 + REG_0x94 = 0x94, Register32 + REG_0x98 = 0x98, Register32 + INPUT_END = 0x9c, Register32 + + OUTPUT_START1 = 0xa0, Register32 + OUTPUT_START2 = 0xa4, Register32 + OUTPUT_END = 0xa8, Register32 + + MATRIX_MULT = irange(0xAC, 11, 4), Register32 + DITHER = irange(0xD8, 10, 4), Register32 + + ENCODE_PIXEL_FORMAT = 0x100, R_ENCODE_PIXEL_FORMAT + # RGB888: R, G, B = byte pos + # RGB101010: R, G, B = 0/1/2 = low/mid/high bits + # RGB565: R, G, B = 0/1/2 = low/mid/high bits + # YUV10: Y, U, V = 0/1/2 = low/mid/high bits + # YUV linear: Y0 Cb Cr Y1 = byte pos + # YUV planar: Y U V = 0 for Y, 0/1 for U/V indicating position somehow + ENCODE_COMPONENT0_POS = 0x104, Register32 + ENCODE_COMPONENT1_POS = 0x108, Register32 + ENCODE_COMPONENT2_POS = 0x10c, Register32 + ENCODE_COMPONENT3_POS = 0x110, Register32 + + CONVERT_COLOR_SPACE = 0x114, Register32 + + REG_0x118 = 0x118, Register32 + REG_0x11c = 0x11c, Register32 + + REG_0x120 = 0x120, Register32 + + # details not understood yet + TILING_ENABLE = 0x124, Register32 + TILING_PLANE0 = 0x128, Register32 + TILING_PLANE1 = 0x12c, Register32 + + DECODE_MACROBLOCKS_W = 0x130, Register32 + DECODE_MACROBLOCKS_H = 0x134, Register32 + RIGHT_EDGE_PIXELS = 0x138, Register32 + BOTTOM_EDGE_PIXELS = 0x13c, Register32 + RIGHT_EDGE_SAMPLES = 0x140, Register32 + BOTTOM_EDGE_SAMPLES = 0x144, Register32 + + SCALE_FACTOR = 0x148, R_SCALE_FACTOR + + DECODE_PIXEL_FORMAT = 0x14c, R_DECODE_PIXEL_FORMAT + # 0 = Cb Y'0 Cr Y'1 1 = Y'0 Cb Y'1 Cr + YUV422_ORDER = 0x150, Register32 + # 0 = BGRA 1 = RGBA + RGBA_ORDER = 0x154, Register32 + RGBA_ALPHA = 0x158, Register32 + + PLANAR_CHROMA_HALVING = 0x15c, Register32 + + REG_0x160 = 0x160, Register32 + REG_0x164 = 0x164, Register32 + # REG_0x168 = 0x168, Register32 + REG_0x16c = 0x16c, Register32 + + REG_0x170 = 0x170, Register32 + # REG_0x174 = 0x174, Register32 + PERFCOUNTER = 0x178, Register32 + # REG_0x17c = 0x17c, Register32 + + # REG_0x180 = 0x180, Register32 + TIMEOUT = 0x184, Register32 + HWREV = 0x188, Register32 + + REG_0x18c = 0x18c, Register32 + REG_0x190 = 0x190, Register32 + REG_0x194 = 0x194, Register32 + REG_0x198 = 0x198, Register32 + REG_0x19c = 0x19c, Register32 + + ENABLE_RST_LOGGING = 0x1a0, Register32 + RST_LOG_ENTRIES = 0x1a4, Register32 + + REG_0x1a8 = 0x1a8, Register32 + REG_0x1ac = 0x1ac, Register32 + REG_0x1b0 = 0x1b0, Register32 + + REG_0x1b4 = 0x1b4, Register32 + REG_0x1b8 = 0x1b8, Register32 + REG_0x1bc = 0x1bc, Register32 + + REG_0x1c0 = 0x1c0, Register32 + REG_0x1c4 = 0x1c4, Register32 + + REG_0x1c8 = 0x1c8, Register32 + + REG_0x1cc = 0x1cc, Register32 + REG_0x1d0 = 0x1d0, Register32 + REG_0x1d4 = 0x1d4, Register32 + REG_0x1d8 = 0x1d8, Register32 + + REG_0x1dc = 0x1dc, Register32 + REG_0x1e0 = 0x1e0, Register32 + REG_0x1e4 = 0x1e4, Register32 + REG_0x1e8 = 0x1e8, Register32 + + REG_0x1ec = 0x1ec, Register32 + REG_0x1f0 = 0x1f0, Register32 + REG_0x1f4 = 0x1f4, Register32 + REG_0x1f8 = 0x1f8, Register32 + + REG_0x1fc = 0x1fc, Register32 + REG_0x200 = 0x200, Register32 + + REG_0x204 = 0x204, Register32 + REG_0x208 = 0x208, Register32 + + REG_0x20c = 0x20c, Register32 + REG_0x210 = 0x210, Register32 + REG_0x214 = 0x214, Register32 + REG_0x218 = 0x218, Register32 + + REG_0x21c = 0x21c, Register32 + REG_0x220 = 0x220, Register32 + + REG_0x224 = 0x224, Register32 + REG_0x228 = 0x228, Register32 + + REG_0x22c = 0x22c, Register32 + REG_0x230 = 0x230, Register32 + REG_0x234 = 0x234, Register32 + + REG_0x238 = 0x238, Register32 + REG_0x23c = 0x23c, Register32 + REG_0x240 = 0x240, Register32 + REG_0x244 = 0x244, Register32 + REG_0x248 = 0x248, Register32 + + REG_0x24c = 0x24c, Register32 + REG_0x250 = 0x250, Register32 + REG_0x254 = 0x254, Register32 + REG_0x258 = 0x258, Register32 + REG_0x25c = 0x25c, Register32 + + REG_0x260 = 0x260, Register32 + REG_0x264 = 0x264, Register32 + REG_0x268 = 0x268, Register32 + REG_0x26c = 0x26c, Register32 + + REG_0x280 = 0x280, Register32 + + JPEG_IO_FLAGS = 0x1000, R_JPEG_IO_FLAGS + REG_0x1004 = 0x1004, Register32 + REG_0x1008 = 0x1008, Register32 + QTBL_SEL = 0x100c, R_QTBL_SEL + + # fixme what _exactly_ does this control + HUFFMAN_TABLE = 0x1010, Register32 + RST_INTERVAL = 0x1014, Register32 # 16 bits effective + JPEG_HEIGHT = 0x1018, Register32 + JPEG_WIDTH = 0x101c, Register32 + + COMPRESSED_BYTES = 0x1020, Register32 + JPEG_OUTPUT_FLAGS = 0x1024, R_JPEG_OUTPUT_FLAGS + REG_0x1028 = 0x1028, Register32 + REG_0x102c = 0x102c, Register32 + + BITSTREAM_CORRUPTION = 0x1030, Register32 + # REG_0x1034 = 0x1034, Register32 + # REG_0x1038 = 0x1038, Register32 + # REG_0x103c = 0x103c, Register32 + + REG_0x1080 = 0x1080, Register32 + REG_0x1084 = 0x1084, Register32 + # REG_0x1088 = 0x1088, Register32 + REG_0x108c = 0x108c, Register32 + REG_0x1090 = 0x1090, Register32 + + SHIKINO_VERSION_MAGIC0 = 0x10e0, Register32 + SHIKINO_VERSION_MAGIC1 = 0x10e4, Register32 + SHIKINO_VERSION_MAGIC2 = 0x10e8, Register32 + SHIKINO_VERSION_MAGIC3 = 0x10ec, Register32 + SHIKINO_VERSION_MAGIC4 = 0x10f0, Register32 + # REG_0x10f4 = 0x10f4, Register32 + # REG_0x10f8 = 0x10f8, Register32 + # REG_0x10fc = 0x10fc, Register32 + + QTBL = irange(0x1100, 64, 4), Register32 + + # todo what's the format? + RSTLOG = irange(0x2000, 1024, 4), Register32 diff --git a/tools/proxyclient/m1n1/hw/mca.py b/tools/proxyclient/m1n1/hw/mca.py new file mode 100644 index 0000000..4c68a39 --- /dev/null +++ b/tools/proxyclient/m1n1/hw/mca.py @@ -0,0 +1,110 @@ +# SPDX-License-Identifier: MIT +from ..utils import * +from enum import IntEnum + +class R_STATUS(Register32): + EN = 0 + RST = 1 + +class R_MCLK_CONF(Register32): + SEL = 3, 0 + +class R_PORT_ENABLES(Register32): + CLOCK1 = 1 + CLOCK2 = 2 + DATA = 3 + +class R_PORT_CLKSEL(Register32): + SEL = 11, 8 + +class R_PORT_DATASEL(Register32): + TXA0 = 0 + TXA1 = 2 + TXA2 = 4 + TXA3 = 6 + TXA4 = 8 + TXA5 = 10 + + TXB0 = 1 + TXB1 = 3 + TXB2 = 5 + TXB3 = 7 + TXB4 = 9 + TXB5 = 11 + +class E_SLOT_WIDTH(IntEnum): + NONE = 0 + + W_16BIT = 0x4 + W_20BIT = 0x8 + W_24BIT = 0xc + W_32BIT = 0x10 + +class R_SERDES_CONF(Register32): + NSLOTS = 3, 0 + SLOT_WIDTH = 8, 4, E_SLOT_WIDTH + + BCLK_POL = 10 + LSB_FIRST = 11 + + UNK1 = 12 + UNK2 = 13 + IDLE_UNDRIVEN = 14 # TX only + NO_DATA_FEEDBACK = 15 # RX only + + SYNC_SEL = 18, 16 + +class R_INTMASK(Register32): + # macOS interested in 0x823c + UNK1 = 2 # m + UNK2 = 3 # m + UNK3 = 4 # m + TX_UNDERFLOW = 5 # m + + UNK4 = 9 # m + READ_SENSITIVE_UNK1 = 11 + READ_SENSITIVE_UNK2 = 15 # m + +class MCAClusterRegs(RegMap): + MCLK_STATUS = 0x0, R_STATUS + MCLK_CONF = 0x4, R_MCLK_CONF + + SYNCGEN_STATUS = 0x100, R_STATUS + SYNCGEN_MCLK_SEL = 0x104, Register32 + SYNCGEN_HI_PERIOD = 0x108, Register32 + SYNCGEN_LO_PERIOD = 0x10c, Register32 + + PORT_ENABLES = 0x600, R_PORT_ENABLES + PORT_CLK_SEL = 0x604, R_PORT_CLKSEL + PORT_DATA_SEL = 0x608, R_PORT_DATASEL + + INTSTATE = 0x700, R_INTMASK + INTMASK = 0x704, R_INTMASK + +class MCATXSerdesRegs(RegMap): + STATUS = 0x0, R_STATUS + CONF = 0x4, R_SERDES_CONF + BITDELAY = 0x8, Register32 + CHANMASK = irange(0xc, 4, 4), Register32 + +class MCARXSerdesRegs(RegMap): + STATUS = 0x0, R_STATUS + UNK1 = 0x4, Register32 + CONF = 0x8, R_SERDES_CONF + BITDELAY = 0xc, Register32 + CHANMASK = irange(0x10, 4, 4), Register32 + + +class MCACluster: + def __init__(self, u, base): + self.regs = MCAClusterRegs(u, base) + self.txa = MCATXSerdesRegs(u, base + 0x300) + self.txb = MCATXSerdesRegs(u, base + 0x500) + self.rxa = MCARXSerdesRegs(u, base + 0x200) + self.rxb = MCARXSerdesRegs(u, base + 0x400) + self.all_regs = [ + self.regs, + self.txa, self.txb, + self.rxa, self.rxb + ] + diff --git a/tools/proxyclient/m1n1/hw/nco.py b/tools/proxyclient/m1n1/hw/nco.py new file mode 100644 index 0000000..3d1840d --- /dev/null +++ b/tools/proxyclient/m1n1/hw/nco.py @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: MIT + +__all__ = ["NCO"] + +def galois_lfsr(init, poly): + state = init + for i in range((1 << poly.bit_length() - 1) - 1): + if state & 1: + state = (state >> 1) ^ (poly >> 1) + else: + state = (state >> 1) + yield state + +def gen_lookup_tables(): + fwd, inv = dict(), dict() + lfsr_states = [0] + list(reversed(list(galois_lfsr(0x7ff, 0xa01)))) + for cycle, sr_state in enumerate(lfsr_states): + fwd[cycle + 2] = sr_state + inv[sr_state] = cycle + 2 + return fwd, inv + + +class NCOChannel: + def __init__(self, parent, base): + self.parent = parent + self.base = base + self.p = parent.u.proxy + + def enabled(self): + return bool(self.p.read32(self.base) & (1<<31)) + + def enable(self): + self.p.set32(self.base, 1<<31) + + def disable(self): + self.p.clear32(self.base, 1<<31) + + def set_rate(self, target): + was_enabled = self.enabled() + for off, val in enumerate(NCO.calc_regvals(self.parent.fin, target)): + self.p.write32(self.base + off*4, val) + if was_enabled: + self.enable() + + def get_rate(self): + return NCO.calc_rate(self.parent.fin, + [self.p.read32(self.base + off*4) for off in range(4)] + ) + + def __repr__(self): + return f"<NCO channel @ 0x{self.base:x}>" + + +class NCO: + TBL, TBL_INV = gen_lookup_tables() + + @classmethod + def calc_rate(self, fin, regvals): + try: + div = self.TBL_INV[regvals[1] >> 2] << 2 | regvals[1] & 3 + except KeyError: + raise ValueError("bad configuration") + inc1 = regvals[2] + inc2 = regvals[3] - 0x1_0000_0000 + return 2 * fin * (inc1 - inc2) // (div * (inc1 - inc2) + inc1) + + @classmethod + def calc_regvals(self, fin, fout): + div = 2 * fin // fout + inc1 = (2 * fin - div * fout) + inc2 = inc1 - fout + try: + return [0, self.TBL[div >> 2] << 2 | div & 3, inc1, inc2 + 0x1_0000_0000] + except KeyError: + raise ValueError("target rate out of range") + + def __init__(self, u, devpath, stride=0x4000): + self.u = u + node = u.adt[devpath] + self.fin = u.adt["/arm-io"].clock_frequencies[node.clock_ids[0] - 256] + + reg = node.get_reg(0) + self.chans = [ + NCOChannel(self, base) + for base in range(reg[0], reg[0] + reg[1], stride) + ] + + def __getitem__(self, idx): + return self.chans[idx] diff --git a/tools/proxyclient/m1n1/hw/pmgr.py b/tools/proxyclient/m1n1/hw/pmgr.py new file mode 100644 index 0000000..85ba247 --- /dev/null +++ b/tools/proxyclient/m1n1/hw/pmgr.py @@ -0,0 +1,71 @@ +# SPDX-License-Identifier: MIT +from ..utils import * + +class R_PSTATE(Register32): + RESET = 31 + AUTO_ENABLE = 28 + AUTO_STATE = 27, 24 + PARENT_MISSING = 11 + DEV_DISABLE = 10 + WAS_CLKGATED = 9 + WAS_PWRGATED = 8 + ACTUAL = 7, 4 + DESIRED = 3, 0 + +class R_PWRGATE(Register32): + GATE = 31 + +class R_CLK_CFG(Register32): + UNK31 = 31 + SRC = 30, 24 + UNK20 = 20 + UNK8 = 8 + UNK0 = 7, 0 + +class PMGRRegs0(RegMap): + PS3 = irange(0x0000, 10, 8), R_PSTATE + PS4 = irange(0x0200, 32, 8), R_PSTATE + PS5 = irange(0x0300, 32, 8), R_PSTATE + PS6 = irange(0x0c00, 2, 8), R_PSTATE + PS7 = irange(0x4000, 13, 8), R_PSTATE + PS8 = irange(0x8000, 5, 8), R_PSTATE + PS9 = irange(0xc000, 7, 8), R_PSTATE + PS10 = irange(0x10000, 10, 8), R_PSTATE + PS11 = irange(0x100, 32, 8), R_PSTATE + PS12 = irange(0x400, 15, 8), R_PSTATE + + PG1 = irange(0x1c010, 16, 8), R_PWRGATE + PG1CFG = (irange(0x1c090, 69, 24), irange(0, 6, 4)), Register32 + + CPUTVM0 = 0x48000, Register32 + CPUTVM1 = 0x48c00, Register32 + CPUTVM2 = 0x48800, Register32 + CPUTVM3 = 0x48400, Register32 + +class PMGRRegs1(RegMap): + PS0 = irange(0x58, 32, 8), R_PSTATE + PS1 = irange(0x4000, 32, 8), R_PSTATE + PS2 = irange(0x8000, 32, 8), R_PSTATE + + PG0 = irange(0x1c010, 32, 8), R_PWRGATE + +class PMGRRegs2(RegMap): + CLK_CFG0 = irange(0x40000, 86, 4), R_CLK_CFG + CLK_CFG1 = irange(0x40200, 8, 4), R_CLK_CFG + CLK_CFG2 = irange(0x40280, 2, 4), R_CLK_CFG + +class PMGR: + def __init__(self, u): + self.u = u + self.p = u.proxy + self.iface = u.iface + self.node = u.adt["/arm-io/pmgr"] + self.regs = [ + PMGRRegs0(u, self.node.get_reg(0)[0]), + PMGRRegs1(u, self.node.get_reg(1)[0]), + PMGRRegs2(u, self.node.get_reg(2)[0]), + ] + + def dump_all(self): + for i in self.regs: + i.dump_regs() diff --git a/tools/proxyclient/m1n1/hw/pmu.py b/tools/proxyclient/m1n1/hw/pmu.py new file mode 100644 index 0000000..b545df4 --- /dev/null +++ b/tools/proxyclient/m1n1/hw/pmu.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: MIT +import struct + +from ..utils import * +from .spmi import SPMI + +__all__ = ["PMU"] + +class PMU: + + def __init__(self, u, adt_path=None): + self.u = u + if adt_path is None: + adt_path = PMU.find_primary_pmu(u.adt) + + self.node = u.adt[adt_path] + self.spmi = SPMI(u, adt_path.rpartition('/')[0]) + self.adt_path = adt_path + self.primary = u.adt[adt_path].is_primary == 1 + self.reg = u.adt[adt_path].reg[0] + + def reset_panic_counter(self): + if self.primary: + leg_scrpad = self.node.info_leg__scrpad[0] + self.spmi.write8(self.reg, leg_scrpad + 2, 0) # error counts + + @staticmethod + def find_primary_pmu(adt): + for child in adt["/arm-io"]: + if child.name.startswith("nub-spmi"): + for pmu in child: + compat = getattr(pmu, "compatible")[0] if hasattr(pmu, "compatible") else "unset" + primary = (getattr(pmu, "is-primary") == 1) if hasattr(pmu, "is-primary") else False + if compat == "pmu,spmi" and primary: + return pmu._path.removeprefix('/device-tree') + raise KeyError(f"primary 'pmu,spmi' node not found") diff --git a/tools/proxyclient/m1n1/hw/prores.py b/tools/proxyclient/m1n1/hw/prores.py new file mode 100644 index 0000000..5985e72 --- /dev/null +++ b/tools/proxyclient/m1n1/hw/prores.py @@ -0,0 +1,250 @@ +# SPDX-License-Identifier: MIT +from ..utils import * +from collections import namedtuple +from enum import IntEnum + + +EncodeNotRawDescriptor = namedtuple('EncodeNotRawDescriptor', [ + 'flags', # +0x000 + # [31:16] ????? + # 13 ????? + # 12 ????? + # [11:9] ????? + # 8 - enable alpha + # 7 - alpha channel bpp + # 0 -> 8bpp + # 1 -> 16bpp + # 6 - something unknown about tiling + # [5:4] ????? + # [3:2] - chroma subsampling + # 00 -> broken? + # 01 -> broken? + # 10 -> 4:2:2 + # 11 -> 4:4:4 + # [1:0] - input bpp + # 00 -> 8bpp + # 01 -> 16bpp? + # 10 -> 16bpp? + # 11 -> 16bpp? + # the last three all produce slightly differnet outputs + # so might be 10/12/14/16????? + 'flags2', # +0x004 + 'output_iova', # +0x008 + 'max_out_sz', # +0x010 + 'offset_x', # +0x014 + 'offset_y', # +0x016 + 'pix_surface_w_2_', # +0x018 + 'pix_surface_h_2_', # +0x01a + 'pix_surface_w', # +0x01c + 'pix_surface_h', # +0x01e + 'luma_stride', # +0x020 + 'chroma_stride', # +0x022 + 'alpha_stride', # +0x024 + 'unk_pad_0x26_', # +0x026 + 'luma_iova', # +0x028 + 'pix_plane0_tileheader_thing_', # +0x030 + 'chroma_iova', # +0x038 + 'pix_plane1_tileheader_thing_', # +0x040 + 'alpha_iova', # +0x048 + 'pix_plane2_tileheader_thing_', # +0x050 + 'frame_header_sz', # +0x058 + 'unk_pad_0x5a_', # +0x05a + 'bitstream_version', # +0x05b + 'encoder_identifier', # +0x05c + 'pix_surface_w_byteswap_', # +0x060 + 'pix_surface_h_byteswap_', # +0x062 + 'chroma_format_interlace_mode', # +0x064 + 'aspect_ratio_frame_rate', # +0x065 + 'color_primaries', # +0x066 + 'transfer_characteristic', # +0x067 + 'matrix_coefficients', # +0x068 + 'alpha_channel_type', # +0x069 + 'frame_hdr_reserved14', # +0x06a + 'unk_pad_0x6c_', # +0x06c + 'deprecated_number_of_slices', # +0x0ec + 'log2_desired_slice_size_in_mb', # +0x0ee + 'quantization_index', # +0x0ef + 'unk_0xf0_', # +0x0f0 + 'unk_0xf2_', # +0x0f2 + 'unk_0xf4_', # +0x0f4 + 'unk_0xfc_', # +0x0fc + 'unk_0x100_0_', # +0x100 + 'unk_0x100_1_', # +0x104 + 'unk_0x100_2_', # +0x108 + 'unk_0x100_3_', # +0x10c + 'unk_0x110_0_', # +0x110 + 'unk_0x110_1_', # +0x114 + 'unk_0x110_2_', # +0x118 + 'unk_0x110_3_', # +0x11c + 'unk_0x110_4_', # +0x120 + 'unk_0x110_5_', # +0x124 + 'unk_0x110_6_', # +0x128 + 'unk_0x110_7_', # +0x12c + 'unk_0x110_8_', # +0x130 + 'unk_0x110_9_', # +0x134 + 'unk_0x110_10_', # +0x138 + 'unk_0x110_11_', # +0x13c + 'unk_0x110_12_', # +0x140 + 'unk_0x110_13_', # +0x144 + 'unk_0x110_14_', # +0x148 + 'unk_0x110_15_', # +0x14c + 'quant_table_sel', # +0x150 + # upper nibble: quality / table index + # lower nibble UNKNOWN! + 'unk_pad_0x154_', # +0x154 +]) +ENCODE_NOT_RAW_STRUCT = "<IIQIHHHHHHHHH2sQQQQQQH1sBIHHBBBBBB2s128sHBBHHQIIIIIIIIIIIIIIIIIIIIII44s" + + +class ProResRegs(RegMap): + # something reads + REG_0x0 = 0x000, Register32 + MODE = 0x008, Register32 # 4 bits + IRQ_ENABLE = 0x00c, Register32 # 2 bits + IRQ_STATUS = 0x010, Register32 + + ST0 = 0x014, Register32 # interrupt handler reads + ST1 = 0x018, Register32 # interrupt handler reads + REG_0x1c = 0x01c, Register32 # interrupt handler reads + REG_0x38 = 0x038, Register32 # exists, maybe RO + REG_0x3c = 0x03c, Register32 # interrupt handler reads + REG_0x40 = 0x040, Register32 # exists, maybe RO, looks like 0x44 + REG_0x44 = 0x044, Register32 # interrupt handler reads + REG_0x48 = 0x048, Register32 # exists, maybe RO, looks like 0x44 + REG_0x4c = 0x04c, Register32 # exists, maybe RO, looks like 0x44 + REG_0x50 = 0x050, Register32 # exists, maybe RO, looks like 0x44 + REG_0x54 = 0x054, Register32 # exists, maybe RO, looks like 0x44 + + DR_SIZE = 0x100, Register32 + DR_ADDR_LO = 0x104, Register32 + DR_ADDR_HI = 0x108, Register32 + DR_HEAD = 0x10c, Register32 # bit24 is special, something about wrapping around? + DR_TAIL = 0x110, Register32 + + # This giant block may or may not be touched by tunables + # Function is all unknown + REG_0x114 = 0x114, Register32 # can set bits 0000FFFF + REG_0x118 = 0x118, Register32 # can set bits 07FF07FF + + REG_0x134 = 0x134, Register32 # can set bits 00000003 + + REG_0x144 = 0x144, Register32 # can set bits 00000001 + REG_0x148 = 0x148, Register32 # can set bits 00000001 + + REG_0x160 = 0x160, Register32 # can set bits BFFF3FFF + REG_0x164 = 0x164, Register32 # can set bits 07FF07FF + + REG_0x170 = 0x170, Register32 # can set bits BFFF3FFF + REG_0x174 = 0x174, Register32 # can set bits 07FF07FF + + REG_0x180 = 0x180, Register32 # can set bits BFFF3FFF + REG_0x184 = 0x184, Register32 # can set bits 07FF07FF + + REG_0x190 = 0x190, Register32 # can set bits BFFF3FFF + REG_0x194 = 0x194, Register32 # can set bits 000000FF + REG_0x198 = 0x198, Register32 # RO? init value 07FB066F + + REG_0x1a0 = 0x1a0, Register32 # can set bits BFFF3FFF + REG_0x1a4 = 0x1a4, Register32 # can set bits 000000FF + REG_0x1a8 = 0x1a8, Register32 # RO? init value 037C03EE + + REG_0x1b0 = 0x1b0, Register32 # can set bits BFFF3FFF + REG_0x1b4 = 0x1b4, Register32 # can set bits 000000FF + REG_0x1b8 = 0x1b8, Register32 # RO? init value 04E00377 + + REG_0x1c0 = 0x1c0, Register32 # can set bits BFFF3FFF + REG_0x1c4 = 0x1c4, Register32 # can set bits 000000FF + REG_0x1c8 = 0x1c8, Register32 # RO? init value 051C00DA + + REG_0x1d0 = 0x1d0, Register32 # can set bits BFFF3FFF + REG_0x1d4 = 0x1d4, Register32 # can set bits 000000FF + REG_0x1d8 = 0x1d8, Register32 # can set bits 000000FF + REG_0x1dc = 0x1dc, Register32 # can set bits 00FFFFFF + + REG_0x1ec = 0x1ec, Register32 # can set bits FFFFFFFF + + REG_0x270 = 0x270, Register32 # can set bits BFFF3FFF + REG_0x274 = 0x274, Register32 # can set bits 07FF07FF + REG_0x278 = 0x278, Register32 # can set bits FFFFFFC0 + REG_0x27c = 0x27c, Register32 # can set bits 000003FF + REG_0x280 = 0x280, Register32 # can set bits FFFFFFC0 + REG_0x284 = 0x284, Register32 # can set bits FFFFFFC0 + REG_0x28c = 0x28c, Register32 # can set bits FFFFFFC0 + + REG_0x290 = 0x290, Register32 # can set bits BFFF3FFF + REG_0x294 = 0x294, Register32 # can set bits 000000FF + REG_0x298 = 0x298, Register32 # RO? init value 07FB066F + + REG_0x2a0 = 0x2a0, Register32 # can set bits BFFF3FFF + REG_0x2a4 = 0x2a4, Register32 # can set bits 000000FF + REG_0x2a8 = 0x2a8, Register32 # RO? init value 037C03EE + + REG_0x2b0 = 0x2b0, Register32 # can set bits BFFF3FFF + REG_0x2b4 = 0x2b4, Register32 # can set bits 000000FF + REG_0x2b8 = 0x2b8, Register32 # RO? init value 04E00377 + + REG_0x2c0 = 0x2c0, Register32 # can set bits BFFF3FFF + REG_0x2c4 = 0x2c4, Register32 # can set bits 000000FF + REG_0x2c8 = 0x2c8, Register32 # RO? init value 051C00DA + + REG_0x2d0 = 0x2d0, Register32 # can set bits FFFFFFFD, CANNOT clear 00000011 + REG_0x2d4 = 0x2d4, Register32 # can set bits 00000001 + REG_0x2d8 = 0x2d8, Register32 # can set bits FFFF0007 + REG_0x2dc = 0x2dc, Register32 # RO? init value 07FB066F + REG_0x2e0 = 0x2e0, Register32 # can set bits 07FF07FF + + REG_0x2f0 = 0x2f0, Register32 # can set bits FFFFFFFF + + REG_0x2f8 = 0x2f8, Register32 # can set bits FFFFFFFD, CANNOT clear 00000011 + REG_0x2fc = 0x2fc, Register32 # can set bits 00000001 + REG_0x300 = 0x300, Register32 # can set bits FFFF0007 + REG_0x304 = 0x304, Register32 # RO? init value 037C03EE + REG_0x308 = 0x308, Register32 # can set bits 07FF07FF + + REG_0x318 = 0x318, Register32 # can set bits FFFFFFFF + + REG_0x320 = 0x320, Register32 # can set bits FFFFFFFD, CANNOT clear 00000011 + REG_0x324 = 0x324, Register32 # can set bits 00000001 + REG_0x328 = 0x328, Register32 # can set bits FFFF0007 + REG_0x32c = 0x32c, Register32 # RO? init value 04E00377 + REG_0x330 = 0x330, Register32 # can set bits 07FF07FF + + REG_0x340 = 0x340, Register32 # can set bits FFFFFFFF + + REG_0x350 = 0x350, Register32 # can set bits BFFF3FFF + REG_0x354 = 0x354, Register32 # can set bits 07FF07FF + REG_0x358 = 0x358, Register32 # can set bits FFFFFFC0 + REG_0x35c = 0x35c, Register32 # can set bits 000003FF + REG_0x360 = 0x360, Register32 # can set bits FFFFFFC0 + REG_0x364 = 0x364, Register32 # can set bits FFFFFFC0 + REG_0x368 = 0x368, Register32 # can set bits FFFFFFC0 + + REG_0x370 = 0x370, Register32 # can set bits BFFF3FFF + REG_0x374 = 0x374, Register32 # can set bits 07FF07FF + + + QUANT_LUMA_EHQ = irange(0x0800, 32, 4), Register32 + QUANT_LUMA_HQ = irange(0x0880, 32, 4), Register32 + QUANT_LUMA_NQ = irange(0x0900, 32, 4), Register32 + QUANT_LUMA_LT = irange(0x0980, 32, 4), Register32 + QUANT_LUMA_PROXY = irange(0x0A00, 32, 4), Register32 + QUANT_CHROMA_EHQ = irange(0x1000, 32, 4), Register32 + QUANT_CHROMA_HQ = irange(0x1080, 32, 4), Register32 + QUANT_CHROMA_NQ = irange(0x1100, 32, 4), Register32 + QUANT_CHROMA_LT = irange(0x1180, 32, 4), Register32 + QUANT_CHROMA_PROXY = irange(0x1200, 32, 4), Register32 + + # wtf, writing to this doesn't actually work? do we have to enable it? + DC_QUANT_SCALE = irange(0x1800, 112, 4), Register32 + + REG_0x19c0 = 0x19c0, Register32 # unknown, all 1s, RO? + REG_0x19c4 = 0x19c4, Register32 # unknown, all 1s, RO? + REG_0x19c8 = 0x19c8, Register32 # unknown, all 1s, RO? + REG_0x19cc = 0x19cc, Register32 # unknown, all 1s, RO? + REG_0x19d0 = 0x19d0, Register32 # unknown, all 1s, RO? + REG_0x19d4 = 0x19d4, Register32 # unknown, all 1s, RO? + REG_0x19d8 = 0x19d8, Register32 # unknown, all 1s, RO? + REG_0x19dc = 0x19dc, Register32 # unknown, can set bits 00000001 + + # Unknown, inits to 0x12345678, can R/W + REG_0x1A00 = 0x1a00, Register32 diff --git a/tools/proxyclient/m1n1/hw/scaler.py b/tools/proxyclient/m1n1/hw/scaler.py new file mode 100644 index 0000000..714f366 --- /dev/null +++ b/tools/proxyclient/m1n1/hw/scaler.py @@ -0,0 +1,212 @@ +# SPDX-License-Identifier: MIT +from ..utils import * +from enum import IntEnum + + +### NOTE: This is "MSR10j" (M1 Max), and there definitely *ARE* differences from M1 + +class R_IRQS(Register32): + DONE = 0 + DBGSTS_ERROR = 1 + READ_ERROR = 3 + # This doesn't actually trigger on bad IOVAs? + WRITE_ERROR_MAYBE = 4 + DECOMPRESSION_ERROR = 9 + CONTEXT_SWITCH = 10 + _BIT11 = 11 + AXI_ERROR = 12 + _BIT13 = 13 + + +class E_ROTATE(IntEnum): + # clockwise rotations + ROT_0 = 0 + ROT_90 = 1 + ROT_180 = 2 + ROT_270 = 3 + + +class R_FLIP_ROTATE(Register32): + ROTATE = 1, 0, E_ROTATE + FLIP_UPDOWN = 2 + FLIP_LEFTRIGHT = 3 + + +class R_SCALE_FLAGS(Register32): + EN = 0 + MAKE_THE_OUTPUT_YELLOW = 1 + # only when bit1 is set, only on H scaling + MAKE_A_BLUE_LINE_APPEAR = 4 + + +class ScalerMainRegs(RegMap): + # on startup 1 will be written followed by 0 + # but it's not clear if that actually does anything + HW_VERSION = 0x00000, Register32 + # bit0 = normal, bit1 = apiodma related + # if things are reset here, reading all other regs will *HANG* + RESET = 0x00004, Register32 + # can set 0x1f00 + RESET_APIODMA_RELATED = 0x00008, Register32 + IS_RUNNING = 0x0000c, Register32 + # writable, can set to 0xfff + REG_0x10 = 0x00010, Register32 + REGISTER_FIFO_AVAILABILITY = 0x00014, Register32 + # XNU sets 0x121b, we can at most set 0x3e1b + IRQ_ENABLE = 0x00018, R_IRQS + MSR_GLBL_IRQSTS = 0x0001c, R_IRQS + FRAME_COUNT = 0x00020, Register32 + + # can set 7 + REG_0x58 = 0x00058, Register32 + + # can set 0xffff + REG_0x74 = 0x00074, Register32 + + # 1, or 3 if readonly?? + START = 0x00080, Register32 + # can set all bits + REG_0x84 = 0x00084, Register32 + # can set all bits + REG_0x88 = 0x00088, Register32 + # can set 0x8000ffff + REG_0x8c = 0x0008c, Register32 + + # can set all bits + REG_0x98 = 0x00098, Register32 + # 0x3f3d? + MSR_CTRL_DBGSTS = 0x0009c, Register32 + # can set 3 + REG_0xa0 = 0x000a0, Register32 + PROFILING_RELATED = 0x000a4, Register32 + + # Can set bits 0/1/2 + # Does something breaking horizontal scaling + # bit2 seems to affect alpha output + PIXEL_AVERAGING = 0x000e4, Register32 + + TRANSFORM_ID = 0x00110, Register32 + + RDMA_THING0 = 0x00180, Register32 + RDMA_THING1 = 0x00184, Register32 + RDMA_THING2 = 0x00188, Register32 + RDMA_THING3 = 0x0018c, Register32 + RDMA_THING4 = 0x00190, Register32 + + # there's probably another source plane existing? + SRC_PLANE0_LO = 0x00198, Register32 + SRC_PLANE0_HI = 0x0019c, Register32 + SRC_PLANE1_LO = 0x001a0, Register32 + SRC_PLANE1_HI = 0x001a4, Register32 + SRC_PLANE2_LO = 0x001a8, Register32 + SRC_PLANE2_HI = 0x001ac, Register32 + + SRC_PLANE0_COMPRESSEDTHING_LO = 0x001b8, Register32 + SRC_PLANE0_COMPRESSEDTHING_HI = 0x001bc, Register32 + SRC_PLANE1_COMPRESSEDTHING_LO = 0x001c0, Register32 + SRC_PLANE1_COMPRESSEDTHING_HI = 0x001c4, Register32 + SRC_PLANE2_COMPRESSEDTHING_LO = 0x001c8, Register32 + SRC_PLANE2_COMPRESSEDTHING_HI = 0x001cc, Register32 + + SRC_PLANE0_STRIDE = 0x001d8, Register32 + SRC_PLANE1_STRIDE = 0x001dc, Register32 + SRC_PLANE2_STRIDE = 0x001e0, Register32 + + # seems to be in "pixels" + SRC_PLANE0_OFFSET = 0x001e8, Register32 + SRC_PLANE1_OFFSET = 0x001ec, Register32 + SRC_PLANE2_OFFSET = 0x001f0, Register32 + + SRC_SWIZZLE = 0x001f8, Register32 + SRC_W = 0x001fc, Register32 + SRC_H = 0x00200, Register32 + CACHE_HINTS_THING0 = irange(0x00204, 4, 4), Register32 + CACHE_HINTS_THING1 = irange(0x00214, 4, 4), Register32 + TUNABLES_THING0 = irange(0x00224, 4, 4), Register32 + SRC_SIZE_THING2 = 0x00234, Register32 + SRC_SIZE_THING3 = 0x00238, Register32 + SRC_SIZE_THING4 = 0x0023c, Register32 + + SRC_SIZE_THING5 = 0x00244, Register32 + SRC_SIZE_THING6 = 0x00248, Register32 + SRC_SIZE_THING7 = 0x0024c, Register32 + + WDMA_THING0 = 0x00280, Register32 + WDMA_THING1 = 0x00284, Register32 + WDMA_THING2 = 0x00288, Register32 + WDMA_THING3 = 0x0028c, Register32 + DST_PLANE0_LO = 0x00290, Register32 + DST_PLANE0_HI = 0x00294, Register32 + DST_PLANE1_LO = 0x00298, Register32 + DST_PLANE1_HI = 0x0029c, Register32 + DST_PLANE2_LO = 0x002a0, Register32 + DST_PLANE2_HI = 0x002a4, Register32 + DST_PLANE0_COMPRESSEDTHING_LO = 0x002a8, Register32 + DST_PLANE0_COMPRESSEDTHING_HI = 0x002ac, Register32 + DST_PLANE1_COMPRESSEDTHING_LO = 0x002b0, Register32 + DST_PLANE1_COMPRESSEDTHING_HI = 0x002b4, Register32 + DST_PLANE2_COMPRESSEDTHING_LO = 0x002b8, Register32 + DST_PLANE2_COMPRESSEDTHING_HI = 0x002bc, Register32 + DST_PLANE0_STRIDE = 0x002c0, Register32 + DST_PLANE1_STRIDE = 0x002c4, Register32 + DST_PLANE2_STRIDE = 0x002c8, Register32 + DST_PLANE0_OFFSET = 0x002cc, Register32 + DST_PLANE1_OFFSET = 0x002d0, Register32 + DST_PLANE2_OFFSET = 0x002d4, Register32 + DST_SWIZZLE = 0x002d8, Register32 + DST_W = 0x002dc, Register32 + DST_H = 0x002e0, Register32 + # uhh is there a macos bug with these? last val always overwritten + CACHE_HINTS_THING2 = irange(0x002e4, 3, 4), Register32 + CACHE_HINTS_THING3 = irange(0x002f0, 3, 4), Register32 + TUNABLES_THING1 = irange(0x002fc, 3, 4), Register32 + DST_SIZE_THING2 = 0x00308, Register32 + DST_SIZE_THING3 = 0x0030c, Register32 + DST_SIZE_THING4 = 0x00310, Register32 + DST_SIZE_THING5 = 0x00314, Register32 + DST_SIZE_THING6 = 0x00318, Register32 + DST_SIZE_THING7 = 0x0031c, Register32 + + FLIP_ROTATE = 0x00380, R_FLIP_ROTATE + + # can set bit 0/1 + # the output obviously changes when this is set + PSEUDO_LINEAR_SCALING = 0x00480, Register32 + + SCALE_V_FLAGS = 0x01000, R_SCALE_FLAGS + # No idea what a DDA is? Q1.22 or U1.22 (23 bits total) + # Also macOS doesn't touch a bunch of the V ones (uses H instead???) + SCALE_V_DDA_THING0 = 0x01004, Register32 + SCALE_V_DDA_THING1 = 0x01008, Register32 + # Q4.22 or U4.22 (26 bits total) + SCALE_V_RATIO_0 = 0x0100c, Register32 + SCALE_V_RATIO_1 = 0x01010, Register32 + SCALE_V_RATIO_2 = 0x01014, Register32 + SCALE_V_RATIO_3 = 0x01018, Register32 + SCALE_V_DDA_THING2 = 0x0101c, Register32 + SCALE_V_RATIO_4 = 0x01020, Register32 + SCALE_V_RATIO_5 = 0x01024, Register32 + + # 9 taps, 32 phases polyphase resampling filter + # Q4.12 (16-bit total) fixed point filter coeffs + # packed into 32-bit registers, 3 sets of filters total (chroma/luma/alpha??) + # exact ordering and arithmetic performed not yet clear + SCALE_FILTER_V_BLOCK0 = irange(0x01400, 9 * 32, 4), Register32 + SCALE_FILTER_V_BLOCK1 = irange(0x01c00, 9 * 32 // 2, 4), Register32 + + SCALE_H_FLAGS = 0x02000, R_SCALE_FLAGS + # No idea what a DDA is? Q1.22 or U1.22 (23 bits total) + SCALE_H_DDA_THING0 = 0x02004, Register32 + SCALE_H_DDA_THING1 = 0x02008, Register32 + # Q4.22 or U4.22 (26 bits total) + SCALE_H_RATIO_0 = 0x0200c, Register32 + SCALE_H_RATIO_1 = 0x02010, Register32 + SCALE_H_RATIO_2 = 0x02014, Register32 + SCALE_H_RATIO_3 = 0x02018, Register32 + SCALE_H_DDA_THING2 = 0x0201c, Register32 + SCALE_H_RATIO_4 = 0x02020, Register32 + SCALE_H_RATIO_5 = 0x02024, Register32 + + # 15 taps, 32 phases polyphase resampling filter + SCALE_FILTER_H_BLOCK0 = irange(0x02400, 15 * 32, 4), Register32 + SCALE_FILTER_H_BLOCK1 = irange(0x02c00, 15 * 32 // 2, 4), Register32 diff --git a/tools/proxyclient/m1n1/hw/sep.py b/tools/proxyclient/m1n1/hw/sep.py new file mode 100644 index 0000000..f206a71 --- /dev/null +++ b/tools/proxyclient/m1n1/hw/sep.py @@ -0,0 +1,169 @@ +# SPDX-License-Identifier: MIT +import struct +from collections import defaultdict, deque +from enum import IntEnum + +from ..trace.asc import ASCRegs +from ..utils import * + + +class BootRomMsg(IntEnum): + GET_STATUS = 2 + BOOT_TZ0 = 5 + BOOT_IMG4 = 6 + SET_SHMEM = 0x18 + + +class BootRomStatus(IntEnum): + STATUS_OK = 0x66 + STATUS_BOOT_TZ0_DONE = 0x69 + STATUS_BOOT_IMG4_DONE = 0x6A + STATUS_BOOT_UNK_DONE = 0xD2 + + +class SEPMessage(Register64): + EP = 7, 0 + TAG = 15, 8 + TYPE = 23, 16 + PARAM = 31, 24 + DATA = 63, 32 + + +# TODO: make this class actually own the shared memory instead of just +# generating a static buffer if we actually need to read/write to +# individual items inside the shmem buffer +class SEPShMem: + def __init__(self): + self.items = [] + self.offset = 0x4000 + + def add_item(self, name, data, min_size=0): + sz = align_up(len(data) + 4, 0x4000) + sz = max(sz, min_size) + self.items.append((name, self.offset, sz, struct.pack("<I", len(data)) + data)) + self.offset += sz + + def finalize(self): + bfr = bytearray(b"\x00" * self.offset) + for i, (name, offset, sz, data) in enumerate(self.items): + bfr[i * 16 : i * 16 + 12] = struct.pack("<4sII", name, sz, offset) + bfr[offset : offset + len(data)] = data + + cnt = len(self.items) + bfr[cnt * 16 : cnt * 16 + 4] = b"llun" # null + + return bfr + + +class SEP: + SHMEM_IOVA = 0xBEEF0000 + FW_IOVA = 0xDEAD0000 + + def __init__(self, proxy, iface, utils): + self.i = iface + self.p = proxy + self.u = utils + + self.sep_base = self.u.adt["/arm-io/sep"].get_reg(0)[0] + self.dart_base = self.u.adt["/arm-io/dart-sep"].get_reg(0)[0] + + self.asc = ASCRegs(self.u, self.sep_base) + + self.dart_handle = self.p.dart_init(self.dart_base, 0) + + self.epnum2name = {} + self.epname2num = {} + self.msgs = defaultdict(deque) + + def map_sepfw(self): + sepfw_addr, sepfw_size = self.u.adt["/chosen/memory-map"].SEPFW + self.p.dart_map(self.dart_handle, self.FW_IOVA, sepfw_addr, sepfw_size) + + def unmap_sepfw(self): + _, sepfw_size = self.u.adt["/chosen/memory-map"].SEPFW + self.p.dart_unmap(self.dart_handle, self.FW_IOVA, sepfw_size) + + def create_shmem(self): + shmem = SEPShMem() + + # PNIC - panic buffer + shmem.add_item(b"CINP", b"\x00", 0x8000) + + # ALPO / SIPS - unknown img4-like blobs from the ADT + addr, sz = self.u.adt["/chosen/boot-object-manifests"].lpol + shmem.add_item(b"OPLA", self.i.readmem(addr, sz)) + addr, sz = self.u.adt["/chosen/boot-object-manifests"].ibot + shmem.add_item(b"IPIS", self.i.readmem(addr, sz)) + + bfr = shmem.finalize() + sz = align_up(len(bfr), 0x4000) + self.shmem = self.u.heap.memalign(0x4000, 0x30000) + self.i.writemem(self.shmem, bfr) + self.p.dart_map(self.dart_handle, self.SHMEM_IOVA, self.shmem, 0x30000) + + def boot(self): + self.create_shmem() + self.map_sepfw() + + self.send_msg(SEPMessage(EP=0xFF, TYPE=BootRomMsg.GET_STATUS)) + self.expect_msg(0xFF, BootRomStatus.STATUS_OK) + + self.send_msg(SEPMessage(EP=0xFF, TYPE=BootRomMsg.BOOT_TZ0)) + self.expect_msg(0xFF, BootRomStatus.STATUS_BOOT_TZ0_DONE) + self.expect_msg(0xFF, BootRomStatus.STATUS_BOOT_UNK_DONE) + + self.send_msg(SEPMessage(EP=0xFF, TYPE=BootRomMsg.GET_STATUS)) + self.expect_msg(0xFF, BootRomStatus.STATUS_OK) + + self.send_msg( + SEPMessage(EP=0xFF, TYPE=BootRomMsg.BOOT_IMG4, DATA=self.FW_IOVA >> 0xC) + ) + self.send_msg( + SEPMessage(EP=0xFE, TYPE=BootRomMsg.SET_SHMEM, DATA=self.SHMEM_IOVA >> 0xC) + ) + + self.expect_msg(0xFF, BootRomStatus.STATUS_BOOT_IMG4_DONE) + + self.unmap_sepfw() + + def expect_msg(self, ep, type): + msg = self.recv_msg(ep, block=True) + if msg.TYPE != type: + raise ValueError( + f"Expected type 0x{type:x} but got message with type 0x{msg.TYPE:x}" + ) + + def send_msg(self, msg): + self.asc.INBOX0 = msg.value + self.asc.INBOX1 = 0 + + def _recv_single_msg(self): + msg = SEPMessage(self.asc.OUTBOX0.val) + _ = self.asc.OUTBOX1.val + return msg + + def _try_recv_msgs(self): + while not self.asc.OUTBOX_CTRL.reg.EMPTY: + msg = self._recv_single_msg() + self.msgs[msg.EP].append(msg) + self._handle_ep_discovery() + + def _handle_ep_discovery(self): + while len(self.msgs[0xFD]): + msg = self.msgs[0xFD].popleft() + if msg.TYPE == 0: + cs = "".join( + [chr((msg.DATA >> (i * 8)) & 0xFF) for i in range(3, -1, -1)] + ) + self.epnum2name[msg.PARAM] = cs + self.epname2num[cs] = msg.PARAM + + def recv_msg(self, ep, block=False): + self._try_recv_msgs() + while block and len(self.msgs[ep]) < 1: + self._try_recv_msgs() + + if len(self.msgs[ep]): + return self.msgs[ep].popleft() + else: + return None diff --git a/tools/proxyclient/m1n1/hw/spi.py b/tools/proxyclient/m1n1/hw/spi.py new file mode 100644 index 0000000..1fa9be5 --- /dev/null +++ b/tools/proxyclient/m1n1/hw/spi.py @@ -0,0 +1,147 @@ +# SPDX-License-Identifier: MIT +from ..utils import * + +__all__ = ["SPIRegs"] + +class R_CTRL(Register32): + RX_FIFO_RESET = 3 + TX_FIFO_RESET = 2 + RUN = 0 + +class R_CFG(Register32): + # impl: 002fb1e6 + IE_TX_COMPLETE = 21 + b19 = 19 + FIFO_THRESH = 18, 17 + # 0 = 8 bytes + # 1 = 4 bytes + # 2 = 1 byte + # 3 = disabled + WORD_SIZE = 16, 15 + # 0 = 8bit + # 1 = 16bit + # 2 = 32bit + LSB_FIRST = 13 + b12 = 12 + IE_RX_THRESH = 8 + IE_RX_COMPLETE = 7 + MODE = 6, 5 + # 0 = polled + # 1 = irq + CPOL = 2 + CPHA = 1 + +class R_STATUS(Register32): + TX_COMPLETE = 22 + TXRX_THRESH = 1 # updated if MODE == 1 + RX_COMPLETE = 0 + +class R_PIN(Register32): + CS = 1 + KEEP_MOSI = 0 + +class R_CLKDIV(Register32): + DIVIDER = 10, 0 # SPI freq = CLK / (DIVIDER + 1) + +class R_INTER_DELAY(Register32): + DELAY = 15, 0 + +class R_FIFOSTAT(Register32): + LEVEL_RX = 31, 24 + RX_EMPTY = 20 + LEVEL_TX = 15, 8 + TX_FULL = 4 + +class R_IRQ_XFER(Register32): + TX_XFER_DONE = 1 + RX_XFER_DONE = 0 + +class R_IRQ_FIFO(Register32): + TX_OVERFLOW = 17 + RX_UNDERRUN = 16 + TX_EMPTY = 9 + RX_FULL = 8 + TX_THRESH = 5 + RX_THRESH = 4 + +class R_XFSTATUS(Register32): + SR_FULL = 26 + SHIFTING = 20 + STATE = 17, 16 + UNK = 0 + +class R_DIVSTATUS(Register32): + COUNT2 = 31, 16 + COUNT1 = 15, 0 + +class R_SHIFTCFG(Register32): + OVERRIDE_CS = 24 + BITS = 21, 16 + RX_ENABLE = 11 + TX_ENABLE = 10 + CS_AS_DATA = 9 + AND_CLK_DATA = 8 + #? = 2 # needs to be 1 for RX to not break + CS_ENABLE = 1 + CLK_ENABLE = 0 + +class R_PINCFG(Register32): + MOSI_INIT_VAL = 10 + CS_INIT_VAL = 9 + CLK_INIT_VAL = 8 + KEEP_MOSI = 2 + KEEP_CS = 1 + KEEP_CLK = 0 + +class R_DELAY(Register32): + DELAY = 31, 16 + MOSI_VAL = 12 + CS_VAL = 10 + SCK_VAL = 8 + SET_MOSI = 6 + SET_CS = 5 + SET_SCK = 4 + NO_INTERBYTE = 1 + ENABLE = 0 + +class R_SCKCFG(Register32): + PERIOD = 31, 16 + PHASE1 = 9 + PHASE0 = 8 + RESET_TO_IDLE = 4 + +class R_SCKPHASES(Register32): + PHASE1_START = 31, 16 + PHASE0_START = 15, 0 + +class SPIRegs(RegMap): + CTRL = 0x00, R_CTRL + CFG = 0x04, R_CFG + STATUS = 0x08, R_STATUS + PIN = 0x0C, R_PIN + TXDATA = 0x10, Register32 + RXDATA = 0x20, Register32 + CLKDIV = 0x30, R_CLKDIV + RXCNT = 0x34, Register32 + INTER_DELAY = 0x38, R_INTER_DELAY + TXCNT = 0x4C, Register32 + FIFOSTAT = 0x10C, R_FIFOSTAT + + IE_XFER = 0x130, R_IRQ_XFER + IF_XFER = 0x134, R_IRQ_XFER + IE_FIFO = 0x138, R_IRQ_FIFO + IF_FIFO = 0x13c, R_IRQ_FIFO + + SHIFTCFG = 0x150, R_SHIFTCFG + PINCFG = 0x154, R_PINCFG + + DELAY_PRE = 0x160, R_DELAY + SCKCFG = 0x164, R_SCKCFG + DELAY_POST = 0x168, R_DELAY + + SCKPHASES = 0x180, R_SCKPHASES + + UNK_PHASE = 0x18c, Register32 # probably MISO sample point + + XFSTATUS = 0x1c0, R_XFSTATUS + DIVSTATUS = 0x1e0, R_DIVSTATUS diff --git a/tools/proxyclient/m1n1/hw/spmi.py b/tools/proxyclient/m1n1/hw/spmi.py new file mode 100644 index 0000000..a312f61 --- /dev/null +++ b/tools/proxyclient/m1n1/hw/spmi.py @@ -0,0 +1,98 @@ +# SPDX-License-Identifier: MIT +import struct + +from ..utils import * + +__all__ = ["SPMI"] + +CMD_EXT_WRITE = 0x00 +CMD_EXT_READ = 0x20 +CMD_EXT_WRITEL = 0x30 +CMD_EXT_READL = 0x38 +CMD_WRITE = 0x40 +CMD_READ = 0x60 +CMD_ZERO_WRITE = 0x80 + +class R_CMD(Register32): + REG = 31, 16 + ACTIVE = 15 + SLAVE_ID = 14, 8 + CMD = 7, 0 + +class R_STATUS(Register32): + RX_EMPTY = 24 + RX_COUNT = 23, 16 + TX_EMPTY = 8 + TX_COUNT = 7, 0 + +class SPMIRegs(RegMap): + STATUS = 0x00, R_STATUS + CMD = 0x04, R_CMD + REPLY = 0x08, Register32 + IRQ_FLAG = 0x80, Register32 + +class SPMI: + def __init__(self, u, adt_path): + self.u = u + self.p = u.proxy + self.iface = u.iface + self.base = u.adt[adt_path].get_reg(0)[0] + self.regs = SPMIRegs(u, self.base) + + def read(self, slave, reg, size): + while not self.regs.STATUS.reg.RX_EMPTY: + print(">", self.regs.REPLY.val) + + self.regs.CMD.reg = R_CMD(REG = reg, ACTIVE=1, SLAVE_ID = slave, CMD = CMD_EXT_READL | (size - 1)) + + buf = b"" + + left = size + 4 + while left > 0: + while self.regs.STATUS.reg.RX_EMPTY: + pass + v = self.regs.REPLY.val + buf += struct.pack("<I", v) + left -= 4 + + return buf[4:4+size] + + def write(self, slave, reg, data): + while not self.regs.STATUS.reg.RX_EMPTY: + self.regs.REPLY.val + + size = len(data) + self.regs.CMD.reg = R_CMD(REG = reg, ACTIVE=1, SLAVE_ID = slave, CMD = CMD_EXT_WRITEL | (size - 1)) + + while data: + blk = (data[:4] + b"\0\0\0")[:4] + self.regs.CMD.val = struct.unpack("<I", blk)[0] + data = data[4:] + + while self.regs.STATUS.reg.RX_EMPTY: + pass + return self.regs.REPLY.val + + def read8(self, slave, reg): + return struct.unpack("<B", self.read(slave, reg, 1))[0] + + def read16(self, slave, reg): + return struct.unpack("<H", self.read(slave, reg, 2))[0] + + def read32(self, slave, reg): + return struct.unpack("<I", self.read(slave, reg, 4))[0] + + def read64(self, slave, reg): + return struct.unpack("<Q", self.read(slave, reg, 8))[0] + + def write8(self, slave, reg, val): + return self.write(slave, reg, struct.pack("<B", val)) + + def write16(self, slave, reg, val): + return self.write(slave, reg, struct.pack("<H", val)) + + def write32(self, slave, reg, val): + return self.write(slave, reg, struct.pack("<I", val)) + + def write64(self, slave, reg, val): + return self.write(slave, reg, struct.pack("<Q", val)) diff --git a/tools/proxyclient/m1n1/hw/uat.py b/tools/proxyclient/m1n1/hw/uat.py new file mode 100644 index 0000000..f5dce9f --- /dev/null +++ b/tools/proxyclient/m1n1/hw/uat.py @@ -0,0 +1,571 @@ +""" + UAT is just regular ARMv8 pagetables, shared between the gfx-asc firmware + and the actual AGX hardware. + + The OS doesn't have direct control over it, TTBR0 and TTBR1 entries are placed at + gpu-region-base, one pair for each context. The firmware automatically loads TTBR0/TTBR1 + on boot and whenever the context changes. +""" + + +import struct +from ..fw.agx.handoff import GFXHandoff +from ..utils import * +from ..malloc import Heap +from enum import IntEnum +import traceback + +__all__ = [] + +class MemoryAttr(IntEnum): + # ff = Normal, Outer Writeback RW, Inner Writeback RW + Normal = 0 # Only accessed by the gfx-asc coprocessor + # 00 = Device nGnRnE + Device = 1 + # f4 = Normal, Outer Writeback RW, Inner NC + Shared = 2 # Probally Outer-shareable. Shared with either the main cpu or AGX hardware + # 4f = Normal, Outer NC, Inner Writeback RW + UNK3 = 3 + # 00 = Device nGnRnE + UNK4 = 4 + # ff = Normal, Outer Writeback RW, Inner Writeback RW + UNK5 = 5 + # 00 = Device nGnRnE + UNK6 = 6 + # 00 = Device nGnRnE + UNK7 = 7 + + +class TTBR(Register64): + ASID = 63, 48 + BADDR = 47, 1 + VALID = 0 + + def valid(self): + return self.VALID == 1 + + def offset(self): + return self.BADDR << 1 + + def set_offset(self, offset): + self.BADDR = offset >> 1 + + def describe(self): + return f"{self.offset():x} [ASID={self.ASID}, VALID={self.VALID}]" + +class PTE(Register64): + OFFSET = 47, 14 + UNK0 = 10 # probally an ownership flag, seems to be 1 for FW created PTEs and 0 for OS PTEs + TYPE = 1 + VALID = 0 + + def valid(self): + return self.VALID == 1 and self.TYPE == 1 + + def offset(self): + return self.OFFSET << 14 + + def set_offset(self, offset): + self.OFFSET = offset >> 14 + + def describe(self): + if not self.valid(): + return f"<invalid> [{int(self)}:x]" + return f"{self.offset():x}, UNK={self.UNK0}" + +class Page_PTE(Register64): + OS = 55 # Owned by host os or firmware + UXN = 54 + PXN = 53 + OFFSET = 47, 14 + nG = 11 # global or local TLB caching + AF = 10 + SH = 9, 8 + AP = 7, 6 + AttrIndex = 4, 2 + TYPE = 1 + VALID = 0 + + def valid(self): + return self.VALID == 1 and self.TYPE == 1 + + def offset(self): + return self.OFFSET << 14 + + def set_offset(self, offset): + self.OFFSET = offset >> 14 + + def access_fw(self, gl=False): + if not self.OS: + return [[ + ["--", "--", "--", "--"], + ["--", "RW", "--", "RW"], + ["--", "RX", "--", "--"], + ["RX", "R-", "--", "R-"], + ], [ + ["--", "--", "--", "RW"], + ["--", "--", "--", "RW"], + ["RX", "--", "--", "R-"], + ["RX", "RW", "--", "R-"], + ]][gl][self.AP][(self.UXN << 1) | self.PXN] + else: + return [ + ["--", "R-", "-?", "RW"], + ["R-", "--", "RW", "RW"], + ["--", "--", "--", "--"], + ["--", "--", "--", "--"], + ][self.AP][(self.UXN << 1) | self.PXN] + + def access_gpu(self): + if not self.OS: + return "--" + + return [ + ["--", "R-", "-W", "RW"], + ["--", "--", "--", "R-"], + ["R-", "-W", "RW", "--"], + ["--", "--", "--", "--"], + ][self.AP][(self.UXN << 1) | self.PXN] + + def describe(self): + if not self.valid(): + return f"<invalid> [{int(self)}:x]" + + return ( + f"{self.offset():x} [GPU={self.access_gpu()}, EL1={self.access_fw(0)}, GL1={self.access_fw(1)}, " + + f"perm={self.OS}{self.AP:02b}{self.UXN}{self.PXN}, " + + f"{MemoryAttr(self.AttrIndex).name}, {['Global', 'Local'][self.nG]}, " + + f"Owner={['FW', 'OS'][self.OS]}, AF={self.AF}, SH={self.SH}] ({self.value:#x})" + ) + +class UatAccessor(Reloadable): + def __init__(self, uat, ctx=0): + self.uat = uat + self.ctx = ctx + + def translate(self, addr, width): + paddr, _ = self.uat.iotranslate(self.ctx, addr, width)[0] + if paddr is None: + raise Exception(f"UAT Failed to translate {addr:#x}") + return paddr + + def read(self, addr, width): + return self.uat.u.read(self.translate(addr, width), width) + def read8(self, addr): + return self.uat.p.read8(self.translate(addr, 1)) + def read16(self, addr): + return self.uat.p.read16(self.translate(addr, 2)) + def read32(self, addr): + return self.uat.p.read32(self.translate(addr, 4)) + def read64(self, addr): + return self.uat.p.read64(self.translate(addr, 8)) + + def write(self, addr, data, width): + self.uat.u.write(self.translate(addr, width), data, width) + def write8(self, addr, data): + self.uat.p.write8(self.translate(addr, 1), daat) + def write16(self, addr, data): + self.uat.p.write6(self.translate(addr, 2), data) + def write32(self, addr, data): + self.uat.p.write32(self.translate(addr, 4), data) + def write64(self, addr, data): + self.uat.p.write64(self.translate(addr, 8), data) + +class UatStream(Reloadable): + CACHE_SIZE = 0x1000 + + def __init__(self, uat, ctx, addr, recurse=True): + self.uat = uat + self.ctx = ctx + self.pos = addr + self.cache = None + self.meta_fn = None + self.recurse = recurse + + def to_accessor(self): + return UatAccessor(self.uat, self.ctx) + + def read(self, size): + assert size >= 0 + + data = b"" + if self.cache: + data = self.cache[:size] + cached = len(self.cache) + self.pos += min(cached, size) + if cached > size: + self.cache = self.cache[size:] + return data + self.cache = None + if cached == size: + return data + + size -= cached + + # align any cache overreads to the next page boundary + remaining_in_page = self.uat.PAGE_SIZE - (self.pos % self.uat.PAGE_SIZE) + to_cache = min(remaining_in_page, self.CACHE_SIZE) + + try: + self.cache = self.uat.ioread(self.ctx, self.pos, max(size, to_cache)) + except: + traceback.print_exc() + raise + return data + self.read(size) + + def readable(self): + return True + + def write(self, bytes): + self.uat.iowrite(self.ctx, self.pos, bytes) + self.pos += len(bytes) + self.cache = None + return len(bytes) + + def writable(self): + return True + + def flush(self): + self.cache = None + + def seek(self, n, wherenc=0): + self.cache = None + if wherenc == 0: + self.pos = n + elif wherenc == 2: + self.pos += n + + def seekable(self): + return True + + def tell(self): + return self.pos + + def closed(self): + return False + + +class UAT(Reloadable): + NUM_CONTEXTS = 64 + + PAGE_BITS = 14 + PAGE_SIZE = 1 << PAGE_BITS + + L0_SIZE = 2 + L0_OFF = 39 + L1_SIZE = 8 + L1_OFF = 36 + L2_OFF = 25 + L3_OFF = 14 + + IDX_BITS = 11 + Lx_SIZE = (1 << IDX_BITS) + + LEVELS = [ + (L0_OFF, L0_SIZE, TTBR), + (L1_OFF, L1_SIZE, PTE), + (L2_OFF, Lx_SIZE, PTE), + (L3_OFF, Lx_SIZE, Page_PTE), + ] + + def __init__(self, iface, util=None, hv=None): + self.iface = iface + self.u = util + self.p = util.proxy + self.hv = hv + self.pt_cache = {} + self.dirty = set() + self.dirty_ranges = {} + self.allocator = None + self.ttbr = None + self.initialized = False + self.sgx_dev = self.u.adt["/arm-io/sgx"] + self.shared_region = self.sgx_dev.gfx_shared_region_base + self.gpu_region = self.sgx_dev.gpu_region_base + self.ttbr0_base = self.u.memalign(self.PAGE_SIZE, self.PAGE_SIZE) + self.ttbr1_base = self.sgx_dev.gfx_shared_region_base + self.handoff = GFXHandoff(self.u) + + self.VA_MASK = 0 + for (off, size, _) in self.LEVELS: + self.VA_MASK |= (size - 1) << off + self.VA_MASK |= self.PAGE_SIZE - 1 + + + def set_l0(self, ctx, off, base, asid=0): + ttbr = TTBR(BADDR = base >> 1, ASID = asid, VALID=(base != 0)) + print(f"[UAT] Set L0 ctx={ctx} off={off:#x} base={base:#x} asid={asid} ({ttbr})") + self.write_pte(self.gpu_region + ctx * 16, off, 2, ttbr) + + def ioread(self, ctx, base, size): + if size == 0: + return b"" + + ranges = self.iotranslate(ctx, base, size) + + iova = base + data = [] + for addr, size in ranges: + if addr is None: + raise Exception(f"Unmapped page at iova {ctx}:{iova:#x}") + data.append(self.iface.readmem(addr, size)) + iova += size + + return b"".join(data) + + def iowrite(self, ctx, base, data): + if len(data) == 0: + return + + ranges = self.iotranslate(ctx, base, len(data)) + + iova = base + p = 0 + for addr, size in ranges: + if addr is None: + raise Exception(f"Unmapped page at iova {ctx}:{iova:#x}") + self.iface.writemem(addr, data[p:p + size]) + p += size + iova += size + + # A stream interface that can be used for random access by Construct + def iostream(self, ctx, base, recurse=True): + return UatStream(self, ctx, base, recurse) + + # A read/write register interface like proxy/utils objects that can be used by RegMap + def ioaccessor(self, ctx): + return UatAccessor(self, ctx) + + def iomap(self, ctx, addr, size, **flags): + iova = self.allocator.malloc(size) + + self.iomap_at(ctx, iova, addr, size, **flags) + self.flush_dirty() + return iova + + def iomap_at(self, ctx, iova, addr, size, **flags): + if size == 0: + return + + if addr & (self.PAGE_SIZE - 1): + raise Exception(f"Unaligned PA {addr:#x}") + + if iova & (self.PAGE_SIZE - 1): + raise Exception(f"Unaligned IOVA {iova:#x}") + + self.init() + + map_flags = {'OS': 1, 'AttrIndex': MemoryAttr.Normal, 'VALID': 1, 'TYPE': 1, 'AP': 1, 'AF': 1, 'UXN': 1} + map_flags.update(flags) + + start_page = align_down(iova, self.PAGE_SIZE) + end = iova + size + end_page = align_up(end, self.PAGE_SIZE) + + for page in range(start_page, end_page, self.PAGE_SIZE): + table_addr = self.gpu_region + ctx * 16 + for (offset, size, ptecls) in self.LEVELS: + if ptecls is Page_PTE: + pte = Page_PTE(**map_flags) + pte.set_offset(addr) + self.write_pte(table_addr, page >> offset, size, pte) + addr += self.PAGE_SIZE + else: + pte = self.fetch_pte(table_addr, page >> offset, size, ptecls) + if not pte.valid(): + table = self.u.memalign(self.PAGE_SIZE, self.PAGE_SIZE) + self.p.memset32(table, 0, self.PAGE_SIZE) + pte.set_offset(table) + if ptecls is not TTBR: + pte.VALID = 1 + pte.TYPE = 1 + #pte.UNK0 = 1 + self.write_pte(table_addr, page >> offset, size, pte) + table_addr = pte.offset() + + self.dirty_ranges.setdefault(ctx, []).append((start_page, end_page - start_page)) + #self.flush_dirty() + + + def fetch_pte(self, offset, idx, size, ptecls): + idx = idx & (size - 1) + + cached, table = self.get_pt(offset, size=size) + pte = ptecls(table[idx]) + if not pte.valid() and cached: + self.flush_dirty() + cached, table = self.get_pt(offset, size=size, uncached=True) + pte = ptecls(table[idx]) + + return pte + + def write_pte(self, offset, idx, size, pte): + idx = idx & (size - 1) + + cached, table = self.get_pt(offset, size=size) + + table[idx] = pte.value + self.dirty.add(offset) + + def iotranslate(self, ctx, start, size): + if size == 0: + return [] + + start = start & self.VA_MASK + + start_page = align_down(start, self.PAGE_SIZE) + start_off = start - start_page + end = start + size + end_page = align_up(end, self.PAGE_SIZE) + end_size = end - (end_page - self.PAGE_SIZE) + + pages = [] + + for page in range(start_page, end_page, self.PAGE_SIZE): + table_addr = self.gpu_region + ctx * 16 + for (offset, size, ptecls) in self.LEVELS: + pte = self.fetch_pte(table_addr, page >> offset, size, ptecls) + if not pte.valid(): + break + table_addr = pte.offset() + + if pte.valid(): + pages.append(pte.offset()) + else: + pages.append(None) + + ranges = [] + + for page in pages: + if not ranges: + ranges.append((page, self.PAGE_SIZE)) + continue + laddr, lsize = ranges[-1] + if ((page is None and laddr is None) or + (page is not None and laddr == (page - lsize))): + ranges[-1] = laddr, lsize + self.PAGE_SIZE + else: + ranges.append((page, self.PAGE_SIZE)) + + ranges[-1] = (ranges[-1][0], ranges[-1][1] - self.PAGE_SIZE + end_size) + + if start_off: + ranges[0] = (ranges[0][0] + start_off if ranges[0][0] else None, + ranges[0][1] - start_off) + + return ranges + + def get_pt(self, addr, size=None, uncached=False): + if size is None: + size = self.Lx_SIZE + cached = True + if addr not in self.pt_cache or uncached: + cached = False + self.pt_cache[addr] = list( + struct.unpack(f"<{size}Q", self.iface.readmem(addr, size * 8))) + + return cached, self.pt_cache[addr] + + def flush_pt(self, addr): + assert addr in self.pt_cache + table = self.pt_cache[addr] + self.iface.writemem(addr, struct.pack(f"<{len(table)}Q", *table)) + #self.p.dc_civac(addr, 0x4000) + + def flush_dirty(self): + inval = False + + for page in self.dirty: + self.flush_pt(page) + inval = True + + self.dirty.clear() + + for ctx, ranges in self.dirty_ranges.items(): + asid = ctx << 48 + self.u.inst("tlbi aside1os, x0", asid) + + def invalidate_cache(self): + self.pt_cache = {} + + def recurse_level(self, level, base, table, page_fn=None, table_fn=None): + def extend(addr): + if addr >= 0x80_00000000: + addr |= 0xf00_00000000 + return addr + + offset, size, ptecls = self.LEVELS[level] + + cached, tbl = self.get_pt(table, size) + sparse = False + for i, pte in enumerate(tbl): + pte = ptecls(pte) + if not pte.valid(): + sparse = True + continue + + range_size = 1 << offset + start = extend(base + i * range_size) + end = start + range_size - 1 + + if level + 1 == len(self.LEVELS): + if page_fn: + page_fn(start, end, i, pte, level, sparse=sparse) + else: + if table_fn: + table_fn(start, end, i, pte, level, sparse=sparse) + self.recurse_level(level + 1, start, pte.offset(), page_fn, table_fn) + + sparse = False + + def foreach_page(self, ctx, page_fn): + self.recurse_level(0, 0, self.gpu_region + ctx * 16, page_fn) + + def foreach_table(self, ctx, table_fn): + self.recurse_level(0, 0, self.gpu_region + ctx * 16, table_fn=table_fn) + + def init(self): + if self.initialized: + return + + print("[UAT] Initializing...") + + # Clear out any stale kernel page tables + self.p.memset64(self.ttbr1_base + 0x10, 0, 0x3ff0) + self.u.inst("tlbi vmalle1os") + + self.handoff.initialize() + + with self.handoff.lock(): + print(f"[UAT] TTBR0[0] = {self.ttbr0_base:#x}") + print(f"[UAT] TTBR1[0] = {self.ttbr1_base:#x}") + self.set_l0(0, 0, self.ttbr0_base) + self.set_l0(0, 1, self.ttbr1_base) + self.flush_dirty() + self.invalidate_cache() + + print("[UAT] Init complete") + + self.initialized = True + + def bind_context(self, ctx, ttbr0_base): + assert ctx != 0 + + with self.handoff.lock(): + self.set_l0(ctx, 0, ttbr0_base, ctx) + self.set_l0(ctx, 1, self.ttbr1_base, ctx) + self.flush_dirty() + self.invalidate_cache() + + def dump(self, ctx, log=print): + def print_fn(start, end, i, pte, level, sparse): + type = "page" if level+1 == len(self.LEVELS) else "table" + if sparse: + log(f"{' ' * level}...") + log(f"{' ' * level}{type}({i:03}): {start:011x} ... {end:011x}" + f" -> {pte.describe()}") + + self.recurse_level(0, 0, self.gpu_region + ctx * 16, print_fn, print_fn) + +__all__.extend(k for k, v in globals().items() + if (callable(v) or isinstance(v, type)) and v.__module__ == __name__) diff --git a/tools/proxyclient/m1n1/loadobjs.py b/tools/proxyclient/m1n1/loadobjs.py new file mode 100644 index 0000000..ef567b3 --- /dev/null +++ b/tools/proxyclient/m1n1/loadobjs.py @@ -0,0 +1,178 @@ +# SPDX-License-Identifier: MIT +from contextlib import contextmanager, ExitStack +import sys, pathlib, os +import subprocess +import tempfile +import bisect + +from .asm import NM, LD, OBJCOPY, ARMAsm + +__all__ = ["LinkedProgram"] + + +def tool_output_lines(progname, *args): + with subprocess.Popen([progname.replace("%ARCH", ARMAsm.ARCH)] + list(args), + stdout=subprocess.PIPE) as proc: + for line in proc.stdout: + yield line.decode("ascii") + proc.wait() + if proc.returncode: + raise Exception(f"{progname} (args: {args}) exited with status {proc.returncode}") + +def run_tool(progname, *args, silent=False): + subprocess.check_call([progname.replace("%ARCH", ARMAsm.ARCH)] + list(args), + stdout=subprocess.DEVNULL if silent else None) + + +class LinkedProgram: + SOURCE_ROOT = str(pathlib.Path(__file__).resolve().parents[2]) + + def __init__(self, u, base_object=None): + self.u = u + self.symbols = [] + self.symaddrs = dict() + self.base_object = base_object + self._alloced_bases = [] + self._attrs_to_clear = [] + self._load_base_symbols() + + def _load_base_symbols(self): + if self.base_object is None: + suffix = "-raw" if not self.m1n1_is_macho() else "" + self.base_object = f"build/m1n1{suffix}.elf" + + addrs = self._load_elf_symbols(self.base_object, self.u.proxy.get_base()) + + # sanity check: compare the .rela table between ELF and m1n1 image on target + rela_base = addrs["_rela_start"] + rela_length = addrs["_rela_end"] - rela_base + rela_target = self.u.iface.readmem(rela_base, rela_length) + + tmp = os.path.join(tempfile.mkdtemp(), "bin") + path = os.path.join(self.SOURCE_ROOT, self.base_object) + run_tool(OBJCOPY, "-O", "binary", path, tmp, "--only-section=.rela.dyn") + rela_objfile = open(tmp, "rb").read() + + if rela_objfile[:len(rela_target)] != rela_target: + raise Exception(f"Mismatch between {self.base_object} and image on target") + + def m1n1_is_macho(self): + p = self.u.proxy + return p.read32(p.get_base()) == 0xfeedfacf + + def _load_elf_symbols(self, relpath, offset=0, + objname=None, ignore=""): + path = pathlib.Path(self.SOURCE_ROOT, relpath) + symaddrs = dict() + + for line in tool_output_lines(NM, "-g", path): + addr_str, t, name = line.split() + addr = int(addr_str, 16) + offset + if t in ignore: + continue + self.symbols.append((addr, name, objname)) + symaddrs[name] = addr + if t in "T" and not hasattr(self, name): + setattr(self, name, self._wrap_call_to(addr)) + if relpath != self.base_object: + self._attrs_to_clear.append(name) + self.symbols.sort() + return symaddrs + + def load_obj(self, objfile, base=None): + ALLOC_SIZE = 16*4096 + + if base is None: + base = self.u.heap.memalign(0x4000, ALLOC_SIZE) + self._alloced_bases.append(base) + + objfile = os.path.join(self.SOURCE_ROOT, objfile) + tmp = tempfile.mkdtemp() + os.sep + elffile = tmp + "elf" + ld_script = tmp + "ld" + binfile = tmp + "bin" + with open(ld_script, "w") as f: + f.write("SECTIONS {\n") + f.write(f". = 0x{base:x};\n") + f.write(".text : { *(.text .text.*) }\n") + f.write(".data : { *(.got .data .data.* .rodata .rodata.* .bss .bss.*) }\n") + f.write("}\n") + for sym in self.symbols: + f.write(f"{sym[1]} = 0x{sym[0]:x};\n") + run_tool(LD, "-EL", "-maarch64elf", "-T", ld_script, "-o", elffile, objfile) + run_tool(OBJCOPY, "-O", "binary", elffile, binfile) + #run_tool("objdump", "-d", elffile) + self._load_elf_symbols(elffile, ignore="A") + with open(binfile, "rb") as f: + buf = f.read() + assert len(buf) <= ALLOC_SIZE + self.u.iface.writemem(base, buf) + self.u.proxy.dc_cvau(base, len(buf)) + self.u.proxy.ic_ivau(base, len(buf)) + + def clear_objs(self): + for name in self._attrs_to_clear: + delattr(self, name) + self._attrs_to_clear = [] + + for base in self._alloced_bases: + self.u.free(base) + self._alloced_bases = [] + + self.symbols = [(a, b, objname) for (a, b, objname) \ + in self.symbols if objname == self.base_object] + + @contextmanager + def _copy_args_to_target(self, args): + heap = self.u.heap + with ExitStack() as stack: + args_copied = [] + for arg in args: + if type(arg) is str: + arg = arg.encode("ascii") + # fallthrough + if type(arg) is bytes: + p = stack.enter_context(heap.guarded_malloc(len(arg) + 1)) + self.u.iface.writemem(p, arg + b"\0") + args_copied.append(p) + elif type(arg) is int: + args_copied.append(arg) + else: + raise NotImplementedError(type(arg)) + yield args_copied + + def _wrap_call_to(self, addr): + def call_symbol(*args, call=self.u.proxy.call): + with self._copy_args_to_target(args) as args_copied: + return call(addr, *args_copied) + return call_symbol + + def lookup(self, addr): + idx = bisect.bisect_left(self.symbols, (addr + 1, "", "")) - 1 + if idx < 0 or idx >= len(self.symbols): + return None, None + return self.symbols[idx] + + def load_inline_c(self, source): + tmp = tempfile.mkdtemp() + cfile = tmp + ".c" + objfile = tmp + ".o" + with open(cfile, "w") as f: + f.write(source) + run_tool("make", "-C", self.SOURCE_ROOT, "invoke_cc", + f"OBJFILE={objfile}", f"CFILE={cfile}", silent=True) + self.load_obj(objfile) + + +if __name__ == "__main__": + from m1n1.setup import * + lp = LinkedProgram(u) + lp.debug_printf("hello from the other side! (%d)\n", 42) + lp.load_inline_c(''' + #include "utils.h" + int add(int a, int b) { + debug_printf("adding %d and %d\\n", a, b); + return a + b; + } + ''') + print(f"1 + 2 = {lp.add(1, 2)}") diff --git a/tools/proxyclient/m1n1/macho.py b/tools/proxyclient/m1n1/macho.py new file mode 100644 index 0000000..32c7639 --- /dev/null +++ b/tools/proxyclient/m1n1/macho.py @@ -0,0 +1,270 @@ +# SPDX-License-Identifier: MIT +from io import BytesIO, SEEK_END, SEEK_SET +import bisect +from construct import * +import subprocess + +from .utils import * + +__all__ = ["MachO"] + +MachOLoadCmdType = "LoadCmdType" / Enum(Int32ul, + SYMTAB = 0x02, + UNIXTHREAD = 0x05, + SEGMENT_64 = 0x19, + UUID = 0x1b, + BUILD_VERSION = 0x32, + DYLD_CHAINED_FIXUPS = 0x80000034, + FILESET_ENTRY = 0x80000035, +) + +MachOArmThreadStateFlavor = "ThreadStateFlavor" / Enum(Int32ul, + THREAD64 = 6, +) + +MachOHeader = Struct( + "magic" / Hex(Int32ul), + "cputype" / Hex(Int32ul), + "cpusubtype" / Hex(Int32ul), + "filetype" / Hex(Int32ul), + "ncmds" / Hex(Int32ul), + "sizeofcmds" / Hex(Int32ul), + "flags" / Hex(Int32ul), + "reserved" / Hex(Int32ul), +) + +MachOVmProt = FlagsEnum(Int32sl, + PROT_READ = 0x01, + PROT_WRITE = 0x02, + PROT_EXECUTE = 0x04, +) + +MachOCmdSymTab = Struct( + "symoff" / Hex(Int32ul), + "nsyms" / Int32ul, + "stroff" / Hex(Int32ul), + "strsize" / Hex(Int32ul), +) + +MachOCmdUnixThread = GreedyRange(Struct( + "flavor" / MachOArmThreadStateFlavor, + "data" / Prefixed(ExprAdapter(Int32ul, obj_ * 4, obj_ / 4), Switch(this.flavor, { + MachOArmThreadStateFlavor.THREAD64: Struct( + "x" / Array(29, Hex(Int64ul)), + "fp" / Hex(Int64ul), + "lr" / Hex(Int64ul), + "sp" / Hex(Int64ul), + "pc" / Hex(Int64ul), + "cpsr" / Hex(Int32ul), + "flags" / Hex(Int32ul), + ) + })), +)) + +NList = Struct( + "n_strx" / Hex(Int32ul), + "n_type" / Hex(Int8ul), + "n_sect" / Hex(Int8ul), + "n_desc" / Hex(Int16sl), + "n_value" / Hex(Int64ul), +) + +MachOCmdSegment64 = Struct( + "segname" / PaddedString(16, "ascii"), + "vmaddr" / Hex(Int64ul), + "vmsize" / Hex(Int64ul), + "fileoff" / Hex(Int64ul), + "filesize" / Hex(Int64ul), + "maxprot" / MachOVmProt, + "initprot" / MachOVmProt, + "nsects" / Int32ul, + "flags" / Hex(Int32ul), + "sections" / GreedyRange(Struct( + "sectname" / PaddedString(16, "ascii"), + "segname" / PaddedString(16, "ascii"), + "addr" / Hex(Int64ul), + "size" / Hex(Int64ul), + "offset" / Hex(Int32ul), + "align" / Hex(Int32ul), + "reloff" / Hex(Int32ul), + "nreloc" / Hex(Int32ul), + "flags" / Hex(Int32ul), + "reserved1" / Hex(Int32ul), + "reserved2" / Hex(Int32ul), + "reserved3" / Hex(Int32ul), + )), +) + +MachOFilesetEntry = Struct( + "addr" / Hex(Int64ul), + "offset" / Hex(Int64ul), + "entryid" / Hex(Int32ul), + "reserved" / Hex(Int32ul), + "name" / CString("ascii"), +) + +MachOCmd = Struct( + "cmd" / Hex(MachOLoadCmdType), + "args" / Prefixed(ExprAdapter(Int32ul, obj_ - 8, obj_ + 8), Switch(this.cmd, { + MachOLoadCmdType.SYMTAB: MachOCmdSymTab, + MachOLoadCmdType.UNIXTHREAD: MachOCmdUnixThread, + MachOLoadCmdType.SEGMENT_64: MachOCmdSegment64, + MachOLoadCmdType.UUID: Hex(Bytes(16)), + MachOLoadCmdType.FILESET_ENTRY: MachOFilesetEntry, + }, default=GreedyBytes)), +) + +MachOFile = Struct( + "header" / MachOHeader, + "cmds" / Array(this.header.ncmds, MachOCmd), +) + +class MachO: + def __init__(self, data): + if isinstance(data, bytes): + self.io = BytesIO(data) + else: + self.io = data + + self.off = self.io.tell() + self.io.seek(0, SEEK_END) + self.end = self.io.tell() + self.size = self.end - self.off + self.io.seek(self.off, SEEK_SET) + self.obj = MachOFile.parse_stream(self.io) + self.symbols = {} + self.load_info() + self.load_fileset() + + def load_info(self): + self.vmin, self.vmax = (1 << 64), 0 + self.entry = None + for cmd in self.obj.cmds: + if cmd.cmd == MachOLoadCmdType.SEGMENT_64: + self.vmin = min(self.vmin, cmd.args.vmaddr) + self.vmax = max(self.vmax, cmd.args.vmaddr + cmd.args.vmsize) + elif cmd.cmd == MachOLoadCmdType.UNIXTHREAD: + self.entry = cmd.args[0].data.pc + + def prepare_image(self, load_hook=None): + memory_size = self.vmax - self.vmin + + image = bytearray(memory_size) + + for cmd in self.get_cmds(MachOLoadCmdType.SEGMENT_64): + dest = cmd.args.vmaddr - self.vmin + end = min(self.size, cmd.args.fileoff + cmd.args.filesize) + size = end - cmd.args.fileoff + print(f"LOAD: {cmd.args.segname} {size} bytes from {cmd.args.fileoff:x} to {dest:x}") + self.io.seek(self.off + cmd.args.fileoff) + data = self.io.read(size) + if load_hook is not None: + data = load_hook(data, cmd.args.segname, size, cmd.args.fileoff, dest) + image[dest:dest + size] = data + if cmd.args.vmsize > size: + clearsize = cmd.args.vmsize - size + if cmd.args.segname == "PYLD": + print("SKIP: %d bytes from 0x%x to 0x%x" % (clearsize, dest + size, dest + size + clearsize)) + memory_size -= clearsize - 4 # leave a payload end marker + image = image[:memory_size] + else: + print("ZERO: %d bytes from 0x%x to 0x%x" % (clearsize, dest + size, dest + size + clearsize)) + image[dest + size:dest + cmd.args.vmsize] = bytes(clearsize) + + return image + + def get_cmds(self, cmdtype): + for cmd in self.obj.cmds: + if cmd.cmd == cmdtype: + yield cmd + + def get_cmd(self, cmdtype): + cmds = list(self.get_cmds(cmdtype)) + if len(cmds) == 0: + raise Exception(f"No commands of type {cmdtype}") + if len(cmds) > 1: + raise Exception(f"More than one commands of type {cmdtype} (found {len(cmd)})") + return cmds[0] + + def load_fileset(self): + self.subfiles = {} + + for fe in self.get_cmds(MachOLoadCmdType.FILESET_ENTRY): + self.io.seek(self.off + fe.args.offset) + subfile = MachO(self.io) + self.subfiles[fe.args.name] = subfile + for seg in subfile.get_cmds(MachOLoadCmdType.SEGMENT_64): + self.symbols[f"{fe.args.name}:{seg.args.segname}"] = seg.args.vmaddr + + def add_symbols(self, filename, syms, demangle=False): + try: + subfile = self.subfiles[filename] + except KeyError: + raise Exception(f"No fileset entry for {filename}") + + sym_segs = {} + for sym_seg in syms.get_cmds(MachOLoadCmdType.SEGMENT_64): + sym_segs[sym_seg.args.segname] = sym_seg + + syms.load_symbols(demangle=demangle) + symtab = [(v, k) for (k, v) in syms.symbols.items()] + symtab.sort() + + for seg in subfile.get_cmds(MachOLoadCmdType.SEGMENT_64): + if seg.args.segname not in sym_segs: + continue + + sym_seg = sym_segs[seg.args.segname] + + start = bisect.bisect_left(symtab, (sym_seg.args.vmaddr, "")) + end = bisect.bisect_left(symtab, (sym_seg.args.vmaddr + sym_seg.args.vmsize, "")) + + for addr, sym in symtab[start:end]: + sname = f"{filename}:{sym}" + self.symbols[sname] = addr - sym_seg.args.vmaddr + seg.args.vmaddr + + def load_symbols(self, demangle=False): + self.symbols = {} + + cmd = self.get_cmd(MachOLoadCmdType.SYMTAB) + + nsyms = cmd.args.nsyms + length = NList.sizeof() * nsyms + self.io.seek(self.off + cmd.args.symoff) + symdata = self.io.read(length) + + symbols = Array(nsyms, NList).parse(symdata) + + symbols_dict = {} + for i in symbols: + off = cmd.args.stroff + i.n_strx + self.io.seek(self.off + off) + name = self.io.read(1024).split(b"\x00")[0].decode("ascii") + symbols_dict[name] = i.n_value + + if demangle: + names = list(symbols_dict.keys()) + argv = ["c++filt"] + argv += names + + with subprocess.Popen(argv, stdin=subprocess.PIPE, stdout=subprocess.PIPE) as proc: + demangled, _ = proc.communicate() + + demangled = demangled.decode("ascii").split("\n")[:-1] + for name_mangled, name_demangled in zip(names, demangled): + self.symbols[name_demangled] = symbols_dict[name_mangled] + else: + self.symbols = symbols_dict + +if __name__ == "__main__": + import sys + macho = MachO(open(sys.argv[1], "rb").read()) + + if len(sys.argv) > 2: + syms = MachO(open(sys.argv[2], "rb").read()) + macho.add_symbols("com.apple.kernel", syms) + + symtab = [(v, k) for (k, v) in macho.symbols.items()] + symtab.sort() + for addr, name in symtab: + print(f"0x{addr:x} {name}") diff --git a/tools/proxyclient/m1n1/malloc.py b/tools/proxyclient/m1n1/malloc.py new file mode 100644 index 0000000..909d55b --- /dev/null +++ b/tools/proxyclient/m1n1/malloc.py @@ -0,0 +1,98 @@ +# SPDX-License-Identifier: MIT +from contextlib import contextmanager + +__all__ = ["Heap"] + +class Heap(object): + def __init__(self, start, end, block=64): + if start%block: + raise ValueError("heap start not aligned") + if end%block: + raise ValueError("heap end not aligned") + self.offset = start + self.count = (end - start) // block + self.blocks = [(self.count,False)] + self.block = block + + def malloc(self, size): + size = (size + self.block - 1) // self.block + pos = 0 + for i, (bsize, full) in enumerate(self.blocks): + if not full and bsize >= size: + self.blocks[i] = (size, True) + if bsize > size: + self.blocks.insert(i+1, (bsize - size, False)) + return self.offset + self.block * pos + pos += bsize + raise Exception("Out of memory") + + def memalign(self, align, size): + assert (align & (align - 1)) == 0 + align = max(align, self.block) // self.block + size = (size + self.block - 1) // self.block + pos = self.offset // self.block + for i, (bsize, full) in enumerate(self.blocks): + if not full: + offset = 0 + if pos % align: + offset = align - (pos % align) + if bsize >= (size + offset): + if offset: + self.blocks.insert(i, (offset, False)) + i += 1 + self.blocks[i] = (size, True) + if bsize > (size + offset): + self.blocks.insert(i+1, (bsize - size - offset, False)) + return self.block * (pos + offset) + pos += bsize + raise Exception("Out of memory") + + def free(self, addr): + if addr%self.block: + raise ValueError("free address not aligned") + if addr<self.offset: + raise ValueError("free address before heap") + addr -= self.offset + addr //= self.block + if addr>=self.count: + raise ValueError("free address after heap") + pos = 0 + for i, (bsize, used) in enumerate(self.blocks): + if pos > addr: + raise ValueError("bad free address") + if pos == addr: + if used == False: + raise ValueError("block already free") + if i!=0 and self.blocks[i-1][1] == False: + bsize += self.blocks[i-1][0] + del self.blocks[i] + i -= 1 + if i!=(len(self.blocks)-1) and self.blocks[i+1][1] == False: + bsize += self.blocks[i+1][0] + del self.blocks[i] + self.blocks[i] = (bsize, False) + return + pos += bsize + raise ValueError("bad free address") + + def check(self): + free = 0 + inuse = 0 + for i, (bsize, used) in enumerate(self.blocks): + if used: + inuse += bsize + else: + free += bsize + if free + inuse != self.count: + raise Exception("Total block size is inconsistent") + print("Heap stats:") + print(" In use: %8dkB"%(inuse * self.block // 1024)) + print(" Free: %8dkB"%(free * self.block // 1024)) + + @contextmanager + def guarded_malloc(self, size): + addr = self.malloc(size) + try: + yield addr + finally: + self.free(addr) diff --git a/tools/proxyclient/m1n1/proxy.py b/tools/proxyclient/m1n1/proxy.py new file mode 100644 index 0000000..1869f3e --- /dev/null +++ b/tools/proxyclient/m1n1/proxy.py @@ -0,0 +1,1104 @@ +# SPDX-License-Identifier: MIT +import os, sys, struct, serial, time +from construct import * +from enum import IntEnum, IntFlag +from serial.tools.miniterm import Miniterm + +from .utils import * +from .sysreg import * + +__all__ = ["REGION_RWX_EL0", "REGION_RW_EL0", "REGION_RX_EL1"] + +# Hack to disable input buffer flushing +class Serial(serial.Serial): + def _reset_input_buffer(self): + return + + def reset_input_buffer(self): + return + +class UartError(RuntimeError): + pass + +class UartTimeout(UartError): + pass + +class UartCMDError(UartError): + pass + +class UartChecksumError(UartError): + pass + +class UartRemoteError(UartError): + pass + +class Feature(IntFlag): + DISABLE_DATA_CSUMS = 0x01 # Data transfers don't use checksums + + @classmethod + def get_all(cls): + return cls.DISABLE_DATA_CSUMS + + def __str__(self): + return ", ".join(feature.name for feature in self.__class__ + if feature & self) or "<none>" + + +class START(IntEnum): + BOOT = 0 + EXCEPTION = 1 + EXCEPTION_LOWER = 2 + HV = 3 + +class EXC(IntEnum): + SYNC = 0 + IRQ = 1 + FIQ = 2 + SERROR = 3 + +class EVENT(IntEnum): + MMIOTRACE = 1 + IRQTRACE = 2 + +class EXC_RET(IntEnum): + UNHANDLED = 1 + HANDLED = 2 + EXIT_GUEST = 3 + STEP = 4 + +class DCP_SHUTDOWN_MODE(IntEnum): + QUIESCED = 0 + SLEEP_IF_EXTERNAL = 1 + SLEEP = 2 + +class PIX_FMT(IntEnum): + XRGB = 0 + XBGR = 1 + +class DART(IntEnum): + T8020 = 0 + T8110 = 1 + T6000 = 2 + +ExcInfo = Struct( + "regs" / Array(32, Int64ul), + "spsr" / RegAdapter(SPSR), + "elr" / Int64ul, + "esr" / RegAdapter(ESR), + "far" / Int64ul, + "afsr1" / Int64ul, + "sp" / Array(3, Int64ul), + "cpu_id" / Int64ul, + "mpidr" / Int64ul, + "elr_phys" / Int64ul, + "far_phys" / Int64ul, + "sp_phys" / Int64ul, + "data" / Int64ul, +) +# Sends 56+ byte Commands and Expects 36 Byte Responses +# Commands are format <I48sI +# 4 byte command, 48 byte null padded data + 4 byte checksum +# Responses are of the format: struct format <Ii24sI +# 4byte Response , 4 byte status, 24 byte string, 4 byte Checksum +# Response must start 0xff55aaXX where XX distiguishes between them +# In little endian mode these numbers as listed as REQ_* constants +# defined under UartInterface +# +# Event Response REQ_EVENT passed to registered Event Handler +# Boot Response REQ_BOOT passed to handle_boot() which may +# pass to a matching registered handler based on reason, code values +# If the status is ST_OK returns the data field to caller +# Otherwise reports a remote Error + +class UartInterface(Reloadable): + REQ_NOP = 0x00AA55FF + REQ_PROXY = 0x01AA55FF + REQ_MEMREAD = 0x02AA55FF + REQ_MEMWRITE = 0x03AA55FF + REQ_BOOT = 0x04AA55FF + REQ_EVENT = 0x05AA55FF + + CHECKSUM_SENTINEL = 0xD0DECADE + DATA_END_SENTINEL = 0xB0CACC10 + + ST_OK = 0 + ST_BADCMD = -1 + ST_INVAL = -2 + ST_XFERERR = -3 + ST_CSUMERR = -4 + + CMD_LEN = 56 + REPLY_LEN = 36 + EVENT_HDR_LEN = 8 + + def __init__(self, device=None, debug=False): + self.debug = debug + self.devpath = None + if device is None: + device = os.environ.get("M1N1DEVICE", "/dev/m1n1:115200") + if isinstance(device, str): + baud = 115200 + if ":" in device: + device, baud = device.rsplit(":", 1) + baud = int(baud) + self.devpath = device + self.baudrate = baud + + device = Serial(self.devpath, baud) + + self.dev = device + self.dev.timeout = 0 + self.dev.flushOutput() + self.dev.flushInput() + self.pted = False + #d = self.dev.read(1) + #while d != "": + #d = self.dev.read(1) + self.dev.timeout = int(os.environ.get("M1N1TIMEOUT", "3")) + self.tty_enable = True + self.handlers = {} + self.evt_handlers = {} + self.enabled_features = Feature(0) + + def checksum(self, data): + sum = 0xDEADBEEF; + for c in data: + sum *= 31337 + sum += c ^ 0x5a + sum &= 0xFFFFFFFF + + return (sum ^ 0xADDEDBAD) & 0xFFFFFFFF + + def data_checksum(self, data): + if self.enabled_features & Feature.DISABLE_DATA_CSUMS: + return self.CHECKSUM_SENTINEL + + return self.checksum(data) + + def readfull(self, size): + d = b'' + while len(d) < size: + block = self.dev.read(size - len(d)) + if not block: + raise UartTimeout("Expected %d bytes, got %d bytes"%(size,len(d))) + d += block + return d + + def cmd(self, cmd, payload=b""): + if len(payload) > self.CMD_LEN: + raise ValueError("Incorrect payload size %d"%len(payload)) + + payload = payload.ljust(self.CMD_LEN, b"\x00") + command = struct.pack("<I", cmd) + payload + command += struct.pack("<I", self.checksum(command)) + if self.debug: + print("<<", hexdump(command)) + self.dev.write(command) + + def unkhandler(self, s): + if not self.tty_enable: + return + for c in s: + if not self.pted: + sys.stdout.write("TTY> ") + self.pted = True + if c == 10: + self.pted = False + sys.stdout.write(chr(c)) + sys.stdout.flush() + + def ttymode(self, dev=None): + if dev is None: + dev = self.dev + + tout = dev.timeout + self.tty_enable = True + dev.timeout = None + + term = Miniterm(dev, eol='cr') + term.exit_character = chr(0x1d) # GS/CTRL+] + term.menu_character = chr(0x14) # Menu: CTRL+T + term.raw = True + term.set_rx_encoding('UTF-8') + term.set_tx_encoding('UTF-8') + + print('--- TTY mode | Quit: CTRL+] | Menu: CTRL+T ---') + term.start() + try: + term.join(True) + except KeyboardInterrupt: + pass + + print('--- Exit TTY mode ---') + term.join() + term.close() + + dev.timeout = tout + self.tty_enable = False + + def reply(self, cmd): + reply = b'' + while True: + if not reply or reply[-1] != 255: + reply = b'' + reply += self.readfull(1) + if reply != b"\xff": + self.unkhandler(reply) + continue + else: + reply = b'\xff' + reply += self.readfull(1) + if reply != b"\xff\x55": + self.unkhandler(reply) + continue + reply += self.readfull(1) + if reply != b"\xff\x55\xaa": + self.unkhandler(reply) + continue + reply += self.readfull(1) + cmdin = struct.unpack("<I", reply)[0] + if cmdin == self.REQ_EVENT: + reply += self.readfull(self.EVENT_HDR_LEN - 4) + data_len, event_type = struct.unpack("<HH", reply[4:]) + reply += self.readfull(data_len + 4) + if self.debug: + print(">>", hexdump(reply)) + checksum = struct.unpack("<I", reply[-4:])[0] + ccsum = self.data_checksum(reply[:-4]) + if checksum != ccsum: + print("Event checksum error: Expected 0x%08x, got 0x%08x"%(checksum, ccsum)) + raise UartChecksumError() + self.handle_event(EVENT(event_type), reply[self.EVENT_HDR_LEN:-4]) + reply = b'' + continue + + reply += self.readfull(self.REPLY_LEN - 4) + if self.debug: + print(">>", hexdump(reply)) + status, data, checksum = struct.unpack("<i24sI", reply[4:]) + ccsum = self.checksum(reply[:-4]) + if checksum != ccsum: + print("Reply checksum error: Expected 0x%08x, got 0x%08x"%(checksum, ccsum)) + raise UartChecksumError() + + if cmdin != cmd: + if cmdin == self.REQ_BOOT and status == self.ST_OK: + self.handle_boot(data) + reply = b'' + continue + raise UartCMDError("Reply command mismatch: Expected 0x%08x, got 0x%08x"%(cmd, cmdin)) + if status != self.ST_OK: + if status == self.ST_BADCMD: + raise UartRemoteError("Reply error: Bad Command") + elif status == self.ST_INVAL: + raise UartRemoteError("Reply error: Invalid argument") + elif status == self.ST_XFERERR: + raise UartRemoteError("Reply error: Data transfer failed") + elif status == self.ST_CSUMERR: + raise UartRemoteError("Reply error: Data checksum failed") + else: + raise UartRemoteError("Reply error: Unknown error (%d)"%status) + return data + + def handle_boot(self, data): + reason, code, info = struct.unpack("<IIQ", data[:16]) + reason = START(reason) + if reason in (START.EXCEPTION, START.EXCEPTION_LOWER): + code = EXC(code) + if (reason, code) in self.handlers: + self.handlers[(reason, code)](reason, code, info) + elif reason != START.BOOT: + print(f"Proxy callback without handler: {reason}, {code}") + + def set_handler(self, reason, code, handler): + self.handlers[(reason, code)] = handler + + def handle_event(self, event_id, data): + if event_id in self.evt_handlers: + self.evt_handlers[event_id](data) + + def set_event_handler(self, event_id, handler): + self.evt_handlers[event_id] = handler + + def wait_boot(self): + try: + return self.reply(self.REQ_BOOT) + except: + # Over USB, reboots cause a reconnect + self.dev.close() + print("Waiting for reconnection... ", end="") + sys.stdout.flush() + for i in range(100): + print(".", end="") + sys.stdout.flush() + try: + self.dev.open() + except serial.serialutil.SerialException: + time.sleep(0.1) + else: + break + else: + raise UartTimeout("Reconnection timed out") + print(" Connected") + + def wait_and_handle_boot(self): + self.handle_boot(self.wait_boot()) + + def nop(self): + features = Feature.get_all() + + # Send the supported feature flags in the NOP message (has no effect + # if the target does not support it) + self.cmd(self.REQ_NOP, struct.pack("<Q", features.value)) + result = self.reply(self.REQ_NOP) + + # Get the enabled feature flags from the message response (returns + # 0 if the target does not support it) + features = Feature(struct.unpack("<QQQ", result)[0]) + + if self.debug: + print(f"Enabled features: {features}") + + self.enabled_features = features + + def proxyreq(self, req, reboot=False, no_reply=False, pre_reply=None): + self.cmd(self.REQ_PROXY, req) + if pre_reply: + pre_reply() + if no_reply: + return + elif reboot: + return self.wait_boot() + else: + return self.reply(self.REQ_PROXY) + + def writemem(self, addr, data, progress=False): + checksum = self.data_checksum(data) + size = len(data) + req = struct.pack("<QQI", addr, size, checksum) + self.cmd(self.REQ_MEMWRITE, req) + if self.debug: + print("<< DATA:") + chexdump(data) + for i in range(0, len(data), 8192): + self.dev.write(data[i:i + 8192]) + if progress: + sys.stdout.write(".") + sys.stdout.flush() + if progress: + print() + if self.enabled_features & Feature.DISABLE_DATA_CSUMS: + # Extra sentinel after the data to make sure no data is lost + self.dev.write(struct.pack("<I", self.DATA_END_SENTINEL)) + + # should automatically report a CRC failure + self.reply(self.REQ_MEMWRITE) + + def readmem(self, addr, size): + if size == 0: + return b"" + + req = struct.pack("<QQ", addr, size) + self.cmd(self.REQ_MEMREAD, req) + reply = self.reply(self.REQ_MEMREAD) + checksum = struct.unpack("<I",reply[:4])[0] + data = self.readfull(size) + if self.debug: + print(">> DATA:") + chexdump(data) + ccsum = self.data_checksum(data) + if checksum != ccsum: + raise UartChecksumError("Reply data checksum error: Expected 0x%08x, got 0x%08x"%(checksum, ccsum)) + + if self.enabled_features & Feature.DISABLE_DATA_CSUMS: + # Extra sentinel after the data to make sure no data was lost + sentinel = struct.unpack("<I", self.readfull(4))[0] + if sentinel != self.DATA_END_SENTINEL: + raise UartChecksumError(f"Reply data sentinel error: Expected " + f"{self.DATA_END_SENTINEL:#x}, got {sentinel:#x}") + + return data + + def readstruct(self, addr, stype): + return stype.parse(self.readmem(addr, stype.sizeof())) + +class ProxyError(RuntimeError): + pass + +class ProxyReplyError(ProxyError): + pass + +class ProxyRemoteError(ProxyError): + pass + +class ProxyCommandError(ProxyRemoteError): + pass + +class AlignmentError(Exception): + pass + +class IODEV(IntEnum): + UART = 0 + FB = 1 + USB_VUART = 2 + USB0 = 3 + USB1 = 4 + USB2 = 5 + USB3 = 6 + USB4 = 7 + USB5 = 8 + USB6 = 9 + USB7 = 10 + +class USAGE(IntFlag): + CONSOLE = (1 << 0) + UARTPROXY = (1 << 1) + +class GUARD(IntFlag): + OFF = 0 + SKIP = 1 + MARK = 2 + RETURN = 3 + SILENT = 0x100 + +REGION_RWX_EL0 = 0x80000000000 +REGION_RW_EL0 = 0xa0000000000 +REGION_RX_EL1 = 0xc0000000000 + +# Uses UartInterface.proxyreq() to send requests to M1N1 and process +# reponses sent back. +class M1N1Proxy(Reloadable): + S_OK = 0 + S_BADCMD = -1 + + P_NOP = 0x000 + P_EXIT = 0x001 + P_CALL = 0x002 + P_GET_BOOTARGS = 0x003 + P_GET_BASE = 0x004 + P_SET_BAUD = 0x005 + P_UDELAY = 0x006 + P_SET_EXC_GUARD = 0x007 + P_GET_EXC_COUNT = 0x008 + P_EL0_CALL = 0x009 + P_EL1_CALL = 0x00a + P_VECTOR = 0x00b + P_GL1_CALL = 0x00c + P_GL2_CALL = 0x00d + P_GET_SIMD_STATE = 0x00e + P_PUT_SIMD_STATE = 0x00f + P_REBOOT = 0x010 + + P_WRITE64 = 0x100 + P_WRITE32 = 0x101 + P_WRITE16 = 0x102 + P_WRITE8 = 0x103 + P_READ64 = 0x104 + P_READ32 = 0x105 + P_READ16 = 0x106 + P_READ8 = 0x107 + P_SET64 = 0x108 + P_SET32 = 0x109 + P_SET16 = 0x10a + P_SET8 = 0x10b + P_CLEAR64 = 0x10c + P_CLEAR32 = 0x10d + P_CLEAR16 = 0x10e + P_CLEAR8 = 0x10f + P_MASK64 = 0x110 + P_MASK32 = 0x111 + P_MASK16 = 0x112 + P_MASK8 = 0x113 + P_WRITEREAD64 = 0x114 + P_WRITEREAD32 = 0x115 + P_WRITEREAD16 = 0x116 + P_WRITEREAD8 = 0x117 + + P_MEMCPY64 = 0x200 + P_MEMCPY32 = 0x201 + P_MEMCPY16 = 0x202 + P_MEMCPY8 = 0x203 + P_MEMSET64 = 0x204 + P_MEMSET32 = 0x205 + P_MEMSET16 = 0x206 + P_MEMSET8 = 0x207 + + P_IC_IALLUIS = 0x300 + P_IC_IALLU = 0x301 + P_IC_IVAU = 0x302 + P_DC_IVAC = 0x303 + P_DC_ISW = 0x304 + P_DC_CSW = 0x305 + P_DC_CISW = 0x306 + P_DC_ZVA = 0x307 + P_DC_CVAC = 0x308 + P_DC_CVAU = 0x309 + P_DC_CIVAC = 0x30a + P_MMU_SHUTDOWN = 0x30b + P_MMU_INIT = 0x30c + P_MMU_DISABLE = 0x30d + P_MMU_RESTORE = 0x30e + P_MMU_INIT_SECONDARY = 0x30f + + P_XZDEC = 0x400 + P_GZDEC = 0x401 + + P_SMP_START_SECONDARIES = 0x500 + P_SMP_CALL = 0x501 + P_SMP_CALL_SYNC = 0x502 + P_SMP_WAIT = 0x503 + P_SMP_SET_WFE_MODE = 0x504 + + P_HEAPBLOCK_ALLOC = 0x600 + P_MALLOC = 0x601 + P_MEMALIGN = 0x602 + P_FREE = 0x602 + + P_KBOOT_BOOT = 0x700 + P_KBOOT_SET_CHOSEN = 0x701 + P_KBOOT_SET_INITRD = 0x702 + P_KBOOT_PREPARE_DT = 0x703 + + P_PMGR_CLOCK_ENABLE = 0x800 + P_PMGR_CLOCK_DISABLE = 0x801 + P_PMGR_ADT_CLOCKS_ENABLE = 0x802 + P_PMGR_ADT_CLOCKS_DISABLE = 0x803 + P_PMGR_RESET = 0x804 + + P_IODEV_SET_USAGE = 0x900 + P_IODEV_CAN_READ = 0x901 + P_IODEV_CAN_WRITE = 0x902 + P_IODEV_READ = 0x903 + P_IODEV_WRITE = 0x904 + P_IODEV_WHOAMI = 0x905 + P_USB_IODEV_VUART_SETUP = 0x906 + + P_TUNABLES_APPLY_GLOBAL = 0xa00 + P_TUNABLES_APPLY_LOCAL = 0xa01 + + P_DART_INIT = 0xb00 + P_DART_SHUTDOWN = 0xb01 + P_DART_MAP = 0xb02 + P_DART_UNMAP = 0xb03 + + P_HV_INIT = 0xc00 + P_HV_MAP = 0xc01 + P_HV_START = 0xc02 + P_HV_TRANSLATE = 0xc03 + P_HV_PT_WALK = 0xc04 + P_HV_MAP_VUART = 0xc05 + P_HV_TRACE_IRQ = 0xc06 + P_HV_WDT_START = 0xc07 + P_HV_START_SECONDARY = 0xc08 + P_HV_SWITCH_CPU = 0xc09 + P_HV_SET_TIME_STEALING = 0xc0a + P_HV_PIN_CPU = 0xc0b + P_HV_WRITE_HCR = 0xc0c + P_HV_MAP_VIRTIO = 0xc0d + P_VIRTIO_PUT_BUFFER = 0xc0e + + P_FB_INIT = 0xd00 + P_FB_SHUTDOWN = 0xd01 + P_FB_BLIT = 0xd02 + P_FB_UNBLIT = 0xd03 + P_FB_FILL = 0xd04 + P_FB_CLEAR = 0xd05 + P_FB_DISPLAY_LOGO = 0xd06 + P_FB_RESTORE_LOGO = 0xd07 + P_FB_IMPROVE_LOGO = 0xd08 + + P_PCIE_INIT = 0xe00 + P_PCIE_SHUTDOWN = 0xe01 + + P_NVME_INIT = 0xf00 + P_NVME_SHUTDOWN = 0xf01 + P_NVME_READ = 0xf02 + P_NVME_FLUSH = 0xf03 + + P_MCC_GET_CARVEOUTS = 0x1000 + + P_DISPLAY_INIT = 0x1100 + P_DISPLAY_CONFIGURE = 0x1101 + P_DISPLAY_SHUTDOWN = 0x1102 + P_DISPLAY_START_DCP = 0x1103 + P_DISPLAY_IS_EXTERNAL = 0x1104 + + P_DAPF_INIT_ALL = 0x1200 + P_DAPF_INIT = 0x1201 + + def __init__(self, iface, debug=False): + self.debug = debug + self.iface = iface + self.heap = None + + def _request(self, opcode, *args, reboot=False, signed=False, no_reply=False, pre_reply=None): + if len(args) > 6: + raise ValueError("Too many arguments") + args = list(args) + [0] * (6 - len(args)) + req = struct.pack("<7Q", opcode, *args) + if self.debug: + print("<<<< %08x: %08x %08x %08x %08x %08x %08x"%tuple([opcode] + args)) + reply = self.iface.proxyreq(req, reboot=reboot, no_reply=no_reply, pre_reply=None) + if no_reply or reboot and reply is None: + return + ret_fmt = "q" if signed else "Q" + rop, status, retval = struct.unpack("<Qq" + ret_fmt, reply) + if self.debug: + print(">>>> %08x: %d %08x"%(rop, status, retval)) + if reboot: + return + if rop != opcode: + raise ProxyReplyError("Reply opcode mismatch: Expected 0x%08x, got 0x%08x"%(opcode,rop)) + if status != self.S_OK: + if status == self.S_BADCMD: + raise ProxyCommandError("Reply error: Bad Command") + else: + raise ProxyRemoteError("Reply error: Unknown error (%d)"%status) + return retval + + def request(self, opcode, *args, **kwargs): + free = [] + args = list(args) + args2 = [] + for i, arg in enumerate(args): + if isinstance(arg, str): + arg = arg.encode("utf-8") + b"\0" + if isinstance(arg, bytes) and self.heap: + p = self.heap.malloc(len(arg)) + free.append(p) + self.iface.writemem(p, arg) + if (i < (len(args) - 1)) and args[i + 1] is None: + args[i + 1] = len(arg) + arg = p + args2.append(arg) + try: + return self._request(opcode, *args2, **kwargs) + finally: + for i in free: + self.heap.free(i) + + def nop(self): + self.request(self.P_NOP) + def exit(self, retval=0): + self.request(self.P_EXIT, retval) + def call(self, addr, *args, reboot=False): + if len(args) > 5: + raise ValueError("Too many arguments") + return self.request(self.P_CALL, addr, *args, reboot=reboot) + def reload(self, addr, *args, el1=False): + if len(args) > 4: + raise ValueError("Too many arguments") + if el1: + self.request(self.P_EL1_CALL, addr, *args, no_reply=True) + else: + try: + self.request(self.P_VECTOR, addr, *args) + self.iface.wait_boot() + except ProxyCommandError: # old m1n1 does not support P_VECTOR + try: + self.mmu_shutdown() + except ProxyCommandError: # older m1n1 does not support MMU + pass + self.request(self.P_CALL, addr, *args, reboot=True) + def get_bootargs(self): + return self.request(self.P_GET_BOOTARGS) + def get_base(self): + return self.request(self.P_GET_BASE) + def set_baud(self, baudrate): + self.iface.tty_enable = False + def change(): + self.iface.dev.baudrate = baudrate + try: + self.request(self.P_SET_BAUD, baudrate, 16, 0x005aa5f0, pre_reply=change) + finally: + self.iface.tty_enable = True + def udelay(self, usec): + self.request(self.P_UDELAY, usec) + def set_exc_guard(self, mode): + self.request(self.P_SET_EXC_GUARD, mode) + def get_exc_count(self): + return self.request(self.P_GET_EXC_COUNT) + def el0_call(self, addr, *args): + if len(args) > 4: + raise ValueError("Too many arguments") + return self.request(self.P_EL0_CALL, addr, *args) + def el1_call(self, addr, *args): + if len(args) > 4: + raise ValueError("Too many arguments") + return self.request(self.P_EL1_CALL, addr, *args) + def gl1_call(self, addr, *args): + if len(args) > 4: + raise ValueError("Too many arguments") + return self.request(self.P_GL1_CALL, addr, *args) + def gl2_call(self, addr, *args): + if len(args) > 4: + raise ValueError("Too many arguments") + return self.request(self.P_GL2_CALL, addr, *args) + def get_simd_state(self, buf): + self.request(self.P_GET_SIMD_STATE, buf) + def put_simd_state(self, buf): + self.request(self.P_PUT_SIMD_STATE, buf) + def reboot(self): + self.request(self.P_REBOOT, no_reply=True) + + def write64(self, addr, data): + '''write 8 byte value to given address''' + if addr & 7: + raise AlignmentError() + self.request(self.P_WRITE64, addr, data) + def write32(self, addr, data): + '''write 4 byte value to given address''' + if addr & 3: + raise AlignmentError() + self.request(self.P_WRITE32, addr, data) + def write16(self, addr, data): + '''write 2 byte value to given address''' + if addr & 1: + raise AlignmentError() + self.request(self.P_WRITE16, addr, data) + def write8(self, addr, data): + '''write 1 byte value to given address''' + self.request(self.P_WRITE8, addr, data) + + def read64(self, addr): + '''return 8 byte value from given address''' + if addr & 7: + raise AlignmentError() + return self.request(self.P_READ64, addr) + def read32(self, addr): + '''return 4 byte value given address''' + if addr & 3: + raise AlignmentError() + return self.request(self.P_READ32, addr) + def read16(self, addr): + '''return 2 byte value from given address''' + if addr & 1: + raise AlignmentError() + return self.request(self.P_READ16, addr) + def read8(self, addr): + '''return 1 byte value from given address''' + return self.request(self.P_READ8, addr) + + def set64(self, addr, data): + '''Or 64 bit value of data into memory at addr and return result''' + if addr & 7: + raise AlignmentError() + return self.request(self.P_SET64, addr, data) + def set32(self, addr, data): + '''Or 32 bit value of data into memory at addr and return result''' + if addr & 3: + raise AlignmentError() + return self.request(self.P_SET32, addr, data) + def set16(self, addr, data): + '''Or 16 bit value of data into memory at addr and return result''' + if addr & 1: + raise AlignmentError() + return self.request(self.P_SET16, addr, data) + def set8(self, addr, data): + '''Or byte value of data into memory at addr and return result''' + return self.request(self.P_SET8, addr, data) + + def clear64(self, addr, data): + '''Clear bits in 64 bit memory at address addr that are set + in parameter data and return result''' + if addr & 7: + raise AlignmentError() + return self.request(self.P_CLEAR64, addr, data) + def clear32(self, addr, data): + '''Clear bits in 32 bit memory at address addr that are set + in parameter data and return result''' + if addr & 3: + raise AlignmentError() + return self.request(self.P_CLEAR32, addr, data) + def clear16(self, addr, data): + '''Clear bits in 16 bit memory at address addr that are set + in parameter data and return result''' + if addr & 1: + raise AlignmentError() + return self.request(self.P_CLEAR16, addr, data) + def clear8(self, addr, data): + '''Clear bits in 8 bit memory at addr that are set in data + and return result''' + return self.request(self.P_CLEAR8, addr, data) + + def mask64(self, addr, clear, set): + '''Clear bits in 64 bit memory at address addr that are + set in clear, then set the bits in set and return result''' + if addr & 7: + raise AlignmentError() + return self.request(self.P_MASK64, addr, clear, set) + def mask32(self, addr, clear, set): + '''Clear bits in 32 bit memory at address addr that are + set in clear, then set the bits in set and return result''' + if addr & 3: + raise AlignmentError() + return self.request(self.P_MASK32, addr, clear, set) + def mask16(self, addr, clear, set): + '''Clear select bits in 16 bit memory addr that are set + in clear parameter, then set the bits in set parameter and return result''' + if addr & 1: + raise AlignmentError() + return self.request(self.P_MASK16, addr, clear, set) + def mask8(self, addr, clear, set): + '''Clear bits in 1 byte memory at addr that are set + in clear parameter, then set the bits in set parameter + and return the result''' + return self.request(self.P_MASK8, addr, clear, set) + + def writeread64(self, addr, data): + return self.request(self.P_WRITEREAD64, addr, data) + def writeread32(self, addr, data): + return self.request(self.P_WRITEREAD32, addr, data) + def writeread16(self, addr, data): + return self.request(self.P_WRITEREAD16, addr, data) + def writeread8(self, addr, data): + return self.request(self.P_WRITEREAD8, addr, data) + + def memcpy64(self, dst, src, size): + if src & 7 or dst & 7: + raise AlignmentError() + self.request(self.P_MEMCPY64, dst, src, size) + def memcpy32(self, dst, src, size): + if src & 3 or dst & 3: + raise AlignmentError() + self.request(self.P_MEMCPY32, dst, src, size) + def memcpy16(self, dst, src, size): + if src & 1 or dst & 1: + raise AlignmentError() + self.request(self.P_MEMCPY16, dst, src, size) + def memcpy8(self, dst, src, size): + self.request(self.P_MEMCPY8, dst, src, size) + + def memset64(self, dst, src, size): + if dst & 7: + raise AlignmentError() + self.request(self.P_MEMSET64, dst, src, size) + def memset32(self, dst, src, size): + if dst & 3: + raise AlignmentError() + self.request(self.P_MEMSET32, dst, src, size) + def memset16(self, dst, src, size): + if dst & 1: + raise AlignmentError() + self.request(self.P_MEMSET16, dst, src, size) + def memset8(self, dst, src, size): + self.request(self.P_MEMSET8, dst, src, size) + + def ic_ialluis(self): + self.request(self.P_IC_IALLUIS) + def ic_iallu(self): + self.request(self.P_IC_IALLU) + def ic_ivau(self, addr, size): + self.request(self.P_IC_IVAU, addr, size) + def dc_ivac(self, addr, size): + self.request(self.P_DC_IVAC, addr, size) + def dc_isw(self, sw): + self.request(self.P_DC_ISW, sw) + def dc_csw(self, sw): + self.request(self.P_DC_CSW, sw) + def dc_cisw(self, sw): + self.request(self.P_DC_CISW, sw) + def dc_zva(self, addr, size): + self.request(self.P_DC_ZVA, addr, size) + def dc_cvac(self, addr, size): + self.request(self.P_DC_CVAC, addr, size) + def dc_cvau(self, addr, size): + self.request(self.P_DC_CVAU, addr, size) + def dc_civac(self, addr, size): + self.request(self.P_DC_CIVAC, addr, size) + def mmu_shutdown(self): + self.request(self.P_MMU_SHUTDOWN) + def mmu_init(self): + self.request(self.P_MMU_INIT) + def mmu_disable(self): + return self.request(self.P_MMU_DISABLE) + def mmu_restore(self, flags): + self.request(self.P_MMU_RESTORE, flags) + def mmu_init_secondary(self, cpu): + self.request(self.P_MMU_INIT_SECONDARY, cpu) + + + def xzdec(self, inbuf, insize, outbuf=0, outsize=0): + return self.request(self.P_XZDEC, inbuf, insize, outbuf, + outsize, signed=True) + + def gzdec(self, inbuf, insize, outbuf, outsize): + return self.request(self.P_GZDEC, inbuf, insize, outbuf, + outsize, signed=True) + + def smp_start_secondaries(self): + self.request(self.P_SMP_START_SECONDARIES) + def smp_call(self, cpu, addr, *args): + if len(args) > 4: + raise ValueError("Too many arguments") + self.request(self.P_SMP_CALL, cpu, addr, *args) + def smp_call_sync(self, cpu, addr, *args): + if len(args) > 4: + raise ValueError("Too many arguments") + return self.request(self.P_SMP_CALL_SYNC, cpu, addr, *args) + def smp_wait(self, cpu): + return self.request(self.P_SMP_WAIT, cpu) + def smp_set_wfe_mode(self, mode): + return self.request(self.P_SMP_SET_WFE_MODE, mode) + + def heapblock_alloc(self, size): + return self.request(self.P_HEAPBLOCK_ALLOC, size) + def malloc(self, size): + return self.request(self.P_MALLOC, size) + def memalign(self, align, size): + return self.request(self.P_MEMALIGN, align, size) + def free(self, ptr): + self.request(self.P_FREE, ptr) + + def kboot_boot(self, kernel): + self.request(self.P_KBOOT_BOOT, kernel) + def kboot_set_chosen(self, name, value): + self.request(self.P_KBOOT_SET_CHOSEN, name, value) + def kboot_set_initrd(self, base, size): + self.request(self.P_KBOOT_SET_INITRD, base, size) + def kboot_prepare_dt(self, dt_addr): + return self.request(self.P_KBOOT_PREPARE_DT, dt_addr) + + def pmgr_clock_enable(self, clkid): + return self.request(self.P_PMGR_CLOCK_ENABLE, clkid) + def pmgr_clock_disable(self, clkid): + return self.request(self.P_PMGR_CLOCK_DISABLE, clkid) + def pmgr_adt_clocks_enable(self, path): + return self.request(self.P_PMGR_ADT_CLOCKS_ENABLE, path) + def pmgr_adt_clocks_disable(self, path): + return self.request(self.P_PMGR_ADT_CLOCKS_DISABLE, path) + def pmgr_reset(self, die, name): + return self.request(self.P_PMGR_RESET, die, name) + + def iodev_set_usage(self, iodev, usage): + return self.request(self.P_IODEV_SET_USAGE, iodev, usage) + def iodev_can_read(self, iodev): + return self.request(self.P_IODEV_CAN_READ, iodev) + def iodev_can_write(self, iodev): + return self.request(self.P_IODEV_CAN_WRITE, iodev) + def iodev_read(self, iodev, buf, size=None): + return self.request(self.P_IODEV_READ, iodev, buf, size) + def iodev_write(self, iodev, buf, size=None): + return self.request(self.P_IODEV_WRITE, iodev, buf, size) + def iodev_whoami(self): + return IODEV(self.request(self.P_IODEV_WHOAMI)) + def usb_iodev_vuart_setup(self, iodev): + return self.request(self.P_USB_IODEV_VUART_SETUP, iodev) + + def tunables_apply_global(self, path, prop): + return self.request(self.P_TUNABLES_APPLY_GLOBAL, path, prop) + def tunables_apply_local(self, path, prop, reg_offset): + return self.request(self.P_TUNABLES_APPLY_LOCAL, path, prop, reg_offset) + def tunables_apply_local_addr(self, path, prop, base): + return self.request(self.P_TUNABLES_APPLY_LOCAL, path, prop, base) + + def dart_init(self, base, sid, dart_type=DART.T8020): + return self.request(self.P_DART_INIT, base, sid, dart_type) + def dart_shutdown(self, dart): + return self.request(self.P_DART_SHUTDOWN, dart) + def dart_map(self, dart, iova, bfr, len): + return self.request(self.P_DART_MAP, dart, iova, bfr, len) + def dart_unmap(self, dart, iova, len): + return self.request(self.P_DART_UNMAP, dart, iova, len) + + def hv_init(self): + return self.request(self.P_HV_INIT) + def hv_map(self, from_, to, size, incr): + return self.request(self.P_HV_MAP, from_, to, size, incr) + def hv_start(self, entry, *args): + return self.request(self.P_HV_START, entry, *args) + def hv_translate(self, addr, s1=False, w=False): + '''Translate virtual address + stage 1 only if s1, for write if w''' + return self.request(self.P_HV_TRANSLATE, addr, s1, w) + def hv_pt_walk(self, addr): + return self.request(self.P_HV_PT_WALK, addr) + def hv_map_vuart(self, base, irq, iodev): + return self.request(self.P_HV_MAP_VUART, base, irq, iodev) + def hv_trace_irq(self, evt_type, num, count, flags): + return self.request(self.P_HV_TRACE_IRQ, evt_type, num, count, flags) + def hv_wdt_start(self, cpu): + return self.request(self.P_HV_WDT_START, cpu) + def hv_start_secondary(self, cpu, entry, *args): + return self.request(self.P_HV_START_SECONDARY, cpu, entry, *args) + def hv_switch_cpu(self, cpu): + return self.request(self.P_HV_SWITCH_CPU, cpu) + def hv_set_time_stealing(self, enabled, reset): + return self.request(self.P_HV_SET_TIME_STEALING, int(bool(enabled)), int(bool(reset))) + def hv_pin_cpu(self, cpu): + return self.request(self.P_HV_PIN_CPU, cpu) + def hv_write_hcr(self, hcr): + return self.request(self.P_HV_WRITE_HCR, hcr) + def hv_map_virtio(self, base, config): + return self.request(self.P_HV_MAP_VIRTIO, base, config) + def virtio_put_buffer(self, base, qu, idx, length): + return self.request(self.P_VIRTIO_PUT_BUFFER, base, qu, idx, length) + + def fb_init(self): + return self.request(self.P_FB_INIT) + def fb_shutdown(self, restore_logo=True): + return self.request(self.P_FB_SHUTDOWN, restore_logo) + def fb_blit(self, x, y, w, h, ptr, stride, pix_fmt=PIX_FMT.XRGB): + return self.request(self.P_FB_BLIT, x, y, w, h, ptr, stride | pix_fmt << 32) + def fb_unblit(self, x, y, w, h, ptr, stride): + return self.request(self.P_FB_UNBLIT, x, y, w, h, ptr, stride) + def fb_fill(self, x, y, w, h, color): + return self.request(self.P_FB_FILL, x, y, w, h, color) + def fb_clear(self, color): + return self.request(self.P_FB_CLEAR, color) + def fb_display_logo(self): + return self.request(self.P_FB_DISPLAY_LOGO) + def fb_restore_logo(self): + return self.request(self.P_FB_RESTORE_LOGO) + def fb_improve_logo(self): + return self.request(self.P_FB_IMPROVE_LOGO) + + def pcie_init(self): + return self.request(self.P_PCIE_INIT) + def pcie_shutdown(self): + return self.request(self.P_PCIE_SHUTDOWN) + + def nvme_init(self): + return self.request(self.P_NVME_INIT) + def nvme_shutdown(self): + return self.request(self.P_NVME_SHUTDOWN) + def nvme_read(self, nsid, lba, bfr): + return self.request(self.P_NVME_READ, nsid, lba, bfr) + def nvme_flush(self, nsid): + return self.request(self.P_NVME_FLUSH, nsid) + + def mcc_get_carveouts(self): + return self.request(self.P_MCC_GET_CARVEOUTS) + + def display_init(self): + return self.request(self.P_DISPLAY_INIT) + def display_configure(self, cfg): + return self.request(self.P_DISPLAY_CONFIGURE, cfg) + def display_shutdown(self, mode): + return self.request(self.P_DISPLAY_SHUTDOWN, mode) + def display_start_dcp(self): + return self.request(self.P_DISPLAY_START_DCP) + def display_is_external(self): + return self.request(self.P_DISPLAY_IS_EXTERNAL) + + def dapf_init_all(self): + return self.request(self.P_DAPF_INIT_ALL) + def dapf_init(self, path): + return self.request(self.P_DAPF_INIT, path) + +__all__.extend(k for k, v in globals().items() + if (callable(v) or isinstance(v, type)) and v.__module__ == __name__) + +if __name__ == "__main__": + import serial + uartdev = os.environ.get("M1N1DEVICE", "/dev/m1n1") + usbuart = serial.Serial(uartdev, 115200) + uartif = UartInterface(usbuart, debug=True) + print("Sending NOP...", end=' ') + uartif.nop() + print("OK") + proxy = M1N1Proxy(uartif, debug=True) + print("Sending Proxy NOP...", end=' ') + proxy.nop() + print("OK") + print("Boot args: 0x%x" % proxy.get_bootargs()) diff --git a/tools/proxyclient/m1n1/proxyutils.py b/tools/proxyclient/m1n1/proxyutils.py new file mode 100644 index 0000000..ca75c41 --- /dev/null +++ b/tools/proxyclient/m1n1/proxyutils.py @@ -0,0 +1,550 @@ +# SPDX-License-Identifier: MIT +import serial, os, struct, sys, time, json, os.path, gzip, functools +from contextlib import contextmanager +from construct import * + +from .asm import ARMAsm +from .proxy import * +from .utils import Reloadable, chexdiff32 +from .tgtypes import * +from .sysreg import * +from .malloc import Heap +from . import adt + +__all__ = ["ProxyUtils", "RegMonitor", "GuardedHeap", "bootstrap_port"] + +SIMD_B = Array(32, Array(16, Int8ul)) +SIMD_H = Array(32, Array(8, Int16ul)) +SIMD_S = Array(32, Array(4, Int32ul)) +SIMD_D = Array(32, Array(2, Int64ul)) +SIMD_Q = Array(32, BytesInteger(16, swapped=True)) + +# This isn't perfect, since multiple versions could have the same +# iBoot version, but it's good enough +VERSION_MAP = { + "iBoot-7429.61.2": "V12_1", + "iBoot-7459.101.2": "V12_3", + "iBoot-7459.121.3": "V12_4", + "iBoot-8419.0.151.0.1": "V13_0B4", +} + +class ProxyUtils(Reloadable): + CODE_BUFFER_SIZE = 0x10000 + def __init__(self, p, heap_size=1024 * 1024 * 1024): + self.iface = p.iface + self.proxy = p + self.base = p.get_base() + self.ba_addr = p.get_bootargs() + + self.ba = self.iface.readstruct(self.ba_addr, BootArgs) + + # We allocate a 128MB heap, 128MB after the m1n1 heap, without telling it about it. + # This frees up from having to coordinate memory management or free stuff after a Python + # script runs, at the expense that if m1n1 ever uses more than 128MB of heap it will + # clash with Python (m1n1 will normally not use *any* heap when running proxy ops though, + # except when running very high-level operations like booting a kernel, so this should be + # OK). + self.heap_size = heap_size + try: + self.heap_base = p.heapblock_alloc(0) + except ProxyRemoteError: + # Compat with versions that don't have heapblock yet + self.heap_base = (self.base + ((self.ba.top_of_kernel_data + 0xffff) & ~0xffff) - + self.ba.phys_base) + + if os.environ.get("M1N1HEAP", ""): + self.heap_base = int(os.environ.get("M1N1HEAP", ""), 16) + + self.heap_base += 128 * 1024 * 1024 # We leave 128MB for m1n1 heap + self.heap_top = self.heap_base + self.heap_size + self.heap = Heap(self.heap_base, self.heap_top) + self.proxy.heap = self.heap + + self.malloc = self.heap.malloc + self.memalign = self.heap.memalign + self.free = self.heap.free + + self.code_buffer = self.malloc(self.CODE_BUFFER_SIZE) + + self.adt_data = None + self.adt = LazyADT(self) + + self.simd_buf = self.malloc(32 * 16) + self.simd_type = None + self.simd = None + + self.mmu_off = False + + self.inst_cache = {} + + self.exec_modes = { + None: (self.proxy.call, REGION_RX_EL1), + "el2": (self.proxy.call, REGION_RX_EL1), + "el1": (self.proxy.el1_call, 0), + "el0": (self.proxy.el0_call, REGION_RWX_EL0), + "gl2": (self.proxy.gl2_call, REGION_RX_EL1), + "gl1": (self.proxy.gl1_call, 0), + } + self._read = { + 8: lambda addr: self.proxy.read8(addr), + 16: lambda addr: self.proxy.read16(addr), + 32: lambda addr: self.proxy.read32(addr), + 64: lambda addr: self.uread64(addr), + 128: lambda addr: [self.uread64(addr), + self.uread64(addr + 8)], + 256: lambda addr: [self.uread64(addr), + self.uread64(addr + 8), + self.uread64(addr + 16), + self.uread64(addr + 24)], + 512: lambda addr: [self.uread64(addr + i) for i in range(0, 64, 8)], + } + self._write = { + 8: lambda addr, data: self.proxy.write8(addr, data), + 16: lambda addr, data: self.proxy.write16(addr, data), + 32: lambda addr, data: self.proxy.write32(addr, data), + 64: lambda addr, data: self.uwrite64(addr, data), + 128: lambda addr, data: (self.uwrite64(addr, data[0]), + self.uwrite64(addr + 8, data[1])), + 256: lambda addr, data: (self.uwrite64(addr, data[0]), + self.uwrite64(addr + 8, data[1]), + self.uwrite64(addr + 16, data[2]), + self.uwrite64(addr + 24, data[3])), + 512: lambda addr, data: [self.uwrite64(addr + 8 * i, data[i]) + for i in range(8)], + } + + def uwrite64(self, addr, data): + '''write 8 byte value to given address, supporting split 4-byte halves''' + if addr & 3: + raise AlignmentError() + if addr & 4: + self.proxy.write32(addr, data & 0xffffffff) + self.proxy.write32(addr + 4, data >> 32) + else: + self.proxy.write64(addr, data) + + def uread64(self, addr): + '''write 8 byte value to given address, supporting split 4-byte halves''' + if addr & 3: + raise AlignmentError() + if addr & 4: + return self.proxy.read32(addr) | (self.proxy.read32(addr + 4) << 32) + else: + return self.proxy.read64(addr) + + def read(self, addr, width): + '''do a width read from addr and return it + width can be 8, 16, 21, 64, 128 or 256''' + val = self._read[width](addr) + if self.proxy.get_exc_count(): + raise ProxyError("Exception occurred") + return val + + def write(self, addr, data, width): + '''do a width write of data to addr + width can be 8, 16, 21, 64, 128 or 256''' + self._write[width](addr, data) + if self.proxy.get_exc_count(): + raise ProxyError("Exception occurred") + + def mrs(self, reg, *, silent=False, call=None): + '''read system register reg''' + op0, op1, CRn, CRm, op2 = sysreg_parse(reg) + + op = ((op0 << 19) | (op1 << 16) | (CRn << 12) | + (CRm << 8) | (op2 << 5) | 0xd5200000) + + return self.exec(op, call=call, silent=silent) + + def msr(self, reg, val, *, silent=False, call=None): + '''Write val to system register reg''' + op0, op1, CRn, CRm, op2 = sysreg_parse(reg) + + op = ((op0 << 19) | (op1 << 16) | (CRn << 12) | + (CRm << 8) | (op2 << 5) | 0xd5000000) + + self.exec(op, val, call=call, silent=silent) + + sys = msr + sysl = mrs + + def exec(self, op, r0=0, r1=0, r2=0, r3=0, *, silent=False, call=None, ignore_exceptions=False): + if callable(call): + region = REGION_RX_EL1 + elif isinstance(call, tuple): + call, region = call + else: + call, region = self.exec_modes[call] + + if isinstance(op, list): + op = tuple(op) + + if op in self.inst_cache: + func = self.inst_cache[op] + elif isinstance(op, tuple) or isinstance(op, list): + func = struct.pack(f"<{len(op)}II", *op, 0xd65f03c0) # ret + elif isinstance(op, int): + func = struct.pack("<II", op, 0xd65f03c0) # ret + elif isinstance(op, str): + c = ARMAsm(op + "; ret", self.code_buffer) + func = c.data + elif isinstance(op, bytes): + func = op + else: + raise ValueError() + + if self.mmu_off: + region = 0 + + self.inst_cache[op] = func + + assert len(func) < self.CODE_BUFFER_SIZE + self.iface.writemem(self.code_buffer, func) + self.proxy.dc_cvau(self.code_buffer, len(func)) + self.proxy.ic_ivau(self.code_buffer, len(func)) + + self.proxy.set_exc_guard(GUARD.SKIP | (GUARD.SILENT if silent else 0)) + ret = call(self.code_buffer | region, r0, r1, r2, r3) + if not ignore_exceptions: + cnt = self.proxy.get_exc_count() + self.proxy.set_exc_guard(GUARD.OFF) + if cnt: + raise ProxyError("Exception occurred") + else: + self.proxy.set_exc_guard(GUARD.OFF) + + return ret + + inst = exec + + def compressed_writemem(self, dest, data, progress=None): + if not len(data): + return + + payload = gzip.compress(data, compresslevel=1) + compressed_size = len(payload) + + with self.heap.guarded_malloc(compressed_size) as compressed_addr: + self.iface.writemem(compressed_addr, payload, progress) + timeout = self.iface.dev.timeout + self.iface.dev.timeout = None + try: + decompressed_size = self.proxy.gzdec(compressed_addr, compressed_size, dest, len(data)) + finally: + self.iface.dev.timeout = timeout + + assert decompressed_size == len(data) + + def get_adt(self): + if self.adt_data is not None: + return self.adt_data + adt_base = (self.ba.devtree - self.ba.virt_base + self.ba.phys_base) & 0xffffffffffffffff + adt_size = self.ba.devtree_size + print(f"Fetching ADT ({adt_size} bytes)...") + self.adt_data = self.iface.readmem(adt_base, self.ba.devtree_size) + return self.adt_data + + def push_adt(self): + self.adt_data = self.adt.build() + adt_base = (self.ba.devtree - self.ba.virt_base + self.ba.phys_base) & 0xffffffffffffffff + adt_size = len(self.adt_data) + print(f"Pushing ADT ({adt_size} bytes)...") + self.iface.writemem(adt_base, self.adt_data) + + def disassemble_at(self, start, size, pc=None, vstart=None, sym=None): + '''disassemble len bytes of memory from start + optional pc address will mark that line with a '*' ''' + code = struct.unpack(f"<{size // 4}I", self.iface.readmem(start, size)) + if vstart is None: + vstart = start + + c = ARMAsm(".inst " + ",".join(str(i) for i in code), vstart) + lines = list() + for line in c.disassemble(): + sl = line.split() + try: + addr = int(sl[0].rstrip(":"), 16) + except: + addr = None + if pc == addr: + line = " *" + line + else: + line = " " + line + if sym: + if s := sym(addr): + print() + print(f"{' '*len(sl[0])} {s}:") + print(line) + + def print_l2c_regs(self): + print() + print(" == L2C Registers ==") + l2c_err_sts = self.mrs(L2C_ERR_STS_EL1) + + print(f" L2C_ERR_STS: {l2c_err_sts:#x}") + print(f" L2C_ERR_ADR: {self.mrs(L2C_ERR_ADR_EL1):#x}"); + print(f" L2C_ERR_INF: {self.mrs(L2C_ERR_INF_EL1):#x}"); + + self.msr(L2C_ERR_STS_EL1, l2c_err_sts) # Clear the flag bits + self.msr(DAIF, self.mrs(DAIF) | 0x100) # Re-enable SError exceptions + + def print_context(self, ctx, is_fault=True, addr=lambda a: f"0x{a:x}", sym=None, num_ctx=9): + print(f" == Exception taken from {ctx.spsr.M.name} ==") + el = ctx.spsr.M >> 2 + print(f" SPSR = {ctx.spsr}") + print(f" ELR = {addr(ctx.elr)}" + (f" (0x{ctx.elr_phys:x})" if ctx.elr_phys else "")) + print(f" SP_EL{el} = 0x{ctx.sp[el]:x}" + (f" (0x{ctx.sp_phys:x})" if ctx.sp_phys else "")) + if is_fault: + print(f" ESR = {ctx.esr}") + print(f" FAR = {addr(ctx.far)}" + (f" (0x{ctx.far_phys:x})" if ctx.far_phys else "")) + + for i in range(0, 31, 4): + j = min(30, i + 3) + print(f" {f'x{i}-x{j}':>7} = {' '.join(f'{r:016x}' for r in ctx.regs[i:j + 1])}") + + if ctx.elr_phys: + print() + print(" == Code context ==") + + off = -(num_ctx // 2) + + self.disassemble_at(ctx.elr_phys + 4 * off, num_ctx * 4, ctx.elr, ctx.elr + 4 * off, sym=sym) + + if is_fault: + if ctx.esr.EC == ESR_EC.MSR or ctx.esr.EC == ESR_EC.IMPDEF and ctx.esr.ISS == 0x20: + print() + print(" == MRS/MSR fault decoding ==") + if ctx.esr.EC == ESR_EC.MSR: + iss = ESR_ISS_MSR(ctx.esr.ISS) + else: + iss = ESR_ISS_MSR(self.mrs(AFSR1_EL2)) + enc = iss.Op0, iss.Op1, iss.CRn, iss.CRm, iss.Op2 + if enc in sysreg_rev: + name = sysreg_rev[enc] + else: + name = f"s{iss.Op0}_{iss.Op1}_c{iss.CRn}_c{iss.CRm}_{iss.Op2}" + if iss.DIR == MSR_DIR.READ: + print(f" Instruction: mrs x{iss.Rt}, {name}") + else: + print(f" Instruction: msr {name}, x{iss.Rt}") + + if ctx.esr.EC in (ESR_EC.DABORT, ESR_EC.DABORT_LOWER): + print() + print(" == Data abort decoding ==") + iss = ESR_ISS_DABORT(ctx.esr.ISS) + if iss.ISV: + print(f" ISS: {iss!s}") + else: + print(" No instruction syndrome available") + + if iss.DFSC == DABORT_DFSC.ECC_ERROR: + self.print_l2c_regs() + + if ctx.esr.EC == ESR_EC.SERROR and ctx.esr.ISS == 0: + self.print_l2c_regs() + + print() + + @contextmanager + def mmu_disabled(self): + flags = self.proxy.mmu_disable() + try: + yield + finally: + self.proxy.mmu_restore(flags) + + def push_simd(self): + if self.simd is not None: + data = self.simd_type.build(self.simd) + self.iface.writemem(self.simd_buf, data) + self.proxy.put_simd_state(self.simd_buf) + self.simd = self.simd_type = None + + def get_simd(self, simd_type): + if self.simd is not None and self.simd_type is not simd_type: + data = self.simd_type.build(self.simd) + self.simd = simd_type.parse(data) + self.simd_type = simd_type + elif self.simd is None: + self.proxy.get_simd_state(self.simd_buf) + data = self.iface.readmem(self.simd_buf, 32 * 16) + self.simd = simd_type.parse(data) + self.simd_type = simd_type + + return self.simd + + @property + def b(self): + return self.get_simd(SIMD_B) + @property + def h(self): + return self.get_simd(SIMD_H) + @property + def s(self): + return self.get_simd(SIMD_S) + @property + def d(self): + return self.get_simd(SIMD_D) + @property + def q(self): + return self.get_simd(SIMD_Q) + + def get_version(self, v): + if isinstance(v, bytes): + v = v.split(b"\0")[0].decode("ascii") + return VERSION_MAP.get(v, None) + + @property + def version(self): + return self.get_version(self.adt["/chosen"].firmware_version) + + @property + def sfr_version(self): + return self.get_version(self.adt["/chosen"].system_firmware_version) + +class LazyADT: + def __init__(self, utils): + self.__dict__["_utils"] = utils + + @functools.cached_property + def _adt(self): + return adt.load_adt(self._utils.get_adt()) + def __getitem__(self, item): + return self._adt[item] + def __setitem__(self, item, value): + self._adt[item] = value + def __delitem__(self, item): + del self._adt[item] + def __contains__(self, item): + return item in self._adt + def __getattr__(self, attr): + return getattr(self._adt, attr) + def __setattr__(self, attr, value): + return setattr(self._adt, attr, value) + def __delattr__(self, attr): + return delattr(self._adt, attr) + def __str__(self, t=""): + return str(self._adt) + def __iter__(self): + return iter(self._adt) + +class RegMonitor(Reloadable): + def __init__(self, utils, bufsize=0x100000, ascii=False, log=None): + self.utils = utils + self.proxy = utils.proxy + self.iface = self.proxy.iface + self.ranges = [] + self.last = [] + self.bufsize = bufsize + self.ascii = ascii + self.log = log or print + + if bufsize: + self.scratch = utils.malloc(bufsize) + else: + self.scratch = None + + def readmem(self, start, size, readfn): + if readfn: + return readfn(start, size) + if self.scratch: + assert size < self.bufsize + self.proxy.memcpy32(self.scratch, start, size) + start = self.scratch + return self.proxy.iface.readmem(start, size) + + def add(self, start, size, name=None, offset=None, readfn=None): + if offset is None: + offset = start + self.ranges.append((start, size, name, offset, readfn)) + self.last.append(None) + + def show_regions(self, log=print): + for start, size, name, offset, readfn in sorted(self.ranges): + end = start + size - 1 + log(f"{start:#x}..{end:#x} ({size:#x})\t{name}") + + def poll(self): + if not self.ranges: + return + cur = [] + for (start, size, name, offset, readfn), last in zip(self.ranges, self.last): + count = size // 4 + block = self.readmem(start, size, readfn) + if block is None: + if last is not None: + self.log(f"# Lost: {name} ({start:#x}..{start + size - 1:#x})") + cur.append(None) + continue + + words = struct.unpack("<%dI" % count, block) + cur.append(block) + if last == block: + continue + if name: + header = f"# {name} ({start:#x}..{start + size - 1:#x})\n" + else: + header = f"# ({start:#x}..{start + size - 1:#x})\n" + + self.log(header + chexdiff32(last, block, offset=offset)) + self.last = cur + +class GuardedHeap: + def __init__(self, malloc, memalign=None, free=None): + if isinstance(malloc, Heap): + malloc, memalign, free = malloc.malloc, malloc.memalign, malloc.free + + self.ptrs = set() + self._malloc = malloc + self._memalign = memalign + self._free = free + + def __enter__(self): + return self + + def __exit__(self, *exc): + self.free_all() + return False + + def malloc(self, sz): + ptr = self._malloc(sz) + self.ptrs.add(ptr) + return ptr + + def memalign(self, align, sz): + ptr = self._memalign(align, sz) + self.ptrs.add(ptr) + return ptr + + def free(self, ptr): + self.ptrs.remove(ptr) + self._free(ptr) + + def free_all(self): + for ptr in self.ptrs: + self._free(ptr) + self.ptrs = set() + +def bootstrap_port(iface, proxy): + to = iface.dev.timeout + iface.dev.timeout = 0.15 + try: + do_baud = proxy.iodev_whoami() == IODEV.UART + except ProxyCommandError: + # Old m1n1 version -- assume non-USB serial link, force baudrate adjust + do_baud = True + except UartTimeout: + # Assume the receiving end is already at 1500000 + iface.dev.baudrate = 1500000 + do_baud = False + + if do_baud: + try: + iface.nop() + proxy.set_baud(1500000) + except UartTimeout: + # May fail even if the setting did get applied; checked by the .nop next + iface.dev.baudrate = 1500000 + + iface.nop() + iface.dev.timeout = to diff --git a/tools/proxyclient/m1n1/setup.py b/tools/proxyclient/m1n1/setup.py new file mode 100644 index 0000000..8a66287 --- /dev/null +++ b/tools/proxyclient/m1n1/setup.py @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: MIT +import os, struct, sys, time + +from .hv import HV +from .proxy import * +from .proxyutils import * +from .sysreg import * +from .tgtypes import * +from .utils import * +from .hw.pmu import PMU + +# Create serial connection +iface = UartInterface() +# Construct m1n1 proxy layer over serial connection +p = M1N1Proxy(iface, debug=False) +# Customise parameters of proxy and serial port +# based on information sent over the connection +bootstrap_port(iface, p) + +# Initialise the Proxy interface from values fetched from +# the remote end +u = ProxyUtils(p) +# Build a Register Monitoring object on Proxy Interface +mon = RegMonitor(u) +hv = HV(iface, p, u) + +fb = u.ba.video.base + +PMU(u).reset_panic_counter() + +print(f"m1n1 base: 0x{u.base:x}") + +PMU(u).reset_panic_counter() diff --git a/tools/proxyclient/m1n1/shell.py b/tools/proxyclient/m1n1/shell.py new file mode 100644 index 0000000..06e0605 --- /dev/null +++ b/tools/proxyclient/m1n1/shell.py @@ -0,0 +1,213 @@ +# SPDX-License-Identifier: MIT +import atexit, serial, os, struct, code, traceback, readline, rlcompleter, sys +import __main__ +import builtins +import re + +from .proxy import * +from .proxyutils import * +from .utils import * +from . import sysreg +from inspect import isfunction, signature + +__all__ = ["ExitConsole", "run_shell"] + +class HistoryConsole(code.InteractiveConsole): + def __init__(self, locals=None, filename="<console>", + histfile=os.path.expanduser("~/.m1n1-history")): + code.InteractiveConsole.__init__(self, locals, filename) + self.histfile = histfile + self.init_history(histfile) + self.poll_func = None + + def init_history(self, histfile): + readline.parse_and_bind("tab: complete") + if hasattr(readline, "read_history_file"): + try: + readline.read_history_file(histfile) + except FileNotFoundError: + pass + + def save_history(self): + readline.set_history_length(10000) + readline.write_history_file(self.histfile) + + def showtraceback(self): + type, value, tb = sys.exc_info() + traceback.print_exception(type, value, tb) + + def runcode(self, code): + super().runcode(code) + if self.poll_func: + self.poll_func() + if "mon" in self.locals: + try: + self.locals["mon"].poll() + except Exception as e: + print(f"mon.poll() failed: {e!r}") + if "u" in self.locals: + self.locals["u"].push_simd() + + +class ExitConsole(SystemExit): + pass +cmd_list = {} +subcmd_list = {} +# Debug levels +DBL_NONE = 0 +DBL_INFO = 1 +DBL_TRACE = 2 +DBL_DEBUG = 3 +DBL_EDEBUG = 4 + +db_level = DBL_NONE + +def debug_cmd(db=None): + '''Set debug level to integer %d(none)...%d(extreme debug)''' % (DBL_NONE, DBL_EDEBUG) + global db_level + if db: + db_level = db + print("debug level=%d" % db_level) + +def help_cmd(arg=None): + if db_level >= DBL_DEBUG: + print("arg=%s" % repr(arg)) + if arg: + #cmd = arg.__qualname__ + if callable(arg): + cmd = arg.__name__ + elif isinstance(arg, str): + cmd = arg + else: + print("Unknown command: %s" % repr(arg)) + return + if db_level >= DBL_DEBUG: + print("cmd=%s" % repr(cmd)) + if cmd not in cmd_list: + print("Undocumented command %s" % cmd) + return + hinfo = cmd_list[cmd] + if isinstance(hinfo, str): + print("%-10s : %s" % (cmd, hinfo)) + return + if cmd in subcmd_list: + clist = subcmd_list[cmd] + aname = cmd + if db_level >= DBL_DEBUG: + print("subcmd_list[%s] = %s" % + (repr(cmd), repr(clist))) + else: + print("command %s is not documented" % cmd) + return + else: + clist = cmd_list + aname = 'top level' + print("Note: To display a category's commands quote the name e.g. help('HV')") + print("List of %s commands:" % aname) + for cmd in clist.keys(): + hinfo = clist[cmd] + if isinstance(hinfo, str): + msg = hinfo.strip().split('\n', 1)[0] + elif isinstance(hinfo, int): + msg = "%s category - %d subcommands" % (cmd, hinfo) + else: + print("%s ?" % cmd) + continue + if len(cmd) <= 10: + print("%-10s : %s" % (cmd, msg)) + else: + print("%s:\n %s" % (cmd, msg)) + +#locals is a dictionary for constructing the +# InteractiveConsole with. It adds in the callables +# in proxy utils iface and sysreg into locals +def run_shell(locals, msg=None, exitmsg=None, poll_func=None): + saved_display = sys.displayhook + try: + def display(val): + if isinstance(val, int) and not isinstance(val, bool): + builtins._ = val + print(hex(val)) + elif callable(val): + val() + else: + saved_display(val) + + sys.displayhook = display + + # convenience + locals["h"] = hex + locals["sysreg"] = sysreg + + if "proxy" in locals and "p" not in locals: + locals["p"] = locals["proxy"] + if "utils" in locals and "u" not in locals: + locals["u"] = locals["utils"] + + for obj_name in ("iface", "p", "u"): + obj = locals.get(obj_name) + obj_class = type(obj) + if obj is None: + continue + + for attr in dir(obj_class): + if attr in locals or attr.startswith('_'): + continue + + member = getattr(obj_class, attr) + if callable(member) and not isinstance(member, property): + cmd = getattr(obj, attr) + locals[attr] = cmd + + for attr in dir(sysreg): + locals[attr] = getattr(sysreg, attr) + + locals['help'] = help_cmd + locals['debug'] = debug_cmd + for obj_name in locals.keys(): + obj = locals.get(obj_name) + if obj is None or obj_name.startswith('_'): + continue + if callable(obj) and not isinstance(obj, property): + try: + desc = obj_name + str(signature(obj)) + except: + continue + qn = obj.__qualname__ + if qn.find('.') > 0: + a = qn.split('.') + if a[0] not in subcmd_list: + subcmd_list[a[0]] = {} + if a[0] not in cmd_list: + cmd_list[a[0]] = 1 + else: + cmd_list[a[0]] += 1 + clist = subcmd_list[a[0]] + else: + clist = None + if locals[obj_name].__doc__: + desc += " - " + locals[obj_name].__doc__ + cmd_list[obj_name] = desc + if isinstance(clist, dict): + clist[obj_name] = desc + + con = HistoryConsole(locals) + con.poll_func = poll_func + try: + con.interact(msg, exitmsg) + except ExitConsole as e: + if len(e.args): + return e.args[0] + else: + return + finally: + con.save_history() + + finally: + sys.displayhook = saved_display + +if __name__ == "__main__": + from .setup import * + locals = dict(__main__.__dict__) + + run_shell(locals, msg="Have fun!") diff --git a/tools/proxyclient/m1n1/sysreg.py b/tools/proxyclient/m1n1/sysreg.py new file mode 100644 index 0000000..3c08238 --- /dev/null +++ b/tools/proxyclient/m1n1/sysreg.py @@ -0,0 +1,383 @@ +# SPDX-License-Identifier: MIT +import json, os, re +from enum import Enum, IntEnum, IntFlag +from .utils import Register, Register64, Register32 + +__all__ = ["sysreg_fwd", "sysreg_rev"] + +def _load_registers(): + global sysreg_fwd, sysop_fwd + + sysreg_fwd = {} + sysop_fwd = {} + for fname in ["arm_regs.json", "apple_regs.json"]: + data = json.load(open(os.path.join(os.path.dirname(__file__), "..", "..", "tools", fname))) + for reg in data: + if "accessors" in reg: + for acc in reg["accessors"]: + if acc in ("MRS", "MSR"): + sysreg_fwd[reg["name"]] = tuple(reg["enc"]) + else: + sysop_fwd[acc + " " + reg["name"]] = tuple(reg["enc"]) + else: + sysreg_fwd[reg["name"]] = tuple(reg["enc"]) + +_load_registers() +sysreg_rev = {v: k for k, v in sysreg_fwd.items()} +sysop_rev = {v: k for k, v in sysop_fwd.items()} +sysop_fwd_id = {k.replace(" ", "_"): v for k,v in sysop_fwd.items()} + +globals().update(sysreg_fwd) +__all__.extend(sysreg_fwd.keys()) +globals().update(sysop_fwd_id) +__all__.extend(sysop_fwd_id.keys()) + +def sysreg_name(enc): + if enc in sysreg_rev: + return sysreg_rev[enc] + if enc in sysop_rev: + return sysop_rev[enc] + return f"s{enc[0]}_{enc[1]}_c{enc[2]}_c{enc[3]}_{enc[4]}" + +def sysreg_parse(s): + if isinstance(s, tuple) or isinstance(s, list): + return tuple(s) + s = s.strip() + for r in (r"s(\d+)_(\d+)_c(\d+)_c(\d+)_(\d+)", r"(\d+), *(\d+), *(\d+), *(\d+), *(\d+)"): + if m := re.match(r, s): + enc = tuple(map(int, m.groups())) + break + else: + for i in sysreg_fwd, sysop_fwd, sysop_fwd_id: + try: + enc = i[s] + except KeyError: + continue + break + else: + raise Exception(f"Unknown sysreg name {s}") + return enc + +def DBGBCRn_EL1(n): + return (2,0,0,n,5) + +def DBGBVRn_EL1(n): + return (2,0,0,n,4) + +def DBGWCRn_EL1(n): + return (2,0,0,n,7) + +def DBGWVRn_EL1(n): + return (2,0,0,n,6) + +class ESR_EC(IntEnum): + UNKNOWN = 0b000000 + WFI = 0b000001 + FP_TRAP = 0b000111 + PAUTH_TRAP = 0b001000 + LS64 = 0b001010 + BTI = 0b001101 + ILLEGAL = 0b001110 + SVC = 0b010101 + HVC = 0b010110 + SMC = 0b010111 + MSR = 0b011000 + SVE = 0b011001 + PAUTH_FAIL = 0b011100 + IABORT_LOWER = 0b100000 + IABORT = 0b100001 + PC_ALIGN = 0b100010 + DABORT_LOWER = 0b100100 + DABORT = 0b100101 + SP_ALIGN = 0b100110 + FP_EXC = 0b101100 + SERROR = 0b101111 + BKPT_LOWER = 0b110000 + BKPT = 0b110001 + SSTEP_LOWER = 0b110010 + SSTEP = 0b110011 + WATCH_LOWER = 0b110100 + WATCH = 0b110101 + BRK = 0b111100 + IMPDEF = 0b111111 + +class MSR_DIR(IntEnum): + WRITE = 0 + READ = 1 + +class ESR_ISS_MSR(Register32): + Op0 = 21, 20 + Op2 = 19, 17 + Op1 = 16, 14 + CRn = 13, 10 + Rt = 9, 5 + CRm = 4, 1 + DIR = 0, 0, MSR_DIR + +class DABORT_DFSC(IntEnum): + ASIZE_L0 = 0b000000 + ASIZE_L1 = 0b000001 + ASIZE_L2 = 0b000010 + ASIZE_L3 = 0b000011 + XLAT_L0 = 0b000100 + XLAT_L1 = 0b000101 + XLAT_L2 = 0b000110 + XLAT_L3 = 0b000111 + AF_L0 = 0b001000 + AF_L1 = 0b001001 + AF_L2 = 0b001010 + AF_L3 = 0b001011 + PERM_L0 = 0b001100 + PERM_L1 = 0b001101 + PERM_L2 = 0b001110 + PERM_L3 = 0b001111 + EABORT = 0b010000 + TAG_CHECK = 0b010001 + PT_EABORT_Lm1 = 0b010011 + PT_EABORT_L0 = 0b010100 + PT_EABORT_L1 = 0b010101 + PT_EABORT_L2 = 0b010110 + PT_EABORT_L3 = 0b010111 + ECC_ERROR = 0b011000 + PT_ECC_ERROR_Lm1 = 0b011011 + PT_ECC_ERROR_L0 = 0b011100 + PT_ECC_ERROR_L1 = 0b011101 + PT_ECC_ERROR_L2 = 0b011110 + PT_ECC_ERROR_L3 = 0b011111 + ALIGN = 0b100001 + ASIZE_Lm1 = 0b101001 + XLAT_Lm1 = 0b101011 + TLB_CONFLICT = 0b110000 + UNSUPP_ATOMIC = 0b110001 + IMPDEF_LOCKDOWN = 0b110100 + IMPDEF_ATOMIC = 0b110101 + +class ESR_ISS_DABORT(Register32): + ISV = 24 + SAS = 23, 22 + SSE = 21 + SRT = 20, 16 + SF = 15 + AR = 14 + VNCR = 13 + SET = 12, 11 + LSR = 12, 11 + FnV = 10 + EA = 9 + CM = 8 + S1PTR = 7 + WnR = 6 + DFSC = 5, 0, DABORT_DFSC + +class ESR(Register64): + ISS2 = 36, 32 + EC = 31, 26, ESR_EC + IL = 25 + ISS = 24, 0 + +class SPSR_M(IntEnum): + EL0t = 0 + EL1t = 4 + EL1h = 5 + EL2t = 8 + EL2h = 9 + +class SPSR(Register64): + N = 31 + Z = 30 + C = 29 + V = 28 + TCO = 25 + DIT = 24 + UAO = 23 + PAN = 22 + SS = 21 + IL = 20 + SSBS = 12 + BTYPE = 11, 10 + D = 9 + A = 8 + I = 7 + F = 6 + M = 4, 0, SPSR_M + +class ACTLR(Register64): + EnMDSB = 12 + EnPRSV = 6 + EnAFP = 5 + EnAPFLG = 4 + DisHWP = 3 + EnTSO = 1 + +class HCR(Register64): + TWEDEL = 63, 60 + TWEDEn = 59 + TID5 = 58 + DCT = 57 + ATA = 56 + TTLBOS = 55 + TTLBIS = 54 + EnSCXT = 53 + TOCU = 52 + AMVOFFEN = 51 + TICAB = 50 + TID4 = 49 + FIEN = 47 + FWB = 46 + NV2 = 45 + AT = 44 + NV1 = 43 + NV = 42 + API = 41 + APK = 40 + MIOCNCE = 38 + TEA = 37 + TERR = 36 + TLOR = 35 + E2H = 34 + ID = 33 + CD = 32 + RW = 31 + TRVM = 30 + HCD = 29 + TDZ = 28 + TGE = 27 + TVM = 26 + TTLB = 25 + TPU = 24 + TPCP = 23 + TPC = 23 + TSW = 22 + TACR = 21 + TIDCP = 20 + TSC = 19 + TID3 = 18 + TID2 = 17 + TID1 = 16 + TID0 = 15 + TWE = 14 + TWI = 13 + DC = 12 + BSU = 11, 10 + FB = 9 + VSE = 8 + VI = 7 + VF = 6 + AMO = 5 + IMO = 4 + FMO = 3 + PTW = 2 + SWIO = 1 + VM = 0 + +class HACR(Register64): + TRAP_CPU_EXT = 0 + TRAP_AIDR = 4 + TRAP_AMX = 10 + TRAP_SPRR = 11 + TRAP_GXF = 13 + TRAP_CTRR = 14 + TRAP_IPI = 16 + TRAP_s3_4_c15_c5z6_x = 18 + TRAP_s3_4_c15_c0z12_5 = 19 + GIC_CNTV = 20 + TRAP_s3_4_c15_c10_4 = 25 + TRAP_SERROR_INFO = 48 + TRAP_EHID = 49 + TRAP_HID = 50 + TRAP_s3_0_c15_c12_1z2 = 51 + TRAP_ACC = 52 + TRAP_PM = 57 + TRAP_UPM = 58 + TRAP_s3_1z7_c15_cx_3 = 59 + +class AMX_CTL(Register64): + EN = 63 + EN_EL1 = 62 + +class MDCR(Register64): + TDE = 8 + TDA = 9 + TDOSA = 10 + TDRA = 11 + +class MDSCR(Register64): + SS = 0 + MDE = 15 + +class DBGBCR(Register32): + BT = 23, 20 + LBN = 16, 16 + SSC = 15, 14 + HMC = 13 + BAS = 8,5 + PMC = 2,1 + E = 0 + +class DBGWCR_LSC(IntFlag): + L = 1 + S = 2 + +class DBGWCR(Register32): + SSCE = 29 + MASK = 28, 24 + WT = 20 + LBN = 19, 16 + SSC = 15, 14 + HMC = 13 + BAS = 12, 5 + LSC = 4, 3 + PAC = 2, 1 + E = 0 + +# TCR_EL1 +class TCR(Register64): + DS = 59 + TCMA1 = 58 + TCMA0 = 57 + E0PD1 = 56 + E0PD0 = 55 + NFD1 = 54 + NFD0 = 53 + TBID1 = 52 + TBID0 = 51 + HWU162 = 50 + HWU161 = 49 + HWU160 = 48 + HWU159 = 47 + HWU062 = 46 + HWU061 = 45 + HWU060 = 44 + HWU059 = 43 + HPD1 = 42 + HPD0 = 41 + HD = 40 + HA = 39 + TBI1 = 38 + TBI0 = 37 + AS = 36 + IPS = 34, 32 + TG1 = 31, 30 + SH1 = 29, 28 + ORGN1 = 27, 26 + IRGN1 = 25, 24 + EPD1 = 23 + A1 = 22 + T1SZ = 21, 16 + TG0 = 15, 14 + SH0 = 13, 12 + ORGN0 = 11, 10 + IRGN0 = 9, 8 + EPD0 = 7 + T0SZ = 5, 0 + +class TLBI_RVA(Register64): + ASID = 63, 48 + TG = 47, 46 + SCALE = 45, 44 + NUM = 43, 39 + TTL = 38, 37 + BaseADDR = 36, 0 + +__all__.extend(k for k, v in globals().items() + if (callable(v) or isinstance(v, type)) and v.__module__ == __name__) diff --git a/tools/proxyclient/m1n1/tgtypes.py b/tools/proxyclient/m1n1/tgtypes.py new file mode 100644 index 0000000..9081c8d --- /dev/null +++ b/tools/proxyclient/m1n1/tgtypes.py @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: MIT +from construct import * + +__all__ = ["BootArgs"] + +BootArgs = Struct( + "revision" / Hex(Int16ul), + "version" / Hex(Int16ul), + Padding(4), + "virt_base" / Hex(Int64ul), + "phys_base" / Hex(Int64ul), + "mem_size" / Hex(Int64ul), + "top_of_kernel_data" / Hex(Int64ul), + "video" / Struct( + "base" / Hex(Int64ul), + "display" / Hex(Int64ul), + "stride" / Hex(Int64ul), + "width" / Hex(Int64ul), + "height" / Hex(Int64ul), + "depth" / Hex(Int64ul), + ), + "machine_type" / Hex(Int32ul), + Padding(4), + "devtree" / Hex(Int64ul), + "devtree_size" / Hex(Int32ul), + "cmdline" / PaddedString(608, "ascii"), + Padding(4), + "boot_flags" / Hex(Int64ul), + "mem_size_actual" / Hex(Int64ul), +) diff --git a/tools/proxyclient/m1n1/trace/__init__.py b/tools/proxyclient/m1n1/trace/__init__.py new file mode 100644 index 0000000..73d1a10 --- /dev/null +++ b/tools/proxyclient/m1n1/trace/__init__.py @@ -0,0 +1,220 @@ +# SPDX-License-Identifier: MIT + +from ..hv import TraceMode +from ..utils import * + +__all__ = [] + +class RegCacheAlwaysCached(Reloadable): + def __init__(self, parent): + self.parent = parent + + def read(self, addr, width): + return self.parent.read_cached(addr, width) + + def write(self, addr, data, width): + raise Exception("Trying to write a register to the cache") + +class RegCache(Reloadable): + def __init__(self, hv): + self.hv = hv + self.u = hv.u + self.cache = {} + + self.cached = RegCacheAlwaysCached(self) + + def update(self, addr, data): + self.cache[addr] = data + + def read(self, addr, width): + if self.hv.ctx or not self.hv.started: + data = self.u.read(addr, width) + self.cache[addr] = data + return data + else: + return self.read_cached(addr, width) + + def read_cached(self, addr, width): + data = self.cache.get(addr, None) + if data is None: + print(f"RegCache: no cache for {addr:#x}") + return data + + def write(self, addr, data, width): + if self.hv.ctx: + self.u.write(addr, data, width) + self.cache[addr] = data + else: + raise Exception("Cannot write register in asynchronous context") + +class TracerState: + pass + +class Tracer(Reloadable): + DEFAULT_MODE = TraceMode.ASYNC + + def __init__(self, hv, verbose=False, ident=None): + self.hv = hv + self.ident = ident or type(self).__name__ + self.regmaps = {} + self.verbose = verbose + self.state = TracerState() + self.init_state() + self._cache = RegCache(hv) + cache = hv.tracer_caches.get(self.ident, None) + if cache is not None: + self._cache.cache.update(cache.get("regcache", {})) + self.state.__dict__.update(cache.get("state", {})) + hv.tracer_caches[self.ident] = { + "regcache": self._cache.cache, + "state": self.state.__dict__ + } + + def init_state(self): + pass + + def hook_w(self, addr, val, width, **kwargs): + self.hv.u.write(addr, val, width) + + def hook_r(self, addr, width, **kwargs): + return self.hv.u.read(addr, width) + + def evt_rw(self, evt, regmap=None, prefix=None): + self._cache.update(evt.addr, evt.data) + reg = rcls = None + value = evt.data + + t = "w" if evt.flags.WRITE else "r" + + if regmap is not None: + reg, index, rcls = regmap.lookup_addr(evt.addr) + if rcls is not None: + value = rcls(evt.data) + + if self.verbose >= 3 or (reg is None and self.verbose >= 1): + if reg is None: + s = f"{evt.addr:#x} = {value:#x}" + else: + s = f"{regmap.get_name(evt.addr)} = {value!s}" + m = "+" if evt.flags.MULTI else " " + self.log(f"MMIO: {t.upper()}.{1<<evt.flags.WIDTH:<2}{m} " + s) + + if reg is not None: + if prefix is not None: + attr = f"{t}_{prefix}_{reg}" + else: + attr = f"{t}_{reg}" + handler = getattr(self, attr, None) + if handler: + if index is not None: + handler(value, index) + else: + handler(value) + elif self.verbose == 2: + s = f"{regmap.get_name(evt.addr)} = {value!s}" + m = "+" if evt.flags.MULTI else " " + self.log(f"MMIO: {t.upper()}.{1<<evt.flags.WIDTH:<2}{m} " + s) + + def trace(self, start, size, mode, read=True, write=True, **kwargs): + zone = irange(start, size) + if mode == TraceMode.HOOK: + self.hv.add_tracer(zone, self.ident, mode, self.hook_r if read else None, + self.hook_w if write else None, **kwargs) + else: + self.hv.add_tracer(zone, self.ident, mode, self.evt_rw if read else None, + self.evt_rw if write else None, **kwargs) + + def trace_regmap(self, start, size, cls, mode=None, name=None, prefix=None, regmap_offset=0): + if mode is None: + mode = self.DEFAULT_MODE + if name is None: + name = cls.__name__ + + regmap = self.regmaps.get(start - regmap_offset, None) + if regmap is None: + regmap = cls(self._cache, start - regmap_offset) + regmap.cached = cls(self._cache.cached, start - regmap_offset) + self.regmaps[start - regmap_offset] = regmap + else: + assert isinstance(regmap, cls) + + setattr(self, name, regmap) + self.trace(start, size, mode=mode, regmap=regmap, prefix=prefix) + + def start(self): + pass + + def stop(self): + self.hv.clear_tracers(self.ident) + + def log(self, msg, show_cpu=True): + self.hv.log(f"[{self.ident}] {msg}", show_cpu=show_cpu) + +class PrintTracer(Tracer): + def __init__(self, hv, device_addr_tbl): + super().__init__(hv) + self.device_addr_tbl = device_addr_tbl + self.log_file = None + + def event_mmio(self, evt, name=None, start=None): + dev, zone2 = self.device_addr_tbl.lookup(evt.addr) + if name is None: + name = dev + start = zone2.start + t = "W" if evt.flags.WRITE else "R" + m = "+" if evt.flags.MULTI else " " + logline = (f"[cpu{evt.flags.CPU}] [0x{evt.pc:016x}] MMIO: {t}.{1<<evt.flags.WIDTH:<2}{m} " + + f"0x{evt.addr:x} ({name}, offset {evt.addr - start:#04x}) = 0x{evt.data:x}") + print(logline) + if self.log_file: + self.log_file.write(f"# {logline}\n") + width = 8 << evt.flags.WIDTH + if evt.flags.WRITE: + stmt = f"p.write{width}({start:#x} + {evt.addr - start:#x}, {evt.data:#x})\n" + else: + stmt = f"p.read{width}({start:#x} + {evt.addr - start:#x})\n" + self.log_file.write(stmt) + +class ADTDevTracer(Tracer): + REGMAPS = [] + NAMES = [] + PREFIXES = [] + + def __init__(self, hv, devpath, verbose=False): + super().__init__(hv, verbose=verbose, ident=type(self).__name__ + "@" + devpath) + self.dev = hv.adt[devpath] + + @classmethod + def _reloadcls(cls, force=False): + regmaps = [] + for i in cls.REGMAPS: + if i is None: + reloaded = None + elif isinstance(i, tuple): + reloaded = (i[0]._reloadcls(force), i[1]) + else: + reloaded = i._reloadcls(force) + regmaps.append(reloaded) + cls.REGMAPS = regmaps + + return super()._reloadcls(force) + + def start(self): + for i in range(len(self.dev.reg)): + if i >= len(self.REGMAPS) or (regmap := self.REGMAPS[i]) is None: + continue + if isinstance(regmap, tuple): + regmap, regmap_offset = regmap + else: + regmap_offset = 0 + prefix = name = None + if i < len(self.NAMES): + name = self.NAMES[i] + if i < len(self.PREFIXES): + prefix = self.PREFIXES[i] + + start, size = self.dev.get_reg(i) + self.trace_regmap(start, size, regmap, name=name, prefix=prefix, regmap_offset=regmap_offset) + +__all__.extend(k for k, v in globals().items() + if (callable(v) or isinstance(v, type)) and v.__module__.startswith(__name__)) diff --git a/tools/proxyclient/m1n1/trace/agx.py b/tools/proxyclient/m1n1/trace/agx.py new file mode 100644 index 0000000..f46a063 --- /dev/null +++ b/tools/proxyclient/m1n1/trace/agx.py @@ -0,0 +1,1148 @@ +# SPDX-License-Identifier: MIT + +import textwrap +from .asc import * +from ..hw.uat import UAT, MemoryAttr, PTE, Page_PTE, TTBR +from ..hw.agx import * + +from ..fw.agx.initdata import InitData +from ..fw.agx.channels import * +from ..fw.agx.cmdqueue import * +from ..fw.agx.microsequence import * +from ..fw.agx.handoff import * + +from m1n1.proxyutils import RegMonitor +from m1n1.constructutils import * +from m1n1.trace import Tracer + +from construct import * + +class ChannelTraceState(object): + pass + +class CommandQueueState(object): + pass + +class GpuMsg(Register64): + TYPE = 55, 48 + +class PongMsg(GpuMsg): + TYPE = 59, 52 + UNK = 47, 0 + +class PongEp(EP): + # This endpoint recives pongs. The cpu code reads some status registers after receiving one + # Might be a "work done" message. + BASE_MESSAGE = GpuMsg + + @msg(0x42, DIR.RX, PongMsg) + def pong_rx(self, msg): + if self.tracer.state.active: + self.log(f" Pong {msg!s}") + if msg.UNK != 0: + self.log(f" Pong had unexpected value{msg.UNK:x}") + self.hv.run_shell() + + self.tracer.pong() + return True + + @msg(0x81, DIR.TX, PongMsg) + def init_ep(self, msg): + self.log(f" Init {msg.UNK:x}") + + self.tracer.pong_init(msg.UNK) + return True + +class KickMsg(GpuMsg): + TYPE = 59, 52 + KICK = 7, 0 # Seen: 17, 16 (common), 9, 8, 1 (common), 0 (common) + +class KickEp(EP): + BASE_MESSAGE = GpuMsg + + @msg(0x83, DIR.TX, KickMsg) + def kick(self, msg): + if self.tracer.state.active: + self.log(f" Kick {msg}") + self.tracer.kick(msg.KICK) + + return True + + @msg(0x84, DIR.TX, KickMsg) + def fwkick(self, msg): + if self.tracer.state.active: + self.log(f" FWRing Kick {msg}") + self.tracer.fwkick(msg.KICK) + return True + +class ChannelTracer(Reloadable): + STATE_FIELDS = ChannelStateFields + WPTR = 0x20 + RPTR = 0x00 + + def __init__(self, tracer, info, index): + self.tracer = tracer + self.uat = tracer.uat + self.hv = tracer.hv + self.u = self.hv.u + self.ring_count = len(channelRings[index]) + self.verbose = False + + if index not in tracer.state.channels: + self.state = ChannelTraceState() + self.state.active = True + self.state.tail = [0] * self.ring_count + tracer.state.channels[index] = self.state + else: + self.state = tracer.state.channels[index] + + self.index = index + self.name = channelNames[index] + self.info = info + base = None + + if self.name == "FWLog": + base = self.tracer.state.fwlog_ring2 + + self.channel = Channel(self.u, self.uat, self.info, channelRings[index], base=base, + state_fields=self.STATE_FIELDS) + for i in range(self.ring_count): + for addr, size in self.channel.rb_maps[i]: + self.log(f"rb_map[{i}] {addr:#x} ({size:#x})") + + self.set_active(self.state.active) + + def state_read(self, evt, regmap=None, prefix=None, off=None): + ring = off // 0x30 + off = off % 0x30 + + msgcls, size, count = self.channel.ring_defs[ring] + + if off == self.WPTR: + if self.verbose: + self.log(f"RD [{evt.addr:#x}] WPTR[{ring}] = {evt.data:#x}") + self.poll_ring(ring) + elif off == self.RPTR: + if self.verbose: + self.log(f"RD [{evt.addr:#x}] RPTR[{ring}] = {evt.data:#x}") + self.poll_ring(ring) + else: + if self.verbose: + self.log(f"RD [{evt.addr:#x}] UNK[{ring}] {off:#x} = {evt.data:#x}") + + def state_write(self, evt, regmap=None, prefix=None, off=None): + ring = off // 0x30 + off = off % 0x30 + + msgcls, size, count = self.channel.ring_defs[ring] + + if off == self.WPTR: + if self.verbose: + self.log(f"WR [{evt.addr:#x}] WPTR[{ring}] = {evt.data:#x}") + self.poll_ring(ring) + elif off == self.RPTR: + if self.verbose: + self.log(f"WR [{evt.addr:#x}] RPTR[{ring}] = {evt.data:#x}") + self.poll_ring(ring) + # Clear message with test pattern + idx = (evt.data - 1) % count + self.channel.clear_message(ring, idx) + else: + if self.verbose: + self.log(f"WR [{evt.addr:#x}] UNK[{ring}] {off:#x} = {evt.data:#x}") + + def log(self, msg): + self.tracer.log(f"[{self.index}:{self.name}] {msg}") + + def poll(self): + for i in range(self.ring_count): + self.poll_ring(i) + + def poll_ring(self, ring): + msgcls, size, count = self.channel.ring_defs[ring] + + cur = self.state.tail[ring] + tail = self.channel.state[ring].WRITE_PTR.val + if tail >= count: + raise Exception(f"Message index {tail:#x} >= {count:#x}") + if cur != tail: + #self.log(f"{cur:#x} -> {tail:#x}") + while cur != tail: + msg = self.channel.get_message(ring, cur, self.tracer.meta_gpuvm) + self.log(f"Message @{ring}.{cur}:\n{msg!s}") + self.tracer.handle_ringmsg(msg) + #if self.index < 12: + #self.hv.run_shell() + cur = (cur + 1) % count + self.state.tail[ring] = cur + + def set_active(self, active=True): + if active: + if not self.state.active: + for ring in range(self.ring_count): + self.state.tail[ring] = self.channel.state[ring].WRITE_PTR.val + + for base in range(0, 0x30 * self.ring_count, 0x30): + p = self.uat.iotranslate(0, self.channel.state_addr + base + self.RPTR, 4)[0][0] + self.hv.add_tracer(irange(p, 4), + f"ChannelTracer/{self.name}", + mode=TraceMode.SYNC, + read=self.state_read, + write=self.state_write, + off=base + self.RPTR) + p = self.uat.iotranslate(0, self.channel.state_addr + base + self.WPTR, 4)[0][0] + self.hv.add_tracer(irange(p, 4), + f"ChannelTracer/{self.name}", + mode=TraceMode.SYNC, + read=self.state_read, + write=self.state_write, + off=base + self.WPTR) + else: + self.hv.clear_tracers(f"ChannelTracer/{self.name}") + self.state.active = active + +ChannelTracer = ChannelTracer._reloadcls() +CommandQueueInfo = CommandQueueInfo._reloadcls() + +class FWCtlChannelTracer(ChannelTracer): + STATE_FIELDS = FWControlStateFields + WPTR = 0x10 + RPTR = 0x00 + +class CommandQueueTracer(Reloadable): + def __init__(self, tracer, info_addr, new_queue): + self.tracer = tracer + self.uat = tracer.uat + self.hv = tracer.hv + self.u = self.hv.u + self.verbose = False + self.info_addr = info_addr + + if info_addr not in tracer.state.queues: + self.state = CommandQueueState() + self.state.rptr = None + self.state.active = True + tracer.state.queues[info_addr] = self.state + else: + self.state = tracer.state.queues[info_addr] + + if new_queue: + self.state.rptr = 0 + + self.update_info() + + def update_info(self): + self.info = CommandQueueInfo.parse_stream(self.tracer.get_stream(0, self.info_addr)) + + def log(self, msg): + self.tracer.log(f"[CQ@{self.info_addr:#x}] {msg}") + + @property + def rb_size(self): + return self.info.pointers.rb_size + + def get_workitems(self, workmsg): + self.tracer.uat.invalidate_cache() + self.update_info() + + if self.state.rptr is None: + self.state.rptr = int(self.info.pointers.gpu_doneptr) + self.log(f"Initializing rptr to {self.info.gpu_rptr1:#x}") + + self.log(f"Got workmsg: wptr={workmsg.head:#x} rptr={self.state.rptr:#x}") + self.log(f"Queue info: {self.info}") + + + assert self.state.rptr < self.rb_size + assert workmsg.head < self.rb_size + + stream = self.tracer.get_stream(0, self.info.rb_addr) + + count = 0 + orig_rptr = rptr = self.state.rptr + while rptr != workmsg.head: + count += 1 + stream.seek(self.info.rb_addr + rptr * 8, 0) + pointer = Int64ul.parse_stream(stream) + self.log(f"WI item @{rptr:#x}: {pointer:#x}") + if pointer: + stream.seek(pointer, 0) + yield CmdBufWork.parse_stream(stream) + rptr = (rptr + 1) % self.rb_size + + self.state.rptr = rptr + + self.log(f"Parsed {count} items from {orig_rptr:#x} to {workmsg.head:#x}") + + def set_active(self, active=True): + if not active: + self.state.rptr = None + self.state.active = active + +CmdBufWork = CmdBufWork._reloadcls() +CommandQueueTracer = CommandQueueTracer._reloadcls() +InitData = InitData._reloadcls(True) + +class HandoffTracer(Tracer): + DEFAULT_MODE = TraceMode.SYNC + + def __init__(self, hv, agx_tracer, base, verbose=False): + super().__init__(hv, verbose=verbose) + self.agx_tracer = agx_tracer + self.base = base + + def start(self): + self.trace_regmap(self.base, 0x4000, GFXHandoffStruct, name="regs") + +class SGXTracer(ADTDevTracer): + DEFAULT_MODE = TraceMode.HOOK + + REGMAPS = [SGXRegs, SGXInfoRegs] + NAMES = ["sgx", "sgx-id"] + + def __init__(self, hv, devpath, verbose=False): + super().__init__(hv, devpath, verbose=verbose) + self.hooks = {} + + def hook_r(self, addr, width, **kwargs): + self.log(f"HOOK: {addr:#x}:{width}") + + if addr in self.hooks: + val = self.hooks[addr] + self.log(f" Returning: {val:#x}") + else: + xval = val = super().hook_r(addr, width, **kwargs) + if isinstance(val, (list, tuple)): + xval = list(map(hex, val)) + else: + xval = hex(val) + self.log(f" Read: {xval}") + + return val + + def hook_w(self, addr, val, width, **kwargs): + if isinstance(val, (list, tuple)): + xval = list(map(hex, val)) + else: + xval = hex(val) + + self.log(f"HOOK: {addr:#x}:{width} = {xval}") + + super().hook_w(addr, val, width, **kwargs) + +class AGXTracer(ASCTracer): + ENDPOINTS = { + 0x20: PongEp, + 0x21: KickEp + } + + REGMAPS = [ASCRegs] + NAMES = ["asc"] + + PAGESIZE = 0x4000 + + def __init__(self, hv, devpath, verbose=False): + super().__init__(hv, devpath, verbose) + self.channels = [] + self.uat = UAT(hv.iface, hv.u, hv) + self.mon = RegMonitor(hv.u, ascii=True, log=hv.log) + self.dev_sgx = hv.u.adt["/arm-io/sgx"] + self.sgx = SGXRegs(hv.u, self.dev_sgx.get_reg(0)[0]) + self.gpu_region = getattr(self.dev_sgx, "gpu-region-base") + self.gpu_region_size = getattr(self.dev_sgx, "gpu-region-size") + self.gfx_shared_region = getattr(self.dev_sgx, "gfx-shared-region-base") + self.gfx_shared_region_size = getattr(self.dev_sgx, "gfx-shared-region-size") + self.gfx_handoff = getattr(self.dev_sgx, "gfx-handoff-base") + self.gfx_handoff_size = getattr(self.dev_sgx, "gfx-handoff-size") + + self.handoff_tracer = HandoffTracer(hv, self, self.gfx_handoff, verbose=2) + + self.ignorelist = [] + self.last_msg = None + + # self.mon.add(self.gpu_region, self.gpu_region_size, "contexts") + # self.mon.add(self.gfx_shared_region, self.gfx_shared_region_size, "gfx-shared") + # self.mon.add(self.gfx_handoff, self.gfx_handoff_size, "gfx-handoff") + + self.trace_kernva = False + self.trace_userva = False + self.trace_kernmap = True + self.trace_usermap = True + self.pause_after_init = False + self.shell_after_init = False + self.after_init_hook = None + self.encoder_id_filter = None + self.redump = False + + self.vmcnt = 0 + self.readlog = {} + self.writelog = {} + self.cmdqueues = {} + self.va_to_pa = {} + + self.last_ta = None + self.last_3d = None + + + def get_cmdqueue(self, info_addr, new_queue): + if info_addr in self.cmdqueues and not new_queue: + return self.cmdqueues[info_addr] + + cmdqueue = CommandQueueTracer(self, info_addr, new_queue) + self.cmdqueues[info_addr] = cmdqueue + + return cmdqueue + + def clear_ttbr_tracers(self): + self.hv.clear_tracers(f"UATTTBRTracer") + + def add_ttbr_tracers(self): + self.hv.add_tracer(irange(self.gpu_region, UAT.NUM_CONTEXTS * 16), + f"UATTTBRTracer", + mode=TraceMode.WSYNC, + write=self.uat_write, + iova=0, + base=self.gpu_region, + level=3) + + def clear_uatmap_tracers(self, ctx=None): + if ctx is None: + for i in range(UAT.NUM_CONTEXTS): + self.clear_uatmap_tracers(i) + else: + self.hv.clear_tracers(f"UATMapTracer/{ctx}") + + def add_uatmap_tracers(self, ctx=None): + self.log(f"add_uatmap_tracers({ctx})") + if ctx is None: + if self.trace_kernmap: + self.add_uatmap_tracers(0) + if self.trace_usermap: + for i in range(1, UAT.NUM_CONTEXTS): + self.add_uatmap_tracers(i) + return + + if ctx != 0 and not self.trace_usermap: + return + if ctx == 0 and not self.trace_kernmap: + return + + def trace_pt(start, end, idx, pte, level, sparse): + if start >= 0xf8000000000 and (ctx != 0 or not self.trace_kernmap): + return + if start < 0xf8000000000 and not self.trace_usermap: + return + self.log(f"Add UATMapTracer/{ctx} {start:#x}") + self.hv.add_tracer(irange(pte.offset(), 0x4000), + f"UATMapTracer/{ctx}", + mode=TraceMode.WSYNC, + write=self.uat_write, + iova=start, + base=pte.offset(), + level=2 - level, + ctx=ctx) + + self.uat.foreach_table(ctx, trace_pt) + + def clear_gpuvm_tracers(self, ctx=None): + if ctx is None: + for i in range(UAT.NUM_CONTEXTS): + self.clear_gpuvm_tracers(i) + else: + self.hv.clear_tracers(f"GPUVM/{ctx}") + + def add_gpuvm_tracers(self, ctx=None): + self.log(f"add_gpuvm_tracers({ctx})") + if ctx is None: + self.add_gpuvm_tracers(0) + if self.trace_userva: + for i in range(1, UAT.NUM_CONTEXTS): + self.add_gpuvm_tracers(i) + return + + def trace_page(start, end, idx, pte, level, sparse): + self.uat_page_mapped(start, pte, ctx) + + self.uat.foreach_page(ctx, trace_page) + + def uat_write(self, evt, level=3, base=0, iova=0, ctx=None): + off = (evt.addr - base) // 8 + sh = ["NS", "??", "OS", "IS"] + a = f"{evt.flags.ATTR:02x}:{sh[evt.flags.SH]}" + self.log(f"UAT <{a}> write L{level} at {ctx}:{iova:#x} (#{off:#x}) -> {evt.data}") + + if level == 3: + ctx = off // 2 + is_kernel = off & 1 + if ctx != 0 and is_kernel: + return + + if is_kernel: + iova += 0xf8000000000 + pte = TTBR(evt.data) + if not pte.valid(): + self.log(f"Context {ctx} invalidated") + self.uat.invalidate_cache() + self.clear_uatmap_tracers(ctx) + self.clear_gpuvm_tracers(ctx) + return + self.log(f"Dumping UAT for context {ctx}") + self.uat.invalidate_cache() + _, pt = self.uat.get_pt(self.uat.gpu_region + ctx * 16, 2) + pt[off & 1] = evt.data + self.uat.dump(ctx, log=self.log) + self.add_uatmap_tracers(ctx) + self.add_gpuvm_tracers(ctx) + else: + is_kernel = iova >= 0xf8000000000 + iova += off << (level * 11 + 14) + if level == 0: + pte = Page_PTE(evt.data) + self.uat_page_mapped(iova, pte, ctx) + return + else: + pte = PTE(evt.data) + + if not pte.valid(): + try: + paddr = self.va_to_pa[(ctx, level, iova)] + except KeyError: + return + self.hv.del_tracer(irange(paddr, 0x4000), + f"UATMapTracer/{ctx}") + del self.va_to_pa[(ctx, level, iova)] + return + + if ctx != 0 and not self.trace_usermap: + return + if ctx == 0 and not self.trace_kernmap: + return + + self.va_to_pa[(ctx, level, iova)] = pte.offset() + level -= 1 + self.hv.add_tracer(irange(pte.offset(), 0x4000), + f"UATMapTracer/{ctx}", + mode=TraceMode.WSYNC, + write=self.uat_write, + iova=iova, + base=pte.offset(), + level=level, + ctx=ctx) + + def uat_page_mapped(self, iova, pte, ctx=0): + if iova >= 0xf8000000000 and ctx != 0: + return + if not pte.valid(): + self.log(f"UAT unmap {ctx}:{iova:#x} ({pte})") + try: + paddr = self.va_to_pa[(ctx, iova)] + except KeyError: + return + self.hv.del_tracer(irange(paddr, 0x4000), f"GPUVM/{ctx}") + del self.va_to_pa[(ctx, iova)] + return + + paddr = pte.offset() + self.log(f"UAT map {ctx}:{iova:#x} -> {paddr:#x} ({pte})") + if paddr < 0x800000000: + return # MMIO, ignore + + if not self.trace_userva and ctx != 0 and iova < 0x80_00000000: + return + if not self.trace_kernva and ctx == 0: + return + + self.va_to_pa[(ctx, iova)] = paddr + self.hv.add_tracer(irange(paddr, 0x4000), + f"GPUVM/{ctx}", + mode=TraceMode.ASYNC, + read=self.event_gpuvm, + write=self.event_gpuvm, + iova=iova, + paddr=paddr, + ctx=ctx) + + def event_gpuvm(self, evt, iova, paddr, name=None, base=None, ctx=None): + off = evt.addr - paddr + iova += off + + if evt.flags.WRITE: + self.writelog[iova] = (self.vmcnt, evt) + else: + self.readlog[iova] = (self.vmcnt, evt) + t = "W" if evt.flags.WRITE else "R" + m = "+" if evt.flags.MULTI else " " + sh = ["NS", "??", "OS", "IS"] + a = f"{evt.flags.ATTR:02x}:{sh[evt.flags.SH]}" + dinfo = "" + if name is not None and base is not None: + dinfo = f"[{name} + {iova - base:#x}]" + logline = (f"[cpu{evt.flags.CPU}] GPUVM[{ctx}/{self.vmcnt:5}]: <{a}>{t}.{1<<evt.flags.WIDTH:<2}{m} " + + f"{iova:#x}({evt.addr:#x}){dinfo} = {evt.data:#x}") + self.log(logline, show_cpu=False) + self.vmcnt += 1 + #self.mon.poll() + + def meta_gpuvm(self, iova, size): + meta = "" + iova &= 0xfffffffffff + for off in range(size): + offva = iova + off + if offva in self.readlog: + ctr, evt = self.readlog[offva] + m = "+" if evt.flags.MULTI else " " + meta += f"[R.{1<<evt.flags.WIDTH:<2}{m} @{ctr} +{off:#x}]" + + if offva in self.writelog: + ctr, evt = self.writelog[offva] + m = "+" if evt.flags.MULTI else " " + meta += f"[W.{1<<evt.flags.WIDTH:<2}{m} @{ctr} +{off:#x}]" + + return meta or None + + def get_stream(self, context, off): + stream = self.uat.iostream(context, off) + stream.meta_fn = self.meta_gpuvm + return stream + + def start(self): + super().start() + + self.clear_ttbr_tracers() + self.clear_uatmap_tracers() + self.add_ttbr_tracers() + self.add_uatmap_tracers() + self.clear_gpuvm_tracers() + self.add_mon_regions() + + #self.handoff_tracer.start() + self.init_channels() + if self.state.active: + self.resume() + else: + self.pause() + + def stop(self): + self.pause() + self.handoff_tracer.stop() + self.clear_ttbr_tracers() + self.clear_uatmap_tracers() + self.clear_gpuvm_tracers() + super().stop() + + def mon_addva(self, ctx, va, size, name=""): + self.mon.add(va, size, name, readfn= lambda a, s: self.uat.ioread(ctx, a, s)) + + def handle_ringmsg(self, msg): + if isinstance(msg, FlagMsg): + self.log(f"== Event flag notification ==") + self.handle_event(msg) + return + elif isinstance(msg, RunCmdQueueMsg): + self.log(f"== Work notification (type {msg.queue_type})==") + queue = self.get_cmdqueue(msg.cmdqueue_addr, msg.new_queue) + work_items = list(queue.get_workitems(msg)) + if self.encoder_id_filter is not None: + for wi in work_items: + if wi.cmd.magic == 0: + # TA + if not self.encoder_id_filter(wi.cmd.struct_3.encoder_id): + return True + if wi.cmd.magic == 1: + # 3D + if not self.encoder_id_filter(wi.cmd.struct_6.encoder_id): + return True + for wi in work_items: + self.log(str(wi)) + if msg.queue_type == 2: + pass + #return self.handle_compute(wi) + elif msg.queue_type == 1: + self.handle_3d(wi) + self.queue_3d = queue + elif msg.queue_type == 0: + self.handle_ta(wi) + self.queue_ta = queue + return True + + def handle_event(self, msg): + if self.last_ta and self.redump: + self.log("Redumping TA...") + stream = self.get_stream(0, self.last_ta._addr) + last_ta = CmdBufWork.parse_stream(stream) + self.log(str(last_ta)) + self.handle_ta(last_ta) + self.queue_ta.update_info() + self.log(f"Queue info: {self.queue_ta.info}") + self.last_ta = None + if self.last_3d and self.redump: + self.log("Redumping 3D...") + stream = self.get_stream(0, self.last_3d._addr) + last_3d = CmdBufWork.parse_stream(stream) + self.log(str(last_3d)) + self.handle_3d(last_3d) + self.queue_3d.update_info() + self.log(f"Queue info: {self.queue_3d.info}") + self.last_3d = None + + def dump_buffer_manager(self, buffer_mgr, kread, read): + return + + self.log(f" buffer_mgr @ {buffer_mgr._addr:#x}: {buffer_mgr!s}") + self.log(f" page_list @ {buffer_mgr.page_list_addr:#x}:") + chexdump(read(buffer_mgr.page_list_addr, + buffer_mgr.page_list_size), print_fn=self.log) + self.log(f" block_list @ {buffer_mgr.block_list_addr:#x}:") + chexdump(read(buffer_mgr.block_list_addr, + 0x8000), print_fn=self.log) + #self.log(f" unkptr_d8 @ {buffer_mgr.unkptr_d8:#x}:") + #chexdump(read(buffer_mgr.unkptr_d8, 0x4000), print_fn=self.log) + + + def handle_ta(self, wi): + self.log(f"Got TA WI{wi.cmd.magic:d}") + self.last_ta = wi + + def kread(off, size): + return self.uat.ioread(0, off, size) + + if wi.cmd.magic == 6: + wi6 = wi.cmd + #self.log(f" unkptr_14 @ {wi6.unkptr_14:#x}:") + #chexdump(kread(wi6.unkptr_14, 0x100), print_fn=self.log) + + elif wi.cmd.magic == 0: + wi0 = wi.cmd + context = wi0.context_id + + def read(off, size): + return self.uat.ioread(context, off & 0x7fff_ffff_ffff_ffff, size) + + #chexdump(kread(wi0.addr, 0x600), print_fn=self.log) + self.log(f" context_id = {context:#x}") + self.dump_buffer_manager(wi0.buffer_mgr, kread, read) + #self.log(f" unk_emptybuf @ {wi0.unk_emptybuf_addr:#x}:") + #chexdump(kread(wi0.unk_emptybuf_addr, 0x1000), print_fn=self.log) + + #self.log(f" unkptr_48 @ {wi0.unkptr_48:#x}:") + #chexdump(read(wi0.unkptr_48, 0x1000), print_fn=self.log) + #self.log(f" unkptr_58 @ {wi0.unkptr_58:#x}:") + #chexdump(read(wi0.unkptr_58, 0x4000), print_fn=self.log) + #self.log(f" unkptr_60 @ {wi0.unkptr_60:#x}:") + #chexdump(read(wi0.unkptr_60, 0x4000), print_fn=self.log) + + #self.log(f" unkptr_45c @ {wi0.unkptr_45c:#x}:") + #chexdump(read(wi0.unkptr_45c, 0x1800), print_fn=self.log) + + for i in wi0.microsequence.value: + i = i.cmd + if isinstance(i, StartTACmd): + self.log(f" # StartTACmd") + self.log(f" buf_thing @ {i.buf_thing_addr:#x}: {i.buf_thing!s}") + self.log(f" unkptr_18 @ {i.buf_thing.unkptr_18:#x}:") + chexdump(read(i.buf_thing.unkptr_18, 0x100), print_fn=self.log) + self.log(f" unkptr_24 @ {i.unkptr_24:#x}:") + chexdump(read(i.unkptr_24, 0x100), print_fn=self.log) + self.log(f" unk_5c @ {i.unkptr_5c:#x}:") + chexdump(read(i.unkptr_5c, 0x100), print_fn=self.log) + + elif isinstance(i, FinalizeTACmd): + self.log(f" # FinalizeTACmd") + + + #self.uat.dump(context, self.log) + + def handle_3d(self, wi): + self.log(f"Got 3D WI{wi.cmdid:d}") + if wi.cmdid != 1: + return + + self.last_3d = wi + + def kread(off, size): + return self.uat.ioread(0, off, size) + + if wi.cmd.magic == 4: + wi4 = wi.cmd + #self.log(f" completion_buf @ {wi4.completion_buf_addr:#x}: {wi4.completion_buf!s} ") + #chexdump(kread(wi4.completion_buf_addr, 0x1000), print_fn=self.log) + elif wi.cmd.magic == 1: + wi1 = wi.cmd + context = wi1.context_id + def read(off, size): + return self.uat.ioread(context, off, size) + + self.log(f" context_id = {context:#x}") + cmd3d = wi1.microsequence.value[0].cmd + + self.log(f" 3D:") + self.log(f" struct1 @ {cmd3d.struct1_addr:#x}: {cmd3d.struct1!s}") + self.log(f" struct2 @ {cmd3d.struct2_addr:#x}: {cmd3d.struct2!s}") + #self.log(f" tvb_start_addr @ {cmd3d.struct2.tvb_start_addr:#x}:") + #if cmd3d.struct2.tvb_start_addr: + #chexdump(read(cmd3d.struct2.tvb_start_addr, 0x1000), print_fn=self.log) + #self.log(f" tvb_tilemap_addr @ {cmd3d.struct2.tvb_tilemap_addr:#x}:") + #if cmd3d.struct2.tvb_tilemap_addr: + #chexdump(read(cmd3d.struct2.tvb_tilemap_addr, 0x1000), print_fn=self.log) + #self.log(f" aux_fb_ptr @ {cmd3d.struct2.aux_fb_ptr:#x}:") + #chexdump(read(cmd3d.struct2.aux_fb_ptr, 0x100), print_fn=self.log) + #self.log(f" pipeline_base @ {cmd3d.struct2.pipeline_base:#x}:") + #chexdump(read(cmd3d.struct2.pipeline_base, 0x100), print_fn=self.log) + + self.log(f" buf_thing @ {cmd3d.buf_thing_addr:#x}: {cmd3d.buf_thing!s}") + #self.log(f" unkptr_18 @ {cmd3d.buf_thing.unkptr_18:#x}:") + #chexdump(read(cmd3d.buf_thing.unkptr_18, 0x1000), print_fn=self.log) + + #self.log(f" unk_24 @ {cmd3d.unkptr_24:#x}: {cmd3d.unk_24!s}") + self.log(f" struct6 @ {cmd3d.struct6_addr:#x}: {cmd3d.struct6!s}") + #self.log(f" unknown_buffer @ {cmd3d.struct6.unknown_buffer:#x}:") + #chexdump(read(cmd3d.struct6.unknown_buffer, 0x1000), print_fn=self.log) + self.log(f" struct7 @ {cmd3d.struct7_addr:#x}: {cmd3d.struct7!s}") + self.log(f" unk_buf_ptr @ {cmd3d.unk_buf_ptr:#x}:") + chexdump(kread(cmd3d.unk_buf_ptr, 0x11c), print_fn=self.log) + self.log(f" unk_buf2_ptr @ {cmd3d.unk_buf2_ptr:#x}:") + chexdump(kread(cmd3d.unk_buf2_ptr, 0x18), print_fn=self.log) + + for i in wi1.microsequence.value: + i = i.cmd + if not isinstance(i, Finalize3DCmd): + continue + self.log(f" Finalize:") + cmdfin = i + #self.log(f" completion:") + #chexdump(kread(cmdfin.completion, 0x4), print_fn=self.log) + self.log(f" unkptr_1c @ {cmdfin.unkptr_1c:#x}:") + chexdump(kread(cmdfin.unkptr_1c, 0x1000), print_fn=self.log) + #self.log(f" unkptr_24 @ {cmdfin.unkptr_24:#x}:") + #chexdump(kread(cmdfin.unkptr_24, 0x100), print_fn=self.log) + self.log(f" unkptr_34 @ {cmdfin.unkptr_34:#x}:") + chexdump(kread(cmdfin.unkptr_34, 0x1000), print_fn=self.log) + self.log(f" unkptr_3c @ {cmdfin.unkptr_3c:#x}:") + chexdump(kread(cmdfin.unkptr_3c, 0x1c0), print_fn=self.log) + self.log(f" unkptr_44 @ {cmdfin.unkptr_44:#x}:") + chexdump(kread(cmdfin.unkptr_44, 0x40), print_fn=self.log) + self.log(f" unkptr_64 @ {cmdfin.unkptr_64:#x}:") + chexdump(kread(cmdfin.unkptr_64, 0x118), print_fn=self.log) + + self.log(f" buf_thing @ {wi1.buf_thing_addr:#x}: {wi1.buf_thing!s}") + self.log(f" unkptr_18 @ {wi1.buf_thing.unkptr_18:#x}:") + chexdump(read(wi1.buf_thing.unkptr_18, 0x1000), print_fn=self.log) + self.dump_buffer_manager(wi1.buffer_mgr, kread, read) + #self.log(f" unk_emptybuf @ {wi1.unk_emptybuf_addr:#x}:") + #chexdump(kread(wi1.unk_emptybuf_addr, 0x1000), print_fn=self.log) + #self.log(f" tvb_addr @ {wi1.tvb_addr:#x}:") + #chexdump(read(wi1.tvb_addr, 0x1000), print_fn=self.log) + + def handle_compute(self, msg): + self.log("Got Compute Work Item") + + try: + wi = msg.workItems[0].cmd + except: + return + + def kread(off, size): + return self.uat.ioread(0, off, size) + + context = wi.context_id + + def read(off, size): + return self.uat.ioread(context, off, size) + + self.log(f" context_id = {context:#x}") + self.log(f" unk_c @ {wi.unkptr_c:#x}: {wi.unk_c!s} ") + #chexdump(kread(wi.unkptr_c, 0x100), print_fn=self.log) + self.log(f" unkptr_0:") + chexdump(kread(wi.unk_c.unkptr_0, 0x1000), print_fn=self.log) + + self.log("StartComputeCmd:") + try: + ccmd = wi.microsequence.value[0].cmd + except: + self.log(" MISSING!") + return + self.log(f" unkptr_4: {ccmd.unkptr_4:#x}") + chexdump(kread(ccmd.unkptr_4, 0x54), print_fn=self.log) + self.log(f" unkptr_14: {ccmd.unkptr_14:#x}") + chexdump(kread(ccmd.unkptr_14, 0x1000), print_fn=self.log) + #self.log(f" unkptr_3c: {ccmd.unkptr_3c:#x}") + #chexdump(kread(ccmd.unkptr_3c, 0xb4), print_fn=self.log) + + ci = ccmd.computeinfo + self.log(f" Compute Info: {ci!s}") + self.log(f" args:") + u0data = read(ci.args, 0x8000) + chexdump(u0data, print_fn=self.log) + args = struct.unpack("<8Q", u0data[0x7fa0:0x7fe0]) + for i, p in enumerate(args): + if p: + self.log(f" args[{i}] @ {p:#x}") + chexdump(read(p, 0x1000), print_fn=self.log) + p0, p1 = struct.unpack("<QQ", u0data[0x7fe0:0x7ff0]) + self.log(f" p0 @ {p0:#x}") + chexdump(read(p0, 0x100), print_fn=self.log) + self.log(f" p1 @ {p1:#x}") + chexdump(read(p1, 0x100), print_fn=self.log) + self.log(f" cmdlist:") + chexdump(read(ci.cmdlist, 0x8000), print_fn=self.log) + self.log(f" unkptr_10:") + chexdump(read(ci.unkptr_10, 8), print_fn=self.log) + self.log(f" unkptr_18:") + chexdump(read(ci.unkptr_18, 8), print_fn=self.log) + self.log(f" unkptr_20:") + chexdump(read(ci.unkptr_20, 8), print_fn=self.log) + self.log(f" unkptr_28:") + chexdump(read(ci.unkptr_28, 8), print_fn=self.log) + self.log(f" pipeline:") + chexdump(read(ci.pipeline_base, 0x1000), print_fn=self.log) + self.log(f" unkptr_48:") + chexdump(read(ci.unkptr_48, 0x8000), print_fn=self.log) + + ci2 = ccmd.computeinfo2 + self.log(f" Compute Info 2: {ci2!s}") + self.log(f" unknown_buffer:") + chexdump(read(ci2.unknown_buffer, 0x8000), print_fn=self.log) + + def ignore(self, addr=None): + if addr is None: + addr = self.last_msg.cmdqueue_addr + self.ignorelist += [addr & 0xfff_ffffffff] + + def kick(self, val): + if not self.state.active: + return + + self.log(f"kick~! {val:#x}") + self.mon.poll() + + if val == 0x10: # Kick Firmware + self.log("KickFirmware, polling") + self.uat.invalidate_cache() + for chan in self.channels: + chan.poll() + return + + if val == 0x11: # Device Control + channel = 12 + self.uat.invalidate_cache() + + elif val < 0x10: + type = val & 3 + assert type != 3 + priority = (val >> 2) & 3 + channel = type + priority * 3 + self.uat.invalidate_cache() + + else: + raise(Exception("Unknown kick type")) + + self.channels[channel].poll() + + ## if val not in [0x0, 0x1, 0x10, 0x11]: + #if self.last_msg and isinstance(self.last_msg, (RunCmdQueue, DeviceControl_17)): + #self.hv.run_shell() + + #self.last_msg = None + + # check the gfx -> cpu channels + for chan in self.channels[13:]: + chan.poll() + + def fwkick(self, val): + if not self.state.active: + return + + self.log(f"FW Kick~! {val:#x}") + self.mon.poll() + + if val == 0x00: # Kick FW control + channel = len(self.channels) - 1 + else: + raise(Exception("Unknown kick type")) + + self.channels[channel].poll() + + # check the gfx -> cpu channels + for chan in self.channels[13:]: + chan.poll() + + def pong(self): + if not self.state.active: + return + + self.log("pong~!"); + self.mon.poll() + + # check the gfx -> cpu channels + for chan in self.channels[13:]: + chan.poll() + + def trace_uatrange(self, ctx, start, size, name=None, off=0): + start &= 0xfff_ffffffff + ranges = self.uat.iotranslate(ctx, start, size) + iova = start + for range in ranges: + pstart, psize = range + if pstart: + self.log(f"trace {name} {start:#x}/{iova:#x} [{pstart:#x}:{psize:#x}] +{off:#x}") + self.hv.add_tracer(irange(pstart, psize), f"GPUVM", + mode=TraceMode.ASYNC, + read=self.event_gpuvm, + write=self.event_gpuvm, + iova=iova, + paddr=pstart, + name=name, + base=start - off) + iova += psize + + def untrace_uatrange(self, ctx, start, size): + ranges = self.uat.iotranslate(ctx, start, size) + for range in ranges: + start, size = range + if start: + self.hv.del_tracer(irange(start, size), f"GPUVM") + + def dump_va(self, ctx): + data = b'' + dataStart = 0 + + def dump_page(start, end, i, pte, level, sparse): + if i == 0 or sparse: + if len(data): + chexdump32(data, dataStart) + data = b'' + dataStart = 0 + if MemoryAttr(pte.AttrIndex) != MemoryAttr.Device and pte.OS: + if dataStart == 0: + dataStart = start + data += self.uat.ioread(0, start, 0x4000) + + self.uat.foreach_page(0, dump_page) + if len(data): + chexdump32(data, dataStart) + + def init_state(self): + super().init_state() + self.state.active = True + self.state.initdata = None + self.state.channel_info = [] + self.state.channels = {} + self.state.queues = {} + + def init_channels(self): + if self.channels: + return + #self.channels = [] + for i, chan_info in enumerate(self.state.channel_info): + print(channelNames[i], chan_info) + if channelNames[i] == "Stats": # ignore stats + continue + elif channelNames[i] == "KTrace": # ignore KTrace + continue + elif channelNames[i] == "FWCtl": + channel_chan = FWCtlChannelTracer(self, chan_info, i) + else: + channel_chan = ChannelTracer(self, chan_info, i) + self.channels.append(channel_chan) + + def pause(self): + self.clear_gpuvm_tracers() + if self.state.initdata is None: + return + self.clear_uatmap_tracers() + self.clear_ttbr_tracers() + self.log("Pausing tracing") + self.state.active = False + for chan in self.channels: + chan.set_active(False) + for queue in self.cmdqueues.values(): + queue.set_active(False) + for info_addr in self.state.queues: + self.state.queues[info_addr].rptr = None + self.untrace_uatrange(0, self.state.initdata.regionA_addr, 0x4000) + self.untrace_uatrange(0, self.state.initdata.regionB_addr, 0x6bc0) + self.untrace_uatrange(0, self.state.initdata.regionC_addr, 0x11d40) + + def resume(self): + self.add_gpuvm_tracers() + self.add_uatmap_tracers() + self.add_ttbr_tracers() + if self.state.initdata is None: + return + self.log("Resuming tracing") + self.state.active = True + for chan in self.channels: + if chan.name == "Stats": + continue + chan.set_active(True) + for queue in self.cmdqueues.values(): + queue.set_active(True) + self.trace_uatrange(0, self.state.initdata.regionA_addr, 0x4000, name="regionA") + self.trace_uatrange(0, self.state.initdata.regionB_addr, 0x6bc0, name="regionB") + self.trace_uatrange(0, self.state.initdata.regionC_addr, 0x11d40, name="regionC") + self.trace_uatrange(0, self.state.initdata.regionB.buffer_mgr_ctl_addr, 0x4000, name="Buffer manager ctl") + + def add_mon_regions(self): + return + initdata = self.state.initdata + if initdata is not None: + self.mon_addva(0, initdata.regionA_addr, 0x4000, "RegionA") + self.mon_addva(0, initdata.regionB_addr, 0x6bc0, "RegionB") + self.mon_addva(0, initdata.regionC_addr, 0x11d40, "RegionC") + #self.mon_addva(0, initdata.regionB.unkptr_170, 0xc0, "unkptr_170") + #self.mon_addva(0, initdata.regionB.unkptr_178, 0x1c0, "unkptr_178") + #self.mon_addva(0, initdata.regionB.unkptr_180, 0x140, "unkptr_180") + self.mon_addva(0, initdata.regionB.unkptr_190, 0x80, "unkptr_190") + self.mon_addva(0, initdata.regionB.unkptr_198, 0xc0, "unkptr_198") + self.mon_addva(0, initdata.regionB.buffer_mgr_ctl_addr, 0x4000, "Buffer manager ctl") + self.mon_addva(0, initdata.unkptr_20.unkptr_0, 0x40, "unkptr_20.unkptr_0") + self.mon_addva(0, initdata.unkptr_20.unkptr_8, 0x40, "unkptr_20.unkptr_8") + + def pong_init(self, addr): + self.log("UAT at init time:") + self.uat.invalidate_cache() + self.uat.dump(0, log=self.log) + addr |= 0xfffff000_00000000 + initdata = InitData.parse_stream(self.get_stream(0, addr)) + + self.log("Initdata:") + self.log(initdata) + + self.add_mon_regions() + + #self.initdata.regionB.mon(lambda addr, size, name: self.mon_addva(0, addr, size, name)) + + self.state.initdata_addr = addr + self.state.initdata = initdata + self.state.channel_info = [] + self.state.fwlog_ring2 = initdata.regionB.fwlog_ring2 + channels = initdata.regionB.channels + for i in channelNames: + if i == "FWCtl": + chan_info = initdata.fw_status.fwctl_channel + else: + chan_info = channels[i] + self.state.channel_info.append(chan_info) + + self.init_channels() + self.mon.poll() + + self.log("Initial commands::") + for chan in self.channels: + chan.poll() + self.log("Init done") + + self.log("Mon regions") + self.mon.show_regions(log=self.log) + + + if self.pause_after_init: + self.log("Pausing tracing") + self.pause() + self.stop() + if self.shell_after_init: + self.hv.run_shell() + if self.after_init_hook: + self.after_init_hook() + +ChannelTracer = ChannelTracer._reloadcls() diff --git a/tools/proxyclient/m1n1/trace/asc.py b/tools/proxyclient/m1n1/trace/asc.py new file mode 100644 index 0000000..6d63a50 --- /dev/null +++ b/tools/proxyclient/m1n1/trace/asc.py @@ -0,0 +1,271 @@ +# SPDX-License-Identifier: MIT + +import struct +from enum import IntEnum +from ..hv import TraceMode +from ..utils import * +from . import ADTDevTracer +from ..hw.asc import * + +class DIR(IntEnum): + RX = 0 + TX = 1 + +def msg(message, direction=None, regtype=None, name=None): + def f(x): + x.is_message = True + x.direction = direction + x.message = message + x.regtype = regtype + x.name = name + return x + return f + +def msg_log(*args, **kwargs): + def x(self, msg): + return False + return msg(*args, **kwargs)(x) + +def msg_ign(*args, **kwargs): + def x(self, msg): + return True + return msg(*args, **kwargs)(x) + +class EPState(object): + pass + +class EP(object): + NAME = None + BASE_MESSAGE = None + + def __init__(self, tracer, epid): + self.tracer = tracer + self.epid = epid + self.present = False + self.started = False + self.name = self.NAME or type(self).__name__.lower() + self.state = EPState() + self.hv = self.tracer.hv + self.msgmap = {} + for name in dir(self): + i = getattr(self, name) + if not callable(i) or not getattr(i, "is_message", False): + continue + self.msgmap[i.direction, i.message] = getattr(self, name), name, i.regtype + + def log(self, msg): + self.tracer.log(f"[{self.name}] {msg}") + + def start(self): + pass + + def handle_msg(self, direction, r0, r1): + msgtype = None + if self.BASE_MESSAGE: + r0 = self.BASE_MESSAGE(r0.value) + msgtype = r0.TYPE + + handler = None + name = "<unknown>" + regtype = None + + msgids = [ + (direction, msgtype), + (None, msgtype), + (direction, None), + (None, None), + ] + + for msgid in msgids: + handler, name, regtype = self.msgmap.get(msgid, (None, None, None)) + if handler: + break + + if regtype is not None: + r0 = regtype(r0.value) + + if handler: + if handler.name is not None: + name = handler.name + if handler(r0): + return True + + d = ">" if direction == DIR.TX else "<" + self.log(f"{d}{msgtype:#x}({name}) {r0.value:016x} ({r0.str_fields()})") + return True + +class EPContainer(object): + pass + +class BaseASCTracer(ADTDevTracer): + DEFAULT_MODE = TraceMode.SYNC + + REGMAPS = [ASCRegs, None] + NAMES = ["asc", None] + + ENDPOINTS = {} + + def w_OUTBOX_CTRL(self, val): + self.log(f"OUTBOX_CTRL = {val!s}") + + def w_INBOX_CTRL(self, val): + self.log(f"INBOX_CTRL = {val!s}") + + def w_CPU_CONTROL(self, val): + self.log(f"CPU_CONTROL = {val!s}") + + def w_INBOX1(self, inbox1): + inbox0 = self.asc.cached.INBOX0.reg + if self.verbose >= 2: + self.log(f"SEND: {inbox0.value:016x}:{inbox1.value:016x} " + + f"{inbox0.str_fields()} | {inbox1.str_fields()}") + self.handle_msg(DIR.TX, inbox0, inbox1) + + def r_OUTBOX1(self, outbox1): + outbox0 = self.asc.cached.OUTBOX0.reg + if self.verbose >= 2: + self.log(f"RECV: {outbox0.value:016x}:{outbox1.value:016x} " + + f"{outbox0.str_fields()} | {outbox1.str_fields()}") + self.handle_msg(DIR.RX, outbox0, outbox1) + + def init_state(self): + self.state.ep = {} + + def handle_msg(self, direction, r0, r1): + if r1.EP in self.epmap: + if self.epmap[r1.EP].handle_msg(direction, r0, r1): + return + + d = ">" if direction == DIR.TX else "<" + self.log(f"{d}ep:{r1.EP:02x} {r0.value:016x} ({r0.str_fields()})") + + def ioread(self, dva, size): + if self.dart: + return self.dart.ioread(self.stream, dva & 0xFFFFFFFFF, size) + else: + return self.hv.iface.readmem(dva, size) + + def iowrite(self, dva, data): + if self.dart: + return self.dart.iowrite(self.stream, dva & 0xFFFFFFFFF, data) + else: + return self.hv.iface.writemem(dva, data) + + def start(self, dart=None, stream=0): + super().start() + self.dart = dart + self.stream = stream + self.msgmap = {} + for name in dir(self): + i = getattr(self, name) + if not callable(i) or not getattr(i, "is_message", False): + continue + self.msgmap[i.direction, i.endpoint, i.message] = getattr(self, name), name, i.regtype + + self.epmap = {} + self.ep = EPContainer() + for cls in type(self).mro(): + eps = getattr(cls, "ENDPOINTS", None) + if eps is None: + break + for k, v in eps.items(): + if k in self.epmap: + continue + ep = v(self, k) + ep.dart = dart + ep.stream = stream + self.epmap[k] = ep + if k in self.state.ep: + ep.state.__dict__.update(self.state.ep[k]) + self.state.ep[k] = ep.state.__dict__ + if getattr(self.ep, ep.name, None): + ep.name = f"{ep.name}{k:02x}" + setattr(self.ep, ep.name, ep) + ep.start() + +# System endpoints + +## Management endpoint + +from ..fw.asc.mgmt import ManagementMessage, Mgmt_EPMap, Mgmt_EPMap_Ack, Mgmt_StartEP, Mgmt_SetAPPower, Mgmt_SetIOPPower, Mgmt_IOPPowerAck + +class Management(EP): + BASE_MESSAGE = ManagementMessage + + HELLO = msg_log(1, DIR.RX) + HELLO_ACK = msg_log(2, DIR.TX) + + @msg(5, DIR.TX, Mgmt_StartEP) + def StartEP(self, msg): + ep = self.tracer.epmap.get(msg.EP, None) + if ep: + ep.started = True + self.log(f" Starting endpoint #{msg.EP:#02x} ({ep.name})") + else: + self.log(f" Starting endpoint #{msg.EP:#02x}") + #return True + + Init = msg_log(6, DIR.TX) + + @msg(8, DIR.RX, Mgmt_EPMap) + def EPMap(self, msg): + for i in range(32): + if msg.BITMAP & (1 << i): + epno = 32 * msg.BASE + i + ep = self.tracer.epmap.get(epno, None) + if ep: + ep.present = True + self.log(f" Adding endpoint #{epno:#02x} ({ep.name})") + else: + self.log(f" Adding endpoint #{epno:#02x}") + + EPMap_Ack = msg_log(8, DIR.TX, Mgmt_EPMap_Ack) + + SetIOPPower = msg_log(6, DIR.TX, Mgmt_SetIOPPower) + SetIOPPowerAck = msg_log(7, DIR.TX, Mgmt_IOPPowerAck) + + SetAPPower = msg_log(0x0b, DIR.TX, Mgmt_SetAPPower) + SetAPPowerAck = msg_log(0x0b, DIR.RX, Mgmt_SetAPPower) + +## Syslog endpoint + +from ..fw.asc.syslog import SyslogMessage, Syslog_Init, Syslog_GetBuf, Syslog_Log + +class Syslog(EP): + BASE_MESSAGE = SyslogMessage + + @msg(8, DIR.RX, Syslog_Init) + def Init(self, msg): + self.state.count = msg.COUNT + self.state.entrysize = msg.ENTRYSIZE + + @msg(1, DIR.RX, Syslog_GetBuf) + def GetBuf(self, msg): + if msg.DVA: + self.state.syslog_buf = msg.DVA + + @msg(1, DIR.TX, Syslog_GetBuf) + def GetBuf_Ack(self, msg): + self.state.syslog_buf = msg.DVA + + @msg(5, DIR.RX, Syslog_Log) + def Log(self, msg): + buf = self.state.syslog_buf + stride = 0x20 + self.state.entrysize + log = self.tracer.ioread(buf + msg.INDEX * stride, stride) + hdr, unk, context, logmsg = struct.unpack(f"<II24s{self.state.entrysize}s", log) + context = context.split(b"\x00")[0].decode("ascii") + logmsg = logmsg.split(b"\x00")[0].decode("ascii").rstrip("\n") + self.log(f"* [{context}]{logmsg}") + return True + + Log_Ack = msg_ign(5, DIR.TX, Syslog_Log) + +class ASCTracer(BaseASCTracer): + ENDPOINTS = { + 0: Management, + #1: CrashLog, + 2: Syslog, + #3: KDebug, + #4: IOReporting, + } diff --git a/tools/proxyclient/m1n1/trace/dart.py b/tools/proxyclient/m1n1/trace/dart.py new file mode 100644 index 0000000..b1324f7 --- /dev/null +++ b/tools/proxyclient/m1n1/trace/dart.py @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: MIT + +from ..hw.dart import * +from ..hw.dart8020 import * +from ..hw.dart8110 import * +from ..hv import TraceMode +from ..utils import * +from . import ADTDevTracer + +class DARTTracer(ADTDevTracer): + + DEFAULT_MODE = TraceMode.ASYNC + + NAMES = ["regs"] + + @classmethod + def _reloadcls(cls, force=False): + global DART8020 + global DART8020Regs + global DART8110 + global DART8110Regs + DART8020 = DART8020._reloadcls(force) + DART8020Regs = DART8020Regs._reloadcls(force) + DART8110 = DART8110._reloadcls(force) + DART8110Regs = DART8110Regs._reloadcls(force) + return super()._reloadcls(force) + + def __init__(self, hv, devpath, **kwargs): + compat = hv.adt[devpath].compatible[0] + if compat in ["dart,t6000", "dart,t8020"]: + self.REGMAPS = [DART8020Regs] + elif compat in ["dart,t8110"]: + self.REGMAPS = [DART8110Regs] + + return super().__init__(hv, devpath, **kwargs) + + def start(self): + super().start() + # prime cache + if self.dev.compatible[0] == "dart,t8110": + for i in range(16): + self.regs.TCR[i].val + self.regs.TTBR[i].val + for _ in range(8): + self.regs.ENABLE_STREAMS[_].val + else: + for i in range(16): + self.regs.TCR[i].val + for j in range(4): + self.regs.TTBR[i, j].val + self.regs.ENABLED_STREAMS.val + + self.dart = DART(self.hv.iface, self.regs, compat=self.dev.compatible[0]) + + + def w_STREAM_COMMAND(self, stream_command): + if stream_command.INVALIDATE: + self.log(f"Invalidate Stream: {self.regs.cached.STREAM_SELECT.reg}") + self.dart.invalidate_cache() + + def w_TLB_OP(self, tlb_op): + if tlb_op.OP == 0: + self.log(f"Invalidate all") + self.dart.invalidate_cache() + elif tlb_op.OP == 1: + self.log(f"Invalidate Stream: {tlb_op.STREAM}") + self.dart.invalidate_cache() + else: + self.log(f"Unknown TLB op {tlb_op}") diff --git a/tools/proxyclient/m1n1/trace/dockchannel.py b/tools/proxyclient/m1n1/trace/dockchannel.py new file mode 100644 index 0000000..10ada58 --- /dev/null +++ b/tools/proxyclient/m1n1/trace/dockchannel.py @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: MIT +import struct + +from ..hw.dockchannel import DockChannelIRQRegs, DockChannelConfigRegs, DockChannelDataRegs +from ..hv import TraceMode +from ..utils import * +from . import ADTDevTracer + +class DockChannelTracer(ADTDevTracer): + DEFAULT_MODE = TraceMode.SYNC + + REGMAPS = [None, DockChannelIRQRegs, DockChannelConfigRegs, DockChannelDataRegs] + NAMES = [None, "irq", "config", "data"] + + def w_TX_8(self, d): + self.tx(struct.pack("<I", d.value)[0:1]) + def w_TX_16(self, d): + self.tx(struct.pack("<I", d.value)[0:2]) + def w_TX_24(self, d): + self.tx(struct.pack("<I", d.value)[0:3]) + def w_TX_32(self, d): + self.tx(struct.pack("<I", d.value)) + + def r_RX_8(self, d): + self.rx(struct.pack("<I", d.value)[1:2]) + def r_RX_16(self, d): + self.rx(struct.pack("<I", d.value)[1:3]) + def r_RX_24(self, d): + self.rx(struct.pack("<I", d.value)[1:4]) + def r_RX_32(self, d): + self.rx(struct.pack("<I", d.value)) + + def tx(self, d): + pass + + def rx(self, d): + pass diff --git a/tools/proxyclient/m1n1/trace/gpio.py b/tools/proxyclient/m1n1/trace/gpio.py new file mode 100644 index 0000000..386f886 --- /dev/null +++ b/tools/proxyclient/m1n1/trace/gpio.py @@ -0,0 +1,67 @@ +# SPDX-License-Identifier: MIT + +from ..hv import TraceMode +from ..utils import * +from . import ADTDevTracer + +class R_PIN(Register32): + DRIVE_STRENGTH1 = 23, 22 + LOCK = 21 + GROUP = 18, 16 + SCHMITT = 15 + DRIVE_STRENGTH0 = 11, 10 + INPUT_ENABLE = 9 + PULL = 8, 7 + PERIPH = 6, 5 + MODE = 3, 1 + DATA = 0 + +class GPIORegs(RegMap): + PIN = irange(0x000, 212, 4), R_PIN + + IRQ_GROUP = (irange(0x800, 7, 0x40), irange(0, (212 + 31) // 32, 4)), Register32 + +def bits32(val, start): + return [start + i for i in range(0, 32) if int(val) & (1 << i)] + +class GPIOTracer(ADTDevTracer): + DEFAULT_MODE = TraceMode.UNBUF + + REGMAPS = [GPIORegs] + NAMES = ["gpio"] + + PIN_NAMES = {} + + def __init__(self, hv, devpath, pin_names={}, verbose=False): + super().__init__(hv, devpath, verbose) + self.PIN_NAMES = pin_names + + def pn(self, pin): + return self.PIN_NAMES.get(pin, f"Pin-{pin}") + + def r_PIN(self, val, index): + if index not in self.PIN_NAMES and self.verbose < 2: + return + self.log(f"{self.pn(index):14} R {val!s} ") + + def w_PIN(self, val, index): + if index not in self.PIN_NAMES and self.verbose < 2: + return + self.log(f"{self.pn(index):14} W {val!s} ") + + def r_IRQ_GROUP(self, val, index): + (grp, index) = index + if int(val) == 0: + return + pins = [self.pn(x) for x in bits32(val, index * 32) if self.verbose >= 2 or x in self.PIN_NAMES] + if len(pins): + self.log(f"IRQ[{grp}] ACT {pins}") + + def w_IRQ_GROUP(self, val, index): + (grp, index) = index + if int(val) == 0: + return + pins = [self.pn(x) for x in bits32(val, index * 32) if self.verbose >= 2 or x in self.PIN_NAMES] + if len(pins): + self.log(f"IRQ[{grp}] ACK {pins}") + diff --git a/tools/proxyclient/m1n1/trace/i2c.py b/tools/proxyclient/m1n1/trace/i2c.py new file mode 100644 index 0000000..9c4bbb6 --- /dev/null +++ b/tools/proxyclient/m1n1/trace/i2c.py @@ -0,0 +1,225 @@ +# SPDX-License-Identifier: MIT + +from ..hv import TraceMode +from ..utils import * +from ..hw import i2c +from . import ADTDevTracer + +class I2CTracer(ADTDevTracer): + DEFAULT_MODE = TraceMode.ASYNC + REGMAPS = [i2c.I2CRegs] + NAMES = ["i2c"] + + def __init__(self, hv, devpath, verbose=False): + super().__init__(hv, devpath, verbose=verbose) + self.default_dev = I2CDevTracer() + self.default_dev.i2c_tracer = self + + def init_state(self): + self.state.txn = [] + self.state.devices = {} + + def w_MTXFIFO(self, mtxfifo): + if self.state.txn is None: + self.state.txn = [] + + d = mtxfifo.DATA + if mtxfifo.START: + self.state.txn += ["S"] + if mtxfifo.READ: + self.state.txn += [None] * d + else: + self.state.txn.append(d) + + if mtxfifo.STOP: + self.state.txn.append("P") + self.flush_txn() + + def r_MRXFIFO(self, mrxfifo): + if mrxfifo.EMPTY: + self.log(f"Read while FIFO empty") + return + + if not self.state.txn: + self.log(f"Stray read: {mrxfifo}") + return + + try: + pos = self.state.txn.index(None) + self.state.txn[pos] = mrxfifo.DATA + except ValueError: + self.log(f"Stray read: {mrxfifo}") + + self.flush_txn() + + def flush_txn(self): + if not self.state.txn: + return + + if self.state.txn[-1] != "P": + return + + if not any(i is None for i in self.state.txn): + self.handle_txn(self.state.txn) + self.state.txn = None + + def handle_txn(self, txn): + st = False + dev = self.default_dev + read = False + for i in txn: + if i == "S": + st = True + continue + if st: + addr = i >> 1 + dev = self.state.devices.get(addr, self.default_dev) + read = bool(i & 1) + dev.start(addr, read=read) + elif i == "P": + dev.stop() + elif read: + dev.read(i) + else: + dev.write(i) + st = False + + def add_device(self, addr, device): + device.hv = self.hv + device.i2c_tracer = self + self.state.devices[addr] = device + +class I2CDevTracer(Reloadable): + def __init__(self, addr=None, name=None, verbose=True): + self.addr = addr + self.name = name + self.verbose = verbose + self.txn = [] + + def log(self, msg, *args, **kwargs): + if self.name: + msg = f"[{self.name}] {msg}" + self.i2c_tracer.log(msg, *args, **kwargs) + + def start(self, addr, read): + self.txn.append("S") + if read: + self.txn.append(f"{addr:02x}.r") + else: + self.txn.append(f"{addr:02x}.w") + + def stop(self): + self.txn.append("P") + if self.verbose: + self.log(f"Txn: {' '.join(self.txn)}") + self.txn = [] + + def read(self, data): + self.txn.append(f"{data:02x}") + + def write(self, data): + self.txn.append(f"{data:02x}") + +class I2CRegCache: + def __init__(self): + self.cache = {} + + def update(self, addr, data): + self.cache[addr] = data + + def read(self, addr, width): + data = self.cache.get(addr, None) + if data is None: + print(f"I2CRegCache: no cache for {addr:#x}") + return data + + def write(self, addr, data, width): + raise NotImplementedError("No write on I2CRegCache") + +class I2CRegMapTracer(I2CDevTracer): + REGMAP = RegMap + ADDRESSING = (0, 1) + + def __init__(self, verbose=False, **kwargs): + super().__init__(verbose=verbose, **kwargs) + self._cache = I2CRegCache() + self.regmap = self.REGMAP(self._cache, 0) + self.page = 0x0 + self.reg = None + self.regbytes = [] + + self.npagebytes, nimmbytes = self.ADDRESSING + self.pageshift = 8 * nimmbytes + self.paged = self.npagebytes != 0 + + def _reloadme(self): + self.regmap._reloadme() + return super()._reloadme() + + def start(self, addr, read): + if not read: + self.reg = None + self.regbytes = [] + super().start(addr, read) + + def stop(self): + super().stop() + + def handle_addressing(self, data): + if self.reg is not None: + return False + + self.regbytes.append(data) + if len(self.regbytes)*8 >= self.pageshift: + immediate = int.from_bytes(bytes(self.regbytes), + byteorder="big") + self.reg = self.page << self.pageshift | immediate + return True + + @property + def reg_imm(self): + '''Returns the 'immediate' part of current register address''' + return self.reg & ~(~0 << self.pageshift) + + def handle_page_register(self, data): + if not self.paged: + return False + + if self.reg_imm >= self.npagebytes: + return False + + shift = 8 * self.reg_imm + self.page &= ~(0xff << shift) + self.page |= data << shift + return True + + def write(self, data): + if self.handle_addressing(data): + return + elif self.handle_page_register(data): + pass + else: + self.regwrite(self.reg, data) + + self.reg += 1 + super().write(data) + + def read(self, data): + if self.reg_imm >= self.npagebytes: + self.regread(self.reg, data) + self.reg += 1 + super().read(data) + + def regwrite(self, reg, val): + self.regevent(reg, val, False) + + def regread(self, reg, val): + self.regevent(reg, val, True) + + def regevent(self, reg, val, read): + self._cache.update(reg, val) + r, index, rcls = self.regmap.lookup_addr(reg) + val = rcls(val) if rcls is not None else f"{val:#x}" + regname = self.regmap.get_name(reg) if r else f"{reg:#x}" + t = "R" if read else "W" + self.log(f"REG: {t.upper()}.8 {regname} = {val!s}") diff --git a/tools/proxyclient/m1n1/trace/isp.py b/tools/proxyclient/m1n1/trace/isp.py new file mode 100644 index 0000000..924bce9 --- /dev/null +++ b/tools/proxyclient/m1n1/trace/isp.py @@ -0,0 +1,118 @@ +from . import ADTDevTracer +from .dart import DARTTracer +from ..hv import TraceMode +from ..hw.dart import DART, DARTRegs +from ..hw.isp import * + +class ISPTracer(ADTDevTracer): + + DEFAULT_MODE = TraceMode.SYNC + + REGMAPS = [ISPRegs, PSReg, SPMIReg, SPMIReg, SPMIReg] + NAMES = ["isp", "ps", "spmi0", "spmi1", "spmi2"] + + ALLOWLISTED_CHANNELS = ["TERMINAL", "IO", "BUF_H2T", "BUF_T2H", "SHAREDMALLOC", "IO_T2H"] + + def __init__(self, hv, dev_path, dart_dev_path, verbose): + super().__init__(hv, dev_path, verbose) + + hv.p.pmgr_adt_clocks_enable("/arm-io/dart-isp") + + self.dart_tracer = DARTTracer(hv, "/arm-io/dart-isp") + self.dart_tracer.start() + self.dart = self.dart_tracer.dart + + self.ignored_ranges = [ + # ----------------------------------------------------------------- + # ## System clock counter (24 mhz) + (0x23b734004, 4), + (0x23b734008, 4), + # ## Noisy memory addresses that are always zero + (0x23b734868, 4), + (0x23b73486c, 4), + (0x23b734b38, 4), + (0x23b734b3c, 4), + (0x23b734b58, 4), + (0x23b734b5c, 4), + (0x23b734bd8, 4), + (0x23b734bdc, 4), + (0x23b734c18, 4), + (0x23b734c1c, 4), + (0x23b778128, 4), + (0x23b77812c, 4), + (0x23b77c128, 4), + (0x23b77c12c, 4), + # # Noisy memory addresses that change value + (0x23b700248, 4), + (0x23b700258, 4), + (0x23b7003f8, 4), + (0x23b700470, 4), + # # ECPU/PCPU state report + (0x23b738004, 4), # ecpu state report + (0x23b738008, 4), # pcpu state report + # ----------------------------------------------------------------- + ] + + def r_ISP_GPR0(self, val): + # I have no idea how many channels may be available in other platforms + # but, at least for M1 I know they are seven (7), so using 64 as safe value here + if val.value == 0x8042006: + self.log(f"ISP_GPR0 = ACK") + elif val.value < 64: + self.log(f"ISP_IPC_CHANNELS = {val!s}") + self.number_of_channels = val.value + elif val.value > 0: + self.log(f"ISP_IPC_CHANNEL_TABLE_IOVA = {val!s}") + self.channel_table = ISPChannelTable(self, self.number_of_channels, val.value) + self.log(f"{str(self.channel_table)}") + + def r_ISP_IRQ_INTERRUPT(self, val): + pending_irq = int(val.value) + self.log(f"======== BEGIN IRQ ========") + #self.channel_table.dump() + self.channel_table.get_last_read_command(pending_irq) + self.log(f"======== END IRQ ========") + + def w_ISP_DOORBELL_RING0(self, val): + doorbell_value = int(val.value) + self.log(f"======== BEGIN DOORBELL ========") + #self.channel_table.dump() + self.channel_table.get_last_write_command(doorbell_value) + self.log(f"======== END DOORBELL ========") + + def w_ISP_GPR0(self, val): + self.log(f"ISP_GPR0 = ({val!s})") + if val.value == 0x1812f80: + if self.dart: + self.init_struct = self.dart.ioread(0, val.value & 0xFFFFFFFF, 0x190) + + def w_ISP_IRQ_INTERRUPT(self, val): + self.log(f"IRQ_INTERRUPT = ({val!s}).") + if val.value == 0xf: + self.log(f"ISP Interrupts enabled") + + def ioread(self, dva, size): + if self.dart: + return self.dart.ioread(0, dva & 0xFFFFFFFF, size) + else: + return self.hv.iface.readmem(dva, size) + + def iowrite(self, dva, data): + if self.dart: + return self.dart.iowrite(0, dva & 0xFFFFFFFF, data) + else: + return self.hv.iface.writemem(dva, data) + + def start(self): + super().start() + + self.msgmap = {} + for name in dir(self): + arg = getattr(self, name) + if not callable(arg) or not getattr(arg, "is_message", False): + continue + self.msgmap[arg.direction, arg.endpoint, arg.message] = getattr(self, name), name, arg.regtype + + # Disable trace of memory regions + for addr, size in self.ignored_ranges: + self.trace(addr, size, TraceMode.OFF)
\ No newline at end of file diff --git a/tools/proxyclient/m1n1/trace/pcie.py b/tools/proxyclient/m1n1/trace/pcie.py new file mode 100644 index 0000000..191da84 --- /dev/null +++ b/tools/proxyclient/m1n1/trace/pcie.py @@ -0,0 +1,108 @@ +from . import Tracer, TraceMode +from ..utils import * + +class R_BAR(Register32): + BASE = 31, 4 + PREFETCH = 3 + ADDR64 = 2 + BELOW_1M = 1 + SPACE = 0 + +class PCICfgSpace(RegMap): + VENDOR_ID = 0x00, Register16 + PRODUCT_ID = 0x02, Register16 + COMMAND = 0x04, Register16 + STATUS = 0x06, Register16 + + BAR = irange(0x10, 6, 4), R_BAR + ROMADDR = 0x30, Register32 + +class PCIeDevTracer(Tracer): + CFGMAP = PCICfgSpace + BARMAPS = [] + NAMES = [] + PREFIXES = [] + + def __init__(self, hv, apcie, bus, dev, fn, verbose=False): + super().__init__(hv, verbose=verbose, ident=f"{type(self).__name__}@{apcie}/{bus:02x}:{dev:02x}.{fn:1x}") + self.busn = bus + self.devn = dev + self.fn = fn + self.ecam_off = (bus << 20) | (dev << 15) | (fn << 12) + self.apcie = hv.adt[apcie] + self.bars = [0] * 6 + self.bar_ranges = [None] * 6 + self.cfg_space = self.CFGMAP(hv.u, self.apcie.get_reg(0)[0] + self.ecam_off) + self.verbose = 3 + + def init_state(self): + self.state.bars = [R_BAR(0) for i in range(6)] + self.state.barsize = [None] * 6 + + @classmethod + def _reloadcls(cls, force=False): + cls.BARMAPS = [i._reloadcls(force) if i else None for i in cls.BARMAPS] + return super()._reloadcls(force) + + def r_cfg_BAR(self, val, index): + if self.state.bars[index].BASE == 0xfffffff: + size = (0x10000000 - val.BASE) << 4 + self.log(f"BAR{index} size = {size:#x}") + self.state.barsize[index] = size + + def w_cfg_BAR(self, val, index): + self.state.bars[index] = val + self.update_tracers(val, index) + + def update_tracers(self, val = None, index = None): + self.hv.clear_tracers(self.ident) + ecam = self.apcie.get_reg(0)[0] + self.trace_regmap(ecam + self.ecam_off, 0x1000, self.CFGMAP, prefix="cfg", + mode=TraceMode.WSYNC) + i = 0 + while i < 6: + idx = i + if i == index: + bar = val + else: + bar = self.cfg_space.BAR[i].reg + addr = bar.BASE << 4 + if bar.ADDR64 and i != 5: + if i + 1 == index: + barh = val + else: + barh = self.cfg_space.BAR[i + 1].reg + addr |= barh.value << 32 + i += 2 + else: + i += 1 + + if addr in (0, 0xfffffff0, 0xffffffff00000000, 0xfffffffffffffff0): + continue + + size = self.state.barsize[idx] + + if not size: + self.log(f"BAR{idx} size is unknown!") + continue + + # Add in PCIe DT addr flags to get the correct translation + start = self.apcie.translate(addr | (0x02 << 88)) + + self.log(f"Tracing BAR{idx} : {addr:#x} -> {start:#x}..{start+size-1:#x}") + self.bar_ranges[idx] = irange(start, size) + self.trace_bar(idx, start, size) + + def trace_bar(self, idx, start, size): + if idx >= len(self.BARMAPS) or (regmap := self.BARMAPS[idx]) is None: + return + prefix = name = None + if idx < len(self.NAMES): + name = self.NAMES[i] + if idx < len(self.PREFIXES): + prefix = self.PREFIXES[i] + + self.trace_regmap(start, size, regmap, name=name, prefix=prefix) + + def start(self): + self.update_tracers() diff --git a/tools/proxyclient/m1n1/trace/spi.py b/tools/proxyclient/m1n1/trace/spi.py new file mode 100644 index 0000000..dd4f967 --- /dev/null +++ b/tools/proxyclient/m1n1/trace/spi.py @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: MIT + +from ..hw.spi import * +from ..hv import TraceMode +from ..utils import * +from . import ADTDevTracer + +class SPITracer(ADTDevTracer): + REGMAPS = [SPIRegs] + NAMES = ["spi"] diff --git a/tools/proxyclient/m1n1/utils.py b/tools/proxyclient/m1n1/utils.py new file mode 100644 index 0000000..d401575 --- /dev/null +++ b/tools/proxyclient/m1n1/utils.py @@ -0,0 +1,1144 @@ +# SPDX-License-Identifier: MIT +from enum import Enum +import threading, traceback, bisect, copy, heapq, importlib, sys, itertools, time, os, functools, struct, re, signal +from construct import Adapter, Int64ul, Int32ul, Int16ul, Int8ul, ExprAdapter, GreedyRange, ListContainer, StopFieldError, ExplicitError, StreamError + +__all__ = ["FourCC"] + +def align_up(v, a=16384): + return (v + a - 1) & ~(a - 1) + +align = align_up + +def align_down(v, a=16384): + return v & ~(a - 1) + +def align_pot(v): + out = 1 + while out < v: + out *= 2 + return out + +def hexdump(s, sep=" "): + return sep.join(["%02x"%x for x in s]) + +def hexdump32(s, sep=" "): + vals = struct.unpack("<%dI" % (len(s)//4), s) + return sep.join(["%08x"%x for x in vals]) + +def _ascii(s): + s2 = "" + for c in s: + if c < 0x20 or c > 0x7e: + s2 += "." + else: + s2 += chr(c) + return s2 + +def chexdump(s, st=0, abbreviate=True, stride=16, indent="", print_fn=print): + last = None + skip = False + for i in range(0,len(s),stride): + val = s[i:i+stride] + if val == last and abbreviate: + if not skip: + print_fn(indent+"%08x *" % (i + st)) + skip = True + else: + print_fn(indent+"%08x %s |%s|" % ( + i + st, + " ".join(hexdump(val[i:i+8], ' ').ljust(23) + for i in range(0, stride, 8)), + _ascii(val).ljust(stride))) + last = val + skip = False + +def chexdiff32(prev, cur, ascii=True, offset=0, offset2=None): + assert len(cur) % 4 == 0 + count = len(cur) // 4 + words = struct.unpack("<%dI" % count, cur) + + if prev is None: + last = None + else: + assert len(prev) == len(cur) + last = struct.unpack("<%dI" % count, prev) + + row = 8 + skipping = False + out = [] + for i in range(0, count, row): + off_text = f"{offset + i * 4:016x}" + if offset2 is not None: + off_text += f"/{offset2 + i * 4:08x}" + if not last: + if i != 0 and words[i:i+row] == words[i-row:i]: + if not skipping: + out.append(f"{off_text} *\n") + skipping = True + else: + out.append(f"{off_text} ") + for new in words[i:i+row]: + out.append("%08x " % new) + if ascii: + out.append("| " + _ascii(cur[4*i:4*(i+row)])) + out.append("\n") + skipping = False + elif last[i:i+row] != words[i:i+row]: + out.append(f"{off_text} ") + for old, new in zip(last[i:i+row], words[i:i+row]): + so = "%08x" % old + sn = s = "%08x" % new + if old != new: + s = "\x1b[32m" + ld = False + for a,b in zip(so, sn): + d = a != b + if ld != d: + s += "\x1b[31;1;4m" if d else "\x1b[32m" + ld = d + s += b + s += "\x1b[m" + out.append(s + " ") + if ascii: + out.append("| " + _ascii(cur[4*i:4*(i+row)])) + out.append("\n") + return "".join(out) + +def chexundump(dump, base=0): + if type(dump) is bytes: + dump = dump.decode("ascii") + elif type(dump) is str: + pass + else: + dump = dump.read() + + decoded = bytearray() + for line in dump.splitlines(): + if not line: + continue + try: + cropped = line.split("|", 2)[0] + mark, data = cropped.split(" ", 1) + if data.strip() == "*": + continue + offset = int(mark, 16) + data = data.replace(" ", "") + if len(data) % 2 != 0: + raise ValueError("odd sized data") + if offset > len(decoded): + decoded.extend([0] * (offset - len(decoded) - base)) + decoded.extend([int(data[i:i+2], 16) for i \ + in range(0, len(data), 2)]) + except (ValueError, TypeError) as exc: + raise ValueError(f"can't decode line: {line:r}") from exc + + return decoded + +_extascii_table_low = [ + "▪", "☺", "☻", "♥", "♦", "♣", "♠", "•", + "◘", "○", "◙", "♂", "♀", "♪", "♫", "☼", + "►", "◄", "↕", "‼", "¶", "§", "▬", "↨", + "↑", "↓", "→", "←", "∟", "↔", "▲", "▼"] + +_extascii_table_high = [ + "⌂", + "█", "⡀", "⢀", "⣀", "⠠", "⡠", "⢠", "⣠", + "⠄", "⡄", "⢄", "⣄", "⠤", "⡤", "⢤", "⣤", + "⠁", "⡁", "⢁", "⣁", "⠡", "⡡", "⢡", "⣡", + "⠅", "⡅", "⢅", "⣅", "⠥", "⡥", "⢥", "⣥", + "⠃", "⡃", "⢃", "⣃", "⠣", "⡣", "⢣", "⣣", + "⠇", "⡇", "⢇", "⣇", "⠧", "⡧", "⢧", "⣧", + "⠉", "⡉", "⢉", "⣉", "⠩", "⡩", "⢩", "⣩", + "⠍", "⡍", "⢍", "⣍", "⠭", "⡭", "⢭", "⣭", + "⠊", "⡊", "⢊", "⣊", "⠪", "⡪", "⢪", "⣪", + "⠎", "⡎", "⢎", "⣎", "⠮", "⡮", "⢮", "⣮", + "⠑", "⡑", "⢑", "⣑", "⠱", "⡱", "⢱", "⣱", + "⠕", "⡕", "⢕", "⣕", "⠵", "⡵", "⢵", "⣵", + "⠚", "⡚", "⢚", "⣚", "⠺", "⡺", "⢺", "⣺", + "⠞", "⡞", "⢞", "⣞", "⠾", "⡾", "⢾", "⣾", + "⠛", "⡛", "⢛", "⣛", "⠻", "⡻", "⢻", "⣻", + "⠟", "⡟", "⢟", "⣟", "⠿", "⡿", "⢿", "⣿"] + +def _extascii(s): + s2 = "" + for c in s: + if c < 0x20: + s2 += _extascii_table_low[c] + elif c > 0x7e: + s2 += _extascii_table_high[c-0x7f] + else: + s2 += chr(c) + return s2 + +def ehexdump(s, st=0, abbreviate=True, indent="", print_fn=print): + last = None + skip = False + for i in range(0,len(s),16): + val = s[i:i+16] + if val == last and abbreviate: + if not skip: + print_fn(indent+"%08x *" % (i + st)) + skip = True + else: + print_fn(indent+"%08x %s %s |%s|" % ( + i + st, + hexdump(val[:8], ' ').ljust(23), + hexdump(val[8:], ' ').ljust(23), + _extascii(val).ljust(16))) + last = val + skip = False + +def chexdump32(s, st=0, abbreviate=True): + last = None + skip = False + for i in range(0,len(s),32): + val = s[i:i+32] + if val == last and abbreviate: + if not skip: + print("%08x *" % (i + st)) + skip = True + else: + print("%08x %s" % ( + i + st, + hexdump32(val, ' '))) + last = val + skip = False + +def unhex(s): + s = re.sub(r"/\*.*?\*/", "", s) + return bytes.fromhex(s.replace(" ", "").replace("\n", "")) + +def dumpstacks(signal, frame): + id2name = dict([(th.ident, th.name) for th in threading.enumerate()]) + code = [] + for threadId, stack in sys._current_frames().items(): + code.append("\n# Thread: %s(%d)" % (id2name.get(threadId,""), threadId)) + for filename, lineno, name, line in traceback.extract_stack(stack): + code.append('File: "%s", line %d, in %s' % (filename, lineno, name)) + if line: + code.append(" %s" % (line.strip())) + print("\n".join(code)) + sys.exit(1) + +def set_sigquit_stackdump_handler(): + signal.signal(signal.SIGQUIT, dumpstacks) + +def parse_indexlist(s): + items = set() + for i in s.split(","): + if "-" in i: + a, b = map(int, i.split("-", 1)) + for i in range(a, b + 1): + items.add(i) + else: + items.add(int(i)) + return items + +FourCC = ExprAdapter(Int32ul, + lambda d, ctx: d.to_bytes(4, "big").decode("latin-1"), + lambda d, ctx: int.from_bytes(d.encode("latin-1"), "big")) + +class SafeGreedyRange(GreedyRange): + def __init__(self, subcon, discard=False): + super().__init__(subcon) + self.discard = discard + + def _parse(self, stream, context, path): + discard = self.discard + obj = ListContainer() + try: + for i in itertools.count(): + context._index = i + e = self.subcon._parsereport(stream, context, path) + if not discard: + obj.append(e) + except StreamError: + pass + return obj + +class ReloadableMeta(type): + def __new__(cls, name, bases, dct): + m = super().__new__(cls, name, bases, dct) + m._load_time = time.time() + return m + +class Reloadable(metaclass=ReloadableMeta): + @classmethod + def _reloadcls(cls, force=False): + mods = [] + for c in cls.mro(): + mod = sys.modules[c.__module__] + cur_cls = getattr(mod, c.__name__) + mods.append((cur_cls, mod)) + if c.__name__ == "Reloadable": + break + + reloaded = set() + newest = 0 + for pcls, mod in mods[::-1]: + source = getattr(mod, "__file__", None) + if not source: + continue + newest = max(newest, os.stat(source).st_mtime, pcls._load_time) + if (force or reloaded or pcls._load_time < newest) and mod.__name__ not in reloaded: + print(f"Reload: {mod.__name__}") + mod = importlib.reload(mod) + reloaded.add(mod.__name__) + + return getattr(mods[0][1], cls.__name__) + + def _reloadme(self): + self.__class__ = self._reloadcls() + +class Constant: + def __init__(self, value): + self.value = value + + def __call__(self, v): + assert v == self.value + return v + +class RegisterMeta(ReloadableMeta): + def __new__(cls, name, bases, dct): + m = super().__new__(cls, name, bases, dct) + + f = {} + + if bases and bases[0] is not Reloadable: + for cls in bases[0].mro(): + if cls is Reloadable: + break + f.update({k: None for k,v in cls.__dict__.items() + if not k.startswith("_") and isinstance(v, (int, tuple))}) + + f.update({k: None for k, v in dct.items() + if not k.startswith("_") and isinstance(v, (int, tuple))}) + + m._fields_list = list(f.keys()) + m._fields = set(f.keys()) + + return m + +class Register(Reloadable, metaclass=RegisterMeta): + _Constant = Constant + def __init__(self, v=None, **kwargs): + if v is not None: + self._value = v + for k in self._fields_list: + getattr(self, k) # validate + else: + self._value = 0 + for k in self._fields_list: + field = getattr(self.__class__, k) + if isinstance(field, tuple) and len(field) >= 3 and isinstance(field[2], self._Constant): + setattr(self, k, field[2].value) + + for k,v in kwargs.items(): + setattr(self, k, v) + + def __getattribute__(self, attr): + if attr.startswith("_") or attr not in self._fields: + return object.__getattribute__(self, attr) + + field = getattr(self.__class__, attr) + value = self._value + + if isinstance(field, int): + return (value >> field) & 1 + elif isinstance(field, tuple): + if len(field) == 2: + msb, lsb = field + ftype = int + else: + msb, lsb, ftype = field + return ftype((value >> lsb) & ((1 << ((msb + 1) - lsb)) - 1)) + else: + raise AttributeError(f"Invalid field definition {attr} = {field!r}") + + def __setattr__(self, attr, fvalue): + if attr.startswith("_"): + self.__dict__[attr] = fvalue + return + + field = getattr(self.__class__, attr) + + value = self._value + + if isinstance(field, int): + self._value = (value & ~(1 << field)) | ((fvalue & 1) << field) + elif isinstance(field, tuple): + if len(field) == 2: + msb, lsb = field + else: + msb, lsb, ftype = field + mask = ((1 << ((msb + 1) - lsb)) - 1) + self._value = (value & ~(mask << lsb)) | ((fvalue & mask) << lsb) + else: + raise AttributeError(f"Invalid field definition {attr} = {field!r}") + + def __int__(self): + return self._value + + def _field_val(self, field_name, as_repr=False): + field = getattr(self.__class__, field_name) + val = getattr(self, field_name) + if isinstance(val, Enum): + if as_repr: + return str(val) + else: + msb, lsb = field[:2] + if (msb - lsb + 1) > 3: + return f"0x{val.value:x}({val.name})" + else: + return f"{val.value}({val.name})" + elif not isinstance(val, int): + return val + elif isinstance(field, int): + return val + elif isinstance(field, tuple): + msb, lsb = field[:2] + if (msb - lsb + 1) > 3: + return f"0x{val:x}" + + return val + + @property + def fields(self): + return {k: getattr(self, k) for k in self._fields_list} + + def str_fields(self): + return ', '.join(f'{k}={self._field_val(k)}' for k in self._fields_list) + + def __str__(self): + return f"0x{self._value:x} ({self.str_fields()})" + + def __repr__(self): + return f"{type(self).__name__}({', '.join(f'{k}={self._field_val(k, True)}' for k in self._fields_list)})" + + def copy(self): + return type(self)(self._value) + + @property + def value(self): + return self._value + @value.setter + def value(self, val): + self._value = val + +class Register8(Register): + __WIDTH__ = 8 + +class Register16(Register): + __WIDTH__ = 16 + +class Register32(Register): + __WIDTH__ = 32 + +class Register64(Register): + __WIDTH__ = 64 + +class RegAdapter(Adapter): + def __init__(self, register): + if register.__WIDTH__ == 64: + subcon = Int64ul + elif register.__WIDTH__ == 32: + subcon = Int32ul + elif register.__WIDTH__ == 16: + subcon = Int16ul + elif register.__WIDTH__ == 8: + subcon = Int8ul + else: + raise ValueError("Invalid reg width") + + self.reg = register + super().__init__(subcon) + + def _decode(self, obj, context, path): + return self.reg(obj) + + def _encode(self, obj, context, path): + return obj.value + +class RangeMap(Reloadable): + def __init__(self): + self.__start = [] + self.__end = [] + self.__value = [] + + def clone(self): + r = type(self)() + r.__start = list(self.__start) + r.__end = list(self.__end) + r.__value = [copy.copy(i) for i in self.__value] + return r + + def __len__(self): + return len(self.__start) + + def __nonzero__(self): + return bool(self.__start) + + def __contains(self, pos, addr): + if pos < 0 or pos >= len(self.__start): + return False + + return self.__start[pos] <= addr and addr <= self.__end[pos] + + def __split(self, pos, addr): + self.__start.insert(pos + 1, addr) + self.__end.insert(pos, addr - 1) + self.__value.insert(pos + 1, copy.copy(self.__value[pos])) + + def __zone(self, zone): + if isinstance(zone, slice): + zone = range(zone.start if zone.start is not None else 0, + zone.stop if zone.stop is not None else 1 << 64) + elif isinstance(zone, int): + zone = range(zone, zone + 1) + + return zone + + def lookup(self, addr, default=None): + addr = int(addr) + + pos = bisect.bisect_left(self.__end, addr) + if self.__contains(pos, addr): + return self.__value[pos] + else: + return default + + def __iter__(self): + return self.ranges() + + def ranges(self): + return (range(s, e + 1) for s, e in zip(self.__start, self.__end)) + + def items(self): + return ((range(s, e + 1), v) for s, e, v in zip(self.__start, self.__end, self.__value)) + + def _overlap_range(self, zone, split=False): + zone = self.__zone(zone) + if not zone: + return 0, 0 + + start = bisect.bisect_left(self.__end, zone.start) + + if split: + # Handle left-side overlap + if self.__contains(start, zone.start) and self.__start[start] != zone.start: + self.__split(start, zone.start) + start += 1 + assert self.__start[start] == zone.start + + for pos in range(start, len(self.__start)): + if self.__start[pos] >= zone.stop: + return start, pos + if split and (self.__end[pos] + 1) > zone.stop: + self.__split(pos, zone.stop) + return start, pos + 1 + + return start, len(self.__start) + + def populate(self, zone, default=[]): + zone = self.__zone(zone) + if len(zone) == 0: + return + + start, stop = zone.start, zone.stop + + # Starting insertion point, overlap inclusive + pos = bisect.bisect_left(self.__end, zone.start) + + # Handle left-side overlap + if self.__contains(pos, zone.start) and self.__start[pos] != zone.start: + self.__split(pos, zone.start) + pos += 1 + assert self.__start[pos] == zone.start + + # Iterate through overlapping ranges + while start < stop: + if pos == len(self.__start): + # Append to end + val = copy.copy(default) + self.__start.append(start) + self.__end.append(stop - 1) + self.__value.append(val) + yield range(start, stop), val + break + + assert self.__start[pos] >= start + if self.__start[pos] > start: + # Insert new range + boundary = stop + if pos < len(self.__start): + boundary = min(stop, self.__start[pos]) + val = copy.copy(default) + self.__start.insert(pos, start) + self.__end.insert(pos, boundary - 1) + self.__value.insert(pos, val) + yield range(start, boundary), val + start = boundary + else: + # Handle right-side overlap + if self.__end[pos] > stop - 1: + self.__split(pos, stop) + # Add to existing range + yield range(self.__start[pos], self.__end[pos] + 1), self.__value[pos] + start = self.__end[pos] + 1 + + pos += 1 + else: + assert start == stop + + def overlaps(self, zone, split=False): + start, stop = self._overlap_range(zone, split) + for pos in range(start, stop): + yield range(self.__start[pos], self.__end[pos] + 1), self.__value[pos] + + def replace(self, zone, val): + zone = self.__zone(zone) + if zone.start == zone.stop: + return + start, stop = self._overlap_range(zone, True) + self.__start = self.__start[:start] + [zone.start] + self.__start[stop:] + self.__end = self.__end[:start] + [zone.stop - 1] + self.__end[stop:] + self.__value = self.__value[:start] + [val] + self.__value[stop:] + + def clear(self, zone=None): + if zone is None: + self.__start = [] + self.__end = [] + self.__value = [] + else: + zone = self.__zone(zone) + if zone.start == zone.stop: + return + start, stop = self._overlap_range(zone, True) + self.__start = self.__start[:start] + self.__start[stop:] + self.__end = self.__end[:start] + self.__end[stop:] + self.__value = self.__value[:start] + self.__value[stop:] + + def compact(self, equal=lambda a, b: a == b, empty=lambda a: not a): + if len(self) == 0: + return + + new_s, new_e, new_v = [], [], [] + + for pos in range(len(self)): + s, e, v = self.__start[pos], self.__end[pos], self.__value[pos] + if empty(v): + continue + if new_v and equal(last, v) and s == new_e[-1] + 1: + new_e[-1] = e + else: + new_s.append(s) + new_e.append(e) + new_v.append(v) + last = v + + self.__start, self.__end, self.__value = new_s, new_e, new_v + + def _assert(self, expect, val=lambda a:a): + state = [] + for i, j, v in zip(self.__start, self.__end, self.__value): + state.append((i, j, val(v))) + if state != expect: + print(f"Expected: {expect}") + print(f"Got: {state}") + +class AddrLookup(RangeMap): + def __str__(self): + b = [""] + for zone, values in self.items(): + b.append(f"{zone.start:#11x} - {zone.stop - 1:#11x}") + if len(values) == 0: + b.append(f" (empty range)") + elif len(values) == 1: + b.append(f" : {values[0][0]}\n") + if len(values) > 1: + b.append(f" ({len(values):d} devices)\n") + for value, r in sorted(values, key=lambda r: r[1].start): + b.append(f" {r.start:#10x} - {r.stop - 1:#8x} : {value}\n") + + return "".join(b) + + def add(self, zone, value): + for r, values in self.populate(zone): + values.append((value, zone)) + + def remove(self, zone, value): + for r, values in self.overlaps(zone): + try: + values.remove((value, zone)) + except: + pass + + def lookup(self, addr, default='unknown'): + maps = super().lookup(addr) + return maps[0] if maps else (default, range(0, 1 << 64)) + + def lookup_all(self, addr): + return super().lookup(addr, []) + + def _assert(self, expect, val=lambda a:a): + super()._assert(expect, lambda v: [i[0] for i in v]) + +class ScalarRangeMap(RangeMap): + def get(self, addr, default=None): + return self.lookup(addr, default) + + def __setitem__(self, zone, value): + self.replace(zone, value) + + def __delitem__(self, zone): + self.clear(zone) + + def __getitem__(self, addr): + value = self.lookup(addr, default=KeyError) + if value is KeyError: + raise KeyError(f"Address {addr:#x} has no value") + return value + +class BoolRangeMap(RangeMap): + def set(self, zone): + self.replace(zone, True) + + def __delitem__(self, zone): + self.clear(zone) + + def __getitem__(self, addr): + return self.lookup(addr, False) + +class DictRangeMap(RangeMap): + def __setitem__(self, k, value): + if not isinstance(k, tuple): + self.replace(k, dict(value)) + else: + zone, key = k + for r, values in self.populate(zone, {}): + values[key] = value + + def __delitem__(self, k): + if not isinstance(k, tuple): + self.clear(k) + else: + zone, key = k + for r, values in self.overlaps(zone, True): + values.pop(key, None) + + def __getitem__(self, k): + if isinstance(k, tuple): + addr, k = k + values = self.lookup(addr) + return values.get(k, None) if values else None + else: + values = self.lookup(k) + return values or {} + +class SetRangeMap(RangeMap): + def add(self, zone, key): + for r, values in self.populate(zone, set()): + values.add(key) + + def discard(self, zone, key): + for r, values in self.overlaps(zone, split=True): + if values: + values.discard(key) + remove = discard + + def __setitem__(self, k, value): + self.replace(k, set(value)) + + def __delitem__(self, k): + self.clear(k) + + def __getitem__(self, addr): + values = super().lookup(addr) + return frozenset(values) if values else frozenset() + +class NdRange: + def __init__(self, rng, min_step=1): + if isinstance(rng, range): + self.ranges = [rng] + else: + self.ranges = list(rng) + least_step = self.ranges[0].step + for i, rng in enumerate(self.ranges): + if rng.step == 1: + self.ranges[i] = range(rng.start, rng.stop, min_step) + least_step = min_step + else: + assert rng.step >= min_step + least_step = min(least_step, rng.step) + self.start = sum(rng[0] for rng in self.ranges) + self.stop = sum(rng[-1] for rng in self.ranges) + least_step + self.rev = {} + for i in itertools.product(*map(enumerate, self.ranges)): + index = tuple(j[0] for j in i) + addr = sum(j[1] for j in i) + if len(self.ranges) == 1: + index = index[0] + self.rev[addr] = index + + def index(self, item): + return self.rev[item] + + def __len__(self): + return self.stop - self.start + + def __contains__(self, item): + return item in self.rev + + def __getitem__(self, item): + if not isinstance(item, tuple): + assert len(self.ranges) == 1 + return self.ranges[0][item] + + assert len(self.ranges) == len(item) + if all(isinstance(i, int) for i in item): + return sum((i[j] for i, j in zip(self.ranges, item))) + else: + iters = (i[j] for i, j in zip(self.ranges, item)) + return map(sum, itertools.product(*(([i] if isinstance(i, int) else i) for i in iters))) + +class RegMapMeta(ReloadableMeta): + def __new__(cls, name, bases, dct): + m = super().__new__(cls, name, bases, dct) + if getattr(m, "_addrmap", None) is None: + m._addrmap = {} + m._rngmap = SetRangeMap() + m._namemap = {} + else: + m._addrmap = dict(m._addrmap) + m._rngmap = m._rngmap.clone() + m._namemap = dict(m._namemap) + + for k, v in dct.items(): + if k.startswith("_") or not isinstance(v, tuple): + continue + addr, rtype = v + + if isinstance(addr, int): + m._addrmap[addr] = k, rtype + else: + addr = NdRange(addr, rtype.__WIDTH__ // 8) + m._rngmap.add(addr, (addr, k, rtype)) + + m._namemap[k] = addr, rtype + + def prop(k): + def getter(self): + return self._accessor[k] + def setter(self, val): + self._accessor[k].val = val + return property(getter, setter) + + setattr(m, k, prop(k)) + + return m + +class RegAccessor(Reloadable): + def __init__(self, cls, rd, wr, addr): + self.cls = cls + self.rd = rd + self.wr = wr + self.addr = addr + + def __int__(self): + return self.rd(self.addr) + + @property + def val(self): + return self.rd(self.addr) + + @val.setter + def val(self, value): + self.wr(self.addr, int(value)) + + @property + def reg(self): + val = self.val + if val is None: + return None + return self.cls(val) + + @reg.setter + def reg(self, value): + self.wr(self.addr, int(value)) + + def set(self, **kwargs): + r = self.reg + for k, v in kwargs.items(): + setattr(r, k, v) + self.wr(self.addr, int(r)) + + def __str__(self): + return str(self.reg) + +class RegArrayAccessor(Reloadable): + def __init__(self, range, cls, rd, wr, addr): + self.range = range + self.cls = cls + self.rd = rd + self.wr = wr + self.addr = addr + + def __getitem__(self, item): + off = self.range[item] + if isinstance(off, int): + return RegAccessor(self.cls, self.rd, self.wr, self.addr + off) + else: + return [RegAccessor(self.cls, self.rd, self.wr, self.addr + i) for i in off] + +class BaseRegMap(Reloadable): + def __init__(self, backend, base): + self._base = base + self._backend = backend + self._accessor = {} + + for name, (addr, rcls) in self._namemap.items(): + width = rcls.__WIDTH__ + rd = functools.partial(backend.read, width=width) + wr = functools.partial(backend.write, width=width) + if type(addr).__name__ == "NdRange": + self._accessor[name] = RegArrayAccessor(addr, rcls, rd, wr, base) + else: + self._accessor[name] = RegAccessor(rcls, rd, wr, base + addr) + + def _lookup_offset(cls, offset): + reg = cls._addrmap.get(offset, None) + if reg is not None: + name, rcls = reg + return name, None, rcls + ret = cls._rngmap[offset] + if ret: + for rng, name, rcls in ret: + if offset in rng: + return name, rng.index(offset), rcls + return None, None, None + lookup_offset = classmethod(_lookup_offset) + + def lookup_addr(self, addr): + return self.lookup_offset(addr - self._base) + + def get_name(self, addr): + name, index, rcls = self.lookup_addr(addr) + if index is not None: + return f"{name}[{index}]" + else: + return name + + def _lookup_name(cls, name): + return cls._namemap.get(name, None) + lookup_name = classmethod(_lookup_name) + + def _scalar_regs(self): + for addr, (name, rtype) in self._addrmap.items(): + yield addr, name, self._accessor[name], rtype + + def _array_reg(self, zone, map): + addrs, name, rtype = map + def index(addr): + idx = addrs.index(addr) + if isinstance(idx, tuple): + idx = str(idx)[1:-1] + return idx + reg = ((addr, f"{name}[{index(addr)}]", self._accessor[name][addrs.index(addr)], rtype) + for addr in zone if addr in addrs) + return reg + + def _array_regs(self): + for zone, maps in self._rngmap.items(): + yield from heapq.merge(*(self._array_reg(zone, map) for map in maps)) + + def dump_regs(self): + for addr, name, acc, rtype in heapq.merge(sorted(self._scalar_regs()), self._array_regs()): + print(f"{self._base:#x}+{addr:06x} {name} = {acc.reg}") + +class RegMap(BaseRegMap, metaclass=RegMapMeta): + pass + +def irange(start, count, step=1): + return range(start, start + count * step, step) + +# Table generated by: +# +# tbl = [0] * 256 +# crc = 1 +# for i in [2**x for x in irange(7, 0, -1)]: +# if crc & 1: +# crc = (crc >> 1) ^ 0xA001 +# else: +# crc = crc >> 1 +# for j in range(0, 255, 2*i): +# tbl[i + j] = crc ^ tbl[j] +# +# for i in range(0, 255, 8): +# print(f"{tbl[i]:#06x}, {tbl[i+1]:#06x}, {tbl[i+2]:#06x}, {tbl[i+3]:#06x}, {tbl[i+4]:#06x}, {tbl[i+5]:#06x}, {tbl[i+6]:#06x}, {tbl[i+7]:#06x}, ") + +_crc16_table = [ + 0x0000, 0xc0c1, 0xc181, 0x0140, 0xc301, 0x03c0, 0x0280, 0xc241, + 0xc601, 0x06c0, 0x0780, 0xc741, 0x0500, 0xc5c1, 0xc481, 0x0440, + 0xcc01, 0x0cc0, 0x0d80, 0xcd41, 0x0f00, 0xcfc1, 0xce81, 0x0e40, + 0x0a00, 0xcac1, 0xcb81, 0x0b40, 0xc901, 0x09c0, 0x0880, 0xc841, + 0xd801, 0x18c0, 0x1980, 0xd941, 0x1b00, 0xdbc1, 0xda81, 0x1a40, + 0x1e00, 0xdec1, 0xdf81, 0x1f40, 0xdd01, 0x1dc0, 0x1c80, 0xdc41, + 0x1400, 0xd4c1, 0xd581, 0x1540, 0xd701, 0x17c0, 0x1680, 0xd641, + 0xd201, 0x12c0, 0x1380, 0xd341, 0x1100, 0xd1c1, 0xd081, 0x1040, + 0xf001, 0x30c0, 0x3180, 0xf141, 0x3300, 0xf3c1, 0xf281, 0x3240, + 0x3600, 0xf6c1, 0xf781, 0x3740, 0xf501, 0x35c0, 0x3480, 0xf441, + 0x3c00, 0xfcc1, 0xfd81, 0x3d40, 0xff01, 0x3fc0, 0x3e80, 0xfe41, + 0xfa01, 0x3ac0, 0x3b80, 0xfb41, 0x3900, 0xf9c1, 0xf881, 0x3840, + 0x2800, 0xe8c1, 0xe981, 0x2940, 0xeb01, 0x2bc0, 0x2a80, 0xea41, + 0xee01, 0x2ec0, 0x2f80, 0xef41, 0x2d00, 0xedc1, 0xec81, 0x2c40, + 0xe401, 0x24c0, 0x2580, 0xe541, 0x2700, 0xe7c1, 0xe681, 0x2640, + 0x2200, 0xe2c1, 0xe381, 0x2340, 0xe101, 0x21c0, 0x2080, 0xe041, + 0xa001, 0x60c0, 0x6180, 0xa141, 0x6300, 0xa3c1, 0xa281, 0x6240, + 0x6600, 0xa6c1, 0xa781, 0x6740, 0xa501, 0x65c0, 0x6480, 0xa441, + 0x6c00, 0xacc1, 0xad81, 0x6d40, 0xaf01, 0x6fc0, 0x6e80, 0xae41, + 0xaa01, 0x6ac0, 0x6b80, 0xab41, 0x6900, 0xa9c1, 0xa881, 0x6840, + 0x7800, 0xb8c1, 0xb981, 0x7940, 0xbb01, 0x7bc0, 0x7a80, 0xba41, + 0xbe01, 0x7ec0, 0x7f80, 0xbf41, 0x7d00, 0xbdc1, 0xbc81, 0x7c40, + 0xb401, 0x74c0, 0x7580, 0xb541, 0x7700, 0xb7c1, 0xb681, 0x7640, + 0x7200, 0xb2c1, 0xb381, 0x7340, 0xb101, 0x71c0, 0x7080, 0xb041, + 0x5000, 0x90c1, 0x9181, 0x5140, 0x9301, 0x53c0, 0x5280, 0x9241, + 0x9601, 0x56c0, 0x5780, 0x9741, 0x5500, 0x95c1, 0x9481, 0x5440, + 0x9c01, 0x5cc0, 0x5d80, 0x9d41, 0x5f00, 0x9fc1, 0x9e81, 0x5e40, + 0x5a00, 0x9ac1, 0x9b81, 0x5b40, 0x9901, 0x59c0, 0x5880, 0x9841, + 0x8801, 0x48c0, 0x4980, 0x8941, 0x4b00, 0x8bc1, 0x8a81, 0x4a40, + 0x4e00, 0x8ec1, 0x8f81, 0x4f40, 0x8d01, 0x4dc0, 0x4c80, 0x8c41, + 0x4400, 0x84c1, 0x8581, 0x4540, 0x8701, 0x47c0, 0x4680, 0x8641, + 0x8201, 0x42c0, 0x4380, 0x8341, 0x4100, 0x81c1, 0x8081, 0x4040 +] + +def crc16USB(crc, data): + for x in data: + crc = (crc >> 8) ^ _crc16_table[(crc ^ x) & 0xff] + return crc + +__all__.extend(k for k, v in globals().items() + if (callable(v) or isinstance(v, type)) and v.__module__ == __name__) + +if __name__ == "__main__": + # AddrLookup test + a = AddrLookup() + a.add(range(0, 10), 0) + a._assert([ + (0, 9, [0]) + ]) + a.add(range(10, 20), 1) + a._assert([ + (0, 9, [0]), (10, 19, [1]) + ]) + a.add(range(20, 25), 2) + a._assert([ + (0, 9, [0]), (10, 19, [1]), (20, 24, [2]) + ]) + a.add(range(30, 40), 3) + a._assert([ + (0, 9, [0]), (10, 19, [1]), (20, 24, [2]), (30, 39, [3]) + ]) + a.add(range(0, 15), 4) + a._assert([ + (0, 9, [0, 4]), (10, 14, [1, 4]), (15, 19, [1]), (20, 24, [2]), (30, 39, [3]) + ]) + a.add(range(0, 15), 5) + a._assert([ + (0, 9, [0, 4, 5]), (10, 14, [1, 4, 5]), (15, 19, [1]), (20, 24, [2]), (30, 39, [3]) + ]) + a.add(range(21, 44), 6) + a._assert([ + (0, 9, [0, 4, 5]), (10, 14, [1, 4, 5]), (15, 19, [1]), (20, 20, [2]), (21, 24, [2, 6]), + (25, 29, [6]), (30, 39, [3, 6]), (40, 43, [6]) + ]) + a.add(range(70, 80), 7) + a._assert([ + (0, 9, [0, 4, 5]), (10, 14, [1, 4, 5]), (15, 19, [1]), (20, 20, [2]), (21, 24, [2, 6]), + (25, 29, [6]), (30, 39, [3, 6]), (40, 43, [6]), (70, 79, [7]) + ]) + a.add(range(0, 100), 8) + a._assert([ + (0, 9, [0, 4, 5, 8]), (10, 14, [1, 4, 5, 8]), (15, 19, [1, 8]), (20, 20, [2, 8]), + (21, 24, [2, 6, 8]), (25, 29, [6, 8]), (30, 39, [3, 6, 8]), (40, 43, [6, 8]), + (44, 69, [8]), (70, 79, [7, 8]), (80, 99, [8]) + ]) + a.remove(range(21, 44), 6) + a._assert([ + (0, 9, [0, 4, 5, 8]), (10, 14, [1, 4, 5, 8]), (15, 19, [1, 8]), (20, 20, [2, 8]), + (21, 24, [2, 8]), (25, 29, [8]), (30, 39, [3, 8]), (40, 43, [8]), + (44, 69, [8]), (70, 79, [7, 8]), (80, 99, [8]) + ]) + a.compact() + a._assert([ + (0, 9, [0, 4, 5, 8]), (10, 14, [1, 4, 5, 8]), (15, 19, [1, 8]), (20, 24, [2, 8]), + (25, 29, [8]), (30, 39, [3, 8]), (40, 69, [8]), (70, 79, [7, 8]), + (80, 99, [8]) + ]) + a.remove(range(0, 100), 8) + a._assert([ + (0, 9, [0, 4, 5]), (10, 14, [1, 4, 5]), (15, 19, [1]), (20, 24, [2]), (25, 29, []), + (30, 39, [3]), (40, 69, []), (70, 79, [7]), (80, 99, []) + ]) + a.compact() + a._assert([ + (0, 9, [0, 4, 5]), (10, 14, [1, 4, 5]), (15, 19, [1]), (20, 24, [2]), (30, 39, [3]), + (70, 79, [7]) + ]) + a.clear(range(12, 21)) + a._assert([ + (0, 9, [0, 4, 5]), (10, 11, [1, 4, 5]), (21, 24, [2]), (30, 39, [3]), + (70, 79, [7]) + ]) + + # ScalarRangeMap test + a = ScalarRangeMap() + a[0:5] = 1 + a[5:10] = 2 + a[4:8] = 3 + del a[2:4] + expect = [1, 1, None, None, 3, 3, 3, 3, 2, 2, None] + for i,j in enumerate(expect): + assert a.get(i) == j + if j is not None: + assert a[i] == j + try: + a[10] + except KeyError: + pass + else: + assert False + + # DictRangeMap test + a = DictRangeMap() + a[0:5, 0] = 10 + a[5:8, 1] = 11 + a[4:6, 2] = 12 + del a[2:4] + expect = [{0: 10}, {0: 10}, {}, {}, {0: 10, 2: 12}, {1: 11, 2: 12}, {1: 11}, {1: 11}, {}] + for i,j in enumerate(expect): + assert a[i] == j + for k, v in j.items(): + assert a[i, k] == v + + # SetRangeMap test + a = SetRangeMap() + a[0:2] = {1,} + a[2:7] = {2,} + a.add(range(1, 4), 3) + a.discard(0, -1) + a.discard(3, 2) + del a[4] + expect = [{1,}, {1,3}, {2,3}, {3,}, set(), {2,}, {2,}, set()] + for i,j in enumerate(expect): + assert a[i] == j + + # BoolRangeMap test + a = BoolRangeMap() + a.set(range(0, 2)) + a.set(range(4, 6)) + a.set(range(5, 5)) + a.clear(range(3, 5)) + expect = [True, True, False, False, False, True, False] + for i,j in enumerate(expect): + assert a[i] == j diff --git a/tools/proxyclient/m1n1/xnutools.py b/tools/proxyclient/m1n1/xnutools.py new file mode 100644 index 0000000..88c1a49 --- /dev/null +++ b/tools/proxyclient/m1n1/xnutools.py @@ -0,0 +1,117 @@ +# SPDX-License-Identifier: MIT +import re +from construct import * + +__all__ = [] + +DebuggerState = Struct( + "panic_options" / Hex(Int64ul), + "current_op" / Hex(Int32ul), + "proceed_on_sync_failre" / Int32ul, + "message" / Hex(Int64ul), + "panic_str" / Hex(Int64ul), + "panic_args" / Hex(Int64ul), + "panic_data_ptr" / Hex(Int64ul), + "panic_caller" / Hex(Int64ul), + "entry_count" / Hex(Int32ul), + "kern_return" / Hex(Int32sl) +) + +# Darwin va_list is just a stack pointer... +VaList = Struct( + "stack" / Hex(Int64ul), +) + +def decode_debugger_state(u, ctx): + p = u.proxy + iface = u.iface + + def hv_readmem(addr, size): + addr = p.hv_translate(addr, False, False) + assert addr != 0 + return iface.readmem(addr, size) + + p_state = p.hv_translate(ctx.regs[25], False, False) + assert p_state != 0 + di = iface.readstruct(p_state, DebuggerState) + print(di) + + message = hv_readmem(di.message, 1024).split(b"\x00")[0].decode("ascii") + print() + print(f"Message: {message}") + + print("===== Panic string =====") + decode_panic(u, di.panic_str, di.panic_args) + print("========================") + +def decode_panic_call(u, ctx): + decode_panic(u, ctx.regs[0], ctx.regs[1]) + +def decode_panic(u, p_string, p_args): + p = u.proxy + iface = u.iface + + def hv_readmem(addr, size): + addr = p.hv_translate(addr, False, False) + assert addr != 0 + return iface.readmem(addr, size) + + string = hv_readmem(p_string, 1024).split(b"\x00")[0].decode("ascii") + p_args = p.hv_translate(p_args, False, False) + + args = iface.readstruct(p_args, VaList) + + stack = hv_readmem(args.stack, 504) + + def va_arg(t): + nonlocal stack + d, stack = stack[:8], stack[8:] + return t.parse(d) + + utypes = { + "hh": Int8ul, + "h": Int16ul, + None: Int32ul, + "l": Int64ul, + "ll": Int64ul, + "q": Int64ul, + "s": Int64ul, + "t": Int64ul, + } + + stypes = { + "hh": Int8sl, + "h": Int16sl, + None: Int32sl, + "l": Int64sl, + "ll": Int64sl, + "q": Int64sl, + "s": Int64sl, + "t": Int64sl, + } + + #print(string) + + def format_arg(match): + pat, flags, width, mod, conv = match.group(0, 1, 2, 3, 4) + if conv == "%": + return "%" + elif conv == "s": + return hv_readmem(va_arg(Int64ul), 1024).split(b"\x00")[0].decode("ascii") + elif conv in "di": + v = va_arg(stypes[mod]) + return f"%{flags or ''}{width or ''}{conv or ''}" % v + elif conv in "ouxX": + v = va_arg(utypes[mod]) + return f"%{flags or ''}{width or ''}{conv or ''}" % v + elif conv in "p": + return f"0x{va_arg(Int64ul):x}" + else: + return f"[{pat!r}:{va_arg(Int64ul):x}]" + + string = re.sub('%([-#0 +]*)([1-9][0-9]*)?(hh|h|l|ll|q|L|j|z|Z|t)?([diouxXeEfFgGaAcsCSpnm%])', + format_arg, string) + print(string + "\n", end="") + +__all__.extend(k for k, v in globals().items() + if (callable(v) or isinstance(v, type)) and v.__module__ == __name__) |
