diff options
Diffstat (limited to 'tools/proxyclient/m1n1/agx/context.py')
| -rw-r--r-- | tools/proxyclient/m1n1/agx/context.py | 247 |
1 files changed, 247 insertions, 0 deletions
diff --git a/tools/proxyclient/m1n1/agx/context.py b/tools/proxyclient/m1n1/agx/context.py new file mode 100644 index 0000000..41ebed5 --- /dev/null +++ b/tools/proxyclient/m1n1/agx/context.py @@ -0,0 +1,247 @@ +# SPDX-License-Identifier: MIT +from ..utils import chexdump +from ..malloc import Heap +from construct.core import * +from ..fw.agx.channels import * +from ..fw.agx.cmdqueue import * +from ..fw.agx.microsequence import * +from ..hw.uat import MemoryAttr +from .object import * +import textwrap + +class GPUContext: + def __init__(self, agx): + self.agx = agx + self.uat = self.agx.uat + self.u = self.agx.u + self.p = self.agx.p + self.verbose = False + + #self.job_list = agx.kshared.new(JobList) + #self.job_list.first_job = 0 + #self.job_list.last_head = self.job_list._addr # Empty list has self as last_head + #self.job_list.unkptr_10 = 0 + #self.job_list.push() + + self.gpu_context = agx.kobj.new(GPUContextData).push() + + self.ttbr0_base = self.u.memalign(self.agx.PAGE_SIZE, self.agx.PAGE_SIZE) + self.p.memset32(self.ttbr0_base, 0, self.agx.PAGE_SIZE) + + self.objects = {} + + # 32K VA pages since buffer manager needs that + self.uobj = GPUAllocator(agx, "Userspace", 0x1600000000, 0x100000000, ctx=None, + guard_pages=16, + va_block=32768, nG=1, AP=0, PXN=1, UXN=1) + + self.gobj = GPUAllocator(agx, "GEM", 0x1500000000, 0x100000000, ctx=None, + guard_pages=16, nG=1, AP=0, PXN=1, UXN=1) + + self.pipeline_base = 0x1100000000 + self.pipeline_size = 1 << 32 + self.pobj = GPUAllocator(agx, "Pipelines", self.pipeline_base + 0x10000, self.pipeline_size, + ctx=None, guard_pages=1, nG=1, AP=0, PXN=1, UXN=1) + + def bind(self, ctx_id): + self.ctx = ctx_id + self.uobj.ctx = ctx_id + self.gobj.ctx = ctx_id + self.pobj.ctx = ctx_id + self.uat.bind_context(ctx_id, self.ttbr0_base) + self.thing = self.buf_at(0x6fffff8000, 0, 0x4000, "thing") + + def make_stream(self, base): + return self.uat.iostream(self.ctx, base, recurse=False) + + def new_at(self, addr, objtype, name=None, track=True, **flags): + obj = GPUObject(self, objtype) + obj._stream = self.make_stream + if name is not None: + obj._name = name + + size_align = align_up(obj._size, self.agx.PAGE_SIZE) + obj._addr = addr + + obj._paddr = self.agx.u.memalign(self.agx.PAGE_SIZE, size_align) + #if isinstance(obj.val, ConstructClassBase): + #obj.val._addr = obj._addr + + self.agx.log(f"[Context@{self.gpu_context._addr:#x}] Map {obj._name} size {obj._size:#x} @ {obj._addr:#x} ({obj._paddr:#x})") + + flags2 = {"AttrIndex": MemoryAttr.Shared} + flags2.update(flags) + obj._map_flags = flags2 + + obj._size_align = size_align + self.agx.uat.iomap_at(self.ctx, obj._addr, obj._paddr, size_align, **flags2) + self.objects[obj._addr] = obj + self.agx.reg_object(obj, track=track) + + return obj + + def buf_at(self, addr, is_pipeline, size, name=None, track=True): + return self.new_at(addr, Bytes(size), name, track=track, + AttrIndex=MemoryAttr.Shared, PXN=1, + nG=1, AP=(1 if is_pipeline else 0)) + + def load_blob(self, addr, is_pipeline, filename, track=True): + data = open(filename, "rb").read() + obj = self.new_at(addr, Bytes(len(data)), filename, track=track, + AttrIndex=MemoryAttr.Shared, PXN=1, + nG=1, AP=(1 if is_pipeline else 0)) + obj.val = data + obj.push() + + return obj + + def free(self, obj): + obj._dead = True + self.agx.uat.iomap_at(self.ctx, obj._addr, 0, obj._size_align, VALID=0) + del self.objects[obj._addr] + self.agx.unreg_object(obj) + + def free_at(self, addr): + self.free(self.objects[obj._addr]) + +class GPUWorkQueue: + def __init__(self, agx, context, job_list): + self.agx = agx + self.u = agx.u + self.p = agx.p + self.context = context + + self.info = agx.kobj.new(CommandQueueInfo) + + self.pointers = agx.kshared.new(CommandQueuePointers).push() + self.pmap = CommandQueuePointerMap(self.u, self.pointers._paddr) + + self.rb_size = self.pointers.rb_size + self.ring = agx.kobj.new_buf(8 * self.rb_size, "GPUWorkQueue.RB") + + self.info.pointers = self.pointers + self.info.rb_addr = self.ring._addr + self.info.job_list = job_list + self.info.gpu_buf_addr = agx.kobj.buf(0x2c18, "GPUWorkQueue.gpu_buf") + self.info.gpu_context = context.gpu_context + self.info.push() + + self.wptr = 0 + self.first_time = True + + self.agx.uat.flush_dirty() + + def submit(self, work): + work.push() + + self.p.write64(self.ring._paddr + 8 * self.wptr, work._addr) + self.wptr = (self.wptr + 1) % self.rb_size + self.agx.uat.flush_dirty() + self.pmap.CPU_WPTR.val = self.wptr + + def wait_empty(self): + while self.wptr != self.pmap.GPU_DONEPTR.val: + self.agx.work() + +class GPU3DWorkQueue(GPUWorkQueue): + TYPE = 1 + +class GPUTAWorkQueue(GPUWorkQueue): + TYPE = 0 + +class GPUMicroSequence: + def __init__(self, agx): + self.agx = agx + self.off = 0 + self.ops = [] + self.obj = None + + def append(self, op): + off = self.off + self.ops.append(op) + self.off += op.sizeof() + return off + + def finalize(self): + self.ops.append(EndCmd()) + self.size = sum(i.sizeof() for i in self.ops) + self.obj = self.agx.kobj.new_buf(self.size, "GPUMicroSequence", track=False) + self.obj.val = b"".join(i.build() for i in self.ops) + self.obj.push() + return self.obj + + def dump(self): + chexdump(self.agx.iface.readmem(self.obj._paddr, self.size)) + print(MicroSequence.parse_stream(self.agx.uat.iostream(0, self.obj._addr))) + + def __str__(self): + s = f"GPUMicroSequence: {len(self.ops)} ops\n" + for i, op in enumerate(self.ops): + op_s = textwrap.indent(str(op), ' ' * 4) + s += f"[{i:2}:{op.sizeof():#x}] = {op!s}\n" + return s + +class GPUBufferManager: + def __init__(self, agx, context, blocks=8): + self.agx = agx + self.ctx = context + + self.block_ctl_obj = agx.kshared.new(BufferManagerBlockControl) + self.block_ctl_obj.total = blocks + self.block_ctl_obj.wptr = 0 + self.block_ctl_obj.unk = 0 + self.block_ctl = self.block_ctl_obj.push().regmap() + + self.counter_obj = agx.kshared.new(BufferManagerCounter) + self.counter_obj.count = 0 + self.counter = self.counter_obj.push().regmap() + + self.misc_obj = agx.kshared.new(BufferManagerMisc) + self.misc_obj.cpu_flag = 1 + self.misc = self.misc_obj.push().regmap() + + self.page_size = 0x8000 + self.pages_per_block = 4 + self.block_size = self.pages_per_block * self.page_size + + self.page_list = context.uobj.new(Array(0x10000 // 4, Int32ul), "BM PageList", track=False) + self.block_list = context.uobj.new(Array(0x8000 // 4, Int32ul), "BM BlockList", track=False) + + self.info = info = agx.kobj.new(BufferManagerInfo) + info.page_list_addr = self.page_list._addr + info.page_list_size = self.page_list._size + info.page_count = self.block_ctl_obj.total * 4 + info.block_count = self.block_ctl_obj.total + + info.block_list_addr = self.block_list._addr + info.block_ctl = self.block_ctl_obj + info.last_page = info.page_count - 1 + info.block_size = self.block_size + + info.counter = self.counter_obj + + self.populate() + self.block_ctl_obj.pull() + self.block_list.push() + self.page_list.push() + + info.push() + + def increment(self): + self.counter_obj.count += 1 + self.counter_obj.push() + + def populate(self): + idx = self.block_ctl.wptr.val + total = self.block_ctl.total.val + while idx < total: + block = self.ctx.uobj.new_buf(self.block_size, "BM Block", track=False) + self.block_list[idx * 2] = block._addr // self.page_size + + page_idx = idx * self.pages_per_block + for i in range(self.pages_per_block): + self.page_list[page_idx + i] = block._addr // self.page_size + i + + idx += 1 + self.block_ctl.wptr.val = idx + |
