# SPDX-License-Identifier: MIT
from ..utils import chexdump
from ..malloc import Heap
from construct.core import *
from ..fw.agx.channels import *
from ..fw.agx.cmdqueue import *
from ..fw.agx.microsequence import *
from ..hw.uat import MemoryAttr
from .object import *
import textwrap

class GPUContext:
    def __init__(self, agx):
        self.agx = agx
        self.uat = self.agx.uat
        self.u = self.agx.u
        self.p = self.agx.p
        self.verbose = False

        #self.job_list = agx.kshared.new(JobList)
        #self.job_list.first_job = 0
        #self.job_list.last_head = self.job_list._addr # Empty list has self as last_head
        #self.job_list.unkptr_10 = 0
        #self.job_list.push()

        self.gpu_context = agx.kobj.new(GPUContextData).push()

        self.ttbr0_base = self.u.memalign(self.agx.PAGE_SIZE, self.agx.PAGE_SIZE)
        self.p.memset32(self.ttbr0_base, 0, self.agx.PAGE_SIZE)

        self.objects = {}

        # 32K VA pages since buffer manager needs that
        self.uobj = GPUAllocator(agx, "Userspace", 0x1600000000, 0x100000000, ctx=None,
                                 guard_pages=16,
                                 va_block=32768, nG=1, AP=0, PXN=1, UXN=1)

        self.gobj = GPUAllocator(agx, "GEM", 0x1500000000, 0x100000000, ctx=None,
                                 guard_pages=16, nG=1, AP=0, PXN=1, UXN=1)

        self.pipeline_base = 0x1100000000
        self.pipeline_size = 1 << 32
        self.pobj = GPUAllocator(agx, "Pipelines", self.pipeline_base + 0x10000, self.pipeline_size,
                                 ctx=None, guard_pages=1, nG=1, AP=0, PXN=1, UXN=1)

    def bind(self, ctx_id):
        self.ctx = ctx_id
        self.uobj.ctx = ctx_id
        self.gobj.ctx = ctx_id
        self.pobj.ctx = ctx_id
        self.uat.bind_context(ctx_id, self.ttbr0_base)
        self.thing = self.buf_at(0x6fffff8000, 0, 0x4000, "thing")

    def make_stream(self, base):
        return self.uat.iostream(self.ctx, base, recurse=False)

    def new_at(self, addr, objtype, name=None, track=True, **flags):
        obj = GPUObject(self, objtype)
        obj._stream = self.make_stream
        if name is not None:
            obj._name = name

        size_align = align_up(obj._size, self.agx.PAGE_SIZE)
        obj._addr = addr

        obj._paddr = self.agx.u.memalign(self.agx.PAGE_SIZE, size_align)
        #if isinstance(obj.val, ConstructClassBase):
            #obj.val._addr = obj._addr

        self.agx.log(f"[Context@{self.gpu_context._addr:#x}] Map {obj._name} size {obj._size:#x} @ {obj._addr:#x} ({obj._paddr:#x})")

        flags2 = {"AttrIndex": MemoryAttr.Shared}
        flags2.update(flags)
        obj._map_flags = flags2

        obj._size_align = size_align
        self.agx.uat.iomap_at(self.ctx, obj._addr, obj._paddr, size_align, **flags2)
        self.objects[obj._addr] = obj
        self.agx.reg_object(obj, track=track)

        return obj

    def buf_at(self, addr, is_pipeline, size, name=None, track=True):
        return self.new_at(addr, Bytes(size), name, track=track,
                           AttrIndex=MemoryAttr.Shared, PXN=1,
                           nG=1, AP=(1 if is_pipeline else 0))

    def load_blob(self, addr, is_pipeline, filename, track=True):
        data = open(filename, "rb").read()
        obj = self.new_at(addr, Bytes(len(data)), filename, track=track,
                          AttrIndex=MemoryAttr.Shared, PXN=1,
                          nG=1, AP=(1 if is_pipeline else 0))
        obj.val = data
        obj.push()

        return obj

    def free(self, obj):
        obj._dead = True
        self.agx.uat.iomap_at(self.ctx, obj._addr, 0, obj._size_align, VALID=0)
        del self.objects[obj._addr]
        self.agx.unreg_object(obj)

    def free_at(self, addr):
        self.free(self.objects[obj._addr])

class GPUWorkQueue:
    def __init__(self, agx, context, job_list):
        self.agx = agx
        self.u = agx.u
        self.p = agx.p
        self.context = context

        self.info = agx.kobj.new(CommandQueueInfo)

        self.pointers = agx.kshared.new(CommandQueuePointers).push()
        self.pmap = CommandQueuePointerMap(self.u, self.pointers._paddr)

        self.rb_size = self.pointers.rb_size
        self.ring = agx.kobj.new_buf(8 * self.rb_size, "GPUWorkQueue.RB")

        self.info.pointers = self.pointers
        self.info.rb_addr = self.ring._addr
        self.info.job_list = job_list
        self.info.gpu_buf_addr = agx.kobj.buf(0x2c18, "GPUWorkQueue.gpu_buf")
        self.info.gpu_context = context.gpu_context
        self.info.push()

        self.wptr = 0
        self.first_time = True

        self.agx.uat.flush_dirty()

    def submit(self, work):
        work.push()

        self.p.write64(self.ring._paddr + 8 * self.wptr, work._addr)
        self.wptr = (self.wptr + 1) % self.rb_size
        self.agx.uat.flush_dirty()
        self.pmap.CPU_WPTR.val = self.wptr

    def wait_empty(self):
        while self.wptr != self.pmap.GPU_DONEPTR.val:
            self.agx.work()

class GPU3DWorkQueue(GPUWorkQueue):
    TYPE = 1

class GPUTAWorkQueue(GPUWorkQueue):
    TYPE = 0

class GPUMicroSequence:
    def __init__(self, agx):
        self.agx = agx
        self.off = 0
        self.ops = []
        self.obj = None

    def append(self, op):
        off = self.off
        self.ops.append(op)
        self.off += op.sizeof()
        return off

    def finalize(self):
        self.ops.append(EndCmd())
        self.size = sum(i.sizeof() for i in self.ops)
        self.obj = self.agx.kobj.new_buf(self.size, "GPUMicroSequence", track=False)
        self.obj.val = b"".join(i.build() for i in self.ops)
        self.obj.push()
        return self.obj

    def dump(self):
        chexdump(self.agx.iface.readmem(self.obj._paddr, self.size))
        print(MicroSequence.parse_stream(self.agx.uat.iostream(0, self.obj._addr)))

    def __str__(self):
        s = f"GPUMicroSequence: {len(self.ops)} ops\n"
        for i, op in enumerate(self.ops):
            op_s = textwrap.indent(str(op), ' ' * 4)
            s += f"[{i:2}:{op.sizeof():#x}] = {op!s}\n"
        return s

class GPUBufferManager:
    def __init__(self, agx, context, blocks=8):
        self.agx = agx
        self.ctx = context

        self.block_ctl_obj = agx.kshared.new(BufferManagerBlockControl)
        self.block_ctl_obj.total = blocks
        self.block_ctl_obj.wptr = 0
        self.block_ctl_obj.unk = 0
        self.block_ctl = self.block_ctl_obj.push().regmap()

        self.counter_obj = agx.kshared.new(BufferManagerCounter)
        self.counter_obj.count = 0
        self.counter = self.counter_obj.push().regmap()

        self.misc_obj = agx.kshared.new(BufferManagerMisc)
        self.misc_obj.cpu_flag = 1
        self.misc = self.misc_obj.push().regmap()

        self.page_size = 0x8000
        self.pages_per_block = 4
        self.block_size = self.pages_per_block * self.page_size

        self.page_list = context.uobj.new(Array(0x10000 // 4, Int32ul), "BM PageList", track=False)
        self.block_list = context.uobj.new(Array(0x8000 // 4, Int32ul), "BM BlockList", track=False)

        self.info = info = agx.kobj.new(BufferManagerInfo)
        info.page_list_addr = self.page_list._addr
        info.page_list_size = self.page_list._size
        info.page_count = self.block_ctl_obj.total * 4
        info.block_count = self.block_ctl_obj.total

        info.block_list_addr = self.block_list._addr
        info.block_ctl = self.block_ctl_obj
        info.last_page = info.page_count - 1
        info.block_size = self.block_size

        info.counter = self.counter_obj

        self.populate()
        self.block_ctl_obj.pull()
        self.block_list.push()
        self.page_list.push()

        info.push()

    def increment(self):
        self.counter_obj.count += 1
        self.counter_obj.push()

    def populate(self):
        idx = self.block_ctl.wptr.val
        total = self.block_ctl.total.val
        while idx < total:
            block = self.ctx.uobj.new_buf(self.block_size, "BM Block", track=False)
            self.block_list[idx * 2] = block._addr // self.page_size

            page_idx = idx * self.pages_per_block
            for i in range(self.pages_per_block):
                self.page_list[page_idx + i] = block._addr // self.page_size + i

            idx += 1
        self.block_ctl.wptr.val = idx