diff options
| author | magh <magh@maghmogh.com> | 2023-03-06 18:44:55 -0600 |
|---|---|---|
| committer | magh <magh@maghmogh.com> | 2023-03-06 18:44:55 -0600 |
| commit | e80d9d8871b325a04b18f90a9ea4bb7fd148fb25 (patch) | |
| tree | 79dbdb8506b7ff1e92549188d1b94cfc0b3503ae /tools/src | |
Diffstat (limited to 'tools/src')
157 files changed, 40067 insertions, 0 deletions
diff --git a/tools/src/adt.c b/tools/src/adt.c new file mode 100644 index 0000000..4189974 --- /dev/null +++ b/tools/src/adt.c @@ -0,0 +1,375 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */ + +#include "adt.h" +#include "string.h" + +/* This API is designed to match libfdt's read-only API */ + +#define ADT_CHECK_HEADER(adt) \ + { \ + int err; \ + if ((err = adt_check_header(adt)) != 0) \ + return err; \ + } + +// #define DEBUG + +#ifdef DEBUG +#include "utils.h" +#define dprintf printf +#else +#define dprintf(...) \ + do { \ + } while (0) +#endif + +int _adt_check_node_offset(const void *adt, int offset) +{ + if ((offset < 0) || (offset % ADT_ALIGN)) + return -ADT_ERR_BADOFFSET; + + const struct adt_node_hdr *node = ADT_NODE(adt, offset); + + // Sanity check + if (node->property_count > 2048 || !node->property_count || node->child_count > 2048) + return -ADT_ERR_BADOFFSET; + + return 0; +} + +int _adt_check_prop_offset(const void *adt, int offset) +{ + if ((offset < 0) || (offset % ADT_ALIGN)) + return -ADT_ERR_BADOFFSET; + + const struct adt_property *prop = ADT_PROP(adt, offset); + + if (prop->size & 0x7ff00000) // up to 1MB properties + return -ADT_ERR_BADOFFSET; + + return 0; +} + +int adt_check_header(const void *adt) +{ + return _adt_check_node_offset(adt, 0); +} + +static int _adt_string_eq(const char *a, const char *b, size_t len) +{ + return (strlen(a) == len) && (memcmp(a, b, len) == 0); +} + +static int _adt_nodename_eq(const char *a, const char *b, size_t len) +{ + if (memcmp(a, b, len) != 0) + return 0; + + if (a[len] == '\0') + return 1; + else if (!memchr(b, '@', len) && (a[len] == '@')) + return 1; + else + return 0; +} + +const struct adt_property *adt_get_property_namelen(const void *adt, int offset, const char *name, + size_t namelen) +{ + dprintf("adt_get_property_namelen(%p, %d, \"%s\", %u)\n", adt, offset, name, namelen); + + ADT_FOREACH_PROPERTY(adt, offset, prop) + { + dprintf(" off=0x%x name=\"%s\"\n", offset, prop->name); + if (_adt_string_eq(prop->name, name, namelen)) + return prop; + } + + return NULL; +} + +const struct adt_property *adt_get_property(const void *adt, int nodeoffset, const char *name) +{ + return adt_get_property_namelen(adt, nodeoffset, name, strlen(name)); +} + +const void *adt_getprop_namelen(const void *adt, int nodeoffset, const char *name, size_t namelen, + u32 *lenp) +{ + const struct adt_property *prop; + + prop = adt_get_property_namelen(adt, nodeoffset, name, namelen); + + if (!prop) + return NULL; + + if (lenp) + *lenp = prop->size; + + return prop->value; +} + +const void *adt_getprop_by_offset(const void *adt, int offset, const char **namep, u32 *lenp) +{ + const struct adt_property *prop; + + prop = adt_get_property_by_offset(adt, offset); + if (!prop) + return NULL; + + if (namep) + *namep = prop->name; + if (lenp) + *lenp = prop->size; + return prop->value; +} + +const void *adt_getprop(const void *adt, int nodeoffset, const char *name, u32 *lenp) +{ + return adt_getprop_namelen(adt, nodeoffset, name, strlen(name), lenp); +} + +int adt_setprop(void *adt, int nodeoffset, const char *name, void *value, size_t len) +{ + u32 plen; + void *prop = (void *)adt_getprop(adt, nodeoffset, name, &plen); + if (!prop) + return -ADT_ERR_NOTFOUND; + + if (len != plen) + return -ADT_ERR_BADLENGTH; + + memcpy(prop, value, len); + return len; +} + +int adt_getprop_copy(const void *adt, int nodeoffset, const char *name, void *out, size_t len) +{ + u32 plen; + + const void *p = adt_getprop(adt, nodeoffset, name, &plen); + + if (!p) + return -ADT_ERR_NOTFOUND; + + if (plen != len) + return -ADT_ERR_BADLENGTH; + + memcpy(out, p, len); + return len; +} + +int adt_first_child_offset(const void *adt, int offset) +{ + const struct adt_node_hdr *node = ADT_NODE(adt, offset); + + u32 cnt = node->property_count; + offset = adt_first_property_offset(adt, offset); + + while (cnt--) { + offset = adt_next_property_offset(adt, offset); + } + + return offset; +} + +int adt_next_sibling_offset(const void *adt, int offset) +{ + const struct adt_node_hdr *node = ADT_NODE(adt, offset); + + u32 cnt = node->child_count; + offset = adt_first_child_offset(adt, offset); + + while (cnt--) { + offset = adt_next_sibling_offset(adt, offset); + } + + return offset; +} + +int adt_subnode_offset_namelen(const void *adt, int offset, const char *name, size_t namelen) +{ + ADT_CHECK_HEADER(adt); + + ADT_FOREACH_CHILD(adt, offset) + { + const char *cname = adt_get_name(adt, offset); + + if (_adt_nodename_eq(cname, name, namelen)) + return offset; + } + + return -ADT_ERR_NOTFOUND; +} + +int adt_subnode_offset(const void *adt, int parentoffset, const char *name) +{ + return adt_subnode_offset_namelen(adt, parentoffset, name, strlen(name)); +} + +int adt_path_offset(const void *adt, const char *path) +{ + return adt_path_offset_trace(adt, path, NULL); +} + +int adt_path_offset_trace(const void *adt, const char *path, int *offsets) +{ + const char *end = path + strlen(path); + const char *p = path; + int offset = 0; + + ADT_CHECK_HEADER(adt); + + while (*p) { + const char *q; + + while (*p == '/') + p++; + if (!*p) + break; + q = strchr(p, '/'); + if (!q) + q = end; + + offset = adt_subnode_offset_namelen(adt, offset, p, q - p); + if (offset < 0) + break; + + if (offsets) + *offsets++ = offset; + + p = q; + } + + if (offsets) + *offsets++ = 0; + + return offset; +} + +const char *adt_get_name(const void *adt, int nodeoffset) +{ + return adt_getprop(adt, nodeoffset, "name", NULL); +} + +static void get_cells(u64 *dst, const u32 **src, int cells) +{ + *dst = 0; + for (int i = 0; i < cells; i++) + *dst |= ((u64) * ((*src)++)) << (32 * i); +} + +int adt_get_reg(const void *adt, int *path, const char *prop, int idx, u64 *paddr, u64 *psize) +{ + int cur = 0; + + if (!*path) + return -ADT_ERR_BADOFFSET; + + while (path[cur + 1]) + cur++; + + int node = path[cur]; + int parent = cur > 0 ? path[cur - 1] : 0; + u32 a_cells = 2, s_cells = 1; + + ADT_GETPROP(adt, parent, "#address-cells", &a_cells); + ADT_GETPROP(adt, parent, "#size-cells", &s_cells); + + dprintf("adt_get_reg: node '%s' @ %d, parent @ %d, address-cells=%d size-cells=%d idx=%d\n", + adt_get_name(adt, node), node, parent, a_cells, s_cells, idx); + + if (a_cells < 1 || a_cells > 2 || s_cells > 2) { + dprintf("bad n-cells\n"); + return ADT_ERR_BADNCELLS; + } + + u32 reg_len = 0; + const u32 *reg = adt_getprop(adt, node, prop, ®_len); + + if (!reg || !reg_len) { + dprintf("reg not found or empty\n"); + return -ADT_ERR_NOTFOUND; + } + + if (reg_len < (idx + 1) * (a_cells + s_cells) * 4) { + dprintf("bad reg property length %d\n", reg_len); + return -ADT_ERR_BADVALUE; + } + + reg += idx * (a_cells + s_cells); + + u64 addr, size = 0; + get_cells(&addr, ®, a_cells); + get_cells(&size, ®, s_cells); + + dprintf(" addr=0x%lx size=0x%lx\n", addr, size); + + while (parent) { + cur--; + node = parent; + parent = cur > 0 ? path[cur - 1] : 0; + + dprintf(" walking up to %s\n", adt_get_name(adt, node)); + + u32 ranges_len; + const u32 *ranges = adt_getprop(adt, node, "ranges", &ranges_len); + if (!ranges) + break; + + u32 pa_cells = 2, ps_cells = 1; + ADT_GETPROP(adt, parent, "#address-cells", &pa_cells); + ADT_GETPROP(adt, parent, "#size-cells", &ps_cells); + + dprintf(" translate range to address-cells=%d size-cells=%d\n", pa_cells, ps_cells); + + if (pa_cells < 1 || pa_cells > 2 || ps_cells > 2) + return ADT_ERR_BADNCELLS; + + int range_cnt = ranges_len / (4 * (pa_cells + a_cells + s_cells)); + + while (range_cnt--) { + u64 c_addr, p_addr, c_size; + get_cells(&c_addr, &ranges, a_cells); + get_cells(&p_addr, &ranges, pa_cells); + get_cells(&c_size, &ranges, s_cells); + + dprintf(" ranges %lx %lx %lx\n", c_addr, p_addr, c_size); + + if (addr >= c_addr && (addr + size) <= (c_addr + c_size)) { + dprintf(" translate %lx", addr); + addr = addr - c_addr + p_addr; + dprintf(" -> %lx\n", addr); + break; + } + } + + a_cells = pa_cells; + s_cells = ps_cells; + } + + if (paddr) + *paddr = addr; + if (psize) + *psize = size; + + return 0; +} + +bool adt_is_compatible(const void *adt, int nodeoffset, const char *compat) +{ + u32 len; + const char *list = adt_getprop(adt, nodeoffset, "compatible", &len); + if (!list) + return false; + + const char *end = list + len; + + while (list != end) { + if (!strcmp(list, compat)) + return true; + list += strlen(list) + 1; + } + + return false; +} diff --git a/tools/src/adt.h b/tools/src/adt.h new file mode 100644 index 0000000..6373c86 --- /dev/null +++ b/tools/src/adt.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */ + +#ifndef XDT_H +#define XDT_H + +#include <stddef.h> +#include <stdint.h> + +#include "types.h" + +#define ADT_ERR_NOTFOUND 1 +#define ADT_ERR_BADOFFSET 4 +#define ADT_ERR_BADPATH 5 +#define ADT_ERR_BADNCELLS 14 +#define ADT_ERR_BADVALUE 15 +#define ADT_ERR_BADLENGTH 20 + +#define ADT_ALIGN 4 + +extern void *adt; + +struct adt_property { + char name[32]; + u32 size; + u8 value[]; +}; + +struct adt_node_hdr { + u32 property_count; + u32 child_count; +}; + +#define ADT_NODE(adt, offset) ((const struct adt_node_hdr *)(((u8 *)(adt)) + (offset))) +#define ADT_PROP(adt, offset) ((const struct adt_property *)(((u8 *)(adt)) + (offset))) +#define ADT_SIZE(node) ((node)->size & 0x7fffffff) + +/* This API is designed to match libfdt's read-only API */ + +/* Basic sanity check */ +int adt_check_header(const void *adt); + +static inline int adt_get_property_count(const void *adt, int offset) +{ + return ADT_NODE(adt, offset)->property_count; +} + +static inline int adt_first_property_offset(const void *adt, int offset) +{ + UNUSED(adt); + return offset + sizeof(struct adt_node_hdr); +} + +static inline int adt_next_property_offset(const void *adt, int offset) +{ + const struct adt_property *prop = ADT_PROP(adt, offset); + return offset + sizeof(struct adt_property) + ((prop->size + ADT_ALIGN - 1) & ~(ADT_ALIGN - 1)); +} + +static inline const struct adt_property *adt_get_property_by_offset(const void *adt, int offset) +{ + return ADT_PROP(adt, offset); +} + +static inline int adt_get_child_count(const void *adt, int offset) +{ + return ADT_NODE(adt, offset)->child_count; +} + +int adt_first_child_offset(const void *adt, int offset); +int adt_next_sibling_offset(const void *adt, int offset); + +int adt_subnode_offset_namelen(const void *adt, int parentoffset, const char *name, size_t namelen); +int adt_subnode_offset(const void *adt, int parentoffset, const char *name); +int adt_path_offset(const void *adt, const char *path); +int adt_path_offset_trace(const void *adt, const char *path, int *offsets); + +const char *adt_get_name(const void *adt, int nodeoffset); +const struct adt_property *adt_get_property_namelen(const void *adt, int nodeoffset, + const char *name, size_t namelen); +const struct adt_property *adt_get_property(const void *adt, int nodeoffset, const char *name); +const void *adt_getprop_by_offset(const void *adt, int offset, const char **namep, u32 *lenp); +const void *adt_getprop_namelen(const void *adt, int nodeoffset, const char *name, size_t namelen, + u32 *lenp); +const void *adt_getprop(const void *adt, int nodeoffset, const char *name, u32 *lenp); +int adt_setprop(void *adt, int nodeoffset, const char *name, void *value, size_t len); +int adt_getprop_copy(const void *adt, int nodeoffset, const char *name, void *out, size_t len); + +#define ADT_GETPROP(adt, nodeoffset, name, val) \ + adt_getprop_copy(adt, nodeoffset, name, (val), sizeof(*(val))) + +#define ADT_GETPROP_ARRAY(adt, nodeoffset, name, arr) \ + adt_getprop_copy(adt, nodeoffset, name, (arr), sizeof(arr)) + +int adt_get_reg(const void *adt, int *path, const char *prop, int idx, u64 *addr, u64 *size); +bool adt_is_compatible(const void *adt, int nodeoffset, const char *compat); + +#define ADT_FOREACH_CHILD(adt, node) \ + for (int _child_count = adt_get_child_count(adt, node); _child_count; _child_count = 0) \ + for (node = adt_first_child_offset(adt, node); _child_count--; \ + node = adt_next_sibling_offset(adt, node)) + +#define ADT_FOREACH_PROPERTY(adt, node, prop) \ + for (int _prop_count = adt_get_property_count(adt, node), \ + _poff = adt_first_property_offset(adt, node); \ + _prop_count; _prop_count = 0) \ + for (const struct adt_property *prop = ADT_PROP(adt, _poff); _prop_count--; \ + prop = ADT_PROP(adt, _poff = adt_next_property_offset(adt, _poff))) + +#endif diff --git a/tools/src/afk.c b/tools/src/afk.c new file mode 100644 index 0000000..7191a21 --- /dev/null +++ b/tools/src/afk.c @@ -0,0 +1,545 @@ +/* SPDX-License-Identifier: MIT */ + +#include "afk.h" +#include "assert.h" +#include "malloc.h" +#include "string.h" +#include "utils.h" + +struct afk_rb_hdr { + u32 bufsz; + u32 unk; + u32 _pad1[14]; + u32 rptr; + u32 _pad2[15]; + u32 wptr; + u32 _pad3[15]; +}; + +struct afk_rb { + bool ready; + struct afk_rb_hdr *hdr; + u32 rptr; + void *buf; + size_t bufsz; +}; + +enum EPICType { + TYPE_NOTIFY = 0, + TYPE_COMMAND = 3, + TYPE_REPLY = 4, + TYPE_NOTIFY_ACK = 8, +}; + +enum EPICCategory { + CAT_REPORT = 0x00, + CAT_NOTIFY = 0x10, + CAT_REPLY = 0x20, + CAT_COMMAND = 0x30, +}; + +enum EPICMessage { + CODE_ANNOUNCE = 0x30, +}; + +struct afk_qe { + u32 magic; + u32 size; + u32 channel; + u32 type; + u8 data[]; +}; + +struct epic_hdr { + u8 version; + u16 seq; + u8 _pad; + u32 unk; + u64 timestamp; +} PACKED; + +struct epic_sub_hdr { + u32 length; + u8 version; + u8 category; + u16 code; + u64 timestamp; + u16 seq; + u16 unk; + u32 unk2; +} PACKED; + +struct epic_announce { + char name[32]; + u8 props[]; +} PACKED; + +struct epic_cmd { + u32 retcode; + u64 rxbuf; + u64 txbuf; + u32 rxlen; + u32 txlen; +} PACKED; + +struct afk_epic_ep { + int ep; + rtkit_dev_t *rtk; + + struct rtkit_buffer buf; + u16 tag; + + struct afk_rb tx; + struct afk_rb rx; + + struct rtkit_buffer txbuf; + struct rtkit_buffer rxbuf; + + bool started; +}; + +enum RBEP_MSG { + RBEP_INIT = 0x80, + RBEP_INIT_ACK = 0xa0, + RBEP_GETBUF = 0x89, + RBEP_GETBUF_ACK = 0xa1, + RBEP_INIT_TX = 0x8a, + RBEP_INIT_RX = 0x8b, + RBEP_START = 0xa3, + RBEP_START_ACK = 0x86, + RBEP_SEND = 0xa2, + RBEP_RECV = 0x85, + RBEP_SHUTDOWN = 0xc0, + RBEP_SHUTDOWN_ACK = 0xc1, +}; + +#define BLOCK_SHIFT 6 +#define QE_MAGIC ' POI' + +#define RBEP_TYPE GENMASK(63, 48) + +#define GETBUF_SIZE GENMASK(31, 16) +#define GETBUF_TAG GENMASK(15, 0) +#define GETBUF_ACK_DVA GENMASK(47, 0) + +#define INITRB_OFFSET GENMASK(47, 32) +#define INITRB_SIZE GENMASK(31, 16) +#define INITRB_TAG GENMASK(15, 0) + +#define SEND_WPTR GENMASK(31, 0) + +bool afk_rb_init(afk_epic_ep_t *epic, struct afk_rb *rb, u64 base, u64 size) +{ + rb->hdr = epic->buf.bfr + base; + + if (rb->hdr->bufsz + sizeof(*rb->hdr) != size) { + printf("AFK: ring buffer size mismatch\n"); + return false; + } + + rb->buf = rb->hdr + 1; + rb->bufsz = rb->hdr->bufsz; + rb->ready = true; + + return true; +} + +static int afk_epic_poll(afk_epic_ep_t *epic) +{ + int ret; + struct rtkit_message msg; + + while ((ret = rtkit_recv(epic->rtk, &msg)) == 0) + ; + + if (ret < 0) { + printf("EPIC: rtkit_recv failed!\n"); + return ret; + } + + if (msg.ep != epic->ep) { + printf("EPIC: received message for unexpected endpoint %d\n", msg.ep); + return 0; + } + + int type = FIELD_GET(RBEP_TYPE, msg.msg); + u64 base, size, tag; + switch (type) { + case RBEP_INIT_ACK: + break; + + case RBEP_GETBUF: + size = FIELD_GET(GETBUF_SIZE, msg.msg) << BLOCK_SHIFT; + epic->tag = FIELD_GET(GETBUF_TAG, msg.msg); + if (!rtkit_alloc_buffer(epic->rtk, &epic->buf, size)) { + printf("EPIC: failed to allocate buffer\n"); + return -1; + } + msg.msg = (FIELD_PREP(RBEP_TYPE, RBEP_GETBUF_ACK) | + FIELD_PREP(GETBUF_ACK_DVA, epic->buf.dva)); + if (!rtkit_send(epic->rtk, &msg)) { + printf("EPIC: failed to send buffer address\n"); + return -1; + } + break; + + case RBEP_INIT_TX: + case RBEP_INIT_RX: + base = FIELD_GET(INITRB_OFFSET, msg.msg) << BLOCK_SHIFT; + size = FIELD_GET(INITRB_SIZE, msg.msg) << BLOCK_SHIFT; + tag = FIELD_GET(INITRB_TAG, msg.msg); + if (tag != epic->tag) { + printf("EPIC: wrong tag (0x%x != 0x%lx)\n", epic->tag, tag); + return -1; + } + + struct afk_rb *rb; + if (type == RBEP_INIT_RX) + rb = &epic->rx; + else + rb = &epic->tx; + + if (!afk_rb_init(epic, rb, base, size)) + return -1; + + if (epic->rx.ready && epic->tx.ready) { + msg.msg = FIELD_PREP(RBEP_TYPE, RBEP_START); + if (!rtkit_send(epic->rtk, &msg)) { + printf("EPIC: failed to send start\n"); + return -1; + } + } + break; + + case RBEP_RECV: + return 1; + + case RBEP_START_ACK: + epic->started = true; + break; + + case RBEP_SHUTDOWN_ACK: + epic->started = false; + break; + + default: + printf("EPIC: received unknown message type 0x%x\n", type); + return 0; + break; + } + + return 0; +} + +static int afk_epic_rx(afk_epic_ep_t *epic, struct afk_qe **qe) +{ + int ret; + struct afk_rb *rb = &epic->rx; + + u32 rptr = rb->hdr->rptr; + + while (rptr == rb->hdr->wptr) { + do { + ret = afk_epic_poll(epic); + if (ret < 0) + return ret; + } while (ret == 0); + dma_rmb(); + } + + struct afk_qe *hdr = rb->buf + rptr; + + if (hdr->magic != QE_MAGIC) { + printf("EPIC: bad queue entry magic!\n"); + return -1; + } + + if (rptr + hdr->size > rb->bufsz) { + rptr = 0; + hdr = rb->buf + rptr; + if (hdr->magic != QE_MAGIC) { + printf("EPIC: bad queue entry magic!\n"); + return -1; + } + rb->hdr->rptr = rptr; + } + + *qe = hdr; + + return 1; +} + +static int afk_epic_tx(afk_epic_ep_t *epic, u32 channel, u32 type, void *data, size_t size) +{ + struct afk_rb *rb = &epic->tx; + + u32 rptr = rb->hdr->rptr; + u32 wptr = rb->hdr->wptr; + struct afk_qe *hdr = rb->buf + wptr; + + if (wptr < rptr && (wptr + sizeof(struct afk_qe) > rptr)) { + printf("EPIC: TX ring buffer is full\n"); + return -1; + } + + hdr->magic = QE_MAGIC; + hdr->channel = channel; + hdr->type = type; + hdr->size = size; + + wptr += sizeof(struct afk_qe); + + if (size > rb->bufsz - wptr) { + if (rptr < sizeof(struct afk_qe)) { + printf("EPIC: TX ring buffer is full\n"); + return -1; + } + *(struct afk_qe *)rb->buf = *hdr; + hdr = rb->buf; + wptr = sizeof(struct afk_qe); + } + + if (wptr < rptr && (wptr + size > rptr)) { + printf("EPIC: TX ring buffer is full\n"); + return -1; + } + + wptr += size; + wptr = ALIGN_UP(wptr, 1 << BLOCK_SHIFT); + + memcpy(hdr + 1, data, size); + + dma_mb(); + rb->hdr->wptr = wptr; + dma_wmb(); + + struct rtkit_message msg = { + epic->ep, + FIELD_PREP(RBEP_TYPE, RBEP_SEND) | FIELD_PREP(SEND_WPTR, wptr), + }; + + if (!rtkit_send(epic->rtk, &msg)) { + printf("EPIC: failed to send TX WPTR message\n"); + return -1; + } + + return 1; +} + +static void afk_epic_rx_ack(afk_epic_ep_t *epic) +{ + struct afk_rb *rb = &epic->rx; + u32 rptr = rb->hdr->rptr; + struct afk_qe *hdr = rb->buf + rptr; + + if (hdr->magic != QE_MAGIC) { + printf("EPIC: bad queue entry magic!\n"); + } + + dma_mb(); + + rptr = ALIGN_UP(rptr + sizeof(*hdr) + hdr->size, 1 << BLOCK_SHIFT); + assert(rptr < rb->bufsz); + if (rptr == rb->bufsz) + rptr = 0; + rb->hdr->rptr = rptr; +} + +int afk_epic_command(afk_epic_ep_t *epic, int channel, u16 code, void *txbuf, size_t txsize, + void *rxbuf, size_t *rxsize) +{ + struct { + struct epic_hdr hdr; + struct epic_sub_hdr sub; + struct epic_cmd cmd; + } PACKED msg; + + assert(txsize <= epic->txbuf.sz); + assert(!rxsize || *rxsize <= epic->rxbuf.sz); + + memset(&msg, 0, sizeof(msg)); + + msg.hdr.version = 2; + msg.hdr.seq = 0; + msg.sub.length = sizeof(msg.cmd); + msg.sub.version = 3; + msg.sub.category = CAT_COMMAND; + msg.sub.code = code; + msg.sub.seq = 0; + msg.cmd.txbuf = epic->txbuf.dva; + msg.cmd.txlen = txsize; + msg.cmd.rxbuf = epic->rxbuf.dva; + msg.cmd.rxlen = rxsize ? *rxsize : 0; + + memcpy(epic->txbuf.bfr, txbuf, txsize); + + int ret = afk_epic_tx(epic, channel, TYPE_COMMAND, &msg, sizeof msg); + if (ret < 0) { + printf("EPIC: failed to transmit command\n"); + return ret; + } + + struct afk_qe *rmsg; + struct epic_cmd *rcmd; + + while (true) { + ret = afk_epic_rx(epic, &rmsg); + if (ret < 0) + return ret; + + if (rmsg->type != TYPE_REPLY && rmsg->type != TYPE_NOTIFY) { + printf("EPIC: got unexpected message type %d during command\n", rmsg->type); + afk_epic_rx_ack(epic); + continue; + } + + struct epic_hdr *hdr = (void *)(rmsg + 1); + struct epic_sub_hdr *sub = (void *)(hdr + 1); + + if (sub->category != CAT_REPLY || sub->code != code) { + printf("EPIC: got unexpected message %02x:%04x during command\n", sub->category, + sub->code); + afk_epic_rx_ack(epic); + continue; + } + + rcmd = (void *)(sub + 1); + break; + } + + if (rcmd->retcode != 0) { + printf("EPIC: IOP returned 0x%x\n", rcmd->retcode); + afk_epic_rx_ack(epic); + return rcmd->retcode; // should be negative already + } + + assert(*rxsize >= rcmd->rxlen); + *rxsize = rcmd->rxlen; + + if (rxsize && *rxsize && rcmd->rxbuf) + memcpy(rxbuf, epic->rxbuf.bfr, *rxsize); + + afk_epic_rx_ack(epic); + + return 0; +} + +afk_epic_ep_t *afk_epic_init(rtkit_dev_t *rtk, int endpoint) +{ + afk_epic_ep_t *epic = malloc(sizeof(afk_epic_ep_t)); + if (!epic) + return NULL; + + memset(epic, 0, sizeof(*epic)); + epic->ep = endpoint; + epic->rtk = rtk; + + if (!rtkit_start_ep(rtk, endpoint)) { + printf("EPIC: failed to start endpoint %d\n", endpoint); + goto err; + } + + struct rtkit_message msg = {endpoint, FIELD_PREP(RBEP_TYPE, RBEP_INIT)}; + if (!rtkit_send(rtk, &msg)) { + printf("EPIC: failed to send init message\n"); + goto err; + } + + while (!epic->started) { + int ret = afk_epic_poll(epic); + if (ret < 0) + break; + else if (ret > 0) + printf("EPIC: received unexpected message during init\n"); + } + + return epic; + +err: + free(epic); + return NULL; +} + +int afk_epic_shutdown(afk_epic_ep_t *epic) +{ + struct rtkit_message msg = {epic->ep, FIELD_PREP(RBEP_TYPE, RBEP_SHUTDOWN)}; + if (!rtkit_send(epic->rtk, &msg)) { + printf("EPIC: failed to send shutdown message\n"); + return -1; + } + + while (epic->started) { + int ret = afk_epic_poll(epic); + if (ret < 0) + break; + } + + rtkit_free_buffer(epic->rtk, &epic->buf); + rtkit_free_buffer(epic->rtk, &epic->rxbuf); + rtkit_free_buffer(epic->rtk, &epic->txbuf); + + free(epic); + return 0; +} + +int afk_epic_start_interface(afk_epic_ep_t *epic, char *name, size_t txsize, size_t rxsize) +{ + int channel = -1; + struct afk_qe *msg; + struct epic_announce *announce; + + for (int tries = 0; tries < 20; tries += 1) { + + int ret = afk_epic_rx(epic, &msg); + if (ret < 0) + return ret; + + if (msg->type != TYPE_NOTIFY) { + printf("EPIC: got unexpected message type %d during iface start\n", msg->type); + afk_epic_rx_ack(epic); + continue; + } + + struct epic_hdr *hdr = (void *)(msg + 1); + struct epic_sub_hdr *sub = (void *)(hdr + 1); + + if (sub->category != CAT_REPORT || sub->code != CODE_ANNOUNCE) { + printf("EPIC: got unexpected message %02x:%04x during iface start\n", sub->category, + sub->code); + afk_epic_rx_ack(epic); + continue; + } + + announce = (void *)(sub + 1); + + if (strncmp(name, announce->name, sizeof(announce->name))) { + printf("EPIC: ignoring channel %d: %s\n", msg->channel, announce->name); + afk_epic_rx_ack(epic); + continue; + } + + channel = msg->channel; + break; + } + + if (channel == -1) { + printf("EPIC: too many unexpected messages, giving up\n"); + return -1; + } + + if (!rtkit_alloc_buffer(epic->rtk, &epic->rxbuf, rxsize)) { + printf("EPIC: failed to allocate rx buffer\n"); + return -1; + } + + if (!rtkit_alloc_buffer(epic->rtk, &epic->txbuf, txsize)) { + printf("EPIC: failed to allocate tx buffer\n"); + return -1; + } + + printf("EPIC: started interface %d (%s)\n", msg->channel, announce->name); + + afk_epic_rx_ack(epic); + + return channel; +} diff --git a/tools/src/afk.h b/tools/src/afk.h new file mode 100644 index 0000000..e76ade3 --- /dev/null +++ b/tools/src/afk.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef DCP_AFK_H +#define DCP_AFK_H + +#include "rtkit.h" + +typedef struct afk_epic_ep afk_epic_ep_t; + +afk_epic_ep_t *afk_epic_init(rtkit_dev_t *rtkit, int endpoint); +int afk_epic_shutdown(afk_epic_ep_t *epic); + +int afk_epic_start_interface(afk_epic_ep_t *epic, char *name, size_t insize, size_t outsize); +int afk_epic_command(afk_epic_ep_t *epic, int channel, u16 code, void *txbuf, size_t txsize, + void *rxbuf, size_t *rxsize); + +#endif diff --git a/tools/src/aic.c b/tools/src/aic.c new file mode 100644 index 0000000..6974aac --- /dev/null +++ b/tools/src/aic.c @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: MIT */ + +#include "aic.h" +#include "adt.h" +#include "aic_regs.h" +#include "assert.h" +#include "utils.h" + +#define MASK_REG(x) (4 * ((x) >> 5)) +#define MASK_BIT(x) BIT((x)&GENMASK(4, 0)) + +static struct aic aic1 = { + .version = 1, + .nr_die = 1, + .max_die = 1, + .regs = + { + .reg_size = AIC_REG_SIZE, + .event = AIC_EVENT, + .tgt_cpu = AIC_TARGET_CPU, + .sw_set = AIC_SW_SET, + .sw_clr = AIC_SW_CLR, + .mask_set = AIC_MASK_SET, + .mask_clr = AIC_MASK_CLR, + }, +}; + +static struct aic aic2 = { + .version = 2, + .regs = + { + .config = AIC2_IRQ_CFG, + }, +}; + +struct aic *aic; + +static int aic2_init(int node) +{ + int ret = ADT_GETPROP(adt, node, "aic-iack-offset", &aic->regs.event); + if (ret < 0) { + printf("AIC: failed to get property aic-iack-offset\n"); + return ret; + } + + u32 info1 = read32(aic->base + AIC2_INFO1); + aic->nr_die = FIELD_GET(AIC2_INFO1_LAST_DIE, info1) + 1; + aic->nr_irq = FIELD_GET(AIC2_INFO1_NR_IRQ, info1); + + u32 info3 = read32(aic->base + AIC2_INFO3); + aic->max_die = FIELD_GET(AIC2_INFO3_MAX_DIE, info3); + aic->max_irq = FIELD_GET(AIC2_INFO3_MAX_IRQ, info3); + + if (aic->nr_die > AIC_MAX_DIES) { + printf("AIC: more dies than supported: %u\n", aic->max_die); + return -1; + } + + if (aic->max_irq > AIC_MAX_HW_NUM) { + printf("AIC: more IRQs than supported: %u\n", aic->max_irq); + return -1; + } + + const u64 start_off = aic->regs.config; + u64 off = start_off + sizeof(u32) * aic->max_irq; /* IRQ_CFG */ + + aic->regs.sw_set = off; + off += sizeof(u32) * (aic->max_irq >> 5); /* SW_SET */ + aic->regs.sw_clr = off; + off += sizeof(u32) * (aic->max_irq >> 5); /* SW_CLR */ + aic->regs.mask_set = off; + off += sizeof(u32) * (aic->max_irq >> 5); /* MASK_SET */ + aic->regs.mask_clr = off; + off += sizeof(u32) * (aic->max_irq >> 5); /* MASK_CLR */ + off += sizeof(u32) * (aic->max_irq >> 5); /* HW_STATE */ + + aic->die_stride = off - start_off; + aic->regs.reg_size = aic->regs.event + 4; + + printf("AIC: AIC2 with %u/%u dies, %u/%u IRQs, reg_size:%05lx die_stride:%05x\n", aic->nr_die, + aic->max_die, aic->nr_irq, aic->max_irq, aic->regs.reg_size, aic->die_stride); + + u32 ext_intr_config_len; + const u8 *ext_intr_config = adt_getprop(adt, node, "aic-ext-intr-cfg", &ext_intr_config_len); + + if (ext_intr_config) { + printf("AIC: Configuring %d external interrupts\n", ext_intr_config_len / 3); + for (u32 i = 0; i < ext_intr_config_len; i += 3) { + u8 die = ext_intr_config[i + 1] >> 4; + u16 irq = ext_intr_config[i] | ((ext_intr_config[i + 1] & 0xf) << 8); + u8 target = ext_intr_config[i + 2]; + assert(die < aic->nr_die); + assert(irq < aic->nr_irq); + mask32(aic->base + aic->regs.config + die * aic->die_stride + 4 * irq, + AIC2_IRQ_CFG_TARGET, FIELD_PREP(AIC2_IRQ_CFG_TARGET, target)); + } + } + + return 0; +} + +void aic_init(void) +{ + int path[8]; + int node = adt_path_offset_trace(adt, "/arm-io/aic", path); + + if (node < 0) { + printf("AIC node not found!\n"); + return; + } + + if (adt_is_compatible(adt, node, "aic,1")) { + aic = &aic1; + } else if (adt_is_compatible(adt, node, "aic,2")) { + aic = &aic2; + } else { + printf("AIC: Error: Unsupported version\n"); + return; + } + + if (adt_get_reg(adt, path, "reg", 0, &aic->base, NULL)) { + printf("Failed to get AIC reg property!\n"); + return; + } + + if (aic->version == 1) { + printf("AIC: Version 1 @ 0x%lx\n", aic->base); + aic->nr_irq = FIELD_GET(AIC_INFO_NR_HW, read32(aic->base + AIC_INFO)); + aic->max_irq = AIC1_MAX_IRQ; + } else if (aic->version == 2) { + printf("AIC: Version 2 @ 0x%lx\n", aic->base); + int ret = aic2_init(node); + if (ret < 0) + aic = NULL; + } +} + +void aic_set_sw(int irq, bool active) +{ + u32 die = irq / aic->max_irq; + irq = irq % aic->max_irq; + if (active) + write32(aic->base + aic->regs.sw_set + die * aic->die_stride + MASK_REG(irq), + MASK_BIT(irq)); + else + write32(aic->base + aic->regs.sw_clr + die * aic->die_stride + MASK_REG(irq), + MASK_BIT(irq)); +} + +uint32_t aic_ack(void) +{ + return read32(aic->base + aic->regs.event); +} diff --git a/tools/src/aic.h b/tools/src/aic.h new file mode 100644 index 0000000..1f401b1 --- /dev/null +++ b/tools/src/aic.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef AIC_H +#define AIC_H + +#include "types.h" + +#define AIC_MAX_DIES 4 + +struct aic_regs { + uint64_t reg_size; + uint64_t event; + uint64_t tgt_cpu; + uint64_t config; + uint64_t sw_set; + uint64_t sw_clr; + uint64_t mask_set; + uint64_t mask_clr; +}; + +struct aic { + uint64_t base; + uint32_t version; + + uint32_t nr_irq; + uint32_t nr_die; + uint32_t max_irq; + uint32_t max_die; + uint32_t die_stride; + + struct aic_regs regs; +}; + +extern struct aic *aic; + +void aic_init(void); +void aic_set_sw(int irq, bool active); +uint32_t aic_ack(void); + +#endif diff --git a/tools/src/aic_regs.h b/tools/src/aic_regs.h new file mode 100644 index 0000000..8cc360b --- /dev/null +++ b/tools/src/aic_regs.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: MIT */ + +#define AIC_REG_SIZE 0x8000 +#define AIC_INFO 0x0004 +#define AIC_WHOAMI 0x2000 +#define AIC_EVENT 0x2004 +#define AIC_IPI_SEND 0x2008 +#define AIC_IPI_ACK 0x200c +#define AIC_IPI_MASK_SET 0x2024 +#define AIC_IPI_MASK_CLR 0x2028 +#define AIC_TARGET_CPU 0x3000 +#define AIC_SW_SET 0x4000 +#define AIC_SW_CLR 0x4080 +#define AIC_MASK_SET 0x4100 +#define AIC_MASK_CLR 0x4180 + +#define AIC_CPU_IPI_SET(cpu) (0x5008 + ((cpu) << 7)) +#define AIC_CPU_IPI_CLR(cpu) (0x500c + ((cpu) << 7)) +#define AIC_CPU_IPI_MASK_SET(cpu) (0x5024 + ((cpu) << 7)) +#define AIC_CPU_IPI_MASK_CLR(cpu) (0x5028 + ((cpu) << 7)) + +#define AIC2_INFO1 0x0004 +#define AIC2_INFO2 0x0008 +#define AIC2_INFO3 0x000c +#define AIC2_LATENCY 0x0204 +#define AIC2_IRQ_CFG 0x2000 + +#define AIC2_IRQ_CFG_TARGET GENMASK(3, 0) + +#define AIC_INFO_NR_HW GENMASK(15, 0) + +#define AIC2_INFO1_NR_IRQ GENMASK(15, 0) +#define AIC2_INFO1_LAST_DIE GENMASK(27, 24) + +#define AIC2_INFO3_MAX_IRQ GENMASK(15, 0) +#define AIC2_INFO3_MAX_DIE GENMASK(27, 24) + +#define AIC_EVENT_DIE GENMASK(31, 24) +#define AIC_EVENT_TYPE GENMASK(23, 16) +#define AIC_EVENT_NUM GENMASK(15, 0) + +#define AIC_EVENT_TYPE_HW 1 +#define AIC_EVENT_TYPE_IPI 4 +#define AIC_EVENT_IPI_OTHER 1 +#define AIC_EVENT_IPI_SELF 2 + +#define AIC_IPI_SEND_CPU(cpu) BIT(cpu) + +#define AIC_IPI_OTHER BIT(0) +#define AIC_IPI_SELF BIT(31) + +#define AIC1_MAX_IRQ 0x400 +#define AIC_MAX_HW_NUM (0x80 * 32) // max_irq of the M1 Max diff --git a/tools/src/arm_cpu_regs.h b/tools/src/arm_cpu_regs.h new file mode 100644 index 0000000..06cc919 --- /dev/null +++ b/tools/src/arm_cpu_regs.h @@ -0,0 +1,338 @@ +/* SPDX-License-Identifier: MIT */ + +#include "types.h" + +#define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) +#define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1) +#define SYS_ACTLR_EL3 sys_reg(3, 6, 1, 0, 1) + +#define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0) +// HCR_EL2.E2H == 1 +#define CNTHCTL_EVNTIS BIT(17) +#define CNTHCTL_EL1NVVCT BIT(16) +#define CNTHCTL_EL1NVPCT BIT(15) +#define CNTHCTL_EL1TVCT BIT(14) +#define CNTHCTL_EL1TVT BIT(13) +#define CNTHCTL_ECV BIT(12) +#define CNTHCTL_EL1PTEN BIT(11) +#define CNTHCTL_EL1PCTEN BIT(10) +#define CNTHCTL_EL0PTEN BIT(9) +#define CNTHCTL_EL0VTEN BIT(8) +#define CNTHCTL_EVNTI GENMASK(7, 4) +#define CNTHCTL_EVNTDIR BIT(3) +#define CNTHCTL_EVNTEN BIT(2) +#define CNTHCTL_EL0VCTEN BIT(1) +#define CNTHCTL_EL0PCTEN BIT(0) + +#define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1) +#define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) +#define SYS_CNTHV_CTL_EL2 sys_reg(3, 4, 14, 3, 1) +#define SYS_CNTHP_CTL_EL2 sys_reg(3, 4, 14, 2, 1) +#define CNTx_CTL_ISTATUS BIT(2) +#define CNTx_CTL_IMASK BIT(1) +#define CNTx_CTL_ENABLE BIT(0) + +#define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0) +#define ESR_ISS2 GENMASK(36, 32) +#define ESR_EC GENMASK(31, 26) +#define ESR_IL BIT(25) +#define ESR_ISS GENMASK(24, 0) + +#define ESR_EC_UNKNOWN 0b000000 +#define ESR_EC_WFI 0b000001 +#define ESR_EC_FP_TRAP 0b000111 +#define ESR_EC_PAUTH_TRAP 0b001000 +#define ESR_EC_LS64 0b001010 +#define ESR_EC_BTI 0b001101 +#define ESR_EC_ILLEGAL 0b001110 +#define ESR_EC_SVC 0b010101 +#define ESR_EC_HVC 0b010110 +#define ESR_EC_SMC 0b010111 +#define ESR_EC_MSR 0b011000 +#define ESR_EC_SVE 0b011001 +#define ESR_EC_PAUTH_FAIL 0b011100 +#define ESR_EC_IABORT_LOWER 0b100000 +#define ESR_EC_IABORT 0b100001 +#define ESR_EC_PC_ALIGN 0b100010 +#define ESR_EC_DABORT_LOWER 0b100100 +#define ESR_EC_DABORT 0b100101 +#define ESR_EC_SP_ALIGN 0b100110 +#define ESR_EC_FP_EXC 0b101100 +#define ESR_EC_SERROR 0b101111 +#define ESR_EC_BKPT_LOWER 0b110000 +#define ESR_EC_BKPT 0b110001 +#define ESR_EC_SSTEP_LOWER 0b110010 +#define ESR_EC_SSTEP 0b110011 +#define ESR_EC_WATCH_LOWER 0b110100 +#define ESR_EC_WATCH 0b110101 +#define ESR_EC_BRK 0b111100 + +#define ESR_ISS_DABORT_ISV BIT(24) +#define ESR_ISS_DABORT_SAS GENMASK(23, 22) +#define ESR_ISS_DABORT_SSE BIT(21) +#define ESR_ISS_DABORT_SRT GENMASK(20, 16) +#define ESR_ISS_DABORT_SF BIT(15) +#define ESR_ISS_DABORT_AR BIT(14) +#define ESR_ISS_DABORT_VNCR BIT(13) +#define ESR_ISS_DABORT_SET GENMASK(12, 11) +#define ESR_ISS_DABORT_LSR GENMASK(12, 11) +#define ESR_ISS_DABORT_FnV BIT(10) +#define ESR_ISS_DABORT_EA BIT(9) +#define ESR_ISS_DABORT_CM BIT(8) +#define ESR_ISS_DABORT_S1PTR BIT(7) +#define ESR_ISS_DABORT_WnR BIT(6) +#define ESR_ISS_DABORT_DFSC GENMASK(5, 0) + +#define SAS_8B 0 +#define SAS_16B 1 +#define SAS_32B 2 +#define SAS_64B 3 + +#define ESR_ISS_MSR_OP0 GENMASK(21, 20) +#define ESR_ISS_MSR_OP0_SHIFT 20 +#define ESR_ISS_MSR_OP2 GENMASK(19, 17) +#define ESR_ISS_MSR_OP2_SHIFT 17 +#define ESR_ISS_MSR_OP1 GENMASK(16, 14) +#define ESR_ISS_MSR_OP1_SHIFT 14 +#define ESR_ISS_MSR_CRn GENMASK(13, 10) +#define ESR_ISS_MSR_CRn_SHIFT 10 +#define ESR_ISS_MSR_Rt GENMASK(9, 5) +#define ESR_ISS_MSR_CRm GENMASK(4, 1) +#define ESR_ISS_MSR_CRm_SHIFT 1 +#define ESR_ISS_MSR_DIR BIT(0) + +#define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0) +#define HCR_TWEDEL GENMASK(63, 60) +#define HCR_TWEDEn BIT(59) +#define HCR_TID5 BIT(58) +#define HCR_DCT BIT(57) +#define HCR_ATA BIT(56) +#define HCR_TTLBOS BIT(55) +#define HCR_TTLBIS BIT(54) +#define HCR_EnSCXT BIT(53) +#define HCR_TOCU BIT(52) +#define HCR_AMVOFFEN BIT(51) +#define HCR_TICAB BIT(50) +#define HCR_TID4 BIT(49) +#define HCR_FIEN BIT(47) +#define HCR_FWB BIT(46) +#define HCR_NV2 BIT(45) +#define HCR_AT BIT(44) +#define HCR_NV1 BIT(43) +#define HCR_NV1 BIT(43) +#define HCR_NV BIT(42) +#define HCR_NV BIT(42) +#define HCR_API BIT(41) +#define HCR_APK BIT(40) +#define HCR_MIOCNCE BIT(38) +#define HCR_TEA BIT(37) +#define HCR_TERR BIT(36) +#define HCR_TLOR BIT(35) +#define HCR_E2H BIT(34) +#define HCR_ID BIT(33) +#define HCR_CD BIT(32) +#define HCR_RW BIT(31) +#define HCR_TRVM BIT(30) +#define HCR_HCD BIT(29) +#define HCR_TDZ BIT(28) +#define HCR_TGE BIT(27) +#define HCR_TVM BIT(26) +#define HCR_TTLB BIT(25) +#define HCR_TPU BIT(24) +#define HCR_TPCP BIT(23) +#define HCR_TPC BIT(23) +#define HCR_TSW BIT(22) +#define HCR_TACR BIT(21) +#define HCR_TIDCP BIT(20) +#define HCR_TSC BIT(19) +#define HCR_TID3 BIT(18) +#define HCR_TID2 BIT(17) +#define HCR_TID1 BIT(16) +#define HCR_TID0 BIT(15) +#define HCR_TWE BIT(14) +#define HCR_TWI BIT(13) +#define HCR_DC BIT(12) +#define HCR_BSU GENMASK(11, 10) +#define HCR_FB BIT(9) +#define HCR_VSE BIT(8) +#define HCR_VI BIT(7) +#define HCR_VF BIT(6) +#define HCR_AMO BIT(5) +#define HCR_IMO BIT(4) +#define HCR_FMO BIT(3) +#define HCR_PTW BIT(2) +#define HCR_SWIO BIT(1) +#define HCR_VM BIT(0) + +#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) +#define ID_AA64MMFR0_ECV GENMASK(63, 60) +#define ID_AA64MMFR0_FGT GENMASK(59, 56) +#define ID_AA64MMFR0_ExS GENMASK(47, 44) +#define ID_AA64MMFR0_TGran4_2 GENMASK(43, 40) +#define ID_AA64MMFR0_TGran64_2 GENMASK(39, 36) +#define ID_AA64MMFR0_TGran16_2 GENMASK(35, 32) +#define ID_AA64MMFR0_TGran4 GENMASK(31, 28) +#define ID_AA64MMFR0_TGran64 GENMASK(27, 24) +#define ID_AA64MMFR0_TGran16 GENMASK(23, 20) +#define ID_AA64MMFR0_BigEndEL0 GENMASK(19, 16) +#define ID_AA64MMFR0_SNSMem GENMASK(15, 12) +#define ID_AA64MMFR0_BigEnd GENMASK(11, 8) +#define ID_AA64MMFR0_ASIDBits GENMASK(7, 4) +#define ID_AA64MMFR0_PARange GENMASK(3, 0) + +#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0) +// AArch64-PAR_EL1.F == 0b0 +#define PAR_ATTR GENMASK(63, 56) +#define PAR_PA GENMASK(51, 12) +#define PAR_NS BIT(9) +#define PAR_SH GENMASK(8, 7) +#define PAR_F BIT(0) +// AArch64-PAR_EL1.F == 0b1 +#define PAR_S BIT(9) +#define PAR_PTW BIT(8) +#define PAR_FST GENMASK(6, 1) + +#define SYS_SCTLR_EL1 sys_reg(3, 0, 1, 0, 0) +#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) +#define SCTLR_EPAN BIT(57) +#define SCTLR_EnALS BIT(56) +#define SCTLR_EnAS0 BIT(55) +#define SCTLR_EnASR BIT(54) +#define SCTLR_TWEDEL GENMASK(49, 46) +#define SCTLR_TWEDEn BIT(45) +#define SCTLR_DSSBS BIT(44) +#define SCTLR_ATA BIT(43) +#define SCTLR_ATA0 BIT(42) +#define SCTLR_TCF GENMASK(41, 40) +#define SCTLR_TCF0 GENMASK(39, 38) +#define SCTLR_ITFSB BIT(37) +#define SCTLR_BT1 BIT(36) +#define SCTLR_BT0 BIT(35) +#define SCTLR_EnIA BIT(31) +#define SCTLR_EnIB BIT(30) +#define SCTLR_LSMAOE BIT(29) +#define SCTLR_nTLSMD BIT(28) +#define SCTLR_EnDA BIT(27) +#define SCTLR_UCI BIT(26) +#define SCTLR_EE BIT(25) +#define SCTLR_E0E BIT(24) +#define SCTLR_SPAN BIT(23) +#define SCTLR_EIS BIT(22) +#define SCTLR_IESB BIT(21) +#define SCTLR_TSCXT BIT(20) +#define SCTLR_WXN BIT(19) +#define SCTLR_nTWE BIT(18) +#define SCTLR_nTWI BIT(16) +#define SCTLR_UCT BIT(15) +#define SCTLR_DZE BIT(14) +#define SCTLR_EnDB BIT(13) +#define SCTLR_I BIT(12) +#define SCTLR_EOS BIT(11) +#define SCTLR_EnRCTX BIT(10) +#define SCTLR_UMA BIT(9) +#define SCTLR_SED BIT(8) +#define SCTLR_ITD BIT(7) +#define SCTLR_nAA BIT(6) +#define SCTLR_CP15BEN BIT(5) +#define SCTLR_SA0 BIT(4) +#define SCTLR_SA BIT(3) +#define SCTLR_C BIT(2) +#define SCTLR_A BIT(1) +#define SCTLR_M BIT(0) + +#define SYS_SPSR_EL1 sys_reg(3, 0, 4, 0, 0) +#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0) +#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) +// exception taken from AArch64 +#define SPSR_N BIT(31) +#define SPSR_Z BIT(30) +#define SPSR_C BIT(29) +#define SPSR_V BIT(28) +#define SPSR_TCO BIT(25) +#define SPSR_DIT BIT(24) +#define SPSR_UAO BIT(23) +#define SPSR_PAN BIT(22) +#define SPSR_SS BIT(21) +#define SPSR_IL BIT(20) +#define SPSR_SSBS BIT(12) +#define SPSR_BTYPE GENMASK(11, 10) +#define SPSR_D BIT(9) +#define SPSR_A BIT(8) +#define SPSR_I BIT(7) +#define SPSR_F BIT(6) +#define SPSR_M GENMASK(4, 0) + +#define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2) +#define TCR_DS BIT(59) +#define TCR_TCMA1 BIT(58) +#define TCR_TCMA0 BIT(57) +#define TCR_E0PD1 BIT(56) +#define TCR_E0PD0 BIT(55) +#define TCR_NFD1 BIT(54) +#define TCR_NFD0 BIT(53) +#define TCR_TBID1 BIT(52) +#define TCR_TBID0 BIT(51) +#define TCR_HWU162 BIT(50) +#define TCR_HWU161 BIT(49) +#define TCR_HWU160 BIT(48) +#define TCR_HWU159 BIT(47) +#define TCR_HWU062 BIT(46) +#define TCR_HWU061 BIT(45) +#define TCR_HWU060 BIT(44) +#define TCR_HWU059 BIT(43) +#define TCR_HPD1 BIT(42) +#define TCR_HPD0 BIT(41) +#define TCR_HD BIT(40) +#define TCR_HA BIT(39) +#define TCR_TBI1 BIT(38) +#define TCR_TBI0 BIT(37) +#define TCR_AS BIT(36) +#define TCR_IPS GENMASK(34, 32) +#define TCR_IPS_1TB 0b010UL +#define TCR_IPS_4TB 0b011UL +#define TCR_IPS_16TB 0b100UL +#define TCR_TG1 GENMASK(31, 30) +#define TCR_TG1_16K 0b01UL +#define TCR_SH1 GENMASK(29, 28) +#define TCR_SH1_IS 0b11UL +#define TCR_ORGN1 GENMASK(27, 26) +#define TCR_ORGN1_WBWA 0b01UL +#define TCR_IRGN1 GENMASK(25, 24) +#define TCR_IRGN1_WBWA 0b01UL +#define TCR_EPD1 BIT(23) +#define TCR_A1 BIT(22) +#define TCR_T1SZ GENMASK(21, 16) +#define TCR_T1SZ_48BIT 16UL +#define TCR_TG0 GENMASK(15, 14) +#define TCR_TG0_16K 0b10UL +#define TCR_SH0 GENMASK(13, 12) +#define TCR_SH0_IS 0b11UL +#define TCR_ORGN0 GENMASK(11, 10) +#define TCR_ORGN0_WBWA 0b01UL +#define TCR_IRGN0 GENMASK(9, 8) +#define TCR_IRGN0_WBWA 0b01UL +#define TCR_EPD0 BIT(7) +#define TCR_T0SZ GENMASK(5, 0) +#define TCR_T0SZ_48BIT 16UL + +#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2) +// Profile(A) +#define VTCR_SL2 BIT(33) +#define VTCR_DS BIT(32) +#define VTCR_NSA BIT(30) +#define VTCR_NSW BIT(29) +#define VTCR_HWU62 BIT(28) +#define VTCR_HWU61 BIT(27) +#define VTCR_HWU60 BIT(26) +#define VTCR_HWU59 BIT(25) +#define VTCR_HD BIT(22) +#define VTCR_HA BIT(21) +#define VTCR_VS BIT(19) +#define VTCR_PS GENMASK(18, 16) +#define VTCR_TG0 GENMASK(15, 14) +#define VTCR_SH0 GENMASK(13, 12) +#define VTCR_ORGN0 GENMASK(11, 10) +#define VTCR_IRGN0 GENMASK(9, 8) +#define VTCR_SL0 GENMASK(7, 6) +#define VTCR_SL0 GENMASK(7, 6) +#define VTCR_T0SZ GENMASK(5, 0) diff --git a/tools/src/asc.c b/tools/src/asc.c new file mode 100644 index 0000000..67c9d46 --- /dev/null +++ b/tools/src/asc.c @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: MIT */ + +#include "adt.h" +#include "asc.h" +#include "malloc.h" +#include "utils.h" + +#define ASC_CPU_CONTROL 0x44 +#define ASC_CPU_CONTROL_START 0x10 + +#define ASC_MBOX_CONTROL_FULL BIT(16) +#define ASC_MBOX_CONTROL_EMPTY BIT(17) + +#define ASC_MBOX_A2I_CONTROL 0x110 +#define ASC_MBOX_A2I_SEND0 0x800 +#define ASC_MBOX_A2I_SEND1 0x808 +#define ASC_MBOX_A2I_RECV0 0x810 +#define ASC_MBOX_A2I_RECV1 0x818 + +#define ASC_MBOX_I2A_CONTROL 0x114 +#define ASC_MBOX_I2A_SEND0 0x820 +#define ASC_MBOX_I2A_SEND1 0x828 +#define ASC_MBOX_I2A_RECV0 0x830 +#define ASC_MBOX_I2A_RECV1 0x838 + +struct asc_dev { + uintptr_t cpu_base; + uintptr_t base; + int iop_node; +}; + +asc_dev_t *asc_init(const char *path) +{ + int asc_path[8]; + int node = adt_path_offset_trace(adt, path, asc_path); + if (node < 0) { + printf("asc: Error getting ASC node %s\n", path); + return NULL; + } + + u64 base; + if (adt_get_reg(adt, asc_path, "reg", 0, &base, NULL) < 0) { + printf("asc: Error getting ASC %s base address.\n", path); + return NULL; + } + + asc_dev_t *asc = malloc(sizeof(*asc)); + if (!asc) + return NULL; + + asc->iop_node = adt_first_child_offset(adt, node); + asc->cpu_base = base; + asc->base = base + 0x8000; + + clear32(base + ASC_CPU_CONTROL, ASC_CPU_CONTROL_START); + return asc; +} + +void asc_free(asc_dev_t *asc) +{ + free(asc); +} + +int asc_get_iop_node(asc_dev_t *asc) +{ + return asc->iop_node; +} + +void asc_cpu_start(asc_dev_t *asc) +{ + set32(asc->cpu_base + ASC_CPU_CONTROL, ASC_CPU_CONTROL_START); +} + +void asc_cpu_stop(asc_dev_t *asc) +{ + clear32(asc->cpu_base + ASC_CPU_CONTROL, ASC_CPU_CONTROL_START); +} + +bool asc_can_recv(asc_dev_t *asc) +{ + return !(read32(asc->base + ASC_MBOX_I2A_CONTROL) & ASC_MBOX_CONTROL_EMPTY); +} + +bool asc_recv(asc_dev_t *asc, struct asc_message *msg) +{ + if (!asc_can_recv(asc)) + return false; + + msg->msg0 = read64(asc->base + ASC_MBOX_I2A_RECV0); + msg->msg1 = (u32)read64(asc->base + ASC_MBOX_I2A_RECV1); + dma_rmb(); + + // printf("received msg: %lx %x\n", msg->msg0, msg->msg1); + + return true; +} + +bool asc_recv_timeout(asc_dev_t *asc, struct asc_message *msg, u32 delay_usec) +{ + u64 timeout = timeout_calculate(delay_usec); + while (!timeout_expired(timeout)) { + if (asc_recv(asc, msg)) + return true; + } + return false; +} + +bool asc_can_send(asc_dev_t *asc) +{ + return !(read32(asc->base + ASC_MBOX_A2I_CONTROL) & ASC_MBOX_CONTROL_FULL); +} + +bool asc_send(asc_dev_t *asc, const struct asc_message *msg) +{ + if (poll32(asc->base + ASC_MBOX_A2I_CONTROL, ASC_MBOX_CONTROL_FULL, 0, 200000)) { + printf("asc: A2I mailbox full for 200ms. Is the ASC stuck?"); + return false; + } + + dma_wmb(); + write64(asc->base + ASC_MBOX_A2I_SEND0, msg->msg0); + write64(asc->base + ASC_MBOX_A2I_SEND1, msg->msg1); + + // printf("sent msg: %lx %x\n", msg->msg0, msg->msg1); + return true; +} diff --git a/tools/src/asc.h b/tools/src/asc.h new file mode 100644 index 0000000..0aac349 --- /dev/null +++ b/tools/src/asc.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef ASC_H +#define ASC_H + +#include "types.h" + +struct asc_message { + u64 msg0; + u32 msg1; +}; + +typedef struct asc_dev asc_dev_t; + +asc_dev_t *asc_init(const char *path); +void asc_free(asc_dev_t *asc); + +int asc_get_iop_node(asc_dev_t *asc); + +void asc_cpu_start(asc_dev_t *asc); +void asc_cpu_stop(asc_dev_t *asc); + +bool asc_can_recv(asc_dev_t *asc); +bool asc_can_send(asc_dev_t *asc); + +bool asc_recv(asc_dev_t *asc, struct asc_message *msg); +bool asc_recv_timeout(asc_dev_t *asc, struct asc_message *msg, u32 delay_usec); +bool asc_send(asc_dev_t *asc, const struct asc_message *msg); + +#endif diff --git a/tools/src/chainload.c b/tools/src/chainload.c new file mode 100644 index 0000000..1dd7c68 --- /dev/null +++ b/tools/src/chainload.c @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: MIT */ + +#include "../build/build_cfg.h" + +#include "chainload.h" +#include "adt.h" +#include "malloc.h" +#include "memory.h" +#include "nvme.h" +#include "string.h" +#include "types.h" +#include "utils.h" +#include "xnuboot.h" + +#ifdef CHAINLOADING +int rust_load_image(const char *spec, void **image, size_t *size); +#endif + +extern u8 _chainload_stub_start[]; +extern u8 _chainload_stub_end[]; + +int chainload_image(void *image, size_t size, char **vars, size_t var_cnt) +{ + u64 new_base = (u64)_base; + size_t image_size = size; + + printf("chainload: Preparing image...\n"); + + // m1n1 variables + for (size_t i = 0; i < var_cnt; i++) + image_size += strlen(vars[i]) + 1; + + // pad to end payload + image_size += 4; + image_size = ALIGN_UP(image_size, SZ_16K); + + // SEPFW + size_t sepfw_off = image_size; + + int anode = adt_path_offset(adt, "/chosen/memory-map"); + if (anode < 0) { + printf("chainload: /chosen/memory-map not found\n"); + return -1; + } + u64 sepfw[2]; + if (ADT_GETPROP_ARRAY(adt, anode, "SEPFW", sepfw) < 0) { + printf("chainload: Failed to find SEPFW\n"); + return -1; + } + + image_size += sepfw[1]; + image_size = ALIGN_UP(image_size, SZ_16K); + + // Bootargs + size_t bootargs_off = image_size; + const size_t bootargs_size = SZ_16K; + image_size += bootargs_size; + + printf("chainload: Total image size: 0x%lx\n", image_size); + + size_t stub_size = _chainload_stub_end - _chainload_stub_start; + + void *new_image = malloc(image_size + stub_size); + + // Copy m1n1 + memcpy(new_image, image, size); + + // Add vars + u8 *p = new_image + size; + for (size_t i = 0; i < var_cnt; i++) { + size_t len = strlen(vars[i]); + + memcpy(p, vars[i], len); + p[len] = '\n'; + p += len + 1; + } + + // Add end padding + memset(p, 0, 4); + + // Copy SEPFW + memcpy(new_image + sepfw_off, (void *)sepfw[0], sepfw[1]); + + // Adjust ADT SEPFW address + sepfw[0] = new_base + sepfw_off; + if (adt_setprop(adt, anode, "SEPFW", &sepfw, sizeof(sepfw)) < 0) { + printf("chainload: Failed to set SEPFW prop\n"); + free(new_image); + return -1; + } + + // Copy bootargs + struct boot_args *new_boot_args = new_image + bootargs_off; + *new_boot_args = cur_boot_args; + new_boot_args->top_of_kernel_data = new_base + image_size; + + // Copy chainload stub + void *stub = new_image + image_size; + memcpy(stub, _chainload_stub_start, stub_size); + dc_cvau_range(stub, stub_size); + ic_ivau_range(stub, stub_size); + + // Set up next stage + next_stage.entry = stub; + next_stage.args[0] = new_base + bootargs_off; + next_stage.args[1] = (u64)new_image; + next_stage.args[2] = new_base; + next_stage.args[3] = image_size; + next_stage.args[4] = new_base + 0x800; // m1n1 entrypoint + next_stage.restore_logo = false; + + return 0; +} + +#ifdef CHAINLOADING + +int chainload_load(const char *spec, char **vars, size_t var_cnt) +{ + void *image; + size_t size; + int ret; + + if (!nvme_init()) { + printf("chainload: NVME init failed\n"); + return -1; + } + + ret = rust_load_image(spec, &image, &size); + nvme_shutdown(); + if (ret < 0) + return ret; + + return chainload_image(image, size, vars, var_cnt); +} + +#else + +int chainload_load(const char *spec, char **vars, size_t var_cnt) +{ + UNUSED(spec); + UNUSED(vars); + UNUSED(var_cnt); + + printf("Chainloading files not supported in this build!\n"); + return -1; +} + +#endif diff --git a/tools/src/chainload.h b/tools/src/chainload.h new file mode 100644 index 0000000..206f482 --- /dev/null +++ b/tools/src/chainload.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef __CHAINLOAD_H__ +#define __CHAINLOAD_H__ + +#include "types.h" + +int chainload_image(void *base, size_t size, char **vars, size_t var_cnt); +int chainload_load(const char *spec, char **vars, size_t var_cnt); + +#endif diff --git a/tools/src/chainload_asm.S b/tools/src/chainload_asm.S new file mode 100644 index 0000000..361ec8f --- /dev/null +++ b/tools/src/chainload_asm.S @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ + +.text + +.globl _chainload_stub_start +.globl _chainload_stub_end +.type _chainload_stub_start, @function + +_chainload_stub_start: +1: + ldp x5, x6, [x1], #16 + stp x5, x6, [x2] + dc cvau, x2 + ic ivau, x2 + add x2, x2, #16 + sub x3, x3, #16 + cbnz x3, 1b + + br x4 +_chainload_stub_end: diff --git a/tools/src/chickens.c b/tools/src/chickens.c new file mode 100644 index 0000000..68a7eee --- /dev/null +++ b/tools/src/chickens.c @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: MIT */ + +#include "chickens.h" +#include "cpu_regs.h" +#include "uart.h" +#include "utils.h" + +/* Part IDs in MIDR_EL1 */ +#define MIDR_PART_T8181_ICESTORM 0x20 +#define MIDR_PART_T8181_FIRESTORM 0x21 +#define MIDR_PART_T8103_ICESTORM 0x22 +#define MIDR_PART_T8103_FIRESTORM 0x23 +#define MIDR_PART_T6000_ICESTORM 0x24 +#define MIDR_PART_T6000_FIRESTORM 0x25 +#define MIDR_PART_T6001_ICESTORM 0x28 +#define MIDR_PART_T6001_FIRESTORM 0x29 +#define MIDR_PART_T8110_BLIZZARD 0x30 +#define MIDR_PART_T8110_AVALANCHE 0x31 +#define MIDR_PART_T8112_BLIZZARD 0x32 +#define MIDR_PART_T8112_AVALANCHE 0x33 + +#define MIDR_REV_LOW GENMASK(3, 0) +#define MIDR_PART GENMASK(15, 4) +#define MIDR_REV_HIGH GENMASK(23, 20) + +void init_m1_icestorm(void); +void init_t8103_firestorm(int rev); +void init_t6000_firestorm(int rev); +void init_t6001_firestorm(int rev); + +void init_m2_blizzard(void); +void init_t8112_avalanche(int rev); + +const char *init_cpu(void) +{ + const char *cpu = "Unknown"; + + msr(OSLAR_EL1, 0); + + /* This is performed unconditionally on all cores (necessary?) */ + if (is_ecore()) + reg_set(SYS_IMP_APL_EHID4, HID4_DISABLE_DC_MVA | HID4_DISABLE_DC_SW_L2_OPS); + else + reg_set(SYS_IMP_APL_HID4, HID4_DISABLE_DC_MVA | HID4_DISABLE_DC_SW_L2_OPS); + + uint64_t midr = mrs(MIDR_EL1); + int part = FIELD_GET(MIDR_PART, midr); + int rev = (FIELD_GET(MIDR_REV_HIGH, midr) << 4) | FIELD_GET(MIDR_REV_LOW, midr); + + printf(" CPU part: 0x%x rev: 0x%x\n", part, rev); + + switch (part) { + case MIDR_PART_T8103_FIRESTORM: + cpu = "M1 Firestorm"; + init_t8103_firestorm(rev); + break; + + case MIDR_PART_T6000_FIRESTORM: + cpu = "M1 Pro Firestorm"; + init_t6000_firestorm(rev); + break; + + case MIDR_PART_T6001_FIRESTORM: + cpu = "M1 Max Firestorm"; + init_t6001_firestorm(rev); + break; + + case MIDR_PART_T8103_ICESTORM: + cpu = "M1 Icestorm"; + init_m1_icestorm(); + break; + + case MIDR_PART_T6000_ICESTORM: + cpu = "M1 Pro Icestorm"; + init_m1_icestorm(); + break; + + case MIDR_PART_T6001_ICESTORM: + cpu = "M1 Max Icestorm"; + init_m1_icestorm(); + break; + + case MIDR_PART_T8112_AVALANCHE: + cpu = "M2 Avalanche"; + init_t8112_avalanche(rev); + break; + + case MIDR_PART_T8112_BLIZZARD: + cpu = "M2 Blizzard"; + init_m2_blizzard(); + break; + + default: + uart_puts(" Unknown CPU type"); + break; + } + + int core = mrs(MPIDR_EL1) & 0xff; + + // Unknown, related to SMP? + msr(s3_4_c15_c5_0, core); + msr(SYS_IMP_APL_AMX_CTL_EL1, 0x100); + + // Enable IRQs (at least necessary on t600x) + msr(s3_4_c15_c10_4, 0); + + sysop("isb"); + + /* Unmask external IRQs, set WFI mode to up (2) */ + reg_mask(SYS_IMP_APL_CYC_OVRD, + CYC_OVRD_FIQ_MODE_MASK | CYC_OVRD_IRQ_MODE_MASK | CYC_OVRD_WFI_MODE_MASK, + CYC_OVRD_FIQ_MODE(0) | CYC_OVRD_IRQ_MODE(0) | CYC_OVRD_WFI_MODE(2)); + + /* Enable branch prediction state retention across ACC sleep */ + reg_mask(SYS_IMP_APL_ACC_CFG, ACC_CFG_BP_SLEEP_MASK, ACC_CFG_BP_SLEEP(3)); + + return cpu; +} diff --git a/tools/src/chickens.h b/tools/src/chickens.h new file mode 100644 index 0000000..c1cb5a6 --- /dev/null +++ b/tools/src/chickens.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef __CHICKENS_H__ +#define __CHICKENS_H__ + +const char *init_cpu(void); + +#endif diff --git a/tools/src/chickens_avalanche.c b/tools/src/chickens_avalanche.c new file mode 100644 index 0000000..faf7a6b --- /dev/null +++ b/tools/src/chickens_avalanche.c @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: MIT */ + +#include "cpu_regs.h" +#include "utils.h" + +static void init_common_avalanche(void) +{ + reg_mask(SYS_IMP_APL_HID1, HID1_ZCL_RF_MISPREDICT_THRESHOLD_MASK, + HID1_ZCL_RF_MISPREDICT_THRESHOLD(1)); + reg_mask(SYS_IMP_APL_HID1, HID1_ZCL_RF_RESTART_THRESHOLD_MASK, + HID1_ZCL_RF_RESTART_THRESHOLD(3)); + + reg_set(SYS_IMP_APL_HID11, HID11_DISABLE_LD_NT_WIDGET); + + reg_set(SYS_IMP_APL_HID9, HID9_TSO_ALLOW_DC_ZVA_WC | HID9_AVL_UNK17); + + // "configure dummy cycles to work around incorrect temp sensor readings on + // NEX power gating" (maybe) + reg_mask(SYS_IMP_APL_HID13, + HID13_POST_OFF_CYCLES_MASK | HID13_POST_ON_CYCLES_MASK | HID13_PRE_CYCLES_MASK | + HID13_GROUP0_FF1_DELAY_MASK | HID13_GROUP0_FF2_DELAY_MASK | + HID13_GROUP0_FF3_DELAY_MASK | HID13_GROUP0_FF4_DELAY_MASK | + HID13_GROUP0_FF5_DELAY_MASK | HID13_GROUP0_FF6_DELAY_MASK | + HID13_GROUP0_FF7_DELAY_MASK | HID13_RESET_CYCLES_MASK, + HID13_POST_OFF_CYCLES(8) | HID13_POST_ON_CYCLES(8) | HID13_PRE_CYCLES(1) | + HID13_GROUP0_FF1_DELAY(4) | HID13_GROUP0_FF2_DELAY(4) | HID13_GROUP0_FF3_DELAY(4) | + HID13_GROUP0_FF4_DELAY(4) | HID13_GROUP0_FF5_DELAY(4) | HID13_GROUP0_FF6_DELAY(4) | + HID13_GROUP0_FF7_DELAY(4) | HID13_RESET_CYCLES(0)); + + reg_mask(SYS_IMP_APL_HID26, HID26_GROUP1_OFFSET_MASK | HID26_GROUP2_OFFSET_MASK, + HID26_GROUP1_OFFSET(26) | HID26_GROUP2_OFFSET(31)); + reg_mask(SYS_IMP_APL_HID27, HID27_GROUP3_OFFSET_MASK, HID27_GROUP3_OFFSET(31)); +} + +static void init_m2_avalanche(void) +{ + init_common_avalanche(); + + reg_mask(SYS_IMP_APL_HID3, HID3_DEV_PCIE_THROTTLE_LIMIT_MASK, HID3_DEV_PCIE_THROTTLE_LIMIT(60)); + reg_set(SYS_IMP_APL_HID3, HID3_DEV_PCIE_THROTTLE_ENABLE); + reg_set(SYS_IMP_APL_HID18, HID18_AVL_UNK27 | HID18_AVL_UNK29); + reg_set(SYS_IMP_APL_HID16, HID16_AVL_UNK12); +} + +void init_t8112_avalanche(int rev) +{ + UNUSED(rev); + + init_m2_avalanche(); +} diff --git a/tools/src/chickens_blizzard.c b/tools/src/chickens_blizzard.c new file mode 100644 index 0000000..8b88b6c --- /dev/null +++ b/tools/src/chickens_blizzard.c @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ + +#include "cpu_regs.h" +#include "utils.h" + +static void init_common_blizzard(void) +{ + reg_set(SYS_IMP_APL_EHID0, EHID0_BLI_UNK32); +} + +void init_m2_blizzard(void) +{ + init_common_blizzard(); + + reg_mask(SYS_IMP_APL_EHID9, EHID9_DEV_2_THROTTLE_LIMIT_MASK, EHID9_DEV_2_THROTTLE_LIMIT(60)); + reg_set(SYS_IMP_APL_EHID9, EHID9_DEV_2_THROTTLE_ENABLE); + reg_set(SYS_IMP_APL_EHID18, EHID18_BLZ_UNK34); +} diff --git a/tools/src/chickens_firestorm.c b/tools/src/chickens_firestorm.c new file mode 100644 index 0000000..7754820 --- /dev/null +++ b/tools/src/chickens_firestorm.c @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: MIT */ + +#include "cpu_regs.h" +#include "utils.h" + +static void init_common_firestorm(void) +{ + reg_set(SYS_IMP_APL_HID0, HID0_SAME_PG_POWER_OPTIMIZATION); + + // Disable SMC trapping to EL2 + reg_clr(SYS_IMP_APL_HID1, HID1_TRAP_SMC); + + reg_clr(SYS_IMP_APL_HID3, HID3_DEV_PCIE_THROTTLE_ENABLE | HID3_DISABLE_ARBITER_FIX_BIF_CRD); + + // "Post-silicon tuning of STNT widget contiguous counter threshold" + reg_mask(SYS_IMP_APL_HID4, HID4_STNT_COUNTER_THRESHOLD_MASK, HID4_STNT_COUNTER_THRESHOLD(3)); + + // "Sibling Merge in LLC can cause UC load to violate ARM Memory Ordering Rules." + reg_set(SYS_IMP_APL_HID5, HID5_DISABLE_FILL_2C_MERGE); + + reg_set(SYS_IMP_APL_HID9, HID9_TSO_ALLOW_DC_ZVA_WC); + + reg_set(SYS_IMP_APL_HID11, HID11_DISABLE_LD_NT_WIDGET); + + // "configure dummy cycles to work around incorrect temp sensor readings on + // NEX power gating" + reg_mask(SYS_IMP_APL_HID13, HID13_PRE_CYCLES_MASK, HID13_PRE_CYCLES(4)); + + // Best bit names... + // Maybe: "RF bank and Multipass conflict forward progress widget does not + // handle 3+ cycle livelock" + reg_set(SYS_IMP_APL_HID16, HID16_SPAREBIT0 | HID16_SPAREBIT3 | HID16_ENABLE_MPX_PICK_45 | + HID16_ENABLE_MP_CYCLONE_7); +} + +static void init_m1_firestorm(void) +{ + init_common_firestorm(); + + // "Cross-beat Crypto(AES/PMUL) ICache fusion is not disabled for branch + // uncondtional "recoded instruction." + reg_set(SYS_IMP_APL_HID0, HID0_FETCH_WIDTH_DISABLE | HID0_CACHE_FUSION_DISABLE); + + reg_set(SYS_IMP_APL_HID7, HID7_FORCE_NONSPEC_IF_STEPPING | + HID7_FORCE_NONSPEC_IF_SPEC_FLUSH_POINTER_INVALID_AND_MP_VALID); + + reg_mask(SYS_IMP_APL_HID7, HID7_FORCE_NONSPEC_TARGET_TIMER_SEL_MASK, + HID7_FORCE_NONSPEC_TARGET_TIMER_SEL(3)); + + reg_set(SYS_IMP_APL_HID9, HID9_TSO_SERIALIZE_VLD_MICROOPS | HID9_FIX_BUG_51667805); + + reg_set(SYS_IMP_APL_HID18, HID18_HVC_SPECULATION_DISABLE); + + reg_clr(SYS_IMP_APL_HID21, HID21_ENABLE_LDREX_FILL_REPLY); +} + +void init_t8103_firestorm(int rev) +{ + init_m1_firestorm(); + + reg_mask(SYS_IMP_APL_HID6, HID6_UP_CRD_TKN_INIT_C2_MASK, HID6_UP_CRD_TKN_INIT_C2(0)); + + if (rev >= 0x10) { + reg_set(SYS_IMP_APL_HID4, + HID4_ENABLE_LFSR_STALL_LOAD_PIPE_2_ISSUE | HID4_ENABLE_LFSR_STALL_STQ_REPLAY); + + reg_set(SYS_IMP_APL_HID9, HID9_FIX_BUG_55719865); + reg_set(SYS_IMP_APL_HID11, HID11_ENABLE_FIX_UC_55719865); + } + + if (rev == 0x11) + reg_set(SYS_IMP_APL_HID1, HID1_ENABLE_MDSB_STALL_PIPELINE_ECO | HID1_ENABLE_BR_KILL_LIMIT); + + if (rev >= 0x11) + reg_set(SYS_IMP_APL_HID18, HID18_SPAREBIT17); +} + +void init_t6000_firestorm(int rev) +{ + init_m1_firestorm(); + + reg_set(SYS_IMP_APL_HID9, HID9_FIX_BUG_55719865); + reg_set(SYS_IMP_APL_HID11, HID11_ENABLE_FIX_UC_55719865); + + if (rev >= 0x10) { + reg_set(SYS_IMP_APL_HID1, HID1_ENABLE_MDSB_STALL_PIPELINE_ECO | HID1_ENABLE_BR_KILL_LIMIT); + + reg_set(SYS_IMP_APL_HID4, + HID4_ENABLE_LFSR_STALL_LOAD_PIPE_2_ISSUE | HID4_ENABLE_LFSR_STALL_STQ_REPLAY); + + reg_set(SYS_IMP_APL_HID18, HID18_SPAREBIT17); + } +} + +void init_t6001_firestorm(int rev) +{ + init_m1_firestorm(); + + reg_set(SYS_IMP_APL_HID1, HID1_ENABLE_MDSB_STALL_PIPELINE_ECO); + + reg_set(SYS_IMP_APL_HID4, + HID4_ENABLE_LFSR_STALL_LOAD_PIPE_2_ISSUE | HID4_ENABLE_LFSR_STALL_STQ_REPLAY); + + reg_set(SYS_IMP_APL_HID9, HID9_FIX_BUG_55719865); + + reg_set(SYS_IMP_APL_HID11, HID11_ENABLE_FIX_UC_55719865); + + if (rev >= 0x10) { + reg_set(SYS_IMP_APL_HID1, HID1_ENABLE_BR_KILL_LIMIT); + + reg_set(SYS_IMP_APL_HID18, HID18_SPAREBIT17); + } +} diff --git a/tools/src/chickens_icestorm.c b/tools/src/chickens_icestorm.c new file mode 100644 index 0000000..bc0cfb8 --- /dev/null +++ b/tools/src/chickens_icestorm.c @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ + +#include "cpu_regs.h" +#include "utils.h" + +static void init_common_icestorm(void) +{ + // "Sibling Merge in LLC can cause UC load to violate ARM Memory Ordering Rules." + reg_set(SYS_IMP_APL_HID5, HID5_DISABLE_FILL_2C_MERGE); + + reg_clr(SYS_IMP_APL_EHID9, EHID9_DEV_2_THROTTLE_ENABLE); + + // "Prevent store-to-load forwarding for UC memory to avoid barrier ordering + // violation" + reg_set(SYS_IMP_APL_EHID10, HID10_FORCE_WAIT_STATE_DRAIN_UC | HID10_DISABLE_ZVA_TEMPORAL_TSO); + + // Disable SMC trapping to EL2 + reg_clr(SYS_IMP_APL_EHID20, EHID20_TRAP_SMC); +} + +void init_m1_icestorm(void) +{ + init_common_icestorm(); + + reg_set(SYS_IMP_APL_EHID20, EHID20_FORCE_NONSPEC_IF_OLDEST_REDIR_VALID_AND_OLDER | + EHID20_FORCE_NONSPEC_IF_SPEC_FLUSH_POINTER_NE_BLK_RTR_POINTER); + + reg_mask(SYS_IMP_APL_EHID20, EHID20_FORCE_NONSPEC_TARGETED_TIMER_SEL_MASK, + EHID20_FORCE_NONSPEC_TARGETED_TIMER_SEL(3)); +} diff --git a/tools/src/clk.c b/tools/src/clk.c new file mode 100644 index 0000000..ec0c77d --- /dev/null +++ b/tools/src/clk.c @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ + +#include "clk.h" +#include "adt.h" +#include "types.h" +#include "utils.h" + +#define CLK_MUX GENMASK(27, 24) + +#define NCO_BASE 5 +#define NUM_NCOS 5 + +void clk_init(void) +{ + int path[8]; + int node = adt_path_offset_trace(adt, "/arm-io/mca-switch", path); + + if (node < 0) { + printf("mca-switch node not found!\n"); + return; + } + + u64 mca_clk_base, mca_clk_size; + if (adt_get_reg(adt, path, "reg", 2, &mca_clk_base, &mca_clk_size)) { + printf("Failed to get mca-switch reg property!\n"); + return; + } + + printf("CLK: MCA clock registers @ 0x%lx (0x%lx)\n", mca_clk_base, mca_clk_size); + + unsigned int i; + for (i = 0; i < (mca_clk_size / 4); i++) + mask32(mca_clk_base + 4 * i, CLK_MUX, FIELD_PREP(CLK_MUX, NCO_BASE + min(NUM_NCOS - 1, i))); + + printf("CLK: Initialized %d MCA clock muxes\n", i); +} diff --git a/tools/src/clk.h b/tools/src/clk.h new file mode 100644 index 0000000..bb79fa4 --- /dev/null +++ b/tools/src/clk.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef __CLK_H__ +#define __CLK_H__ + +void clk_init(void); + +#endif diff --git a/tools/src/cpu_regs.h b/tools/src/cpu_regs.h new file mode 100644 index 0000000..236b53e --- /dev/null +++ b/tools/src/cpu_regs.h @@ -0,0 +1,290 @@ +/* SPDX-License-Identifier: MIT */ + +#include "arm_cpu_regs.h" +#include "types.h" + +/* ARM extensions */ +#define ESR_EC_IMPDEF 0b111111 +#define ESR_ISS_IMPDEF_MSR 0x20 + +#define SYS_IMP_APL_ACTLR_EL12 sys_reg(3, 6, 15, 14, 6) + +#define SYS_IMP_APL_AMX_CTL_EL1 sys_reg(3, 4, 15, 1, 4) +#define SYS_IMP_APL_AMX_CTL_EL2 sys_reg(3, 4, 15, 4, 7) +#define SYS_IMP_APL_AMX_CTL_EL12 sys_reg(3, 4, 15, 4, 6) + +#define AMX_CTL_EN BIT(63) +#define AMX_CTL_EN_EL1 BIT(62) + +#define SYS_IMP_APL_CNTVCT_ALIAS_EL0 sys_reg(3, 4, 15, 10, 6) + +/* HID registers */ +#define SYS_IMP_APL_HID0 sys_reg(3, 0, 15, 0, 0) +#define HID0_FETCH_WIDTH_DISABLE BIT(28) +#define HID0_CACHE_FUSION_DISABLE BIT(36) +#define HID0_SAME_PG_POWER_OPTIMIZATION BIT(45) + +#define SYS_IMP_APL_EHID0 sys_reg(3, 0, 15, 0, 1) +#define EHID0_BLI_UNK32 BIT(32) + +#define SYS_IMP_APL_HID1 sys_reg(3, 0, 15, 1, 0) +#define HID1_TRAP_SMC BIT(54) +#define HID1_ENABLE_MDSB_STALL_PIPELINE_ECO BIT(58) +#define HID1_ENABLE_BR_KILL_LIMIT BIT(60) + +#define HID1_ZCL_RF_RESTART_THRESHOLD_MASK GENMASK(23, 22) +#define HID1_ZCL_RF_RESTART_THRESHOLD(x) (((unsigned long)x) << 22) +#define HID1_ZCL_RF_MISPREDICT_THRESHOLD_MASK GENMASK(43, 42) +#define HID1_ZCL_RF_MISPREDICT_THRESHOLD(x) (((unsigned long)x) << 42) + +#define SYS_IMP_APL_HID3 sys_reg(3, 0, 15, 3, 0) +#define HID3_DISABLE_ARBITER_FIX_BIF_CRD BIT(44) +#define HID3_DEV_PCIE_THROTTLE_LIMIT_MASK GENMASK(62, 57) +#define HID3_DEV_PCIE_THROTTLE_LIMIT(x) (((unsigned long)x) << 57) +#define HID3_DEV_PCIE_THROTTLE_ENABLE BIT(63) + +#define SYS_IMP_APL_HID4 sys_reg(3, 0, 15, 4, 0) +#define SYS_IMP_APL_EHID4 sys_reg(3, 0, 15, 4, 1) +#define HID4_DISABLE_DC_MVA BIT(11) +#define HID4_DISABLE_DC_SW_L2_OPS BIT(44) +#define HID4_STNT_COUNTER_THRESHOLD(x) (((unsigned long)x) << 40) +#define HID4_STNT_COUNTER_THRESHOLD_MASK (3UL << 40) +#define HID4_ENABLE_LFSR_STALL_LOAD_PIPE_2_ISSUE BIT(49) +#define HID4_ENABLE_LFSR_STALL_STQ_REPLAY BIT(53) + +#define SYS_IMP_APL_HID5 sys_reg(3, 0, 15, 5, 0) +#define HID5_DISABLE_FILL_2C_MERGE BIT(61) + +#define SYS_IMP_APL_HID6 sys_reg(3, 0, 15, 6, 0) +#define HID6_UP_CRD_TKN_INIT_C2(x) (((unsigned long)x) << 5) +#define HID6_UP_CRD_TKN_INIT_C2_MASK (0x1FUL << 5) + +#define SYS_IMP_APL_HID7 sys_reg(3, 0, 15, 7, 0) +#define HID7_FORCE_NONSPEC_IF_SPEC_FLUSH_POINTER_INVALID_AND_MP_VALID BIT(16) +#define HID7_FORCE_NONSPEC_IF_STEPPING BIT(20) +#define HID7_FORCE_NONSPEC_TARGET_TIMER_SEL(x) (((unsigned long)x) << 24) +#define HID7_FORCE_NONSPEC_TARGET_TIMER_SEL_MASK (3UL << 24) + +#define SYS_IMP_APL_HID9 sys_reg(3, 0, 15, 9, 0) +#define HID9_AVL_UNK17 BIT(17) +#define HID9_TSO_ALLOW_DC_ZVA_WC BIT(26) +#define HID9_TSO_SERIALIZE_VLD_MICROOPS BIT(29) +#define HID9_FIX_BUG_51667805 BIT(48) +#define HID9_FIX_BUG_55719865 BIT(55) + +#define SYS_IMP_APL_EHID9 sys_reg(3, 0, 15, 9, 1) +#define EHID9_DEV_2_THROTTLE_ENABLE BIT(5) +#define EHID9_DEV_2_THROTTLE_LIMIT_MASK GENMASK(11, 6) +#define EHID9_DEV_2_THROTTLE_LIMIT(x) (((unsigned long)x) << 6) + +#define SYS_IMP_APL_HID10 sys_reg(3, 0, 15, 10, 0) +#define SYS_IMP_APL_EHID10 sys_reg(3, 0, 15, 10, 1) +#define HID10_FORCE_WAIT_STATE_DRAIN_UC BIT(32) +#define HID10_DISABLE_ZVA_TEMPORAL_TSO BIT(49) + +#define SYS_IMP_APL_HID11 sys_reg(3, 0, 15, 11, 0) +#define HID11_ENABLE_FIX_UC_55719865 BIT(15) +#define HID11_DISABLE_LD_NT_WIDGET BIT(59) + +#define SYS_IMP_APL_HID13 sys_reg(3, 0, 15, 14, 0) +#define HID13_POST_OFF_CYCLES(x) (((unsigned long)x)) +#define HID13_POST_OFF_CYCLES_MASK GENMASK(6, 0) +#define HID13_POST_ON_CYCLES(x) (((unsigned long)x) << 7) +#define HID13_POST_ON_CYCLES_MASK GENMASK(13, 7) +#define HID13_PRE_CYCLES(x) (((unsigned long)x) << 14) +#define HID13_PRE_CYCLES_MASK GENMASK(17, 14) +#define HID13_GROUP0_FF1_DELAY(x) (((unsigned long)x) << 26) +#define HID13_GROUP0_FF1_DELAY_MASK GENMASK(29, 26) +#define HID13_GROUP0_FF2_DELAY(x) (((unsigned long)x) << 30) +#define HID13_GROUP0_FF2_DELAY_MASK GENMASK(33, 30) +#define HID13_GROUP0_FF3_DELAY(x) (((unsigned long)x) << 34) +#define HID13_GROUP0_FF3_DELAY_MASK GENMASK(37, 34) +#define HID13_GROUP0_FF4_DELAY(x) (((unsigned long)x) << 38) +#define HID13_GROUP0_FF4_DELAY_MASK GENMASK(41, 38) +#define HID13_GROUP0_FF5_DELAY(x) (((unsigned long)x) << 42) +#define HID13_GROUP0_FF5_DELAY_MASK GENMASK(45, 42) +#define HID13_GROUP0_FF6_DELAY(x) (((unsigned long)x) << 46) +#define HID13_GROUP0_FF6_DELAY_MASK GENMASK(49, 46) +#define HID13_GROUP0_FF7_DELAY(x) (((unsigned long)x) << 50) +#define HID13_GROUP0_FF7_DELAY_MASK GENMASK(53, 50) +#define HID13_RESET_CYCLES(x) (((unsigned long)x) << 60) +#define HID13_RESET_CYCLES_MASK (0xFUL << 60) + +#define SYS_IMP_APL_HID16 sys_reg(3, 0, 15, 15, 2) +#define HID16_AVL_UNK12 BIT(12) +#define HID16_SPAREBIT0 BIT(56) +#define HID16_SPAREBIT3 BIT(59) +#define HID16_ENABLE_MPX_PICK_45 BIT(61) +#define HID16_ENABLE_MP_CYCLONE_7 BIT(62) + +#define SYS_IMP_APL_HID18 sys_reg(3, 0, 15, 11, 2) +#define HID18_HVC_SPECULATION_DISABLE BIT(14) +#define HID18_AVL_UNK27 BIT(27) +#define HID18_AVL_UNK29 BIT(29) +#define HID18_SPAREBIT7 BIT(39) +#define HID18_SPAREBIT17 BIT(49) + +#define SYS_IMP_APL_EHID18 sys_reg(3, 0, 15, 11, 3) +#define EHID18_BLZ_UNK34 BIT(34) + +#define SYS_IMP_APL_EHID20 sys_reg(3, 0, 15, 1, 2) +#define EHID20_TRAP_SMC BIT(8) +#define EHID20_FORCE_NONSPEC_IF_OLDEST_REDIR_VALID_AND_OLDER BIT(15) +#define EHID20_FORCE_NONSPEC_IF_SPEC_FLUSH_POINTER_NE_BLK_RTR_POINTER BIT(16) +#define EHID20_FORCE_NONSPEC_TARGETED_TIMER_SEL(x) (((unsigned long)x) << 21) +#define EHID20_FORCE_NONSPEC_TARGETED_TIMER_SEL_MASK (3UL << 21) + +#define SYS_IMP_APL_HID21 sys_reg(3, 0, 15, 1, 3) +#define HID21_ENABLE_LDREX_FILL_REPLY BIT(19) +#define HID21_LDQ_RTR_WAIT_FOR_OLD_ST_REL_COMPLETION BIT(33) +#define HID21_DISABLE_CDP_REPLY_PURGED_TRANSACTION BIT(34) +#define HID21_AVL_UNK52 BIT(52) + +#define SYS_IMP_APL_HID26 sys_reg(3, 0, 15, 0, 3) +#define HID26_GROUP1_OFFSET(x) (((unsigned long)x) << 0) +#define HID26_GROUP1_OFFSET_MASK (0xffUL << 0) +#define HID26_GROUP2_OFFSET(x) (((unsigned long)x) << 36) +#define HID26_GROUP2_OFFSET_MASK (0xffUL << 36) + +#define SYS_IMP_APL_HID27 sys_reg(3, 0, 15, 0, 4) +#define HID27_GROUP3_OFFSET(x) (((unsigned long)x) << 8) +#define HID27_GROUP3_OFFSET_MASK (0xffUL << 8) + +#define SYS_IMP_APL_PMCR0 sys_reg(3, 1, 15, 0, 0) +#define PMCR0_CNT_EN_MASK (MASK(8) | GENMASK(33, 32)) +#define PMCR0_IMODE_OFF (0 << 8) +#define PMCR0_IMODE_PMI (1 << 8) +#define PMCR0_IMODE_AIC (2 << 8) +#define PMCR0_IMODE_HALT (3 << 8) +#define PMCR0_IMODE_FIQ (4 << 8) +#define PMCR0_IMODE_MASK (7 << 8) +#define PMCR0_IACT (BIT(11)) +#define PMCR0_PMI_SHIFT 12 +#define PMCR0_CNT_MASK (PMCR0_CNT_EN_MASK | (PMCR0_CNT_EN_MASK << PMCR0_PMI_SHIFT)) + +#define SYS_IMP_APL_PMCR1 sys_reg(3, 1, 15, 1, 0) +#define SYS_IMP_APL_PMCR2 sys_reg(3, 1, 15, 2, 0) +#define SYS_IMP_APL_PMCR3 sys_reg(3, 1, 15, 3, 0) +#define SYS_IMP_APL_PMCR4 sys_reg(3, 1, 15, 4, 0) + +#define SYS_IMP_APL_PMESR0 sys_reg(3, 1, 15, 5, 0) +#define SYS_IMP_APL_PMESR1 sys_reg(3, 1, 15, 6, 0) + +#define SYS_IMP_APL_PMSR sys_reg(3, 1, 15, 13, 0) + +#define SYS_IMP_APL_PMC0 sys_reg(3, 2, 15, 0, 0) +#define SYS_IMP_APL_PMC1 sys_reg(3, 2, 15, 1, 0) +#define SYS_IMP_APL_PMC2 sys_reg(3, 2, 15, 2, 0) +#define SYS_IMP_APL_PMC3 sys_reg(3, 2, 15, 3, 0) +#define SYS_IMP_APL_PMC4 sys_reg(3, 2, 15, 4, 0) +#define SYS_IMP_APL_PMC5 sys_reg(3, 2, 15, 5, 0) +#define SYS_IMP_APL_PMC6 sys_reg(3, 2, 15, 6, 0) +#define SYS_IMP_APL_PMC7 sys_reg(3, 2, 15, 7, 0) +#define SYS_IMP_APL_PMC8 sys_reg(3, 2, 15, 9, 0) +#define SYS_IMP_APL_PMC9 sys_reg(3, 2, 15, 10, 0) + +#define SYS_IMP_APL_LSU_ERR_STS sys_reg(3, 3, 15, 0, 0) +#define SYS_IMP_APL_E_LSU_ERR_STS sys_reg(3, 3, 15, 2, 0) + +#define SYS_IMP_APL_L2C_ERR_STS sys_reg(3, 3, 15, 8, 0) + +#define L2C_ERR_STS_RECURSIVE_FAULT BIT(1) +#define L2C_ERR_STS_ACCESS_FAULT BIT(7) +#define L2C_ERR_STS_ENABLE_W1C BIT(56) + +#define SYS_IMP_APL_L2C_ERR_ADR sys_reg(3, 3, 15, 9, 0) +#define SYS_IMP_APL_L2C_ERR_INF sys_reg(3, 3, 15, 10, 0) + +#define SYS_IMP_APL_FED_ERR_STS sys_reg(3, 4, 15, 0, 0) +#define SYS_IMP_APL_E_FED_ERR_STS sys_reg(3, 4, 15, 0, 2) + +#define SYS_IMP_APL_MMU_ERR_STS sys_reg(3, 6, 15, 0, 0) +#define SYS_IMP_APL_E_MMU_ERR_STS sys_reg(3, 6, 15, 2, 0) + +/* ACC/CYC Registers */ +#define SYS_IMP_APL_ACC_CFG sys_reg(3, 5, 15, 4, 0) +#define ACC_CFG_BP_SLEEP(x) (((unsigned long)x) << 2) +#define ACC_CFG_BP_SLEEP_MASK (3UL << 2) + +#define SYS_IMP_APL_CYC_OVRD sys_reg(3, 5, 15, 5, 0) +#define CYC_OVRD_FIQ_MODE(x) (((unsigned long)x) << 20) +#define CYC_OVRD_FIQ_MODE_MASK (3UL << 20) +#define CYC_OVRD_IRQ_MODE(x) (((unsigned long)x) << 22) +#define CYC_OVRD_IRQ_MODE_MASK (3UL << 22) +#define CYC_OVRD_WFI_MODE(x) (((unsigned long)x) << 24) +#define CYC_OVRD_WFI_MODE_MASK (3UL << 24) +#define CYC_OVRD_DISABLE_WFI_RET BIT(0) + +#define SYS_IMP_APL_UPMCR0 sys_reg(3, 7, 15, 0, 4) +#define UPMCR0_IMODE_OFF (0 << 16) +#define UPMCR0_IMODE_AIC (2 << 16) +#define UPMCR0_IMODE_HALT (3 << 16) +#define UPMCR0_IMODE_FIQ (4 << 16) +#define UPMCR0_IMODE_MASK (7 << 16) + +#define SYS_IMP_APL_UPMSR sys_reg(3, 7, 15, 6, 4) +#define UPMSR_IACT (BIT(0)) + +/* SPRR and GXF registers */ +#define SYS_IMP_APL_SPRR_CONFIG_EL1 sys_reg(3, 6, 15, 1, 0) +#define SPRR_CONFIG_EN BIT(0) +#define SPRR_CONFIG_LOCK_CONFIG BIT(1) +#define SPRR_CONFIG_LOCK_PERM BIT(4) +#define SPRR_CONFIG_LOCK_KERNEL_PERM BIT(5) + +#define SYS_IMP_APL_GXF_CONFIG_EL1 sys_reg(3, 6, 15, 1, 2) +#define GXF_CONFIG_EN BIT(0) + +#define SYS_IMP_APL_GXF_STATUS_EL1 sys_reg(3, 6, 15, 8, 0) +#define GXF_STATUS_GUARDED BIT(0) + +#define SYS_IMP_APL_GXF_ABORT_EL1 sys_reg(3, 6, 15, 8, 2) +#define SYS_IMP_APL_GXF_ENTER_EL1 sys_reg(3, 6, 15, 8, 1) + +#define SYS_IMP_APL_GXF_ABORT_EL12 sys_reg(3, 6, 15, 15, 3) +#define SYS_IMP_APL_GXF_ENTER_EL12 sys_reg(3, 6, 15, 15, 2) + +#define SYS_IMP_APL_SPRR_PERM_EL0 sys_reg(3, 6, 15, 1, 5) +#define SYS_IMP_APL_SPRR_PERM_EL1 sys_reg(3, 6, 15, 1, 6) +#define SYS_IMP_APL_SPRR_PERM_EL02 sys_reg(3, 4, 15, 5, 2) +#define SYS_IMP_APL_SPRR_PERM_EL12 sys_reg(3, 6, 15, 15, 7) + +#define SYS_IMP_APL_TPIDR_GL1 sys_reg(3, 6, 15, 10, 1) +#define SYS_IMP_APL_VBAR_GL1 sys_reg(3, 6, 15, 10, 2) +#define SYS_IMP_APL_SPSR_GL1 sys_reg(3, 6, 15, 10, 3) +#define SYS_IMP_APL_ASPSR_GL1 sys_reg(3, 6, 15, 10, 4) +#define SYS_IMP_APL_ESR_GL1 sys_reg(3, 6, 15, 10, 5) +#define SYS_IMP_APL_ELR_GL1 sys_reg(3, 6, 15, 10, 6) +#define SYS_IMP_APL_FAR_GL1 sys_reg(3, 6, 15, 10, 7) + +#define SYS_IMP_APL_VBAR_GL12 sys_reg(3, 6, 15, 9, 2) +#define SYS_IMP_APL_SPSR_GL12 sys_reg(3, 6, 15, 9, 3) +#define SYS_IMP_APL_ASPSR_GL12 sys_reg(3, 6, 15, 9, 4) +#define SYS_IMP_APL_ESR_GL12 sys_reg(3, 6, 15, 9, 5) +#define SYS_IMP_APL_ELR_GL12 sys_reg(3, 6, 15, 9, 6) +#define SYS_IMP_APL_SP_GL12 sys_reg(3, 6, 15, 10, 0) + +#define SYS_IMP_APL_AFSR1_GL1 sys_reg(3, 6, 15, 0, 1) + +/* PAuth registers */ +#define SYS_IMP_APL_APVMKEYLO_EL2 sys_reg(3, 6, 15, 14, 4) +#define SYS_IMP_APL_APVMKEYHI_EL2 sys_reg(3, 6, 15, 14, 5) +#define SYS_IMP_APL_APSTS_EL12 sys_reg(3, 6, 15, 14, 7) + +#define SYS_IMP_APL_APCTL_EL1 sys_reg(3, 4, 15, 0, 4) +#define SYS_IMP_APL_APCTL_EL2 sys_reg(3, 6, 15, 12, 2) +#define SYS_IMP_APL_APCTL_EL12 sys_reg(3, 6, 15, 15, 0) + +/* VM registers */ +#define SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2 sys_reg(3, 5, 15, 1, 3) +#define VM_TMR_FIQ_ENA_ENA_V BIT(0) +#define VM_TMR_FIQ_ENA_ENA_P BIT(1) + +/* IPI registers */ +#define SYS_IMP_APL_IPI_RR_LOCAL_EL1 sys_reg(3, 5, 15, 0, 0) +#define SYS_IMP_APL_IPI_RR_GLOBAL_EL1 sys_reg(3, 5, 15, 0, 1) + +#define SYS_IMP_APL_IPI_SR_EL1 sys_reg(3, 5, 15, 1, 1) +#define IPI_SR_PENDING BIT(0) + +#define SYS_IMP_APL_IPI_CR_EL1 sys_reg(3, 5, 15, 3, 1) diff --git a/tools/src/cpufreq.c b/tools/src/cpufreq.c new file mode 100644 index 0000000..e7c4f41 --- /dev/null +++ b/tools/src/cpufreq.c @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: MIT */ + +#include "cpufreq.h" +#include "adt.h" +#include "soc.h" +#include "utils.h" + +#define CLUSTER_PSTATE 0x20 +#define CLUSTER_CONFIG 0x6b8 + +#define CLUSTER_PSTATE_BUSY BIT(31) +#define CLUSTER_PSTATE_SET BIT(25) +#define CLUSTER_PSTATE_DESIRED2 GENMASK(15, 12) +#define CLUSTER_PSTATE_DESIRED1 GENMASK(3, 0) + +#define CLUSTER_CONFIG_ENABLE BIT(63) +#define CLUSTER_CONFIG_DVMR1 BIT(32) +#define CLUSTER_CONFIG_DVMR2 BIT(31) + +#define CLUSTER_SWITCH_TIMEOUT 100 + +struct cluster_t { + const char *name; + u64 base; + bool dvmr; + uint32_t boot_pstate; +}; + +int cpufreq_init_cluster(const struct cluster_t *cluster) +{ + u64 enable = CLUSTER_CONFIG_ENABLE; + if (cluster->dvmr) + enable |= CLUSTER_CONFIG_DVMR1 | CLUSTER_CONFIG_DVMR2; + + u64 val = read64(cluster->base + CLUSTER_CONFIG); + if ((val & enable) != enable) { + printf("cpufreq: Configuring cluster %s (dvmr: %d)\n", cluster->name, cluster->dvmr); + write64(cluster->base + CLUSTER_CONFIG, val | enable); + } + + val = read64(cluster->base + CLUSTER_PSTATE); + + if (FIELD_GET(CLUSTER_PSTATE_DESIRED1, val) != cluster->boot_pstate) { + val &= CLUSTER_PSTATE_DESIRED1 | CLUSTER_PSTATE_DESIRED2; + val |= CLUSTER_PSTATE_SET | FIELD_PREP(CLUSTER_PSTATE_DESIRED1, cluster->boot_pstate) | + FIELD_PREP(CLUSTER_PSTATE_DESIRED2, cluster->boot_pstate); + printf("cpufreq: Switching cluster %s to P-State %d\n", cluster->name, + cluster->boot_pstate); + write64(cluster->base + CLUSTER_PSTATE, val); + if (poll32(cluster->base + CLUSTER_PSTATE, CLUSTER_PSTATE_BUSY, 0, CLUSTER_SWITCH_TIMEOUT) < + 0) { + printf("cpufreq: Timed out waiting for cluster %s P-State switch\n", cluster->name); + return -1; + } + } + + return 0; +} + +static const struct cluster_t t8103_clusters[] = { + {"ECPU", 0x210e20000, false, 5}, + {"PCPU", 0x211e20000, true, 7}, + {}, +}; + +static const struct cluster_t t6000_clusters[] = { + {"ECPU0", 0x210e20000, false, 5}, + {"PCPU0", 0x211e20000, false, 7}, + {"PCPU1", 0x212e20000, false, 7}, + {}, +}; + +static const struct cluster_t t6002_clusters[] = { + {"ECPU0", 0x0210e20000, false, 5}, + {"PCPU0", 0x0211e20000, false, 7}, + {"PCPU1", 0x0212e20000, false, 7}, + {"ECPU1", 0x2210e20000, false, 5}, + {"PCPU2", 0x2211e20000, false, 7}, + {"PCPU3", 0x2212e20000, false, 7}, + {}, +}; + +static const struct cluster_t t8112_clusters[] = { + {"ECPU", 0x210e20000, false, 7}, + {"PCPU", 0x211e20000, true, 6}, + {}, +}; + +int cpufreq_init(void) +{ + printf("cpufreq: Initializing clusters\n"); + + const struct cluster_t *cluster; + + switch (chip_id) { + case T8103: + cluster = t8103_clusters; + break; + case T6000: + case T6001: + cluster = t6000_clusters; + break; + case T6002: + cluster = t6002_clusters; + break; + case T8112: + cluster = t8112_clusters; + break; + default: + printf("cpufreq: Chip 0x%x is unsupported\n", chip_id); + return -1; + } + + bool err = false; + while (cluster->base) { + err |= cpufreq_init_cluster(cluster++); + } + + return err ? -1 : 0; +} diff --git a/tools/src/cpufreq.h b/tools/src/cpufreq.h new file mode 100644 index 0000000..7710f20 --- /dev/null +++ b/tools/src/cpufreq.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef CPUFREQ_H +#define CPUFREQ_H + +int cpufreq_init(void); + +#endif diff --git a/tools/src/dapf.c b/tools/src/dapf.c new file mode 100644 index 0000000..cbeb576 --- /dev/null +++ b/tools/src/dapf.c @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: MIT */ + +#include "dapf.h" +#include "adt.h" +#include "assert.h" +#include "malloc.h" +#include "memory.h" +#include "string.h" +#include "utils.h" + +struct dapf_t8020_config { + u64 start; + u64 end; + u8 unk1; + u8 r0_hi; + u8 r0_lo; + u8 unk2; + u32 r4; +} PACKED; + +static int dapf_init_t8020(const char *path, u64 base, int node) +{ + u32 length; + const char *prop = "filter-data-instance-0"; + const struct dapf_t8020_config *config = adt_getprop(adt, node, prop, &length); + + if (!config || !length || (length % sizeof(*config)) != 0) { + printf("dapf: Error getting ADT node %s property %s.\n", path, prop); + return -1; + } + + int count = length / sizeof(*config); + + for (int i = 0; i < count; i++) { + write32(base + 0x04, config[i].r4); + write64(base + 0x08, config[i].start); + write64(base + 0x10, config[i].end); + write32(base + 0x00, (config[i].r0_hi << 4) | config[i].r0_lo); + base += 0x40; + } + return 0; +} + +struct dapf_t8110_config { + u64 start; + u64 end; + u32 r20; + u32 unk1; + u32 r4; + u32 unk2[5]; + u8 unk3; + u8 r0_hi; + u8 r0_lo; + u8 unk4; +} PACKED; + +static int dapf_init_t8110(const char *path, u64 base, int node) +{ + u32 length; + const char *prop = "dapf-instance-0"; + const struct dapf_t8110_config *config = adt_getprop(adt, node, prop, &length); + + if (!config || !length) { + printf("dapf: Error getting ADT node %s property %s.\n", path, prop); + return -1; + } + + if (length % sizeof(*config) != 0) { + printf("dapf: Invalid length for %s property %s\n", path, prop); + return -1; + } + + int count = length / sizeof(*config); + + for (int i = 0; i < count; i++) { + write32(base + 0x04, config[i].r4); + write64(base + 0x08, config[i].start); + write64(base + 0x10, config[i].end); + write32(base + 0x00, (config[i].r0_hi << 4) | config[i].r0_lo); + write32(base + 0x20, config[i].r20); + base += 0x40; + } + return 0; +} + +int dapf_init(const char *path) +{ + int ret; + int dart_path[8]; + int node = adt_path_offset_trace(adt, path, dart_path); + if (node < 0) { + printf("dapf: Error getting DAPF %s node.\n", path); + return -1; + } + + u64 base; + if (adt_get_reg(adt, dart_path, "reg", 1, &base, NULL) < 0) { + printf("dapf: Error getting DAPF %s base address.\n", path); + return -1; + } + + if (adt_is_compatible(adt, node, "dart,t8020")) { + ret = dapf_init_t8020(path, base, node); + } else if (adt_is_compatible(adt, node, "dart,t6000")) { + ret = dapf_init_t8020(path, base, node); + } else if (adt_is_compatible(adt, node, "dart,t8110")) { + ret = dapf_init_t8110(path, base, node); + } else { + printf("dapf: DAPF %s at 0x%lx is of an unknown type\n", path, base); + return -1; + } + + if (!ret) + printf("dapf: Initialized %s\n", path); + + return ret; +} + +const char *dapf_paths[] = {"/arm-io/dart-aop", "/arm-io/dart-mtp", "/arm-io/dart-pmp", NULL}; + +int dapf_init_all(void) +{ + int ret = 0; + int count = 0; + + for (const char **path = dapf_paths; *path; path++) { + if (adt_path_offset(adt, *path) < 0) + continue; + + if (dapf_init(*path) < 0) { + ret = -1; + } + count += 1; + } + + return ret ? ret : count; +} diff --git a/tools/src/dapf.h b/tools/src/dapf.h new file mode 100644 index 0000000..2a7e1bf --- /dev/null +++ b/tools/src/dapf.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef DAPF_H +#define DAPF_H + +int dapf_init_all(void); +int dapf_init(const char *path); + +#endif diff --git a/tools/src/dart.c b/tools/src/dart.c new file mode 100644 index 0000000..96c4261 --- /dev/null +++ b/tools/src/dart.c @@ -0,0 +1,714 @@ +/* SPDX-License-Identifier: MIT */ + +#include "dart.h" +#include "adt.h" +#include "assert.h" +#include "devicetree.h" +#include "malloc.h" +#include "memory.h" +#include "string.h" +#include "utils.h" + +#include "libfdt/libfdt.h" + +#define DART_T8020_CONFIG 0x60 +#define DART_T8020_CONFIG_LOCK BIT(15) + +#define DART_T8020_ERROR 0x40 +#define DART_T8020_ERROR_STREAM_SHIFT 24 +#define DART_T8020_ERROR_STREAM_MASK 0xf +#define DART_T8020_ERROR_CODE_MASK 0xffffff +#define DART_T8020_ERROR_FLAG BIT(31) +#define DART_T8020_ERROR_READ_FAULT BIT(4) +#define DART_T8020_ERROR_WRITE_FAULT BIT(3) +#define DART_T8020_ERROR_NO_PTE BIT(2) +#define DART_T8020_ERROR_NO_PMD BIT(1) +#define DART_T8020_ERROR_NO_TTBR BIT(0) + +#define DART_T8020_STREAM_SELECT 0x34 + +#define DART_T8020_STREAM_COMMAND 0x20 +#define DART_T8020_STREAM_COMMAND_BUSY BIT(2) +#define DART_T8020_STREAM_COMMAND_INVALIDATE BIT(20) + +#define DART_T8020_STREAM_COMMAND_BUSY_TIMEOUT 100 + +#define DART_T8020_STREAM_REMAP 0x80 + +#define DART_T8020_ERROR_ADDR_HI 0x54 +#define DART_T8020_ERROR_ADDR_LO 0x50 + +#define DART_T8020_ENABLED_STREAMS 0xfc + +#define DART_T8020_TCR_OFF 0x100 +#define DART_T8020_TCR_TRANSLATE_ENABLE BIT(7) +#define DART_T8020_TCR_BYPASS_DART BIT(8) +#define DART_T8020_TCR_BYPASS_DAPF BIT(12) + +#define DART_T8020_TTBR_OFF 0x200 +#define DART_T8020_TTBR_VALID BIT(31) +#define DART_T8020_TTBR_ADDR GENMASK(30, 0) +#define DART_T8020_TTBR_SHIFT 12 + +#define DART_PTE_OFFSET_SHIFT 14 +#define DART_PTE_SP_START GENMASK(63, 52) +#define DART_PTE_SP_END GENMASK(51, 40) +#define DART_T8020_PTE_OFFSET GENMASK(39, 14) +#define DART_T6000_PTE_OFFSET GENMASK(39, 10) +#define DART_T8020_PTE_DISABLE_SP BIT(1) +#define DART_T6000_PTE_REALTIME BIT(1) +#define DART_PTE_VALID BIT(0) + +#define DART_T8110_TTBR_OFF 0x1400 +#define DART_T8110_TTBR_VALID BIT(0) +#define DART_T8110_TTBR_ADDR GENMASK(29, 2) +#define DART_T8110_TTBR_SHIFT 14 + +#define DART_T8110_TCR_OFF 0x1000 +#define DART_T8110_TCR_REMAP GENMASK(11, 8) +#define DART_T8110_TCR_REMAP_EN BIT(7) +#define DART_T8110_TCR_BYPASS_DAPF BIT(2) +#define DART_T8110_TCR_BYPASS_DART BIT(1) +#define DART_T8110_TCR_TRANSLATE_ENABLE BIT(0) + +#define DART_T8110_TLB_CMD 0x80 +#define DART_T8110_TLB_CMD_BUSY BIT(31) +#define DART_T8110_TLB_CMD_OP GENMASK(10, 8) +#define DART_T8110_TLB_CMD_OP_FLUSH_ALL 0 +#define DART_T8110_TLB_CMD_OP_FLUSH_SID 1 +#define DART_T8110_TLB_CMD_STREAM GENMASK(7, 0) + +#define DART_T8110_PROTECT 0x200 +#define DART_T8110_PROTECT_TTBR_TCR BIT(0) + +#define DART_T8110_ENABLE_STREAMS 0xc00 +#define DART_T8110_DISABLE_STREAMS 0xc20 + +#define DART_MAX_TTBR_COUNT 4 + +#define DART_TCR(dart) (dart->regs + dart->params->tcr_off + 4 * dart->device) +#define DART_TTBR(dart, idx) \ + (dart->regs + dart->params->ttbr_off + 4 * dart->params->ttbr_count * dart->device + 4 * idx) + +struct dart_params { + int sid_count; + + u64 pte_flags; + u64 offset_mask; + + u64 tcr_enabled; + u64 tcr_disabled; + u64 tcr_off; + + u64 ttbr_valid; + u64 ttbr_addr; + u64 ttbr_shift; + u64 ttbr_off; + int ttbr_count; + + void (*tlb_invalidate)(dart_dev_t *dart); +}; + +struct dart_dev { + bool locked; + bool keep; + uintptr_t regs; + u8 device; + enum dart_type_t type; + const struct dart_params *params; + u64 vm_base; + + u64 *l1[DART_MAX_TTBR_COUNT]; +}; + +static void dart_t8020_tlb_invalidate(dart_dev_t *dart) +{ + write32(dart->regs + DART_T8020_STREAM_SELECT, BIT(dart->device)); + + /* ensure that the DART can see the updated pagetables before invalidating */ + dma_wmb(); + write32(dart->regs + DART_T8020_STREAM_COMMAND, DART_T8020_STREAM_COMMAND_INVALIDATE); + + if (poll32(dart->regs + DART_T8020_STREAM_COMMAND, DART_T8020_STREAM_COMMAND_BUSY, 0, 100)) + printf("dart: DART_T8020_STREAM_COMMAND_BUSY did not clear.\n"); +} + +static void dart_t8110_tlb_invalidate(dart_dev_t *dart) +{ + /* ensure that the DART can see the updated pagetables before invalidating */ + dma_wmb(); + write32(dart->regs + DART_T8110_TLB_CMD, + FIELD_PREP(DART_T8110_TLB_CMD_OP, DART_T8110_TLB_CMD_OP_FLUSH_SID) | + FIELD_PREP(DART_T8110_TLB_CMD_STREAM, dart->device)); + + if (poll32(dart->regs + DART_T8110_TLB_CMD_OP, DART_T8110_TLB_CMD_BUSY, 0, 100)) + printf("dart: DART_T8110_TLB_CMD_BUSY did not clear.\n"); +} + +const struct dart_params dart_t8020 = { + .sid_count = 32, + .pte_flags = FIELD_PREP(DART_PTE_SP_END, 0xfff) | FIELD_PREP(DART_PTE_SP_START, 0) | + DART_T8020_PTE_DISABLE_SP | DART_PTE_VALID, + .offset_mask = DART_T8020_PTE_OFFSET, + .tcr_enabled = DART_T8020_TCR_TRANSLATE_ENABLE, + .tcr_disabled = DART_T8020_TCR_BYPASS_DAPF | DART_T8020_TCR_BYPASS_DART, + .tcr_off = DART_T8020_TCR_OFF, + .ttbr_valid = DART_T8020_TTBR_VALID, + .ttbr_addr = DART_T8020_TTBR_ADDR, + .ttbr_shift = DART_T8020_TTBR_SHIFT, + .ttbr_off = DART_T8020_TTBR_OFF, + .ttbr_count = 4, + .tlb_invalidate = dart_t8020_tlb_invalidate, +}; + +const struct dart_params dart_t6000 = { + .sid_count = 32, + .pte_flags = + FIELD_PREP(DART_PTE_SP_END, 0xfff) | FIELD_PREP(DART_PTE_SP_START, 0) | DART_PTE_VALID, + .offset_mask = DART_T6000_PTE_OFFSET, + .tcr_enabled = DART_T8020_TCR_TRANSLATE_ENABLE, + .tcr_disabled = DART_T8020_TCR_BYPASS_DAPF | DART_T8020_TCR_BYPASS_DART, + .tcr_off = DART_T8020_TCR_OFF, + .ttbr_valid = DART_T8020_TTBR_VALID, + .ttbr_addr = DART_T8020_TTBR_ADDR, + .ttbr_shift = DART_T8020_TTBR_SHIFT, + .ttbr_off = DART_T8020_TTBR_OFF, + .ttbr_count = 4, + .tlb_invalidate = dart_t8020_tlb_invalidate, +}; + +const struct dart_params dart_t8110 = { + .sid_count = 256, + .pte_flags = + FIELD_PREP(DART_PTE_SP_END, 0xfff) | FIELD_PREP(DART_PTE_SP_START, 0) | DART_PTE_VALID, + .offset_mask = DART_T6000_PTE_OFFSET, + .tcr_enabled = DART_T8110_TCR_TRANSLATE_ENABLE, + .tcr_disabled = DART_T8110_TCR_BYPASS_DAPF | DART_T8110_TCR_BYPASS_DART, + .tcr_off = DART_T8110_TCR_OFF, + .ttbr_valid = DART_T8110_TTBR_VALID, + .ttbr_addr = DART_T8110_TTBR_ADDR, + .ttbr_shift = DART_T8110_TTBR_SHIFT, + .ttbr_off = DART_T8110_TTBR_OFF, + .ttbr_count = 1, + .tlb_invalidate = dart_t8110_tlb_invalidate, +}; + +dart_dev_t *dart_init(uintptr_t base, u8 device, bool keep_pts, enum dart_type_t type) +{ + dart_dev_t *dart = malloc(sizeof(*dart)); + if (!dart) + return NULL; + + memset(dart, 0, sizeof(*dart)); + + dart->regs = base; + dart->device = device; + dart->type = type; + + switch (type) { + case DART_T8020: + dart->params = &dart_t8020; + break; + case DART_T8110: + dart->params = &dart_t8110; + break; + case DART_T6000: + dart->params = &dart_t6000; + break; + } + + if (device >= dart->params->sid_count) { + printf("dart: device %d is too big for this DART type\n", device); + free(dart); + return NULL; + } + + switch (type) { + case DART_T8020: + case DART_T6000: + if (read32(dart->regs + DART_T8020_CONFIG) & DART_T8020_CONFIG_LOCK) + dart->locked = true; + set32(dart->regs + DART_T8020_ENABLED_STREAMS, BIT(device & 0x1f)); + break; + case DART_T8110: + // TODO locked dart + write32(dart->regs + DART_T8110_ENABLE_STREAMS + 4 * (device >> 5), BIT(device & 0x1f)); + break; + } + + dart->keep = keep_pts; + + if (dart->locked || keep_pts) { + for (int i = 0; i < dart->params->ttbr_count; i++) { + u32 ttbr = read32(DART_TTBR(dart, i)); + if (ttbr & dart->params->ttbr_valid) + dart->l1[i] = + (u64 *)(FIELD_GET(dart->params->ttbr_addr, ttbr) << dart->params->ttbr_shift); + } + } + + for (int i = 0; i < dart->params->ttbr_count; i++) { + if (dart->l1[i]) + continue; + + dart->l1[i] = memalign(SZ_16K, SZ_16K); + if (!dart->l1[i]) + goto error; + memset(dart->l1[i], 0, SZ_16K); + + write32(DART_TTBR(dart, i), + dart->params->ttbr_valid | + FIELD_PREP(dart->params->ttbr_addr, + ((uintptr_t)dart->l1[i]) >> dart->params->ttbr_shift)); + } + + if (!dart->locked && !keep_pts) + write32(DART_TCR(dart), dart->params->tcr_enabled); + + dart->params->tlb_invalidate(dart); + return dart; + +error: + if (!dart->locked) + free(dart->l1); + free(dart); + return NULL; +} + +dart_dev_t *dart_init_adt(const char *path, int instance, int device, bool keep_pts) +{ + int dart_path[8]; + int node = adt_path_offset_trace(adt, path, dart_path); + if (node < 0) { + printf("dart: Error getting DART node %s\n", path); + return NULL; + } + + u64 base; + if (adt_get_reg(adt, dart_path, "reg", instance, &base, NULL) < 0) { + printf("dart: Error getting DART %s base address.\n", path); + return NULL; + } + + enum dart_type_t type; + const char *type_s; + + if (adt_is_compatible(adt, node, "dart,t8020")) { + type = DART_T8020; + type_s = "t8020"; + } else if (adt_is_compatible(adt, node, "dart,t6000")) { + type = DART_T6000; + type_s = "t6000"; + } else if (adt_is_compatible(adt, node, "dart,t8110")) { + type = DART_T8110; + type_s = "t8110"; + } else { + printf("dart: dart %s at 0x%lx is of an unknown type\n", path, base); + return NULL; + } + + dart_dev_t *dart = dart_init(base, device, keep_pts, type); + + if (!dart) + return NULL; + + printf("dart: dart %s at 0x%lx is a %s%s\n", path, base, type_s, + dart->locked ? " (locked)" : ""); + + if (adt_getprop(adt, node, "real-time", NULL)) { + for (int i = 0; i < dart->params->ttbr_count; i++) { + printf("dart: dart %s.%d.%d L1 %d is real-time at %p\n", path, instance, device, i, + dart->l1[i]); + } + } + if (ADT_GETPROP(adt, node, "vm-base", &dart->vm_base) < 0) + dart->vm_base = 0; + + return dart; +} + +void dart_lock_adt(const char *path, int instance) +{ + int dart_path[8]; + int node = adt_path_offset_trace(adt, path, dart_path); + if (node < 0) { + printf("dart: Error getting DART node %s\n", path); + return; + } + + u64 base; + if (adt_get_reg(adt, dart_path, "reg", instance, &base, NULL) < 0) { + printf("dart: Error getting DART %s base address.\n", path); + return; + } + + if (adt_is_compatible(adt, node, "dart,t8020") || adt_is_compatible(adt, node, "dart,t6000")) { + if (!(read32(base + DART_T8020_CONFIG) & DART_T8020_CONFIG_LOCK)) + set32(base + DART_T8020_CONFIG, DART_T8020_CONFIG_LOCK); + } else if (adt_is_compatible(adt, node, "dart,t8110")) { + if (!(read32(base + DART_T8110_PROTECT) & DART_T8110_PROTECT_TTBR_TCR)) + set32(base + DART_T8110_PROTECT, DART_T8110_PROTECT_TTBR_TCR); + } else { + printf("dart: dart %s at 0x%lx is of an unknown type\n", path, base); + } +} + +dart_dev_t *dart_init_fdt(void *dt, u32 phandle, int device, bool keep_pts) +{ + int node = fdt_node_offset_by_phandle(dt, phandle); + if (node < 0) { + printf("FDT: node for phandle %u not found\n", phandle); + return NULL; + } + + u64 base = dt_get_address(dt, node); + if (!base) + return NULL; + + enum dart_type_t type; + const char *type_s; + const char *name = fdt_get_name(dt, node, NULL); + + if (fdt_node_check_compatible(dt, node, "apple,t8103-dart") == 0) { + type = DART_T8020; + type_s = "t8020"; + } else if (fdt_node_check_compatible(dt, node, "apple,t6000-dart") == 0) { + type = DART_T6000; + type_s = "t6000"; + } else if (fdt_node_check_compatible(dt, node, "apple,t8110-dart") == 0) { + type = DART_T8110; + type_s = "t8110"; + } else { + printf("dart: dart %s at 0x%lx is of an unknown type\n", name, base); + return NULL; + } + + dart_dev_t *dart = dart_init(base, device, keep_pts, type); + + if (!dart) + return NULL; + + printf("dart: dart %s at 0x%lx is a %s%s\n", name, base, type_s, + dart->locked ? " (locked)" : ""); + + return dart; +} + +int dart_setup_pt_region(dart_dev_t *dart, const char *path, int device, u64 vm_base) +{ + int node = adt_path_offset(adt, path); + if (node < 0) { + printf("dart: Error getting DART node %s\n", path); + return -1; + } + char pt_region_str[24]; + snprintf(pt_region_str, sizeof(pt_region_str), "pt-region-%d", device); + char l2_tt_str[24]; + snprintf(l2_tt_str, sizeof(l2_tt_str), "l2-tt-%d", device); + + const struct adt_property *pt_region = adt_get_property(adt, node, pt_region_str); + if (pt_region && pt_region->size == 16) { + u64 region[2]; + memcpy(region, pt_region->value, sizeof(region)); + u64 tbl_count = (region[1] - region[0]) / SZ_16K; + if (tbl_count > 64) { + printf("dart: dart %s ignoring large %s, %lu L2 tables\n", path, pt_region_str, + tbl_count); + return -1; + } + /* first index is the l1 table, cap at 2 or else macOS hates it */ + tbl_count = min(2, tbl_count - 1); + u64 l2_start = region[0] + SZ_16K; + u64 vmstart = vm_base >> (14 + 11); + for (u64 index = 0; index < tbl_count; index++) { + int ttbr = (vmstart + index) >> 11; + int idx = (vmstart + index) & 0x7ff; + u64 l2tbl = l2_start + index * SZ_16K; + + if (dart->l1[ttbr][idx] & DART_PTE_VALID) { + u64 off = FIELD_GET(dart->params->offset_mask, dart->l1[ttbr][idx]) + << DART_PTE_OFFSET_SHIFT; + if (off != l2tbl) + printf("dart: unexpected L2 tbl at index:%lu. 0x%016lx != 0x%016lx\n", index, + off, l2tbl); + continue; + } else { + printf("dart: allocating L2 tbl at %d, %d to 0x%lx\n", ttbr, idx, l2tbl); + memset((void *)l2tbl, 0, SZ_16K); + } + + u64 offset = FIELD_PREP(dart->params->offset_mask, l2tbl >> DART_PTE_OFFSET_SHIFT); + dart->l1[ttbr][idx] = offset | DART_PTE_VALID; + } + + u64 l2_tt[2] = {region[0], tbl_count}; + int ret = adt_setprop(adt, node, l2_tt_str, &l2_tt, sizeof(l2_tt)); + if (ret < 0) { + printf("dart: failed to update '%s/%s'\n", path, l2_tt_str); + } + + dart->params->tlb_invalidate(dart); + } + + return 0; +} + +static u64 *dart_get_l2(dart_dev_t *dart, u32 idx) +{ + int ttbr = idx >> 11; + idx &= 0x7ff; + + if (dart->l1[ttbr][idx] & DART_PTE_VALID) { + u64 off = FIELD_GET(dart->params->offset_mask, dart->l1[ttbr][idx]) + << DART_PTE_OFFSET_SHIFT; + return (u64 *)off; + } + + u64 *tbl = memalign(SZ_16K, SZ_16K); + if (!tbl) + return NULL; + + memset(tbl, 0, SZ_16K); + + u64 offset = FIELD_PREP(dart->params->offset_mask, ((u64)tbl) >> DART_PTE_OFFSET_SHIFT); + + dart->l1[ttbr][idx] = offset | DART_PTE_VALID; + + return tbl; +} + +static int dart_map_page(dart_dev_t *dart, uintptr_t iova, uintptr_t paddr) +{ + u32 l1_index = (iova >> 25) & 0x1fff; + u32 l2_index = (iova >> 14) & 0x7ff; + + u64 *l2 = dart_get_l2(dart, l1_index); + if (!l2) { + printf("dart: couldn't create l2 for iova %lx\n", iova); + return -1; + } + + if (l2[l2_index] & DART_PTE_VALID) { + printf("dart: iova %lx already has a valid PTE: %lx\n", iova, l2[l2_index]); + return -1; + } + + u64 offset = FIELD_PREP(dart->params->offset_mask, paddr >> DART_PTE_OFFSET_SHIFT); + + l2[l2_index] = offset | dart->params->pte_flags; + + return 0; +} + +int dart_map(dart_dev_t *dart, uintptr_t iova, void *bfr, size_t len) +{ + uintptr_t paddr = (uintptr_t)bfr; + u64 offset = 0; + + if (len % SZ_16K) + return -1; + if (paddr % SZ_16K) + return -1; + if (iova % SZ_16K) + return -1; + + while (offset < len) { + int ret = dart_map_page(dart, iova + offset, paddr + offset); + + if (ret) { + dart_unmap(dart, iova, offset); + return ret; + } + + offset += SZ_16K; + } + + dart->params->tlb_invalidate(dart); + return 0; +} + +static void dart_unmap_page(dart_dev_t *dart, uintptr_t iova) +{ + u32 ttbr = (iova >> 36) & 0x3; + u32 l1_index = (iova >> 25) & 0x7ff; + u32 l2_index = (iova >> 14) & 0x7ff; + + if (!(dart->l1[ttbr][l1_index] & DART_PTE_VALID)) + return; + + u64 *l2 = dart_get_l2(dart, l1_index); + l2[l2_index] = 0; +} + +void dart_unmap(dart_dev_t *dart, uintptr_t iova, size_t len) +{ + if (len % SZ_16K) + return; + if (iova % SZ_16K) + return; + + while (len) { + dart_unmap_page(dart, iova); + + len -= SZ_16K; + iova += SZ_16K; + } + + dart->params->tlb_invalidate(dart); +} + +void dart_free_l2(dart_dev_t *dart, uintptr_t iova) +{ + if (iova & ((1 << 25) - 1)) { + printf("dart: %08lx is not at the start of L2 table\n", iova); + return; + } + + u32 ttbr = (iova >> 36) & 0x3; + u32 l1_index = (iova >> 25) & 0x7ff; + + if (!(dart->l1[ttbr][l1_index] & DART_PTE_VALID)) + return; + + u64 *l2 = dart_get_l2(dart, l1_index); + + for (u32 idx = 0; idx < 2048; idx++) { + if (l2[idx] & DART_PTE_VALID) { + printf("dart: %08lx is still mapped\n", iova + (idx << 14)); + return; + } + } + dart->l1[ttbr][l1_index] = 0; + free(l2); +} + +static void *dart_translate_internal(dart_dev_t *dart, uintptr_t iova, int silent) +{ + u32 ttbr = (iova >> 36) & 0x3; + u32 l1_index = (iova >> 25) & 0x7ff; + + if ((int)ttbr >= dart->params->ttbr_count) { + printf("dart[%lx %u]: ttbr out of range: %d\n", dart->regs, dart->device, ttbr); + return NULL; + } + + if (!dart->l1[ttbr]) { + printf("dart[%lx %u]: l1[%u] is not set\n", dart->regs, dart->device, ttbr); + return NULL; + } + + if (!(dart->l1[ttbr][l1_index] & DART_PTE_VALID) && !silent) { + printf("dart[%lx %u]: l1 translation failure %x %lx\n", dart->regs, dart->device, l1_index, + iova); + return NULL; + } + + u32 l2_index = (iova >> 14) & 0x7ff; + u64 *l2 = (u64 *)(FIELD_GET(dart->params->offset_mask, dart->l1[ttbr][l1_index]) + << DART_PTE_OFFSET_SHIFT); + + if (!(l2[l2_index] & DART_PTE_VALID) && !silent) { + printf("dart[%lx %u]: l2 translation failure %x:%x %lx\n", dart->regs, dart->device, + l1_index, l2_index, iova); + return NULL; + } + + u32 offset = iova & 0x3fff; + void *base = + (void *)(FIELD_GET(dart->params->offset_mask, l2[l2_index]) << DART_PTE_OFFSET_SHIFT); + + return base + offset; +} + +void *dart_translate(dart_dev_t *dart, uintptr_t iova) +{ + return dart_translate_internal(dart, iova, 0); +} + +u64 dart_search(dart_dev_t *dart, void *paddr) +{ + for (int ttbr = 0; ttbr < dart->params->ttbr_count; ++ttbr) { + if (!dart->l1[ttbr]) + continue; + for (u32 l1_index = 0; l1_index < 0x7ff; l1_index++) { + if (!(dart->l1[ttbr][l1_index] & DART_PTE_VALID)) + continue; + + u64 *l2 = (u64 *)(FIELD_GET(dart->params->offset_mask, dart->l1[ttbr][l1_index]) + << DART_PTE_OFFSET_SHIFT); + for (u32 l2_index = 0; l2_index < 0x7ff; l2_index++) { + if (!(l2[l2_index] & DART_PTE_VALID)) + continue; + u64 *dst = (u64 *)(FIELD_GET(dart->params->offset_mask, l2[l2_index]) + << DART_PTE_OFFSET_SHIFT); + if (dst == paddr) + return ((u64)ttbr << 36) | ((u64)l1_index << 25) | (l2_index << 14); + } + } + } + + return DART_PTR_ERR; +} + +u64 dart_find_iova(dart_dev_t *dart, s64 start, size_t len) +{ + if (len % SZ_16K) + return -1; + if (start < 0 || start % SZ_16K) + return -1; + + uintptr_t end = 1LLU << 36; + uintptr_t iova = start; + + while (iova + len <= end) { + + if (dart_translate_internal(dart, iova, 1) == NULL) { + size_t size; + for (size = SZ_16K; size < len; size += SZ_16K) { + if (dart_translate_internal(dart, iova + size, 1) != NULL) + break; + } + if (size == len) + return iova; + + iova += size + SZ_16K; + } else + iova += SZ_16K; + } + + return DART_PTR_ERR; +} + +void dart_shutdown(dart_dev_t *dart) +{ + if (!dart->locked && !dart->keep) + write32(DART_TCR(dart), dart->params->tcr_disabled); + + for (int i = 0; i < dart->params->ttbr_count; ++i) + if (is_heap(dart->l1[i])) + write32(DART_TTBR(dart, i), 0); + + for (int ttbr = 0; ttbr < dart->params->ttbr_count; ++ttbr) { + for (int i = 0; i < SZ_16K / 8; ++i) { + if (dart->l1[ttbr][i] & DART_PTE_VALID) { + void *l2 = dart_get_l2(dart, i); + if (is_heap(l2)) { + free(l2); + dart->l1[ttbr][i] = 0; + } + } + } + } + + dart->params->tlb_invalidate(dart); + + for (int i = 0; i < dart->params->ttbr_count; ++i) + if (is_heap(dart->l1[i])) + free(dart->l1[i]); + free(dart); +} + +u64 dart_vm_base(dart_dev_t *dart) +{ + return dart->vm_base; +} diff --git a/tools/src/dart.h b/tools/src/dart.h new file mode 100644 index 0000000..7d8474b --- /dev/null +++ b/tools/src/dart.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef DART_H +#define DART_H + +#include "types.h" + +#define DART_PTR_ERR BIT(63) +#define DART_IS_ERR(val) FIELD_GET(DART_PTR_ERR, val) + +typedef struct dart_dev dart_dev_t; + +enum dart_type_t { + DART_T8020, + DART_T8110, + DART_T6000, +}; + +dart_dev_t *dart_init(uintptr_t base, u8 device, bool keep_pts, enum dart_type_t type); +dart_dev_t *dart_init_adt(const char *path, int instance, int device, bool keep_pts); +void dart_lock_adt(const char *path, int instance); +dart_dev_t *dart_init_fdt(void *dt, u32 phandle, int device, bool keep_pts); +int dart_setup_pt_region(dart_dev_t *dart, const char *path, int device, u64 vm_base); +int dart_map(dart_dev_t *dart, uintptr_t iova, void *bfr, size_t len); +void dart_unmap(dart_dev_t *dart, uintptr_t iova, size_t len); +void dart_free_l2(dart_dev_t *dart, uintptr_t iova); +void *dart_translate(dart_dev_t *dart, uintptr_t iova); +u64 dart_search(dart_dev_t *dart, void *paddr); +u64 dart_find_iova(dart_dev_t *dart, s64 start, size_t len); +void dart_shutdown(dart_dev_t *dart); +u64 dart_vm_base(dart_dev_t *dart); + +#endif diff --git a/tools/src/dcp.c b/tools/src/dcp.c new file mode 100644 index 0000000..e9f0503 --- /dev/null +++ b/tools/src/dcp.c @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: MIT */ + +#include "dcp.h" +#include "adt.h" +#include "malloc.h" +#include "pmgr.h" +#include "rtkit.h" +#include "utils.h" + +dcp_dev_t *dcp_init(const char *dcp_path, const char *dcp_dart_path, const char *disp_dart_path) +{ + u32 sid; + + int node = adt_path_offset(adt, "/arm-io/dart-dcp/mapper-dcp"); + if (node < 0) { + printf("dcp: mapper-dcp not found!\n"); + return NULL; + } + if (ADT_GETPROP(adt, node, "reg", &sid) < 0) { + printf("dcp: failed to read dart stream ID!\n"); + return NULL; + } + + dcp_dev_t *dcp = malloc(sizeof(dcp_dev_t)); + if (!dcp) + return NULL; + + dcp->dart_dcp = dart_init_adt(dcp_dart_path, 0, sid, true); + if (!dcp->dart_dcp) { + printf("dcp: failed to initialize DCP DART\n"); + goto out_free; + } + u64 vm_base = dart_vm_base(dcp->dart_dcp); + dart_setup_pt_region(dcp->dart_dcp, dcp_dart_path, sid, vm_base); + + dcp->dart_disp = dart_init_adt(disp_dart_path, 0, 0, true); + if (!dcp->dart_disp) { + printf("dcp: failed to initialize DISP DART\n"); + goto out_dart_dcp; + } + // set disp0's page tables at dart-dcp's vm-base + dart_setup_pt_region(dcp->dart_disp, disp_dart_path, 0, vm_base); + + dcp->iovad_dcp = iovad_init(vm_base + 0x10000000, vm_base + 0x20000000); + + dcp->asc = asc_init(dcp_path); + if (!dcp->asc) { + printf("dcp: failed to initialize ASC\n"); + goto out_iovad; + } + + dcp->rtkit = rtkit_init("dcp", dcp->asc, dcp->dart_dcp, dcp->iovad_dcp, NULL); + if (!dcp->rtkit) { + printf("dcp: failed to initialize RTKit\n"); + goto out_iovad; + } + + if (!rtkit_boot(dcp->rtkit)) { + printf("dcp: failed to boot RTKit\n"); + goto out_iovad; + } + + return dcp; + + rtkit_quiesce(dcp->rtkit); + rtkit_free(dcp->rtkit); +out_iovad: + iovad_shutdown(dcp->iovad_dcp, dcp->dart_dcp); + dart_shutdown(dcp->dart_disp); +out_dart_dcp: + dart_shutdown(dcp->dart_dcp); +out_free: + free(dcp); + return NULL; +} + +int dcp_shutdown(dcp_dev_t *dcp, bool sleep) +{ + if (sleep) { + rtkit_sleep(dcp->rtkit); + pmgr_reset(0, "DISP0_CPU0"); + } else { + rtkit_quiesce(dcp->rtkit); + } + rtkit_free(dcp->rtkit); + dart_shutdown(dcp->dart_disp); + iovad_shutdown(dcp->iovad_dcp, dcp->dart_dcp); + dart_shutdown(dcp->dart_dcp); + free(dcp); + + return 0; +} diff --git a/tools/src/dcp.h b/tools/src/dcp.h new file mode 100644 index 0000000..c9de8f2 --- /dev/null +++ b/tools/src/dcp.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef DCP_H +#define DCP_H + +#include "asc.h" +#include "dart.h" +#include "rtkit.h" + +typedef struct { + dart_dev_t *dart_dcp; + dart_dev_t *dart_disp; + iova_domain_t *iovad_dcp; + asc_dev_t *asc; + rtkit_dev_t *rtkit; +} dcp_dev_t; + +dcp_dev_t *dcp_init(const char *dcp_path, const char *dcp_dart_path, const char *disp_dart_path); + +int dcp_shutdown(dcp_dev_t *dcp, bool sleep); + +#endif diff --git a/tools/src/dcp_iboot.c b/tools/src/dcp_iboot.c new file mode 100644 index 0000000..8f8a374 --- /dev/null +++ b/tools/src/dcp_iboot.c @@ -0,0 +1,224 @@ +/* SPDX-License-Identifier: MIT */ + +#include "dcp_iboot.h" +#include "afk.h" +#include "assert.h" +#include "malloc.h" +#include "string.h" +#include "utils.h" + +#define DCP_IBOOT_ENDPOINT 0x23 + +#define TXBUF_LEN 0x4000 +#define RXBUF_LEN 0x4000 + +struct txcmd { + u32 op; + u32 len; + u32 unk1; + u32 unk2; + u8 payload[]; +}; + +struct rxcmd { + u32 op; + u32 len; + u8 payload[]; +}; + +struct dcp_iboot_if { + dcp_dev_t *dcp; + afk_epic_ep_t *epic; + int channel; + + union { + u8 txbuf[TXBUF_LEN]; + struct txcmd txcmd; + }; + + union { + u8 rxbuf[RXBUF_LEN]; + struct rxcmd rxcmd; + }; +}; + +enum IBootCmd { + IBOOT_SET_POWER = 2, + IBOOT_GET_HPD = 3, + IBOOT_GET_TIMING_MODES = 4, + IBOOT_GET_COLOR_MODES = 5, + IBOOT_SET_MODE = 6, + IBOOT_SWAP_BEGIN = 15, + IBOOT_SWAP_SET_LAYER = 16, + IBOOT_SWAP_END = 18, +}; + +struct get_hpd_resp { + u8 hpd; + u8 pad[3]; + u32 timing_cnt; + u32 color_cnt; +}; + +struct get_tmode_resp { + u32 count; + dcp_timing_mode_t modes[]; +}; + +struct get_cmode_resp { + u32 count; + dcp_color_mode_t modes[]; +}; + +struct swap_start_resp { + u32 unk1, unk2, unk3; + u32 swap_id; + u32 unk4; +}; + +struct swap_set_layer_cmd { + u32 unk; + u32 layer_id; + dcp_layer_t layer; + dcp_rect_t src; + dcp_rect_t dst; + u32 unk2; +} PACKED; + +dcp_iboot_if_t *dcp_ib_init(dcp_dev_t *dcp) +{ + dcp_iboot_if_t *iboot = malloc(sizeof(dcp_iboot_if_t)); + if (!iboot) + return NULL; + + iboot->dcp = dcp; + iboot->epic = afk_epic_init(dcp->rtkit, DCP_IBOOT_ENDPOINT); + if (!iboot->epic) { + printf("dcp-iboot: failed to initialize EPIC\n"); + goto err_free; + } + + iboot->channel = afk_epic_start_interface(iboot->epic, "disp0-service", TXBUF_LEN, RXBUF_LEN); + + if (iboot->channel < 0) { + printf("dcp-iboot: failed to initialize disp0 service\n"); + goto err_shutdown; + } + + return iboot; + +err_shutdown: + afk_epic_shutdown(iboot->epic); +err_free: + free(iboot); + return NULL; +} + +int dcp_ib_shutdown(dcp_iboot_if_t *iboot) +{ + afk_epic_shutdown(iboot->epic); + + free(iboot); + return 0; +} + +static int dcp_ib_cmd(dcp_iboot_if_t *iboot, int op, size_t in_size) +{ + size_t rxsize = RXBUF_LEN; + assert(in_size <= TXBUF_LEN - sizeof(struct txcmd)); + + iboot->txcmd.op = op; + iboot->txcmd.len = sizeof(struct txcmd) + in_size; + + return afk_epic_command(iboot->epic, iboot->channel, 0xc0, iboot->txbuf, + sizeof(struct txcmd) + in_size, iboot->rxbuf, &rxsize); +} + +int dcp_ib_set_power(dcp_iboot_if_t *iboot, bool power) +{ + u32 *pwr = (void *)iboot->txcmd.payload; + *pwr = power; + + return dcp_ib_cmd(iboot, IBOOT_SET_POWER, 1); +} + +int dcp_ib_get_hpd(dcp_iboot_if_t *iboot, int *timing_cnt, int *color_cnt) +{ + struct get_hpd_resp *resp = (void *)iboot->rxcmd.payload; + int ret = dcp_ib_cmd(iboot, IBOOT_GET_HPD, 0); + + if (ret < 0) + return ret; + + if (timing_cnt) + *timing_cnt = resp->timing_cnt; + if (color_cnt) + *color_cnt = resp->color_cnt; + + return !!resp->hpd; +} + +int dcp_ib_get_timing_modes(dcp_iboot_if_t *iboot, dcp_timing_mode_t **modes) +{ + struct get_tmode_resp *resp = (void *)iboot->rxcmd.payload; + int ret = dcp_ib_cmd(iboot, IBOOT_GET_TIMING_MODES, 0); + + if (ret < 0) + return ret; + + *modes = resp->modes; + return resp->count; +} + +int dcp_ib_get_color_modes(dcp_iboot_if_t *iboot, dcp_color_mode_t **modes) +{ + struct get_cmode_resp *resp = (void *)iboot->rxcmd.payload; + int ret = dcp_ib_cmd(iboot, IBOOT_GET_COLOR_MODES, 0); + + if (ret < 0) + return ret; + + *modes = resp->modes; + return resp->count; +} + +int dcp_ib_set_mode(dcp_iboot_if_t *iboot, dcp_timing_mode_t *tmode, dcp_color_mode_t *cmode) +{ + struct { + dcp_timing_mode_t tmode; + dcp_color_mode_t cmode; + } *cmd = (void *)iboot->txcmd.payload; + + cmd->tmode = *tmode; + cmd->cmode = *cmode; + return dcp_ib_cmd(iboot, IBOOT_SET_MODE, sizeof(*cmd)); +} + +int dcp_ib_swap_begin(dcp_iboot_if_t *iboot) +{ + struct swap_start_resp *resp = (void *)iboot->rxcmd.payload; + int ret = dcp_ib_cmd(iboot, IBOOT_SWAP_BEGIN, 0); + if (ret < 0) + return ret; + + return resp->swap_id; +} + +int dcp_ib_swap_set_layer(dcp_iboot_if_t *iboot, int layer_id, dcp_layer_t *layer, + dcp_rect_t *src_rect, dcp_rect_t *dst_rect) +{ + struct swap_set_layer_cmd *cmd = (void *)iboot->txcmd.payload; + memset(cmd, 0, sizeof(*cmd)); + cmd->layer_id = layer_id; + cmd->layer = *layer; + cmd->src = *src_rect; + cmd->dst = *dst_rect; + + return dcp_ib_cmd(iboot, IBOOT_SWAP_SET_LAYER, sizeof(*cmd)); +} + +int dcp_ib_swap_end(dcp_iboot_if_t *iboot) +{ + memset(iboot->txcmd.payload, 0, 12); + return dcp_ib_cmd(iboot, IBOOT_SWAP_END, 12); +} diff --git a/tools/src/dcp_iboot.h b/tools/src/dcp_iboot.h new file mode 100644 index 0000000..adb449e --- /dev/null +++ b/tools/src/dcp_iboot.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef DCP_IBOOT_H +#define DCP_IBOOT_H + +#include "dcp.h" + +typedef struct dcp_iboot_if dcp_iboot_if_t; + +enum DCPEOTF { + EOTF_GAMMA_SDR = 1, + EOTF_GAMMA_HDR = 2, +}; + +enum DCPEncoding { + ENC_RGB = 1, + ENC_YCBCR_444 = 3, + ENC_YCBCR_422 = 4, + ENC_YCBCR_420 = 5, +}; + +enum DCPColorimetry { + CLR_BT601_709 = 1, + CLR_BT2020 = 2, + CLR_DCIP3 = 3, +}; + +enum DCPSurfaceFmt { + FMT_BGRA = 1, + FMT_RGBA = 3, + FMT_w18p = 4, + FMT_444v = 6, + FMT_422v = 7, + FMT_420v = 8, + FMT_w30r = 9, + FMT_w40a = 10, +}; + +enum DCPTransform { + XFRM_NONE = 0, + XFRM_XFLIP = 1, + XFRM_YFLIP = 2, + XFRM_ROT_90 = 3, + XFRM_ROT_180 = 4, + XFRM_ROT_270 = 5, +}; + +enum AddrFormat { + ADDR_PLANAR = 1, + ADDR_TILED = 2, + ADDR_AGX = 3, +}; + +typedef struct { + u32 valid; + u32 width; + u32 height; + u32 fps; + u8 pad[8]; +} PACKED dcp_timing_mode_t; + +typedef struct { + u32 valid; + u32 colorimetry; + u32 eotf; + u32 encoding; + u32 bpp; + u8 pad[4]; +} PACKED dcp_color_mode_t; + +typedef struct { + u32 unk1; + u64 addr; + u32 tile_size; + u32 stride; + u32 unk2[4]; + u32 addr_format; + u32 unk3; +} PACKED dcp_plane_t; + +typedef struct { + dcp_plane_t planes[3]; + u32 unk; + u32 plane_cnt; + u32 width; + u32 height; + u32 surface_fmt; + u32 colorspace; + u32 eotf; + u8 transform; + u8 padding[3]; +} PACKED dcp_layer_t; + +typedef struct { + u32 w, h, x, y; +} PACKED dcp_rect_t; + +dcp_iboot_if_t *dcp_ib_init(dcp_dev_t *dcp); +int dcp_ib_shutdown(dcp_iboot_if_t *iboot); + +int dcp_ib_set_power(dcp_iboot_if_t *iboot, bool power); +int dcp_ib_get_hpd(dcp_iboot_if_t *iboot, int *timing_cnt, int *color_cnt); +int dcp_ib_get_timing_modes(dcp_iboot_if_t *iboot, dcp_timing_mode_t **modes); +int dcp_ib_get_color_modes(dcp_iboot_if_t *iboot, dcp_color_mode_t **modes); +int dcp_ib_set_mode(dcp_iboot_if_t *iboot, dcp_timing_mode_t *timing, dcp_color_mode_t *color); +int dcp_ib_swap_begin(dcp_iboot_if_t *iboot); +int dcp_ib_swap_set_layer(dcp_iboot_if_t *iboot, int layer_id, dcp_layer_t *layer, + dcp_rect_t *src_rect, dcp_rect_t *dst_rect); +int dcp_ib_swap_end(dcp_iboot_if_t *iboot); + +#endif diff --git a/tools/src/devicetree.c b/tools/src/devicetree.c new file mode 100644 index 0000000..f0c9193 --- /dev/null +++ b/tools/src/devicetree.c @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: MIT */ + +#include "devicetree.h" + +#include "libfdt/libfdt.h" + +void dt_parse_ranges(void *dt, int node, struct dt_ranges_tbl *ranges) +{ + int len; + const struct fdt_property *ranges_prop = fdt_get_property(dt, node, "ranges", &len); + if (ranges_prop && len > 0) { + int idx = 0; + int num_entries = len / sizeof(fdt64_t); + if (num_entries > DT_MAX_RANGES) + num_entries = DT_MAX_RANGES; + + const fdt64_t *entry = (const fdt64_t *)ranges_prop->data; + for (int i = 0; i < num_entries; ++i) { + u64 start = fdt64_ld(entry++); + u64 parent = fdt64_ld(entry++); + u64 size = fdt64_ld(entry++); + if (size) { + ranges[idx].start = start; + ranges[idx].parent = parent; + ranges[idx].size = size; + idx++; + } + } + } +} + +u64 dt_translate(struct dt_ranges_tbl *ranges, const fdt64_t *reg) +{ + u64 addr = fdt64_ld(reg); + for (int idx = 0; idx < DT_MAX_RANGES; ++idx) { + if (ranges[idx].size == 0) + break; + if (addr >= ranges[idx].start && addr < ranges[idx].start + ranges[idx].size) + return ranges[idx].parent - ranges[idx].start + addr; + } + + return addr; +} + +u64 dt_get_address(void *dt, int node) +{ + int parent = fdt_parent_offset(dt, node); + + // find parent with "ranges" property + while (parent >= 0) { + if (fdt_getprop(dt, parent, "ranges", NULL)) + break; + + parent = fdt_parent_offset(dt, parent); + } + + if (parent < 0) + return 0; + + // parse ranges for address translation + struct dt_ranges_tbl ranges[DT_MAX_RANGES] = {0}; + dt_parse_ranges(dt, parent, ranges); + + const fdt64_t *reg = fdt_getprop(dt, node, "reg", NULL); + if (!reg) + return 0; + + return dt_translate(ranges, reg); +} diff --git a/tools/src/devicetree.h b/tools/src/devicetree.h new file mode 100644 index 0000000..855f038 --- /dev/null +++ b/tools/src/devicetree.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef DEVICETREE_H +#define DEVICETREE_H + +#include "types.h" + +#include "libfdt/libfdt.h" + +#define DT_MAX_RANGES 8 + +struct dt_ranges_tbl { + u64 start; + u64 parent; + u64 size; +}; + +void dt_parse_ranges(void *dt, int node, struct dt_ranges_tbl *ranges); +u64 dt_translate(struct dt_ranges_tbl *ranges, const fdt64_t *reg); +u64 dt_get_address(void *dt, int node); + +#endif diff --git a/tools/src/display.c b/tools/src/display.c new file mode 100644 index 0000000..3dbf49e --- /dev/null +++ b/tools/src/display.c @@ -0,0 +1,514 @@ +/* SPDX-License-Identifier: MIT */ + +#include "display.h" +#include "adt.h" +#include "assert.h" +#include "dcp.h" +#include "dcp_iboot.h" +#include "fb.h" +#include "memory.h" +#include "string.h" +#include "utils.h" +#include "xnuboot.h" + +#define DISPLAY_STATUS_DELAY 100 +#define DISPLAY_STATUS_RETRIES 20 + +#define COMPARE(a, b) \ + if ((a) > (b)) { \ + *best = modes[i]; \ + continue; \ + } else if ((a) < (b)) { \ + continue; \ + } + +static dcp_dev_t *dcp; +static dcp_iboot_if_t *iboot; +static u64 fb_dva; +static u64 fb_size; +bool display_is_external; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) + +u64 display_mode_fb_size(dcp_timing_mode_t *mode) +{ + // assume 4 byte per pixel (either BGRA x2r10b10g10) + return mode->width * mode->height * 4; +} + +static void display_choose_timing_mode(dcp_timing_mode_t *modes, int cnt, dcp_timing_mode_t *best, + dcp_timing_mode_t *want) +{ + *best = modes[0]; + + for (int i = 1; i < cnt; i++) { + COMPARE(modes[i].valid, best->valid); + if (want && want->valid) { + COMPARE(modes[i].width == want->width && modes[i].height == want->height, + best->width == want->width && best->height == want->height); + COMPARE(-abs((long)modes[i].fps - (long)want->fps), + -abs((long)best->fps - (long)want->fps)); + } else { + COMPARE(display_mode_fb_size(&modes[i]) <= fb_size, + display_mode_fb_size(best) <= fb_size); + } + + COMPARE(modes[i].width <= 1920, best->width <= 1920); + COMPARE(modes[i].height <= 1200, best->height <= 1200); + COMPARE(modes[i].fps <= 60 << 16, best->fps <= 60 << 16); + COMPARE(modes[i].width, best->width); + COMPARE(modes[i].height, best->height); + COMPARE(modes[i].fps, best->fps); + } + + printf("display: timing mode: valid=%d %dx%d %d.%02d Hz\n", best->valid, best->width, + best->height, best->fps >> 16, ((best->fps & 0xffff) * 100 + 0x7fff) >> 16); +} + +static void display_choose_color_mode(dcp_color_mode_t *modes, int cnt, dcp_color_mode_t *best) +{ + *best = modes[0]; + + for (int i = 1; i < cnt; i++) { + COMPARE(modes[i].valid, best->valid); + COMPARE(modes[i].bpp <= 32, best->bpp <= 32); + COMPARE(modes[i].bpp, best->bpp); + COMPARE(-modes[i].colorimetry, -best->colorimetry); + COMPARE(-modes[i].encoding, -best->encoding); + COMPARE(-modes[i].eotf, -best->eotf); + } + + printf("display: color mode: valid=%d colorimetry=%d eotf=%d encoding=%d bpp=%d\n", best->valid, + best->colorimetry, best->eotf, best->encoding, best->bpp); +} + +int display_get_vram(u64 *paddr, u64 *size) +{ + int ret = 0; + int adt_path[4]; + int node = adt_path_offset_trace(adt, "/vram", adt_path); + + if (node < 0) { + printf("display: '/vram' not found\n"); + return -1; + } + + int pp = 0; + while (adt_path[pp]) + pp++; + adt_path[pp + 1] = 0; + + ret = adt_get_reg(adt, adt_path, "reg", 0, paddr, size); + if (ret < 0) { + printf("display: failed to read /vram/reg\n"); + return -1; + } + + if (*paddr != cur_boot_args.video.base) { + printf("display: vram does not match boot_args.video.base\n"); + return -1; + } + + return 0; +} + +static uintptr_t display_map_fb(uintptr_t iova, u64 paddr, u64 size) +{ + if (iova == 0) { + u64 iova_disp0 = 0; + u64 iova_dcp = 0; + + // start scanning for free iova space on vm-base + iova_dcp = dart_find_iova(dcp->dart_dcp, dart_vm_base(dcp->dart_dcp), size); + if (DART_IS_ERR(iova_dcp)) { + printf("display: failed to find IOVA for fb of %06zx bytes (dcp)\n", size); + return iova_dcp; + } + + // try to map the fb to the same IOVA on disp0 + iova_disp0 = dart_find_iova(dcp->dart_disp, iova_dcp, size); + if (DART_IS_ERR(iova_disp0)) { + printf("display: failed to find IOVA for fb of %06zx bytes (disp0)\n", size); + return iova_disp0; + } + + // assume this results in the same IOVA, not sure if this is required but matches what iboot + // does on other models. + if (iova_disp0 != iova_dcp) { + printf("display: IOVA mismatch for fb between dcp (%08lx) and disp0 (%08lx)\n", + (u64)iova_dcp, (u64)iova_disp0); + return DART_PTR_ERR; + } + + iova = iova_dcp; + } + + int ret = dart_map(dcp->dart_disp, iova, (void *)paddr, size); + if (ret < 0) { + printf("display: failed to map fb to dart-disp0\n"); + return DART_PTR_ERR; + } + + ret = dart_map(dcp->dart_dcp, iova, (void *)paddr, size); + if (ret < 0) { + printf("display: failed to map fb to dart-dcp\n"); + dart_unmap(dcp->dart_disp, iova, size); + return DART_PTR_ERR; + } + + return iova; +} + +int display_start_dcp(void) +{ + if (iboot) + return 0; + + dcp = dcp_init("/arm-io/dcp", "/arm-io/dart-dcp", "/arm-io/dart-disp0"); + if (!dcp) { + printf("display: failed to initialize DCP\n"); + return -1; + } + + // determine frame buffer PA and size from "/vram" + u64 pa, size; + if (display_get_vram(&pa, &size)) { + // use a safe fb_size + fb_size = cur_boot_args.video.stride * cur_boot_args.video.height * + ((cur_boot_args.video.depth + 7) / 8); + } else { + fb_size = size; + } + + // Find the framebuffer DVA + fb_dva = dart_search(dcp->dart_disp, (void *)cur_boot_args.video.base); + // framebuffer is not mapped on the M1 Ultra Mac Studio + if (DART_IS_ERR(fb_dva)) + fb_dva = display_map_fb(0, pa, size); + if (DART_IS_ERR(fb_dva)) { + printf("display: failed to find display DVA\n"); + fb_dva = 0; + dcp_shutdown(dcp, false); + return -1; + } + + iboot = dcp_ib_init(dcp); + if (!iboot) { + printf("display: failed to initialize DCP iBoot interface\n"); + dcp_shutdown(dcp, false); + return -1; + } + + return 0; +} + +struct display_options { + bool retina; +}; + +int display_parse_mode(const char *config, dcp_timing_mode_t *mode, struct display_options *opts) +{ + memset(mode, 0, sizeof(*mode)); + + if (!config || !strcmp(config, "auto")) + return 0; + + const char *s_w = config; + const char *s_h = strchr(config, 'x'); + const char *s_fps = strchr(config, '@'); + + if (s_w && s_h) { + mode->width = atol(s_w); + mode->height = atol(s_h + 1); + mode->valid = mode->width && mode->height; + } + + if (s_fps) { + mode->fps = atol(s_fps + 1) << 16; + + const char *s_fps_frac = strchr(s_fps + 1, '.'); + if (s_fps_frac) { + // Assumes two decimals... + mode->fps += (atol(s_fps_frac + 1) << 16) / 100; + } + } + + const char *option = config; + while (option && opts) { + if (!strncmp(option + 1, "retina", 6)) + opts->retina = true; + option = strchr(option + 1, ','); + } + + printf("display: want mode: valid=%d %dx%d %d.%02d Hz\n", mode->valid, mode->width, + mode->height, mode->fps >> 16, ((mode->fps & 0xffff) * 100 + 0x7fff) >> 16); + + return mode->valid; +} + +static int display_swap(u64 iova, u32 stride, u32 width, u32 height) +{ + int ret; + int swap_id = ret = dcp_ib_swap_begin(iboot); + if (swap_id < 0) { + printf("display: failed to start swap\n"); + return -1; + } + + dcp_layer_t layer = { + .planes = {{ + .addr = iova, + .stride = stride, + .addr_format = ADDR_PLANAR, + }}, + .plane_cnt = 1, + .width = width, + .height = height, + .surface_fmt = FMT_w30r, + .colorspace = 2, + .eotf = EOTF_GAMMA_SDR, + .transform = XFRM_NONE, + }; + + dcp_rect_t rect = {width, height, 0, 0}; + + if ((ret = dcp_ib_swap_set_layer(iboot, 0, &layer, &rect, &rect)) < 0) { + printf("display: failed to set layer\n"); + return -1; + } + + if ((ret = dcp_ib_swap_end(iboot)) < 0) { + printf("display: failed to complete swap\n"); + return -1; + } + + return swap_id; +} + +int display_configure(const char *config) +{ + dcp_timing_mode_t want; + struct display_options opts = {0}; + + display_parse_mode(config, &want, &opts); + + u64 start_time = get_ticks(); + + int ret = display_start_dcp(); + if (ret < 0) + return ret; + + // Power on + if ((ret = dcp_ib_set_power(iboot, true)) < 0) { + printf("display: failed to set power\n"); + return ret; + } + + // Detect if display is connected + int timing_cnt, color_cnt; + int hpd = 0, retries = 0; + + /* After boot DCP does not immediately report a connected display. Retry getting display + * information for 2 seconds. + */ + while (retries++ < DISPLAY_STATUS_RETRIES) { + hpd = dcp_ib_get_hpd(iboot, &timing_cnt, &color_cnt); + if (hpd < 0) + ret = hpd; + else if (hpd && timing_cnt && color_cnt) + break; + if (retries < DISPLAY_STATUS_RETRIES) + mdelay(DISPLAY_STATUS_DELAY); + } + printf("display: waited %d ms for display status\n", (retries - 1) * DISPLAY_STATUS_DELAY); + if (ret < 0) { + printf("display: failed to get display status\n"); + return 0; + } + + printf("display: connected:%d timing_cnt:%d color_cnt:%d\n", hpd, timing_cnt, color_cnt); + + if (!hpd || !timing_cnt || !color_cnt) + return 0; + + // Find best modes + dcp_timing_mode_t *tmodes, tbest; + if ((ret = dcp_ib_get_timing_modes(iboot, &tmodes)) < 0) { + printf("display: failed to get timing modes\n"); + return -1; + } + assert(ret == timing_cnt); + display_choose_timing_mode(tmodes, timing_cnt, &tbest, &want); + + dcp_color_mode_t *cmodes, cbest; + if ((ret = dcp_ib_get_color_modes(iboot, &cmodes)) < 0) { + printf("display: failed to get color modes\n"); + return -1; + } + assert(ret == color_cnt); + display_choose_color_mode(cmodes, color_cnt, &cbest); + + // Set mode + if ((ret = dcp_ib_set_mode(iboot, &tbest, &cbest)) < 0) { + printf("display: failed to set mode\n"); + return -1; + } + + u64 fb_pa = cur_boot_args.video.base; + u64 tmp_dva = 0; + + size_t size = + ALIGN_UP(tbest.width * tbest.height * ((cbest.bpp + 7) / 8) + 24 * SZ_16K, SZ_16K); + + if (fb_size < size) { + printf("display: current framebuffer is too small for new mode\n"); + + /* rtkit uses 0x10000000 as DVA offset, FB starts in the first page */ + if ((s64)size > 7 * SZ_32M) { + printf("display: not enough reserved L2 DVA space for fb size 0x%zx\n", size); + return -1; + } + + cur_boot_args.mem_size -= size; + fb_pa = cur_boot_args.phys_base + cur_boot_args.mem_size; + /* add guard page between RAM and framebuffer */ + // TODO: update mapping? + cur_boot_args.mem_size -= SZ_16K; + + memset((void *)fb_pa, 0, size); + + tmp_dva = iova_alloc(dcp->iovad_dcp, size); + + tmp_dva = display_map_fb(tmp_dva, fb_pa, size); + if (DART_IS_ERR(tmp_dva)) { + printf("display: failed to map new fb\n"); + return -1; + } + + // Swap! + u32 stride = tbest.width * 4; + ret = display_swap(tmp_dva, stride, tbest.width, tbest.height); + if (ret < 0) + return ret; + + /* wait for swap durations + 1ms */ + u32 delay = (((1000 << 16) + tbest.fps - 1) / tbest.fps) + 1; + mdelay(delay); + dart_unmap(dcp->dart_disp, fb_dva, fb_size); + dart_unmap(dcp->dart_dcp, fb_dva, fb_size); + + fb_dva = display_map_fb(fb_dva, fb_pa, size); + if (DART_IS_ERR(fb_dva)) { + printf("display: failed to map new fb\n"); + fb_dva = 0; + return -1; + } + + fb_size = size; + mmu_map_framebuffer(fb_pa, fb_size); + + /* update ADT with the physical address of the new framebuffer */ + u64 fb_reg[2] = {fb_pa, size}; + int node = adt_path_offset(adt, "vram"); + if (node >= 0) { + // TODO: adt_set_reg(adt, node, "vram", fb_pa, size);? + ret = adt_setprop(adt, node, "reg", &fb_reg, sizeof(fb_reg)); + if (ret < 0) + printf("display: failed to update '/vram'\n"); + } + node = adt_path_offset(adt, "/chosen/carveout-memory-map"); + if (node >= 0) { + // TODO: adt_set_reg(adt, node, "vram", fb_pa, size);? + ret = adt_setprop(adt, node, "region-id-14", &fb_reg, sizeof(fb_reg)); + if (ret < 0) + printf("display: failed to update '/chosen/carveout-memory-map/region-id-14'\n"); + } + } + + // Swap! + u32 stride = tbest.width * 4; + ret = display_swap(fb_dva, stride, tbest.width, tbest.height); + if (ret < 0) + return ret; + + printf("display: swapped! (swap_id=%d)\n", ret); + + if (fb_pa != cur_boot_args.video.base || cur_boot_args.video.stride != stride || + cur_boot_args.video.width != tbest.width || cur_boot_args.video.height != tbest.height || + cur_boot_args.video.depth != 30) { + cur_boot_args.video.base = fb_pa; + cur_boot_args.video.stride = stride; + cur_boot_args.video.width = tbest.width; + cur_boot_args.video.height = tbest.height; + cur_boot_args.video.depth = 30 | (opts.retina ? FB_DEPTH_FLAG_RETINA : 0); + fb_reinit(); + } + + /* Update for python / subsequent stages */ + memcpy((void *)boot_args_addr, &cur_boot_args, sizeof(cur_boot_args)); + + if (tmp_dva) { + // unmap / free temporary dva + dart_unmap(dcp->dart_disp, tmp_dva, size); + dart_unmap(dcp->dart_dcp, tmp_dva, size); + iova_free(dcp->iovad_dcp, tmp_dva, size); + } + + u64 msecs = ticks_to_msecs(get_ticks() - start_time); + printf("display: Modeset took %ld ms\n", msecs); + + return 1; +} + +int display_init(void) +{ + int node = adt_path_offset(adt, "/arm-io/disp0"); + + if (node < 0) { + printf("DISP0 node not found!\n"); + return -1; + } + + display_is_external = adt_getprop(adt, node, "external", NULL); + if (display_is_external) + printf("display: Display is external\n"); + else + printf("display: Display is internal\n"); + + if (cur_boot_args.video.width == 640 && cur_boot_args.video.height == 1136) { + printf("display: Dummy framebuffer found, initializing display\n"); + return display_configure(NULL); + } else if (display_is_external) { + printf("display: External display found, reconfiguring\n"); + return display_configure(NULL); + } else { + printf("display: Display is already initialized (%ldx%ld)\n", cur_boot_args.video.width, + cur_boot_args.video.height); + return 0; + } +} + +void display_shutdown(dcp_shutdown_mode mode) +{ + if (iboot) { + dcp_ib_shutdown(iboot); + switch (mode) { + case DCP_QUIESCED: + printf("display: Quiescing DCP (unconditional)\n"); + dcp_shutdown(dcp, false); + break; + case DCP_SLEEP_IF_EXTERNAL: + if (!display_is_external) + printf("display: Quiescing DCP (internal)\n"); + else + printf("display: Sleeping DCP (external)\n"); + dcp_shutdown(dcp, display_is_external); + break; + case DCP_SLEEP: + printf("display: Sleeping DCP (unconditional)\n"); + dcp_shutdown(dcp, true); + break; + } + iboot = NULL; + } +} diff --git a/tools/src/display.h b/tools/src/display.h new file mode 100644 index 0000000..992088e --- /dev/null +++ b/tools/src/display.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef DISPLAY_H +#define DISPLAY_H + +#include "types.h" + +typedef enum _dcp_shutdown_mode { + DCP_QUIESCED = 0, + DCP_SLEEP_IF_EXTERNAL = 1, + DCP_SLEEP = 2, +} dcp_shutdown_mode; + +extern bool display_is_external; + +int display_init(void); +int display_start_dcp(void); +int display_configure(const char *config); +void display_shutdown(dcp_shutdown_mode mode); + +#endif diff --git a/tools/src/dlmalloc/malloc.c b/tools/src/dlmalloc/malloc.c new file mode 100644 index 0000000..31c9d21 --- /dev/null +++ b/tools/src/dlmalloc/malloc.c @@ -0,0 +1,6286 @@ +#include "malloc_config.h" + +/* + This is a version (aka dlmalloc) of malloc/free/realloc written by + Doug Lea and released to the public domain, as explained at + http://creativecommons.org/publicdomain/zero/1.0/ Send questions, + comments, complaints, performance data, etc to dl@cs.oswego.edu + +* Version 2.8.6 Wed Aug 29 06:57:58 2012 Doug Lea + Note: There may be an updated version of this malloc obtainable at + ftp://gee.cs.oswego.edu/pub/misc/malloc.c + Check before installing! + +* Quickstart + + This library is all in one file to simplify the most common usage: + ftp it, compile it (-O3), and link it into another program. All of + the compile-time options default to reasonable values for use on + most platforms. You might later want to step through various + compile-time and dynamic tuning options. + + For convenience, an include file for code using this malloc is at: + ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.6.h + You don't really need this .h file unless you call functions not + defined in your system include files. The .h file contains only the + excerpts from this file needed for using this malloc on ANSI C/C++ + systems, so long as you haven't changed compile-time options about + naming and tuning parameters. If you do, then you can create your + own malloc.h that does include all settings by cutting at the point + indicated below. Note that you may already by default be using a C + library containing a malloc that is based on some version of this + malloc (for example in linux). You might still want to use the one + in this file to customize settings or to avoid overheads associated + with library versions. + +* Vital statistics: + + Supported pointer/size_t representation: 4 or 8 bytes + size_t MUST be an unsigned type of the same width as + pointers. (If you are using an ancient system that declares + size_t as a signed type, or need it to be a different width + than pointers, you can use a previous release of this malloc + (e.g. 2.7.2) supporting these.) + + Alignment: 8 bytes (minimum) + This suffices for nearly all current machines and C compilers. + However, you can define MALLOC_ALIGNMENT to be wider than this + if necessary (up to 128bytes), at the expense of using more space. + + Minimum overhead per allocated chunk: 4 or 8 bytes (if 4byte sizes) + 8 or 16 bytes (if 8byte sizes) + Each malloced chunk has a hidden word of overhead holding size + and status information, and additional cross-check word + if FOOTERS is defined. + + Minimum allocated size: 4-byte ptrs: 16 bytes (including overhead) + 8-byte ptrs: 32 bytes (including overhead) + + Even a request for zero bytes (i.e., malloc(0)) returns a + pointer to something of the minimum allocatable size. + The maximum overhead wastage (i.e., number of extra bytes + allocated than were requested in malloc) is less than or equal + to the minimum size, except for requests >= mmap_threshold that + are serviced via mmap(), where the worst case wastage is about + 32 bytes plus the remainder from a system page (the minimal + mmap unit); typically 4096 or 8192 bytes. + + Security: static-safe; optionally more or less + The "security" of malloc refers to the ability of malicious + code to accentuate the effects of errors (for example, freeing + space that is not currently malloc'ed or overwriting past the + ends of chunks) in code that calls malloc. This malloc + guarantees not to modify any memory locations below the base of + heap, i.e., static variables, even in the presence of usage + errors. The routines additionally detect most improper frees + and reallocs. All this holds as long as the static bookkeeping + for malloc itself is not corrupted by some other means. This + is only one aspect of security -- these checks do not, and + cannot, detect all possible programming errors. + + If FOOTERS is defined nonzero, then each allocated chunk + carries an additional check word to verify that it was malloced + from its space. These check words are the same within each + execution of a program using malloc, but differ across + executions, so externally crafted fake chunks cannot be + freed. This improves security by rejecting frees/reallocs that + could corrupt heap memory, in addition to the checks preventing + writes to statics that are always on. This may further improve + security at the expense of time and space overhead. (Note that + FOOTERS may also be worth using with MSPACES.) + + By default detected errors cause the program to abort (calling + "abort()"). You can override this to instead proceed past + errors by defining PROCEED_ON_ERROR. In this case, a bad free + has no effect, and a malloc that encounters a bad address + caused by user overwrites will ignore the bad address by + dropping pointers and indices to all known memory. This may + be appropriate for programs that should continue if at all + possible in the face of programming errors, although they may + run out of memory because dropped memory is never reclaimed. + + If you don't like either of these options, you can define + CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything + else. And if if you are sure that your program using malloc has + no errors or vulnerabilities, you can define INSECURE to 1, + which might (or might not) provide a small performance improvement. + + It is also possible to limit the maximum total allocatable + space, using malloc_set_footprint_limit. This is not + designed as a security feature in itself (calls to set limits + are not screened or privileged), but may be useful as one + aspect of a secure implementation. + + Thread-safety: NOT thread-safe unless USE_LOCKS defined non-zero + When USE_LOCKS is defined, each public call to malloc, free, + etc is surrounded with a lock. By default, this uses a plain + pthread mutex, win32 critical section, or a spin-lock if if + available for the platform and not disabled by setting + USE_SPIN_LOCKS=0. However, if USE_RECURSIVE_LOCKS is defined, + recursive versions are used instead (which are not required for + base functionality but may be needed in layered extensions). + Using a global lock is not especially fast, and can be a major + bottleneck. It is designed only to provide minimal protection + in concurrent environments, and to provide a basis for + extensions. If you are using malloc in a concurrent program, + consider instead using nedmalloc + (http://www.nedprod.com/programs/portable/nedmalloc/) or + ptmalloc (See http://www.malloc.de), which are derived from + versions of this malloc. + + System requirements: Any combination of MORECORE and/or MMAP/MUNMAP + This malloc can use unix sbrk or any emulation (invoked using + the CALL_MORECORE macro) and/or mmap/munmap or any emulation + (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system + memory. On most unix systems, it tends to work best if both + MORECORE and MMAP are enabled. On Win32, it uses emulations + based on VirtualAlloc. It also uses common C library functions + like memset. + + Compliance: I believe it is compliant with the Single Unix Specification + (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably + others as well. + +* Overview of algorithms + + This is not the fastest, most space-conserving, most portable, or + most tunable malloc ever written. However it is among the fastest + while also being among the most space-conserving, portable and + tunable. Consistent balance across these factors results in a good + general-purpose allocator for malloc-intensive programs. + + In most ways, this malloc is a best-fit allocator. Generally, it + chooses the best-fitting existing chunk for a request, with ties + broken in approximately least-recently-used order. (This strategy + normally maintains low fragmentation.) However, for requests less + than 256bytes, it deviates from best-fit when there is not an + exactly fitting available chunk by preferring to use space adjacent + to that used for the previous small request, as well as by breaking + ties in approximately most-recently-used order. (These enhance + locality of series of small allocations.) And for very large requests + (>= 256Kb by default), it relies on system memory mapping + facilities, if supported. (This helps avoid carrying around and + possibly fragmenting memory used only for large chunks.) + + All operations (except malloc_stats and mallinfo) have execution + times that are bounded by a constant factor of the number of bits in + a size_t, not counting any clearing in calloc or copying in realloc, + or actions surrounding MORECORE and MMAP that have times + proportional to the number of non-contiguous regions returned by + system allocation routines, which is often just 1. In real-time + applications, you can optionally suppress segment traversals using + NO_SEGMENT_TRAVERSAL, which assures bounded execution even when + system allocators return non-contiguous spaces, at the typical + expense of carrying around more memory and increased fragmentation. + + The implementation is not very modular and seriously overuses + macros. Perhaps someday all C compilers will do as good a job + inlining modular code as can now be done by brute-force expansion, + but now, enough of them seem not to. + + Some compilers issue a lot of warnings about code that is + dead/unreachable only on some platforms, and also about intentional + uses of negation on unsigned types. All known cases of each can be + ignored. + + For a longer but out of date high-level description, see + http://gee.cs.oswego.edu/dl/html/malloc.html + +* MSPACES + If MSPACES is defined, then in addition to malloc, free, etc., + this file also defines mspace_malloc, mspace_free, etc. These + are versions of malloc routines that take an "mspace" argument + obtained using create_mspace, to control all internal bookkeeping. + If ONLY_MSPACES is defined, only these versions are compiled. + So if you would like to use this allocator for only some allocations, + and your system malloc for others, you can compile with + ONLY_MSPACES and then do something like... + static mspace mymspace = create_mspace(0,0); // for example + #define mymalloc(bytes) mspace_malloc(mymspace, bytes) + + (Note: If you only need one instance of an mspace, you can instead + use "USE_DL_PREFIX" to relabel the global malloc.) + + You can similarly create thread-local allocators by storing + mspaces as thread-locals. For example: + static __thread mspace tlms = 0; + void* tlmalloc(size_t bytes) { + if (tlms == 0) tlms = create_mspace(0, 0); + return mspace_malloc(tlms, bytes); + } + void tlfree(void* mem) { mspace_free(tlms, mem); } + + Unless FOOTERS is defined, each mspace is completely independent. + You cannot allocate from one and free to another (although + conformance is only weakly checked, so usage errors are not always + caught). If FOOTERS is defined, then each chunk carries around a tag + indicating its originating mspace, and frees are directed to their + originating spaces. Normally, this requires use of locks. + + ------------------------- Compile-time options --------------------------- + +Be careful in setting #define values for numerical constants of type +size_t. On some systems, literal values are not automatically extended +to size_t precision unless they are explicitly casted. You can also +use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below. + +WIN32 default: defined if _WIN32 defined + Defining WIN32 sets up defaults for MS environment and compilers. + Otherwise defaults are for unix. Beware that there seem to be some + cases where this malloc might not be a pure drop-in replacement for + Win32 malloc: Random-looking failures from Win32 GDI API's (eg; + SetDIBits()) may be due to bugs in some video driver implementations + when pixel buffers are malloc()ed, and the region spans more than + one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb) + default granularity, pixel buffers may straddle virtual allocation + regions more often than when using the Microsoft allocator. You can + avoid this by using VirtualAlloc() and VirtualFree() for all pixel + buffers rather than using malloc(). If this is not possible, + recompile this malloc with a larger DEFAULT_GRANULARITY. Note: + in cases where MSC and gcc (cygwin) are known to differ on WIN32, + conditions use _MSC_VER to distinguish them. + +DLMALLOC_EXPORT default: extern + Defines how public APIs are declared. If you want to export via a + Windows DLL, you might define this as + #define DLMALLOC_EXPORT extern __declspec(dllexport) + If you want a POSIX ELF shared object, you might use + #define DLMALLOC_EXPORT extern __attribute__((visibility("default"))) + +MALLOC_ALIGNMENT default: (size_t)(2 * sizeof(void *)) + Controls the minimum alignment for malloc'ed chunks. It must be a + power of two and at least 8, even on machines for which smaller + alignments would suffice. It may be defined as larger than this + though. Note however that code and data structures are optimized for + the case of 8-byte alignment. + +MSPACES default: 0 (false) + If true, compile in support for independent allocation spaces. + This is only supported if HAVE_MMAP is true. + +ONLY_MSPACES default: 0 (false) + If true, only compile in mspace versions, not regular versions. + +USE_LOCKS default: 0 (false) + Causes each call to each public routine to be surrounded with + pthread or WIN32 mutex lock/unlock. (If set true, this can be + overridden on a per-mspace basis for mspace versions.) If set to a + non-zero value other than 1, locks are used, but their + implementation is left out, so lock functions must be supplied manually, + as described below. + +USE_SPIN_LOCKS default: 1 iff USE_LOCKS and spin locks available + If true, uses custom spin locks for locking. This is currently + supported only gcc >= 4.1, older gccs on x86 platforms, and recent + MS compilers. Otherwise, posix locks or win32 critical sections are + used. + +USE_RECURSIVE_LOCKS default: not defined + If defined nonzero, uses recursive (aka reentrant) locks, otherwise + uses plain mutexes. This is not required for malloc proper, but may + be needed for layered allocators such as nedmalloc. + +LOCK_AT_FORK default: not defined + If defined nonzero, performs pthread_atfork upon initialization + to initialize child lock while holding parent lock. The implementation + assumes that pthread locks (not custom locks) are being used. In other + cases, you may need to customize the implementation. + +FOOTERS default: 0 + If true, provide extra checking and dispatching by placing + information in the footers of allocated chunks. This adds + space and time overhead. + +INSECURE default: 0 + If true, omit checks for usage errors and heap space overwrites. + +USE_DL_PREFIX default: NOT defined + Causes compiler to prefix all public routines with the string 'dl'. + This can be useful when you only want to use this malloc in one part + of a program, using your regular system malloc elsewhere. + +MALLOC_INSPECT_ALL default: NOT defined + If defined, compiles malloc_inspect_all and mspace_inspect_all, that + perform traversal of all heap space. Unless access to these + functions is otherwise restricted, you probably do not want to + include them in secure implementations. + +ABORT default: defined as abort() + Defines how to abort on failed checks. On most systems, a failed + check cannot die with an "assert" or even print an informative + message, because the underlying print routines in turn call malloc, + which will fail again. Generally, the best policy is to simply call + abort(). It's not very useful to do more than this because many + errors due to overwriting will show up as address faults (null, odd + addresses etc) rather than malloc-triggered checks, so will also + abort. Also, most compilers know that abort() does not return, so + can better optimize code conditionally calling it. + +PROCEED_ON_ERROR default: defined as 0 (false) + Controls whether detected bad addresses cause them to bypassed + rather than aborting. If set, detected bad arguments to free and + realloc are ignored. And all bookkeeping information is zeroed out + upon a detected overwrite of freed heap space, thus losing the + ability to ever return it from malloc again, but enabling the + application to proceed. If PROCEED_ON_ERROR is defined, the + static variable malloc_corruption_error_count is compiled in + and can be examined to see if errors have occurred. This option + generates slower code than the default abort policy. + +DEBUG default: NOT defined + The DEBUG setting is mainly intended for people trying to modify + this code or diagnose problems when porting to new platforms. + However, it may also be able to better isolate user errors than just + using runtime checks. The assertions in the check routines spell + out in more detail the assumptions and invariants underlying the + algorithms. The checking is fairly extensive, and will slow down + execution noticeably. Calling malloc_stats or mallinfo with DEBUG + set will attempt to check every non-mmapped allocated and free chunk + in the course of computing the summaries. + +ABORT_ON_ASSERT_FAILURE default: defined as 1 (true) + Debugging assertion failures can be nearly impossible if your + version of the assert macro causes malloc to be called, which will + lead to a cascade of further failures, blowing the runtime stack. + ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(), + which will usually make debugging easier. + +MALLOC_FAILURE_ACTION default: sets errno to ENOMEM, or no-op on win32 + The action to take before "return 0" when malloc fails to be able to + return memory because there is none available. + +HAVE_MORECORE default: 1 (true) unless win32 or ONLY_MSPACES + True if this system supports sbrk or an emulation of it. + +MORECORE default: sbrk + The name of the sbrk-style system routine to call to obtain more + memory. See below for guidance on writing custom MORECORE + functions. The type of the argument to sbrk/MORECORE varies across + systems. It cannot be size_t, because it supports negative + arguments, so it is normally the signed type of the same width as + size_t (sometimes declared as "intptr_t"). It doesn't much matter + though. Internally, we only call it with arguments less than half + the max value of a size_t, which should work across all reasonable + possibilities, although sometimes generating compiler warnings. + +MORECORE_CONTIGUOUS default: 1 (true) if HAVE_MORECORE + If true, take advantage of fact that consecutive calls to MORECORE + with positive arguments always return contiguous increasing + addresses. This is true of unix sbrk. It does not hurt too much to + set it true anyway, since malloc copes with non-contiguities. + Setting it false when definitely non-contiguous saves time + and possibly wasted space it would take to discover this though. + +MORECORE_CANNOT_TRIM default: NOT defined + True if MORECORE cannot release space back to the system when given + negative arguments. This is generally necessary only if you are + using a hand-crafted MORECORE function that cannot handle negative + arguments. + +NO_SEGMENT_TRAVERSAL default: 0 + If non-zero, suppresses traversals of memory segments + returned by either MORECORE or CALL_MMAP. This disables + merging of segments that are contiguous, and selectively + releasing them to the OS if unused, but bounds execution times. + +HAVE_MMAP default: 1 (true) + True if this system supports mmap or an emulation of it. If so, and + HAVE_MORECORE is not true, MMAP is used for all system + allocation. If set and HAVE_MORECORE is true as well, MMAP is + primarily used to directly allocate very large blocks. It is also + used as a backup strategy in cases where MORECORE fails to provide + space from system. Note: A single call to MUNMAP is assumed to be + able to unmap memory that may have be allocated using multiple calls + to MMAP, so long as they are adjacent. + +HAVE_MREMAP default: 1 on linux, else 0 + If true realloc() uses mremap() to re-allocate large blocks and + extend or shrink allocation spaces. + +MMAP_CLEARS default: 1 except on WINCE. + True if mmap clears memory so calloc doesn't need to. This is true + for standard unix mmap using /dev/zero and on WIN32 except for WINCE. + +USE_BUILTIN_FFS default: 0 (i.e., not used) + Causes malloc to use the builtin ffs() function to compute indices. + Some compilers may recognize and intrinsify ffs to be faster than the + supplied C version. Also, the case of x86 using gcc is special-cased + to an asm instruction, so is already as fast as it can be, and so + this setting has no effect. Similarly for Win32 under recent MS compilers. + (On most x86s, the asm version is only slightly faster than the C version.) + +malloc_getpagesize default: derive from system includes, or 4096. + The system page size. To the extent possible, this malloc manages + memory from the system in page-size units. This may be (and + usually is) a function rather than a constant. This is ignored + if WIN32, where page size is determined using getSystemInfo during + initialization. + +USE_DEV_RANDOM default: 0 (i.e., not used) + Causes malloc to use /dev/random to initialize secure magic seed for + stamping footers. Otherwise, the current time is used. + +NO_MALLINFO default: 0 + If defined, don't compile "mallinfo". This can be a simple way + of dealing with mismatches between system declarations and + those in this file. + +MALLINFO_FIELD_TYPE default: size_t + The type of the fields in the mallinfo struct. This was originally + defined as "int" in SVID etc, but is more usefully defined as + size_t. The value is used only if HAVE_USR_INCLUDE_MALLOC_H is not set + +NO_MALLOC_STATS default: 0 + If defined, don't compile "malloc_stats". This avoids calls to + fprintf and bringing in stdio dependencies you might not want. + +REALLOC_ZERO_BYTES_FREES default: not defined + This should be set if a call to realloc with zero bytes should + be the same as a call to free. Some people think it should. Otherwise, + since this malloc returns a unique pointer for malloc(0), so does + realloc(p, 0). + +LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H +LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H, LACKS_ERRNO_H +LACKS_STDLIB_H LACKS_SCHED_H LACKS_TIME_H default: NOT defined unless on WIN32 + Define these if your system does not have these header files. + You might need to manually insert some of the declarations they provide. + +DEFAULT_GRANULARITY default: page size if MORECORE_CONTIGUOUS, + system_info.dwAllocationGranularity in WIN32, + otherwise 64K. + Also settable using mallopt(M_GRANULARITY, x) + The unit for allocating and deallocating memory from the system. On + most systems with contiguous MORECORE, there is no reason to + make this more than a page. However, systems with MMAP tend to + either require or encourage larger granularities. You can increase + this value to prevent system allocation functions to be called so + often, especially if they are slow. The value must be at least one + page and must be a power of two. Setting to 0 causes initialization + to either page size or win32 region size. (Note: In previous + versions of malloc, the equivalent of this option was called + "TOP_PAD") + +DEFAULT_TRIM_THRESHOLD default: 2MB + Also settable using mallopt(M_TRIM_THRESHOLD, x) + The maximum amount of unused top-most memory to keep before + releasing via malloc_trim in free(). Automatic trimming is mainly + useful in long-lived programs using contiguous MORECORE. Because + trimming via sbrk can be slow on some systems, and can sometimes be + wasteful (in cases where programs immediately afterward allocate + more large chunks) the value should be high enough so that your + overall system performance would improve by releasing this much + memory. As a rough guide, you might set to a value close to the + average size of a process (program) running on your system. + Releasing this much memory would allow such a process to run in + memory. Generally, it is worth tuning trim thresholds when a + program undergoes phases where several large chunks are allocated + and released in ways that can reuse each other's storage, perhaps + mixed with phases where there are no such chunks at all. The trim + value must be greater than page size to have any useful effect. To + disable trimming completely, you can set to MAX_SIZE_T. Note that the trick + some people use of mallocing a huge space and then freeing it at + program startup, in an attempt to reserve system memory, doesn't + have the intended effect under automatic trimming, since that memory + will immediately be returned to the system. + +DEFAULT_MMAP_THRESHOLD default: 256K + Also settable using mallopt(M_MMAP_THRESHOLD, x) + The request size threshold for using MMAP to directly service a + request. Requests of at least this size that cannot be allocated + using already-existing space will be serviced via mmap. (If enough + normal freed space already exists it is used instead.) Using mmap + segregates relatively large chunks of memory so that they can be + individually obtained and released from the host system. A request + serviced through mmap is never reused by any other request (at least + not directly; the system may just so happen to remap successive + requests to the same locations). Segregating space in this way has + the benefits that: Mmapped space can always be individually released + back to the system, which helps keep the system level memory demands + of a long-lived program low. Also, mapped memory doesn't become + `locked' between other chunks, as can happen with normally allocated + chunks, which means that even trimming via malloc_trim would not + release them. However, it has the disadvantage that the space + cannot be reclaimed, consolidated, and then used to service later + requests, as happens with normal chunks. The advantages of mmap + nearly always outweigh disadvantages for "large" chunks, but the + value of "large" may vary across systems. The default is an + empirically derived value that works well in most systems. You can + disable mmap by setting to MAX_SIZE_T. + +MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP + The number of consolidated frees between checks to release + unused segments when freeing. When using non-contiguous segments, + especially with multiple mspaces, checking only for topmost space + doesn't always suffice to trigger trimming. To compensate for this, + free() will, with a period of MAX_RELEASE_CHECK_RATE (or the + current number of segments, if greater) try to release unused + segments to the OS when freeing chunks that result in + consolidation. The best value for this parameter is a compromise + between slowing down frees with relatively costly checks that + rarely trigger versus holding on to unused memory. To effectively + disable, set to MAX_SIZE_T. This may lead to a very slight speed + improvement at the expense of carrying around more memory. +*/ + +/* Version identifier to allow people to support multiple versions */ +#ifndef DLMALLOC_VERSION +#define DLMALLOC_VERSION 20806 +#endif /* DLMALLOC_VERSION */ + +#ifndef DLMALLOC_EXPORT +#define DLMALLOC_EXPORT extern +#endif + +#ifndef WIN32 +#ifdef _WIN32 +#define WIN32 1 +#endif /* _WIN32 */ +#ifdef _WIN32_WCE +#define LACKS_FCNTL_H +#define WIN32 1 +#endif /* _WIN32_WCE */ +#endif /* WIN32 */ +#ifdef WIN32 +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#include <tchar.h> +#define HAVE_MMAP 1 +#define HAVE_MORECORE 0 +#define LACKS_UNISTD_H +#define LACKS_SYS_PARAM_H +#define LACKS_SYS_MMAN_H +#define LACKS_STRING_H +#define LACKS_STRINGS_H +#define LACKS_SYS_TYPES_H +#define LACKS_ERRNO_H +#define LACKS_SCHED_H +#ifndef MALLOC_FAILURE_ACTION +#define MALLOC_FAILURE_ACTION +#endif /* MALLOC_FAILURE_ACTION */ +#ifndef MMAP_CLEARS +#ifdef _WIN32_WCE /* WINCE reportedly does not clear */ +#define MMAP_CLEARS 0 +#else +#define MMAP_CLEARS 1 +#endif /* _WIN32_WCE */ +#endif /*MMAP_CLEARS */ +#endif /* WIN32 */ + +#if defined(DARWIN) || defined(_DARWIN) +/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */ +#ifndef HAVE_MORECORE +#define HAVE_MORECORE 0 +#define HAVE_MMAP 1 +/* OSX allocators provide 16 byte alignment */ +#ifndef MALLOC_ALIGNMENT +#define MALLOC_ALIGNMENT ((size_t)16U) +#endif +#endif /* HAVE_MORECORE */ +#endif /* DARWIN */ + +#ifndef LACKS_SYS_TYPES_H +#include <sys/types.h> /* For size_t */ +#endif /* LACKS_SYS_TYPES_H */ + +/* The maximum possible size_t value has all bits set */ +#define MAX_SIZE_T (~(size_t)0) + +#ifndef USE_LOCKS /* ensure true if spin or recursive locks set */ +#if ((defined(USE_SPIN_LOCKS) && USE_SPIN_LOCKS != 0) || \ + (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0)) +#define USE_LOCKS 1 +#else +#define USE_LOCKS 0 +#endif +#endif /* USE_LOCKS */ + +#if USE_LOCKS /* Spin locks for gcc >= 4.1, older gcc on x86, MSC >= 1310 */ +#if ((defined(__GNUC__) && \ + ((__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) || \ + defined(__i386__) || defined(__x86_64__))) || \ + (defined(_MSC_VER) && _MSC_VER>=1310)) +#ifndef USE_SPIN_LOCKS +#define USE_SPIN_LOCKS 1 +#endif /* USE_SPIN_LOCKS */ +#elif USE_SPIN_LOCKS +#error "USE_SPIN_LOCKS defined without implementation" +#endif /* ... locks available... */ +#elif !defined(USE_SPIN_LOCKS) +#define USE_SPIN_LOCKS 0 +#endif /* USE_LOCKS */ + +#ifndef ONLY_MSPACES +#define ONLY_MSPACES 0 +#endif /* ONLY_MSPACES */ +#ifndef MSPACES +#if ONLY_MSPACES +#define MSPACES 1 +#else /* ONLY_MSPACES */ +#define MSPACES 0 +#endif /* ONLY_MSPACES */ +#endif /* MSPACES */ +#ifndef MALLOC_ALIGNMENT +#define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *))) +#endif /* MALLOC_ALIGNMENT */ +#ifndef FOOTERS +#define FOOTERS 0 +#endif /* FOOTERS */ +#ifndef ABORT +#define ABORT abort() +#endif /* ABORT */ +#ifndef ABORT_ON_ASSERT_FAILURE +#define ABORT_ON_ASSERT_FAILURE 1 +#endif /* ABORT_ON_ASSERT_FAILURE */ +#ifndef PROCEED_ON_ERROR +#define PROCEED_ON_ERROR 0 +#endif /* PROCEED_ON_ERROR */ + +#ifndef INSECURE +#define INSECURE 0 +#endif /* INSECURE */ +#ifndef MALLOC_INSPECT_ALL +#define MALLOC_INSPECT_ALL 0 +#endif /* MALLOC_INSPECT_ALL */ +#ifndef HAVE_MMAP +#define HAVE_MMAP 1 +#endif /* HAVE_MMAP */ +#ifndef MMAP_CLEARS +#define MMAP_CLEARS 1 +#endif /* MMAP_CLEARS */ +#ifndef HAVE_MREMAP +#ifdef linux +#define HAVE_MREMAP 1 +#define _GNU_SOURCE /* Turns on mremap() definition */ +#else /* linux */ +#define HAVE_MREMAP 0 +#endif /* linux */ +#endif /* HAVE_MREMAP */ +#ifndef MALLOC_FAILURE_ACTION +#define MALLOC_FAILURE_ACTION errno = ENOMEM; +#endif /* MALLOC_FAILURE_ACTION */ +#ifndef HAVE_MORECORE +#if ONLY_MSPACES +#define HAVE_MORECORE 0 +#else /* ONLY_MSPACES */ +#define HAVE_MORECORE 1 +#endif /* ONLY_MSPACES */ +#endif /* HAVE_MORECORE */ +#if !HAVE_MORECORE +#define MORECORE_CONTIGUOUS 0 +#else /* !HAVE_MORECORE */ +#define MORECORE_DEFAULT sbrk +#ifndef MORECORE_CONTIGUOUS +#define MORECORE_CONTIGUOUS 1 +#endif /* MORECORE_CONTIGUOUS */ +#endif /* HAVE_MORECORE */ +#ifndef DEFAULT_GRANULARITY +#if (MORECORE_CONTIGUOUS || defined(WIN32)) +#define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */ +#else /* MORECORE_CONTIGUOUS */ +#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U) +#endif /* MORECORE_CONTIGUOUS */ +#endif /* DEFAULT_GRANULARITY */ +#ifndef DEFAULT_TRIM_THRESHOLD +#ifndef MORECORE_CANNOT_TRIM +#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U) +#else /* MORECORE_CANNOT_TRIM */ +#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T +#endif /* MORECORE_CANNOT_TRIM */ +#endif /* DEFAULT_TRIM_THRESHOLD */ +#ifndef DEFAULT_MMAP_THRESHOLD +#if HAVE_MMAP +#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U) +#else /* HAVE_MMAP */ +#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T +#endif /* HAVE_MMAP */ +#endif /* DEFAULT_MMAP_THRESHOLD */ +#ifndef MAX_RELEASE_CHECK_RATE +#if HAVE_MMAP +#define MAX_RELEASE_CHECK_RATE 4095 +#else +#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T +#endif /* HAVE_MMAP */ +#endif /* MAX_RELEASE_CHECK_RATE */ +#ifndef USE_BUILTIN_FFS +#define USE_BUILTIN_FFS 0 +#endif /* USE_BUILTIN_FFS */ +#ifndef USE_DEV_RANDOM +#define USE_DEV_RANDOM 0 +#endif /* USE_DEV_RANDOM */ +#ifndef NO_MALLINFO +#define NO_MALLINFO 0 +#endif /* NO_MALLINFO */ +#ifndef MALLINFO_FIELD_TYPE +#define MALLINFO_FIELD_TYPE size_t +#endif /* MALLINFO_FIELD_TYPE */ +#ifndef NO_MALLOC_STATS +#define NO_MALLOC_STATS 0 +#endif /* NO_MALLOC_STATS */ +#ifndef NO_SEGMENT_TRAVERSAL +#define NO_SEGMENT_TRAVERSAL 0 +#endif /* NO_SEGMENT_TRAVERSAL */ + +/* + mallopt tuning options. SVID/XPG defines four standard parameter + numbers for mallopt, normally defined in malloc.h. None of these + are used in this malloc, so setting them has no effect. But this + malloc does support the following options. +*/ + +#define M_TRIM_THRESHOLD (-1) +#define M_GRANULARITY (-2) +#define M_MMAP_THRESHOLD (-3) + +/* ------------------------ Mallinfo declarations ------------------------ */ + +#if !NO_MALLINFO +/* + This version of malloc supports the standard SVID/XPG mallinfo + routine that returns a struct containing usage properties and + statistics. It should work on any system that has a + /usr/include/malloc.h defining struct mallinfo. The main + declaration needed is the mallinfo struct that is returned (by-copy) + by mallinfo(). The malloinfo struct contains a bunch of fields that + are not even meaningful in this version of malloc. These fields are + are instead filled by mallinfo() with other numbers that might be of + interest. + + HAVE_USR_INCLUDE_MALLOC_H should be set if you have a + /usr/include/malloc.h file that includes a declaration of struct + mallinfo. If so, it is included; else a compliant version is + declared below. These must be precisely the same for mallinfo() to + work. The original SVID version of this struct, defined on most + systems with mallinfo, declares all fields as ints. But some others + define as unsigned long. If your system defines the fields using a + type of different width than listed here, you MUST #include your + system version and #define HAVE_USR_INCLUDE_MALLOC_H. +*/ + +/* #define HAVE_USR_INCLUDE_MALLOC_H */ + +#ifdef HAVE_USR_INCLUDE_MALLOC_H +#include "/usr/include/malloc.h" +#else /* HAVE_USR_INCLUDE_MALLOC_H */ +#ifndef STRUCT_MALLINFO_DECLARED +/* HP-UX (and others?) redefines mallinfo unless _STRUCT_MALLINFO is defined */ +#define _STRUCT_MALLINFO +#define STRUCT_MALLINFO_DECLARED 1 +struct mallinfo { + MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */ + MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */ + MALLINFO_FIELD_TYPE smblks; /* always 0 */ + MALLINFO_FIELD_TYPE hblks; /* always 0 */ + MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */ + MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */ + MALLINFO_FIELD_TYPE fsmblks; /* always 0 */ + MALLINFO_FIELD_TYPE uordblks; /* total allocated space */ + MALLINFO_FIELD_TYPE fordblks; /* total free space */ + MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */ +}; +#endif /* STRUCT_MALLINFO_DECLARED */ +#endif /* HAVE_USR_INCLUDE_MALLOC_H */ +#endif /* NO_MALLINFO */ + +/* + Try to persuade compilers to inline. The most critical functions for + inlining are defined as macros, so these aren't used for them. +*/ + +#ifndef FORCEINLINE + #if defined(__GNUC__) +#define FORCEINLINE __inline __attribute__ ((always_inline)) + #elif defined(_MSC_VER) + #define FORCEINLINE __forceinline + #endif +#endif +#ifndef NOINLINE + #if defined(__GNUC__) + #define NOINLINE __attribute__ ((noinline)) + #elif defined(_MSC_VER) + #define NOINLINE __declspec(noinline) + #else + #define NOINLINE + #endif +#endif + +#ifdef __cplusplus +extern "C" { +#ifndef FORCEINLINE + #define FORCEINLINE inline +#endif +#endif /* __cplusplus */ +#ifndef FORCEINLINE + #define FORCEINLINE +#endif + +#if !ONLY_MSPACES + +/* ------------------- Declarations of public routines ------------------- */ + +#ifndef USE_DL_PREFIX +#define dlcalloc calloc +#define dlfree free +#define dlmalloc malloc +#define dlmemalign memalign +#define dlposix_memalign posix_memalign +#define dlrealloc realloc +#define dlrealloc_in_place realloc_in_place +#define dlvalloc valloc +#define dlpvalloc pvalloc +#define dlmallinfo mallinfo +#define dlmallopt mallopt +#define dlmalloc_trim malloc_trim +#define dlmalloc_stats malloc_stats +#define dlmalloc_usable_size malloc_usable_size +#define dlmalloc_footprint malloc_footprint +#define dlmalloc_max_footprint malloc_max_footprint +#define dlmalloc_footprint_limit malloc_footprint_limit +#define dlmalloc_set_footprint_limit malloc_set_footprint_limit +#define dlmalloc_inspect_all malloc_inspect_all +#define dlindependent_calloc independent_calloc +#define dlindependent_comalloc independent_comalloc +#define dlbulk_free bulk_free +#endif /* USE_DL_PREFIX */ + +/* + malloc(size_t n) + Returns a pointer to a newly allocated chunk of at least n bytes, or + null if no space is available, in which case errno is set to ENOMEM + on ANSI C systems. + + If n is zero, malloc returns a minimum-sized chunk. (The minimum + size is 16 bytes on most 32bit systems, and 32 bytes on 64bit + systems.) Note that size_t is an unsigned type, so calls with + arguments that would be negative if signed are interpreted as + requests for huge amounts of space, which will often fail. The + maximum supported value of n differs across systems, but is in all + cases less than the maximum representable value of a size_t. +*/ +DLMALLOC_EXPORT void* dlmalloc(size_t); + +/* + free(void* p) + Releases the chunk of memory pointed to by p, that had been previously + allocated using malloc or a related routine such as realloc. + It has no effect if p is null. If p was not malloced or already + freed, free(p) will by default cause the current program to abort. +*/ +DLMALLOC_EXPORT void dlfree(void*); + +/* + calloc(size_t n_elements, size_t element_size); + Returns a pointer to n_elements * element_size bytes, with all locations + set to zero. +*/ +DLMALLOC_EXPORT void* dlcalloc(size_t, size_t); + +/* + realloc(void* p, size_t n) + Returns a pointer to a chunk of size n that contains the same data + as does chunk p up to the minimum of (n, p's size) bytes, or null + if no space is available. + + The returned pointer may or may not be the same as p. The algorithm + prefers extending p in most cases when possible, otherwise it + employs the equivalent of a malloc-copy-free sequence. + + If p is null, realloc is equivalent to malloc. + + If space is not available, realloc returns null, errno is set (if on + ANSI) and p is NOT freed. + + if n is for fewer bytes than already held by p, the newly unused + space is lopped off and freed if possible. realloc with a size + argument of zero (re)allocates a minimum-sized chunk. + + The old unix realloc convention of allowing the last-free'd chunk + to be used as an argument to realloc is not supported. +*/ +DLMALLOC_EXPORT void* dlrealloc(void*, size_t); + +/* + realloc_in_place(void* p, size_t n) + Resizes the space allocated for p to size n, only if this can be + done without moving p (i.e., only if there is adjacent space + available if n is greater than p's current allocated size, or n is + less than or equal to p's size). This may be used instead of plain + realloc if an alternative allocation strategy is needed upon failure + to expand space; for example, reallocation of a buffer that must be + memory-aligned or cleared. You can use realloc_in_place to trigger + these alternatives only when needed. + + Returns p if successful; otherwise null. +*/ +DLMALLOC_EXPORT void* dlrealloc_in_place(void*, size_t); + +/* + memalign(size_t alignment, size_t n); + Returns a pointer to a newly allocated chunk of n bytes, aligned + in accord with the alignment argument. + + The alignment argument should be a power of two. If the argument is + not a power of two, the nearest greater power is used. + 8-byte alignment is guaranteed by normal malloc calls, so don't + bother calling memalign with an argument of 8 or less. + + Overreliance on memalign is a sure way to fragment space. +*/ +DLMALLOC_EXPORT void* dlmemalign(size_t, size_t); + +/* + int posix_memalign(void** pp, size_t alignment, size_t n); + Allocates a chunk of n bytes, aligned in accord with the alignment + argument. Differs from memalign only in that it (1) assigns the + allocated memory to *pp rather than returning it, (2) fails and + returns EINVAL if the alignment is not a power of two (3) fails and + returns ENOMEM if memory cannot be allocated. +*/ +DLMALLOC_EXPORT int dlposix_memalign(void**, size_t, size_t); + +/* + valloc(size_t n); + Equivalent to memalign(pagesize, n), where pagesize is the page + size of the system. If the pagesize is unknown, 4096 is used. +*/ +DLMALLOC_EXPORT void* dlvalloc(size_t); + +/* + mallopt(int parameter_number, int parameter_value) + Sets tunable parameters The format is to provide a + (parameter-number, parameter-value) pair. mallopt then sets the + corresponding parameter to the argument value if it can (i.e., so + long as the value is meaningful), and returns 1 if successful else + 0. To workaround the fact that mallopt is specified to use int, + not size_t parameters, the value -1 is specially treated as the + maximum unsigned size_t value. + + SVID/XPG/ANSI defines four standard param numbers for mallopt, + normally defined in malloc.h. None of these are use in this malloc, + so setting them has no effect. But this malloc also supports other + options in mallopt. See below for details. Briefly, supported + parameters are as follows (listed defaults are for "typical" + configurations). + + Symbol param # default allowed param values + M_TRIM_THRESHOLD -1 2*1024*1024 any (-1 disables) + M_GRANULARITY -2 page size any power of 2 >= page size + M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) +*/ +DLMALLOC_EXPORT int dlmallopt(int, int); + +/* + malloc_footprint(); + Returns the number of bytes obtained from the system. The total + number of bytes allocated by malloc, realloc etc., is less than this + value. Unlike mallinfo, this function returns only a precomputed + result, so can be called frequently to monitor memory consumption. + Even if locks are otherwise defined, this function does not use them, + so results might not be up to date. +*/ +DLMALLOC_EXPORT size_t dlmalloc_footprint(void); + +/* + malloc_max_footprint(); + Returns the maximum number of bytes obtained from the system. This + value will be greater than current footprint if deallocated space + has been reclaimed by the system. The peak number of bytes allocated + by malloc, realloc etc., is less than this value. Unlike mallinfo, + this function returns only a precomputed result, so can be called + frequently to monitor memory consumption. Even if locks are + otherwise defined, this function does not use them, so results might + not be up to date. +*/ +DLMALLOC_EXPORT size_t dlmalloc_max_footprint(void); + +/* + malloc_footprint_limit(); + Returns the number of bytes that the heap is allowed to obtain from + the system, returning the last value returned by + malloc_set_footprint_limit, or the maximum size_t value if + never set. The returned value reflects a permission. There is no + guarantee that this number of bytes can actually be obtained from + the system. +*/ +DLMALLOC_EXPORT size_t dlmalloc_footprint_limit(void); + +/* + malloc_set_footprint_limit(); + Sets the maximum number of bytes to obtain from the system, causing + failure returns from malloc and related functions upon attempts to + exceed this value. The argument value may be subject to page + rounding to an enforceable limit; this actual value is returned. + Using an argument of the maximum possible size_t effectively + disables checks. If the argument is less than or equal to the + current malloc_footprint, then all future allocations that require + additional system memory will fail. However, invocation cannot + retroactively deallocate existing used memory. +*/ +DLMALLOC_EXPORT size_t dlmalloc_set_footprint_limit(size_t bytes); + +#if MALLOC_INSPECT_ALL +/* + malloc_inspect_all(void(*handler)(void *start, + void *end, + size_t used_bytes, + void* callback_arg), + void* arg); + Traverses the heap and calls the given handler for each managed + region, skipping all bytes that are (or may be) used for bookkeeping + purposes. Traversal does not include include chunks that have been + directly memory mapped. Each reported region begins at the start + address, and continues up to but not including the end address. The + first used_bytes of the region contain allocated data. If + used_bytes is zero, the region is unallocated. The handler is + invoked with the given callback argument. If locks are defined, they + are held during the entire traversal. It is a bad idea to invoke + other malloc functions from within the handler. + + For example, to count the number of in-use chunks with size greater + than 1000, you could write: + static int count = 0; + void count_chunks(void* start, void* end, size_t used, void* arg) { + if (used >= 1000) ++count; + } + then: + malloc_inspect_all(count_chunks, NULL); + + malloc_inspect_all is compiled only if MALLOC_INSPECT_ALL is defined. +*/ +DLMALLOC_EXPORT void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*), + void* arg); + +#endif /* MALLOC_INSPECT_ALL */ + +#if !NO_MALLINFO +/* + mallinfo() + Returns (by copy) a struct containing various summary statistics: + + arena: current total non-mmapped bytes allocated from system + ordblks: the number of free chunks + smblks: always zero. + hblks: current number of mmapped regions + hblkhd: total bytes held in mmapped regions + usmblks: the maximum total allocated space. This will be greater + than current total if trimming has occurred. + fsmblks: always zero + uordblks: current total allocated space (normal or mmapped) + fordblks: total free space + keepcost: the maximum number of bytes that could ideally be released + back to system via malloc_trim. ("ideally" means that + it ignores page restrictions etc.) + + Because these fields are ints, but internal bookkeeping may + be kept as longs, the reported values may wrap around zero and + thus be inaccurate. +*/ +DLMALLOC_EXPORT struct mallinfo dlmallinfo(void); +#endif /* NO_MALLINFO */ + +/* + independent_calloc(size_t n_elements, size_t element_size, void* chunks[]); + + independent_calloc is similar to calloc, but instead of returning a + single cleared space, it returns an array of pointers to n_elements + independent elements that can hold contents of size elem_size, each + of which starts out cleared, and can be independently freed, + realloc'ed etc. The elements are guaranteed to be adjacently + allocated (this is not guaranteed to occur with multiple callocs or + mallocs), which may also improve cache locality in some + applications. + + The "chunks" argument is optional (i.e., may be null, which is + probably the most typical usage). If it is null, the returned array + is itself dynamically allocated and should also be freed when it is + no longer needed. Otherwise, the chunks array must be of at least + n_elements in length. It is filled in with the pointers to the + chunks. + + In either case, independent_calloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and "chunks" + is null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be freed when it is no longer needed. This can be + done all at once using bulk_free. + + independent_calloc simplifies and speeds up implementations of many + kinds of pools. It may also be useful when constructing large data + structures that initially have a fixed number of fixed-sized nodes, + but the number is not known at compile time, and some of the nodes + may later need to be freed. For example: + + struct Node { int item; struct Node* next; }; + + struct Node* build_list() { + struct Node** pool; + int n = read_number_of_nodes_needed(); + if (n <= 0) return 0; + pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); + if (pool == 0) die(); + // organize into a linked list... + struct Node* first = pool[0]; + for (i = 0; i < n-1; ++i) + pool[i]->next = pool[i+1]; + free(pool); // Can now free the array (or not, if it is needed later) + return first; + } +*/ +DLMALLOC_EXPORT void** dlindependent_calloc(size_t, size_t, void**); + +/* + independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]); + + independent_comalloc allocates, all at once, a set of n_elements + chunks with sizes indicated in the "sizes" array. It returns + an array of pointers to these elements, each of which can be + independently freed, realloc'ed etc. The elements are guaranteed to + be adjacently allocated (this is not guaranteed to occur with + multiple callocs or mallocs), which may also improve cache locality + in some applications. + + The "chunks" argument is optional (i.e., may be null). If it is null + the returned array is itself dynamically allocated and should also + be freed when it is no longer needed. Otherwise, the chunks array + must be of at least n_elements in length. It is filled in with the + pointers to the chunks. + + In either case, independent_comalloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and chunks is + null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be freed when it is no longer needed. This can be + done all at once using bulk_free. + + independent_comallac differs from independent_calloc in that each + element may have a different size, and also that it does not + automatically clear elements. + + independent_comalloc can be used to speed up allocation in cases + where several structs or objects must always be allocated at the + same time. For example: + + struct Head { ... } + struct Foot { ... } + + void send_message(char* msg) { + int msglen = strlen(msg); + size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; + void* chunks[3]; + if (independent_comalloc(3, sizes, chunks) == 0) + die(); + struct Head* head = (struct Head*)(chunks[0]); + char* body = (char*)(chunks[1]); + struct Foot* foot = (struct Foot*)(chunks[2]); + // ... + } + + In general though, independent_comalloc is worth using only for + larger values of n_elements. For small values, you probably won't + detect enough difference from series of malloc calls to bother. + + Overuse of independent_comalloc can increase overall memory usage, + since it cannot reuse existing noncontiguous small chunks that + might be available for some of the elements. +*/ +DLMALLOC_EXPORT void** dlindependent_comalloc(size_t, size_t*, void**); + +/* + bulk_free(void* array[], size_t n_elements) + Frees and clears (sets to null) each non-null pointer in the given + array. This is likely to be faster than freeing them one-by-one. + If footers are used, pointers that have been allocated in different + mspaces are not freed or cleared, and the count of all such pointers + is returned. For large arrays of pointers with poor locality, it + may be worthwhile to sort this array before calling bulk_free. +*/ +DLMALLOC_EXPORT size_t dlbulk_free(void**, size_t n_elements); + +/* + pvalloc(size_t n); + Equivalent to valloc(minimum-page-that-holds(n)), that is, + round up n to nearest pagesize. + */ +DLMALLOC_EXPORT void* dlpvalloc(size_t); + +/* + malloc_trim(size_t pad); + + If possible, gives memory back to the system (via negative arguments + to sbrk) if there is unused memory at the `high' end of the malloc + pool or in unused MMAP segments. You can call this after freeing + large blocks of memory to potentially reduce the system-level memory + requirements of a program. However, it cannot guarantee to reduce + memory. Under some allocation patterns, some large free blocks of + memory will be locked between two used chunks, so they cannot be + given back to the system. + + The `pad' argument to malloc_trim represents the amount of free + trailing space to leave untrimmed. If this argument is zero, only + the minimum amount of memory to maintain internal data structures + will be left. Non-zero arguments can be supplied to maintain enough + trailing space to service future expected allocations without having + to re-obtain memory from the system. + + Malloc_trim returns 1 if it actually released any memory, else 0. +*/ +DLMALLOC_EXPORT int dlmalloc_trim(size_t); + +/* + malloc_stats(); + Prints on stderr the amount of space obtained from the system (both + via sbrk and mmap), the maximum amount (which may be more than + current if malloc_trim and/or munmap got called), and the current + number of bytes allocated via malloc (or realloc, etc) but not yet + freed. Note that this is the number of bytes allocated, not the + number requested. It will be larger than the number requested + because of alignment and bookkeeping overhead. Because it includes + alignment wastage as being in use, this figure may be greater than + zero even when no user-level chunks are allocated. + + The reported current and maximum system memory can be inaccurate if + a program makes other calls to system memory allocation functions + (normally sbrk) outside of malloc. + + malloc_stats prints only the most commonly interesting statistics. + More information can be obtained by calling mallinfo. +*/ +DLMALLOC_EXPORT void dlmalloc_stats(void); + +/* + malloc_usable_size(void* p); + + Returns the number of bytes you can actually use in + an allocated chunk, which may be more than you requested (although + often not) due to alignment and minimum size constraints. + You can use this many bytes without worrying about + overwriting other allocated objects. This is not a particularly great + programming practice. malloc_usable_size can be more useful in + debugging and assertions, for example: + + p = malloc(n); + assert(malloc_usable_size(p) >= 256); +*/ +size_t dlmalloc_usable_size(void*); + +#endif /* ONLY_MSPACES */ + +#if MSPACES + +/* + mspace is an opaque type representing an independent + region of space that supports mspace_malloc, etc. +*/ +typedef void* mspace; + +/* + create_mspace creates and returns a new independent space with the + given initial capacity, or, if 0, the default granularity size. It + returns null if there is no system memory available to create the + space. If argument locked is non-zero, the space uses a separate + lock to control access. The capacity of the space will grow + dynamically as needed to service mspace_malloc requests. You can + control the sizes of incremental increases of this space by + compiling with a different DEFAULT_GRANULARITY or dynamically + setting with mallopt(M_GRANULARITY, value). +*/ +DLMALLOC_EXPORT mspace create_mspace(size_t capacity, int locked); + +/* + destroy_mspace destroys the given space, and attempts to return all + of its memory back to the system, returning the total number of + bytes freed. After destruction, the results of access to all memory + used by the space become undefined. +*/ +DLMALLOC_EXPORT size_t destroy_mspace(mspace msp); + +/* + create_mspace_with_base uses the memory supplied as the initial base + of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this + space is used for bookkeeping, so the capacity must be at least this + large. (Otherwise 0 is returned.) When this initial space is + exhausted, additional memory will be obtained from the system. + Destroying this space will deallocate all additionally allocated + space (if possible) but not the initial base. +*/ +DLMALLOC_EXPORT mspace create_mspace_with_base(void* base, size_t capacity, int locked); + +/* + mspace_track_large_chunks controls whether requests for large chunks + are allocated in their own untracked mmapped regions, separate from + others in this mspace. By default large chunks are not tracked, + which reduces fragmentation. However, such chunks are not + necessarily released to the system upon destroy_mspace. Enabling + tracking by setting to true may increase fragmentation, but avoids + leakage when relying on destroy_mspace to release all memory + allocated using this space. The function returns the previous + setting. +*/ +DLMALLOC_EXPORT int mspace_track_large_chunks(mspace msp, int enable); + + +/* + mspace_malloc behaves as malloc, but operates within + the given space. +*/ +DLMALLOC_EXPORT void* mspace_malloc(mspace msp, size_t bytes); + +/* + mspace_free behaves as free, but operates within + the given space. + + If compiled with FOOTERS==1, mspace_free is not actually needed. + free may be called instead of mspace_free because freed chunks from + any space are handled by their originating spaces. +*/ +DLMALLOC_EXPORT void mspace_free(mspace msp, void* mem); + +/* + mspace_realloc behaves as realloc, but operates within + the given space. + + If compiled with FOOTERS==1, mspace_realloc is not actually + needed. realloc may be called instead of mspace_realloc because + realloced chunks from any space are handled by their originating + spaces. +*/ +DLMALLOC_EXPORT void* mspace_realloc(mspace msp, void* mem, size_t newsize); + +/* + mspace_calloc behaves as calloc, but operates within + the given space. +*/ +DLMALLOC_EXPORT void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size); + +/* + mspace_memalign behaves as memalign, but operates within + the given space. +*/ +DLMALLOC_EXPORT void* mspace_memalign(mspace msp, size_t alignment, size_t bytes); + +/* + mspace_independent_calloc behaves as independent_calloc, but + operates within the given space. +*/ +DLMALLOC_EXPORT void** mspace_independent_calloc(mspace msp, size_t n_elements, + size_t elem_size, void* chunks[]); + +/* + mspace_independent_comalloc behaves as independent_comalloc, but + operates within the given space. +*/ +DLMALLOC_EXPORT void** mspace_independent_comalloc(mspace msp, size_t n_elements, + size_t sizes[], void* chunks[]); + +/* + mspace_footprint() returns the number of bytes obtained from the + system for this space. +*/ +DLMALLOC_EXPORT size_t mspace_footprint(mspace msp); + +/* + mspace_max_footprint() returns the peak number of bytes obtained from the + system for this space. +*/ +DLMALLOC_EXPORT size_t mspace_max_footprint(mspace msp); + + +#if !NO_MALLINFO +/* + mspace_mallinfo behaves as mallinfo, but reports properties of + the given space. +*/ +DLMALLOC_EXPORT struct mallinfo mspace_mallinfo(mspace msp); +#endif /* NO_MALLINFO */ + +/* + malloc_usable_size(void* p) behaves the same as malloc_usable_size; +*/ +DLMALLOC_EXPORT size_t mspace_usable_size(const void* mem); + +/* + mspace_malloc_stats behaves as malloc_stats, but reports + properties of the given space. +*/ +DLMALLOC_EXPORT void mspace_malloc_stats(mspace msp); + +/* + mspace_trim behaves as malloc_trim, but + operates within the given space. +*/ +DLMALLOC_EXPORT int mspace_trim(mspace msp, size_t pad); + +/* + An alias for mallopt. +*/ +DLMALLOC_EXPORT int mspace_mallopt(int, int); + +#endif /* MSPACES */ + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif /* __cplusplus */ + +/* + ======================================================================== + To make a fully customizable malloc.h header file, cut everything + above this line, put into file malloc.h, edit to suit, and #include it + on the next line, as well as in programs that use this malloc. + ======================================================================== +*/ + +/* #include "malloc.h" */ + +/*------------------------------ internal #includes ---------------------- */ + +#ifdef _MSC_VER +#pragma warning( disable : 4146 ) /* no "unsigned" warnings */ +#endif /* _MSC_VER */ +#if !NO_MALLOC_STATS +#include <stdio.h> /* for printing in malloc_stats */ +#endif /* NO_MALLOC_STATS */ +#ifndef LACKS_ERRNO_H +#include <errno.h> /* for MALLOC_FAILURE_ACTION */ +#endif /* LACKS_ERRNO_H */ +#ifdef DEBUG +#if ABORT_ON_ASSERT_FAILURE +#undef assert +#define assert(x) if(!(x)) ABORT +#else /* ABORT_ON_ASSERT_FAILURE */ +#include <assert.h> +#endif /* ABORT_ON_ASSERT_FAILURE */ +#else /* DEBUG */ +#ifndef assert +#define assert(x) +#endif +#define DEBUG 0 +#endif /* DEBUG */ +#if !defined(WIN32) && !defined(LACKS_TIME_H) +#include <time.h> /* for magic initialization */ +#endif /* WIN32 */ +#ifndef LACKS_STDLIB_H +#include <stdlib.h> /* for abort() */ +#endif /* LACKS_STDLIB_H */ +#ifndef LACKS_STRING_H +#include <string.h> /* for memset etc */ +#endif /* LACKS_STRING_H */ +#if USE_BUILTIN_FFS +#ifndef LACKS_STRINGS_H +#include <strings.h> /* for ffs */ +#endif /* LACKS_STRINGS_H */ +#endif /* USE_BUILTIN_FFS */ +#if HAVE_MMAP +#ifndef LACKS_SYS_MMAN_H +/* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */ +#if (defined(linux) && !defined(__USE_GNU)) +#define __USE_GNU 1 +#include <sys/mman.h> /* for mmap */ +#undef __USE_GNU +#else +#include <sys/mman.h> /* for mmap */ +#endif /* linux */ +#endif /* LACKS_SYS_MMAN_H */ +#ifndef LACKS_FCNTL_H +#include <fcntl.h> +#endif /* LACKS_FCNTL_H */ +#endif /* HAVE_MMAP */ +#ifndef LACKS_UNISTD_H +#include <unistd.h> /* for sbrk, sysconf */ +#else /* LACKS_UNISTD_H */ +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) +extern void* sbrk(ptrdiff_t); +#endif /* FreeBSD etc */ +#endif /* LACKS_UNISTD_H */ + +/* Declarations for locking */ +#if USE_LOCKS +#ifndef WIN32 +#if defined (__SVR4) && defined (__sun) /* solaris */ +#include <thread.h> +#elif !defined(LACKS_SCHED_H) +#include <sched.h> +#endif /* solaris or LACKS_SCHED_H */ +#if (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0) || !USE_SPIN_LOCKS +#include <pthread.h> +#endif /* USE_RECURSIVE_LOCKS ... */ +#elif defined(_MSC_VER) +#ifndef _M_AMD64 +/* These are already defined on AMD64 builds */ +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ +LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp); +LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value); +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* _M_AMD64 */ +#pragma intrinsic (_InterlockedCompareExchange) +#pragma intrinsic (_InterlockedExchange) +#define interlockedcompareexchange _InterlockedCompareExchange +#define interlockedexchange _InterlockedExchange +#elif defined(WIN32) && defined(__GNUC__) +#define interlockedcompareexchange(a, b, c) __sync_val_compare_and_swap(a, c, b) +#define interlockedexchange __sync_lock_test_and_set +#endif /* Win32 */ +#else /* USE_LOCKS */ +#endif /* USE_LOCKS */ + +#ifndef LOCK_AT_FORK +#define LOCK_AT_FORK 0 +#endif + +/* Declarations for bit scanning on win32 */ +#if defined(_MSC_VER) && _MSC_VER>=1300 +#ifndef BitScanForward /* Try to avoid pulling in WinNT.h */ +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ +unsigned char _BitScanForward(unsigned long *index, unsigned long mask); +unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#define BitScanForward _BitScanForward +#define BitScanReverse _BitScanReverse +#pragma intrinsic(_BitScanForward) +#pragma intrinsic(_BitScanReverse) +#endif /* BitScanForward */ +#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */ + +#ifndef WIN32 +#ifndef malloc_getpagesize +# ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */ +# ifndef _SC_PAGE_SIZE +# define _SC_PAGE_SIZE _SC_PAGESIZE +# endif +# endif +# ifdef _SC_PAGE_SIZE +# define malloc_getpagesize sysconf(_SC_PAGE_SIZE) +# else +# if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE) + extern size_t getpagesize(); +# define malloc_getpagesize getpagesize() +# else +# ifdef WIN32 /* use supplied emulation of getpagesize */ +# define malloc_getpagesize getpagesize() +# else +# ifndef LACKS_SYS_PARAM_H +# include <sys/param.h> +# endif +# ifdef EXEC_PAGESIZE +# define malloc_getpagesize EXEC_PAGESIZE +# else +# ifdef NBPG +# ifndef CLSIZE +# define malloc_getpagesize NBPG +# else +# define malloc_getpagesize (NBPG * CLSIZE) +# endif +# else +# ifdef NBPC +# define malloc_getpagesize NBPC +# else +# ifdef PAGESIZE +# define malloc_getpagesize PAGESIZE +# else /* just guess */ +# define malloc_getpagesize ((size_t)4096U) +# endif +# endif +# endif +# endif +# endif +# endif +# endif +#endif +#endif + +/* ------------------- size_t and alignment properties -------------------- */ + +/* The byte and bit size of a size_t */ +#define SIZE_T_SIZE (sizeof(size_t)) +#define SIZE_T_BITSIZE (sizeof(size_t) << 3) + +/* Some constants coerced to size_t */ +/* Annoying but necessary to avoid errors on some platforms */ +#define SIZE_T_ZERO ((size_t)0) +#define SIZE_T_ONE ((size_t)1) +#define SIZE_T_TWO ((size_t)2) +#define SIZE_T_FOUR ((size_t)4) +#define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1) +#define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2) +#define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES) +#define HALF_MAX_SIZE_T (MAX_SIZE_T / 2U) + +/* The bit mask value corresponding to MALLOC_ALIGNMENT */ +#define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE) + +/* True if address a has acceptable alignment */ +#define is_aligned(A) (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0) + +/* the number of bytes to offset an address to align it */ +#define align_offset(A)\ + ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\ + ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK)) + +/* -------------------------- MMAP preliminaries ------------------------- */ + +/* + If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and + checks to fail so compiler optimizer can delete code rather than + using so many "#if"s. +*/ + + +/* MORECORE and MMAP must return MFAIL on failure */ +#define MFAIL ((void*)(MAX_SIZE_T)) +#define CMFAIL ((char*)(MFAIL)) /* defined for convenience */ + +#if HAVE_MMAP + +#ifndef WIN32 +#define MUNMAP_DEFAULT(a, s) munmap((a), (s)) +#define MMAP_PROT (PROT_READ|PROT_WRITE) +#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) +#define MAP_ANONYMOUS MAP_ANON +#endif /* MAP_ANON */ +#ifdef MAP_ANONYMOUS +#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) +#define MMAP_DEFAULT(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) +#else /* MAP_ANONYMOUS */ +/* + Nearly all versions of mmap support MAP_ANONYMOUS, so the following + is unlikely to be needed, but is supplied just in case. +*/ +#define MMAP_FLAGS (MAP_PRIVATE) +static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ +#define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \ + (dev_zero_fd = open("/dev/zero", O_RDWR), \ + mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \ + mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) +#endif /* MAP_ANONYMOUS */ + +#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s) + +#else /* WIN32 */ + +/* Win32 MMAP via VirtualAlloc */ +static FORCEINLINE void* win32mmap(size_t size) { + void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); + return (ptr != 0)? ptr: MFAIL; +} + +/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ +static FORCEINLINE void* win32direct_mmap(size_t size) { + void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, + PAGE_READWRITE); + return (ptr != 0)? ptr: MFAIL; +} + +/* This function supports releasing coalesed segments */ +static FORCEINLINE int win32munmap(void* ptr, size_t size) { + MEMORY_BASIC_INFORMATION minfo; + char* cptr = (char*)ptr; + while (size) { + if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0) + return -1; + if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr || + minfo.State != MEM_COMMIT || minfo.RegionSize > size) + return -1; + if (VirtualFree(cptr, 0, MEM_RELEASE) == 0) + return -1; + cptr += minfo.RegionSize; + size -= minfo.RegionSize; + } + return 0; +} + +#define MMAP_DEFAULT(s) win32mmap(s) +#define MUNMAP_DEFAULT(a, s) win32munmap((a), (s)) +#define DIRECT_MMAP_DEFAULT(s) win32direct_mmap(s) +#endif /* WIN32 */ +#endif /* HAVE_MMAP */ + +#if HAVE_MREMAP +#ifndef WIN32 +#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv)) +#endif /* WIN32 */ +#endif /* HAVE_MREMAP */ + +/** + * Define CALL_MORECORE + */ +#if HAVE_MORECORE + #ifdef MORECORE + #define CALL_MORECORE(S) MORECORE(S) + #else /* MORECORE */ + #define CALL_MORECORE(S) MORECORE_DEFAULT(S) + #endif /* MORECORE */ +#else /* HAVE_MORECORE */ + #define CALL_MORECORE(S) MFAIL +#endif /* HAVE_MORECORE */ + +/** + * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP + */ +#if HAVE_MMAP + #define USE_MMAP_BIT (SIZE_T_ONE) + + #ifdef MMAP + #define CALL_MMAP(s) MMAP(s) + #else /* MMAP */ + #define CALL_MMAP(s) MMAP_DEFAULT(s) + #endif /* MMAP */ + #ifdef MUNMAP + #define CALL_MUNMAP(a, s) MUNMAP((a), (s)) + #else /* MUNMAP */ + #define CALL_MUNMAP(a, s) MUNMAP_DEFAULT((a), (s)) + #endif /* MUNMAP */ + #ifdef DIRECT_MMAP + #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s) + #else /* DIRECT_MMAP */ + #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s) + #endif /* DIRECT_MMAP */ +#else /* HAVE_MMAP */ + #define USE_MMAP_BIT (SIZE_T_ZERO) + + #define MMAP(s) MFAIL + #define MUNMAP(a, s) (-1) + #define DIRECT_MMAP(s) MFAIL + #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s) + #define CALL_MMAP(s) MMAP(s) + #define CALL_MUNMAP(a, s) MUNMAP((a), (s)) +#endif /* HAVE_MMAP */ + +/** + * Define CALL_MREMAP + */ +#if HAVE_MMAP && HAVE_MREMAP + #ifdef MREMAP + #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv)) + #else /* MREMAP */ + #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv)) + #endif /* MREMAP */ +#else /* HAVE_MMAP && HAVE_MREMAP */ + #define CALL_MREMAP(addr, osz, nsz, mv) MFAIL +#endif /* HAVE_MMAP && HAVE_MREMAP */ + +/* mstate bit set if continguous morecore disabled or failed */ +#define USE_NONCONTIGUOUS_BIT (4U) + +/* segment bit set in create_mspace_with_base */ +#define EXTERN_BIT (8U) + + +/* --------------------------- Lock preliminaries ------------------------ */ + +/* + When locks are defined, there is one global lock, plus + one per-mspace lock. + + The global lock_ensures that mparams.magic and other unique + mparams values are initialized only once. It also protects + sequences of calls to MORECORE. In many cases sys_alloc requires + two calls, that should not be interleaved with calls by other + threads. This does not protect against direct calls to MORECORE + by other threads not using this lock, so there is still code to + cope the best we can on interference. + + Per-mspace locks surround calls to malloc, free, etc. + By default, locks are simple non-reentrant mutexes. + + Because lock-protected regions generally have bounded times, it is + OK to use the supplied simple spinlocks. Spinlocks are likely to + improve performance for lightly contended applications, but worsen + performance under heavy contention. + + If USE_LOCKS is > 1, the definitions of lock routines here are + bypassed, in which case you will need to define the type MLOCK_T, + and at least INITIAL_LOCK, DESTROY_LOCK, ACQUIRE_LOCK, RELEASE_LOCK + and TRY_LOCK. You must also declare a + static MLOCK_T malloc_global_mutex = { initialization values };. + +*/ + +#if !USE_LOCKS +#define USE_LOCK_BIT (0U) +#define INITIAL_LOCK(l) (0) +#define DESTROY_LOCK(l) (0) +#define ACQUIRE_MALLOC_GLOBAL_LOCK() +#define RELEASE_MALLOC_GLOBAL_LOCK() + +#else +#if USE_LOCKS > 1 +/* ----------------------- User-defined locks ------------------------ */ +/* Define your own lock implementation here */ +/* #define INITIAL_LOCK(lk) ... */ +/* #define DESTROY_LOCK(lk) ... */ +/* #define ACQUIRE_LOCK(lk) ... */ +/* #define RELEASE_LOCK(lk) ... */ +/* #define TRY_LOCK(lk) ... */ +/* static MLOCK_T malloc_global_mutex = ... */ + +#elif USE_SPIN_LOCKS + +/* First, define CAS_LOCK and CLEAR_LOCK on ints */ +/* Note CAS_LOCK defined to return 0 on success */ + +#if defined(__GNUC__)&& (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) +#define CAS_LOCK(sl) __sync_lock_test_and_set(sl, 1) +#define CLEAR_LOCK(sl) __sync_lock_release(sl) + +#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) +/* Custom spin locks for older gcc on x86 */ +static FORCEINLINE int x86_cas_lock(int *sl) { + int ret; + int val = 1; + int cmp = 0; + __asm__ __volatile__ ("lock; cmpxchgl %1, %2" + : "=a" (ret) + : "r" (val), "m" (*(sl)), "0"(cmp) + : "memory", "cc"); + return ret; +} + +static FORCEINLINE void x86_clear_lock(int* sl) { + assert(*sl != 0); + int prev = 0; + int ret; + __asm__ __volatile__ ("lock; xchgl %0, %1" + : "=r" (ret) + : "m" (*(sl)), "0"(prev) + : "memory"); +} + +#define CAS_LOCK(sl) x86_cas_lock(sl) +#define CLEAR_LOCK(sl) x86_clear_lock(sl) + +#else /* Win32 MSC */ +#define CAS_LOCK(sl) interlockedexchange(sl, (LONG)1) +#define CLEAR_LOCK(sl) interlockedexchange (sl, (LONG)0) + +#endif /* ... gcc spins locks ... */ + +/* How to yield for a spin lock */ +#define SPINS_PER_YIELD 63 +#if defined(_MSC_VER) +#define SLEEP_EX_DURATION 50 /* delay for yield/sleep */ +#define SPIN_LOCK_YIELD SleepEx(SLEEP_EX_DURATION, FALSE) +#elif defined (__SVR4) && defined (__sun) /* solaris */ +#define SPIN_LOCK_YIELD thr_yield(); +#elif !defined(LACKS_SCHED_H) +#define SPIN_LOCK_YIELD sched_yield(); +#else +#define SPIN_LOCK_YIELD +#endif /* ... yield ... */ + +#if !defined(USE_RECURSIVE_LOCKS) || USE_RECURSIVE_LOCKS == 0 +/* Plain spin locks use single word (embedded in malloc_states) */ +static int spin_acquire_lock(int *sl) { + int spins = 0; + while (*(volatile int *)sl != 0 || CAS_LOCK(sl)) { + if ((++spins & SPINS_PER_YIELD) == 0) { + SPIN_LOCK_YIELD; + } + } + return 0; +} + +#define MLOCK_T int +#define TRY_LOCK(sl) !CAS_LOCK(sl) +#define RELEASE_LOCK(sl) CLEAR_LOCK(sl) +#define ACQUIRE_LOCK(sl) (CAS_LOCK(sl)? spin_acquire_lock(sl) : 0) +#define INITIAL_LOCK(sl) (*sl = 0) +#define DESTROY_LOCK(sl) (0) +static MLOCK_T malloc_global_mutex = 0; + +#else /* USE_RECURSIVE_LOCKS */ +/* types for lock owners */ +#ifdef WIN32 +#define THREAD_ID_T DWORD +#define CURRENT_THREAD GetCurrentThreadId() +#define EQ_OWNER(X,Y) ((X) == (Y)) +#else +/* + Note: the following assume that pthread_t is a type that can be + initialized to (casted) zero. If this is not the case, you will need to + somehow redefine these or not use spin locks. +*/ +#define THREAD_ID_T pthread_t +#define CURRENT_THREAD pthread_self() +#define EQ_OWNER(X,Y) pthread_equal(X, Y) +#endif + +struct malloc_recursive_lock { + int sl; + unsigned int c; + THREAD_ID_T threadid; +}; + +#define MLOCK_T struct malloc_recursive_lock +static MLOCK_T malloc_global_mutex = { 0, 0, (THREAD_ID_T)0}; + +static FORCEINLINE void recursive_release_lock(MLOCK_T *lk) { + assert(lk->sl != 0); + if (--lk->c == 0) { + CLEAR_LOCK(&lk->sl); + } +} + +static FORCEINLINE int recursive_acquire_lock(MLOCK_T *lk) { + THREAD_ID_T mythreadid = CURRENT_THREAD; + int spins = 0; + for (;;) { + if (*((volatile int *)(&lk->sl)) == 0) { + if (!CAS_LOCK(&lk->sl)) { + lk->threadid = mythreadid; + lk->c = 1; + return 0; + } + } + else if (EQ_OWNER(lk->threadid, mythreadid)) { + ++lk->c; + return 0; + } + if ((++spins & SPINS_PER_YIELD) == 0) { + SPIN_LOCK_YIELD; + } + } +} + +static FORCEINLINE int recursive_try_lock(MLOCK_T *lk) { + THREAD_ID_T mythreadid = CURRENT_THREAD; + if (*((volatile int *)(&lk->sl)) == 0) { + if (!CAS_LOCK(&lk->sl)) { + lk->threadid = mythreadid; + lk->c = 1; + return 1; + } + } + else if (EQ_OWNER(lk->threadid, mythreadid)) { + ++lk->c; + return 1; + } + return 0; +} + +#define RELEASE_LOCK(lk) recursive_release_lock(lk) +#define TRY_LOCK(lk) recursive_try_lock(lk) +#define ACQUIRE_LOCK(lk) recursive_acquire_lock(lk) +#define INITIAL_LOCK(lk) ((lk)->threadid = (THREAD_ID_T)0, (lk)->sl = 0, (lk)->c = 0) +#define DESTROY_LOCK(lk) (0) +#endif /* USE_RECURSIVE_LOCKS */ + +#elif defined(WIN32) /* Win32 critical sections */ +#define MLOCK_T CRITICAL_SECTION +#define ACQUIRE_LOCK(lk) (EnterCriticalSection(lk), 0) +#define RELEASE_LOCK(lk) LeaveCriticalSection(lk) +#define TRY_LOCK(lk) TryEnterCriticalSection(lk) +#define INITIAL_LOCK(lk) (!InitializeCriticalSectionAndSpinCount((lk), 0x80000000|4000)) +#define DESTROY_LOCK(lk) (DeleteCriticalSection(lk), 0) +#define NEED_GLOBAL_LOCK_INIT + +static MLOCK_T malloc_global_mutex; +static volatile LONG malloc_global_mutex_status; + +/* Use spin loop to initialize global lock */ +static void init_malloc_global_mutex() { + for (;;) { + long stat = malloc_global_mutex_status; + if (stat > 0) + return; + /* transition to < 0 while initializing, then to > 0) */ + if (stat == 0 && + interlockedcompareexchange(&malloc_global_mutex_status, (LONG)-1, (LONG)0) == 0) { + InitializeCriticalSection(&malloc_global_mutex); + interlockedexchange(&malloc_global_mutex_status, (LONG)1); + return; + } + SleepEx(0, FALSE); + } +} + +#else /* pthreads-based locks */ +#define MLOCK_T pthread_mutex_t +#define ACQUIRE_LOCK(lk) pthread_mutex_lock(lk) +#define RELEASE_LOCK(lk) pthread_mutex_unlock(lk) +#define TRY_LOCK(lk) (!pthread_mutex_trylock(lk)) +#define INITIAL_LOCK(lk) pthread_init_lock(lk) +#define DESTROY_LOCK(lk) pthread_mutex_destroy(lk) + +#if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0 && defined(linux) && !defined(PTHREAD_MUTEX_RECURSIVE) +/* Cope with old-style linux recursive lock initialization by adding */ +/* skipped internal declaration from pthread.h */ +extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr, + int __kind)); +#define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP +#define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y) +#endif /* USE_RECURSIVE_LOCKS ... */ + +static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER; + +static int pthread_init_lock (MLOCK_T *lk) { + pthread_mutexattr_t attr; + if (pthread_mutexattr_init(&attr)) return 1; +#if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0 + if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1; +#endif + if (pthread_mutex_init(lk, &attr)) return 1; + if (pthread_mutexattr_destroy(&attr)) return 1; + return 0; +} + +#endif /* ... lock types ... */ + +/* Common code for all lock types */ +#define USE_LOCK_BIT (2U) + +#ifndef ACQUIRE_MALLOC_GLOBAL_LOCK +#define ACQUIRE_MALLOC_GLOBAL_LOCK() ACQUIRE_LOCK(&malloc_global_mutex); +#endif + +#ifndef RELEASE_MALLOC_GLOBAL_LOCK +#define RELEASE_MALLOC_GLOBAL_LOCK() RELEASE_LOCK(&malloc_global_mutex); +#endif + +#endif /* USE_LOCKS */ + +/* ----------------------- Chunk representations ------------------------ */ + +/* + (The following includes lightly edited explanations by Colin Plumb.) + + The malloc_chunk declaration below is misleading (but accurate and + necessary). It declares a "view" into memory allowing access to + necessary fields at known offsets from a given base. + + Chunks of memory are maintained using a `boundary tag' method as + originally described by Knuth. (See the paper by Paul Wilson + ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such + techniques.) Sizes of free chunks are stored both in the front of + each chunk and at the end. This makes consolidating fragmented + chunks into bigger chunks fast. The head fields also hold bits + representing whether chunks are free or in use. + + Here are some pictures to make it clearer. They are "exploded" to + show that the state of a chunk can be thought of as extending from + the high 31 bits of the head field of its header through the + prev_foot and PINUSE_BIT bit of the following chunk header. + + A chunk that's in use looks like: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk (if P = 0) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| + | Size of this chunk 1| +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + +- -+ + | | + +- -+ + | : + +- size - sizeof(size_t) available payload bytes -+ + : | + chunk-> +- -+ + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1| + | Size of next chunk (may or may not be in use) | +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + And if it's free, it looks like this: + + chunk-> +- -+ + | User payload (must be in use, or we would have merged!) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| + | Size of this chunk 0| +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Next pointer | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Prev pointer | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | : + +- size - sizeof(struct chunk) unused bytes -+ + : | + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of this chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0| + | Size of next chunk (must be in use, or we would have merged)| +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | : + +- User payload -+ + : | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |0| + +-+ + Note that since we always merge adjacent free chunks, the chunks + adjacent to a free chunk must be in use. + + Given a pointer to a chunk (which can be derived trivially from the + payload pointer) we can, in O(1) time, find out whether the adjacent + chunks are free, and if so, unlink them from the lists that they + are on and merge them with the current chunk. + + Chunks always begin on even word boundaries, so the mem portion + (which is returned to the user) is also on an even word boundary, and + thus at least double-word aligned. + + The P (PINUSE_BIT) bit, stored in the unused low-order bit of the + chunk size (which is always a multiple of two words), is an in-use + bit for the *previous* chunk. If that bit is *clear*, then the + word before the current chunk size contains the previous chunk + size, and can be used to find the front of the previous chunk. + The very first chunk allocated always has this bit set, preventing + access to non-existent (or non-owned) memory. If pinuse is set for + any given chunk, then you CANNOT determine the size of the + previous chunk, and might even get a memory addressing fault when + trying to do so. + + The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of + the chunk size redundantly records whether the current chunk is + inuse (unless the chunk is mmapped). This redundancy enables usage + checks within free and realloc, and reduces indirection when freeing + and consolidating chunks. + + Each freshly allocated chunk must have both cinuse and pinuse set. + That is, each allocated chunk borders either a previously allocated + and still in-use chunk, or the base of its memory arena. This is + ensured by making all allocations from the `lowest' part of any + found chunk. Further, no free chunk physically borders another one, + so each free chunk is known to be preceded and followed by either + inuse chunks or the ends of memory. + + Note that the `foot' of the current chunk is actually represented + as the prev_foot of the NEXT chunk. This makes it easier to + deal with alignments etc but can be very confusing when trying + to extend or adapt this code. + + The exceptions to all this are + + 1. The special chunk `top' is the top-most available chunk (i.e., + the one bordering the end of available memory). It is treated + specially. Top is never included in any bin, is used only if + no other chunk is available, and is released back to the + system if it is very large (see M_TRIM_THRESHOLD). In effect, + the top chunk is treated as larger (and thus less well + fitting) than any other available chunk. The top chunk + doesn't update its trailing size field since there is no next + contiguous chunk that would have to index off it. However, + space is still allocated for it (TOP_FOOT_SIZE) to enable + separation or merging when space is extended. + + 3. Chunks allocated via mmap, have both cinuse and pinuse bits + cleared in their head fields. Because they are allocated + one-by-one, each must carry its own prev_foot field, which is + also used to hold the offset this chunk has within its mmapped + region, which is needed to preserve alignment. Each mmapped + chunk is trailed by the first two fields of a fake next-chunk + for sake of usage checks. + +*/ + +struct malloc_chunk { + size_t prev_foot; /* Size of previous chunk (if free). */ + size_t head; /* Size and inuse bits. */ + struct malloc_chunk* fd; /* double links -- used only if free. */ + struct malloc_chunk* bk; +}; + +typedef struct malloc_chunk mchunk; +typedef struct malloc_chunk* mchunkptr; +typedef struct malloc_chunk* sbinptr; /* The type of bins of chunks */ +typedef unsigned int bindex_t; /* Described below */ +typedef unsigned int binmap_t; /* Described below */ +typedef unsigned int flag_t; /* The type of various bit flag sets */ + +/* ------------------- Chunks sizes and alignments ----------------------- */ + +#define MCHUNK_SIZE (sizeof(mchunk)) + +#if FOOTERS +#define CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) +#else /* FOOTERS */ +#define CHUNK_OVERHEAD (SIZE_T_SIZE) +#endif /* FOOTERS */ + +/* MMapped chunks need a second word of overhead ... */ +#define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) +/* ... and additional padding for fake next-chunk at foot */ +#define MMAP_FOOT_PAD (FOUR_SIZE_T_SIZES) + +/* The smallest size we can malloc is an aligned minimal chunk */ +#define MIN_CHUNK_SIZE\ + ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) + +/* conversion from malloc headers to user pointers, and back */ +#define chunk2mem(p) ((void*)((char*)(p) + TWO_SIZE_T_SIZES)) +#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES)) +/* chunk associated with aligned address A */ +#define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A))) + +/* Bounds on request (not chunk) sizes. */ +#define MAX_REQUEST ((-MIN_CHUNK_SIZE) << 2) +#define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE) + +/* pad request bytes into a usable size */ +#define pad_request(req) \ + (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) + +/* pad request, checking for minimum (but not maximum) */ +#define request2size(req) \ + (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req)) + + +/* ------------------ Operations on head and foot fields ----------------- */ + +/* + The head field of a chunk is or'ed with PINUSE_BIT when previous + adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in + use, unless mmapped, in which case both bits are cleared. + + FLAG4_BIT is not used by this malloc, but might be useful in extensions. +*/ + +#define PINUSE_BIT (SIZE_T_ONE) +#define CINUSE_BIT (SIZE_T_TWO) +#define FLAG4_BIT (SIZE_T_FOUR) +#define INUSE_BITS (PINUSE_BIT|CINUSE_BIT) +#define FLAG_BITS (PINUSE_BIT|CINUSE_BIT|FLAG4_BIT) + +/* Head value for fenceposts */ +#define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE) + +/* extraction of fields from head words */ +#define cinuse(p) ((p)->head & CINUSE_BIT) +#define pinuse(p) ((p)->head & PINUSE_BIT) +#define flag4inuse(p) ((p)->head & FLAG4_BIT) +#define is_inuse(p) (((p)->head & INUSE_BITS) != PINUSE_BIT) +#define is_mmapped(p) (((p)->head & INUSE_BITS) == 0) + +#define chunksize(p) ((p)->head & ~(FLAG_BITS)) + +#define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT) +#define set_flag4(p) ((p)->head |= FLAG4_BIT) +#define clear_flag4(p) ((p)->head &= ~FLAG4_BIT) + +/* Treat space at ptr +/- offset as a chunk */ +#define chunk_plus_offset(p, s) ((mchunkptr)(((char*)(p)) + (s))) +#define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s))) + +/* Ptr to next or previous physical malloc_chunk. */ +#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~FLAG_BITS))) +#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) )) + +/* extract next chunk's pinuse bit */ +#define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT) + +/* Get/set size at footer */ +#define get_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot) +#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s)) + +/* Set size, pinuse bit, and foot */ +#define set_size_and_pinuse_of_free_chunk(p, s)\ + ((p)->head = (s|PINUSE_BIT), set_foot(p, s)) + +/* Set size, pinuse bit, foot, and clear next pinuse */ +#define set_free_with_pinuse(p, s, n)\ + (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s)) + +/* Get the internal overhead associated with chunk p */ +#define overhead_for(p)\ + (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD) + +/* Return true if malloced space is not necessarily cleared */ +#if MMAP_CLEARS +#define calloc_must_clear(p) (!is_mmapped(p)) +#else /* MMAP_CLEARS */ +#define calloc_must_clear(p) (1) +#endif /* MMAP_CLEARS */ + +/* ---------------------- Overlaid data structures ----------------------- */ + +/* + When chunks are not in use, they are treated as nodes of either + lists or trees. + + "Small" chunks are stored in circular doubly-linked lists, and look + like this: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `head:' | Size of chunk, in bytes |P| + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Forward pointer to next chunk in list | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Back pointer to previous chunk in list | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Unused space (may be 0 bytes long) . + . . + . | +nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `foot:' | Size of chunk, in bytes | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Larger chunks are kept in a form of bitwise digital trees (aka + tries) keyed on chunksizes. Because malloc_tree_chunks are only for + free chunks greater than 256 bytes, their size doesn't impose any + constraints on user chunk sizes. Each node looks like: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `head:' | Size of chunk, in bytes |P| + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Forward pointer to next chunk of same size | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Back pointer to previous chunk of same size | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer to left child (child[0]) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer to right child (child[1]) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer to parent | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | bin index of this chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Unused space . + . | +nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `foot:' | Size of chunk, in bytes | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Each tree holding treenodes is a tree of unique chunk sizes. Chunks + of the same size are arranged in a circularly-linked list, with only + the oldest chunk (the next to be used, in our FIFO ordering) + actually in the tree. (Tree members are distinguished by a non-null + parent pointer.) If a chunk with the same size an an existing node + is inserted, it is linked off the existing node using pointers that + work in the same way as fd/bk pointers of small chunks. + + Each tree contains a power of 2 sized range of chunk sizes (the + smallest is 0x100 <= x < 0x180), which is is divided in half at each + tree level, with the chunks in the smaller half of the range (0x100 + <= x < 0x140 for the top nose) in the left subtree and the larger + half (0x140 <= x < 0x180) in the right subtree. This is, of course, + done by inspecting individual bits. + + Using these rules, each node's left subtree contains all smaller + sizes than its right subtree. However, the node at the root of each + subtree has no particular ordering relationship to either. (The + dividing line between the subtree sizes is based on trie relation.) + If we remove the last chunk of a given size from the interior of the + tree, we need to replace it with a leaf node. The tree ordering + rules permit a node to be replaced by any leaf below it. + + The smallest chunk in a tree (a common operation in a best-fit + allocator) can be found by walking a path to the leftmost leaf in + the tree. Unlike a usual binary tree, where we follow left child + pointers until we reach a null, here we follow the right child + pointer any time the left one is null, until we reach a leaf with + both child pointers null. The smallest chunk in the tree will be + somewhere along that path. + + The worst case number of steps to add, find, or remove a node is + bounded by the number of bits differentiating chunks within + bins. Under current bin calculations, this ranges from 6 up to 21 + (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case + is of course much better. +*/ + +struct malloc_tree_chunk { + /* The first four fields must be compatible with malloc_chunk */ + size_t prev_foot; + size_t head; + struct malloc_tree_chunk* fd; + struct malloc_tree_chunk* bk; + + struct malloc_tree_chunk* child[2]; + struct malloc_tree_chunk* parent; + bindex_t index; +}; + +typedef struct malloc_tree_chunk tchunk; +typedef struct malloc_tree_chunk* tchunkptr; +typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */ + +/* A little helper macro for trees */ +#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1]) + +/* ----------------------------- Segments -------------------------------- */ + +/* + Each malloc space may include non-contiguous segments, held in a + list headed by an embedded malloc_segment record representing the + top-most space. Segments also include flags holding properties of + the space. Large chunks that are directly allocated by mmap are not + included in this list. They are instead independently created and + destroyed without otherwise keeping track of them. + + Segment management mainly comes into play for spaces allocated by + MMAP. Any call to MMAP might or might not return memory that is + adjacent to an existing segment. MORECORE normally contiguously + extends the current space, so this space is almost always adjacent, + which is simpler and faster to deal with. (This is why MORECORE is + used preferentially to MMAP when both are available -- see + sys_alloc.) When allocating using MMAP, we don't use any of the + hinting mechanisms (inconsistently) supported in various + implementations of unix mmap, or distinguish reserving from + committing memory. Instead, we just ask for space, and exploit + contiguity when we get it. It is probably possible to do + better than this on some systems, but no general scheme seems + to be significantly better. + + Management entails a simpler variant of the consolidation scheme + used for chunks to reduce fragmentation -- new adjacent memory is + normally prepended or appended to an existing segment. However, + there are limitations compared to chunk consolidation that mostly + reflect the fact that segment processing is relatively infrequent + (occurring only when getting memory from system) and that we + don't expect to have huge numbers of segments: + + * Segments are not indexed, so traversal requires linear scans. (It + would be possible to index these, but is not worth the extra + overhead and complexity for most programs on most platforms.) + * New segments are only appended to old ones when holding top-most + memory; if they cannot be prepended to others, they are held in + different segments. + + Except for the top-most segment of an mstate, each segment record + is kept at the tail of its segment. Segments are added by pushing + segment records onto the list headed by &mstate.seg for the + containing mstate. + + Segment flags control allocation/merge/deallocation policies: + * If EXTERN_BIT set, then we did not allocate this segment, + and so should not try to deallocate or merge with others. + (This currently holds only for the initial segment passed + into create_mspace_with_base.) + * If USE_MMAP_BIT set, the segment may be merged with + other surrounding mmapped segments and trimmed/de-allocated + using munmap. + * If neither bit is set, then the segment was obtained using + MORECORE so can be merged with surrounding MORECORE'd segments + and deallocated/trimmed using MORECORE with negative arguments. +*/ + +struct malloc_segment { + char* base; /* base address */ + size_t size; /* allocated size */ + struct malloc_segment* next; /* ptr to next segment */ + flag_t sflags; /* mmap and extern flag */ +}; + +#define is_mmapped_segment(S) ((S)->sflags & USE_MMAP_BIT) +#define is_extern_segment(S) ((S)->sflags & EXTERN_BIT) + +typedef struct malloc_segment msegment; +typedef struct malloc_segment* msegmentptr; + +/* ---------------------------- malloc_state ----------------------------- */ + +/* + A malloc_state holds all of the bookkeeping for a space. + The main fields are: + + Top + The topmost chunk of the currently active segment. Its size is + cached in topsize. The actual size of topmost space is + topsize+TOP_FOOT_SIZE, which includes space reserved for adding + fenceposts and segment records if necessary when getting more + space from the system. The size at which to autotrim top is + cached from mparams in trim_check, except that it is disabled if + an autotrim fails. + + Designated victim (dv) + This is the preferred chunk for servicing small requests that + don't have exact fits. It is normally the chunk split off most + recently to service another small request. Its size is cached in + dvsize. The link fields of this chunk are not maintained since it + is not kept in a bin. + + SmallBins + An array of bin headers for free chunks. These bins hold chunks + with sizes less than MIN_LARGE_SIZE bytes. Each bin contains + chunks of all the same size, spaced 8 bytes apart. To simplify + use in double-linked lists, each bin header acts as a malloc_chunk + pointing to the real first node, if it exists (else pointing to + itself). This avoids special-casing for headers. But to avoid + waste, we allocate only the fd/bk pointers of bins, and then use + repositioning tricks to treat these as the fields of a chunk. + + TreeBins + Treebins are pointers to the roots of trees holding a range of + sizes. There are 2 equally spaced treebins for each power of two + from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything + larger. + + Bin maps + There is one bit map for small bins ("smallmap") and one for + treebins ("treemap). Each bin sets its bit when non-empty, and + clears the bit when empty. Bit operations are then used to avoid + bin-by-bin searching -- nearly all "search" is done without ever + looking at bins that won't be selected. The bit maps + conservatively use 32 bits per map word, even if on 64bit system. + For a good description of some of the bit-based techniques used + here, see Henry S. Warren Jr's book "Hacker's Delight" (and + supplement at http://hackersdelight.org/). Many of these are + intended to reduce the branchiness of paths through malloc etc, as + well as to reduce the number of memory locations read or written. + + Segments + A list of segments headed by an embedded malloc_segment record + representing the initial space. + + Address check support + The least_addr field is the least address ever obtained from + MORECORE or MMAP. Attempted frees and reallocs of any address less + than this are trapped (unless INSECURE is defined). + + Magic tag + A cross-check field that should always hold same value as mparams.magic. + + Max allowed footprint + The maximum allowed bytes to allocate from system (zero means no limit) + + Flags + Bits recording whether to use MMAP, locks, or contiguous MORECORE + + Statistics + Each space keeps track of current and maximum system memory + obtained via MORECORE or MMAP. + + Trim support + Fields holding the amount of unused topmost memory that should trigger + trimming, and a counter to force periodic scanning to release unused + non-topmost segments. + + Locking + If USE_LOCKS is defined, the "mutex" lock is acquired and released + around every public call using this mspace. + + Extension support + A void* pointer and a size_t field that can be used to help implement + extensions to this malloc. +*/ + +/* Bin types, widths and sizes */ +#define NSMALLBINS (32U) +#define NTREEBINS (32U) +#define SMALLBIN_SHIFT (3U) +#define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT) +#define TREEBIN_SHIFT (8U) +#define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT) +#define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE) +#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD) + +struct malloc_state { + binmap_t smallmap; + binmap_t treemap; + size_t dvsize; + size_t topsize; + char* least_addr; + mchunkptr dv; + mchunkptr top; + size_t trim_check; + size_t release_checks; + size_t magic; + mchunkptr smallbins[(NSMALLBINS+1)*2]; + tbinptr treebins[NTREEBINS]; + size_t footprint; + size_t max_footprint; + size_t footprint_limit; /* zero means no limit */ + flag_t mflags; +#if USE_LOCKS + MLOCK_T mutex; /* locate lock among fields that rarely change */ +#endif /* USE_LOCKS */ + msegment seg; + void* extp; /* Unused but available for extensions */ + size_t exts; +}; + +typedef struct malloc_state* mstate; + +/* ------------- Global malloc_state and malloc_params ------------------- */ + +/* + malloc_params holds global properties, including those that can be + dynamically set using mallopt. There is a single instance, mparams, + initialized in init_mparams. Note that the non-zeroness of "magic" + also serves as an initialization flag. +*/ + +struct malloc_params { + size_t magic; + size_t page_size; + size_t granularity; + size_t mmap_threshold; + size_t trim_threshold; + flag_t default_mflags; +}; + +static struct malloc_params mparams; + +/* Ensure mparams initialized */ +#define ensure_initialization() (void)(mparams.magic != 0 || init_mparams()) + +#if !ONLY_MSPACES + +/* The global malloc_state used for all non-"mspace" calls */ +static struct malloc_state _gm_; +#define gm (&_gm_) +#define is_global(M) ((M) == &_gm_) + +#endif /* !ONLY_MSPACES */ + +#define is_initialized(M) ((M)->top != 0) + +/* -------------------------- system alloc setup ------------------------- */ + +/* Operations on mflags */ + +#define use_lock(M) ((M)->mflags & USE_LOCK_BIT) +#define enable_lock(M) ((M)->mflags |= USE_LOCK_BIT) +#if USE_LOCKS +#define disable_lock(M) ((M)->mflags &= ~USE_LOCK_BIT) +#else +#define disable_lock(M) +#endif + +#define use_mmap(M) ((M)->mflags & USE_MMAP_BIT) +#define enable_mmap(M) ((M)->mflags |= USE_MMAP_BIT) +#if HAVE_MMAP +#define disable_mmap(M) ((M)->mflags &= ~USE_MMAP_BIT) +#else +#define disable_mmap(M) +#endif + +#define use_noncontiguous(M) ((M)->mflags & USE_NONCONTIGUOUS_BIT) +#define disable_contiguous(M) ((M)->mflags |= USE_NONCONTIGUOUS_BIT) + +#define set_lock(M,L)\ + ((M)->mflags = (L)?\ + ((M)->mflags | USE_LOCK_BIT) :\ + ((M)->mflags & ~USE_LOCK_BIT)) + +/* page-align a size */ +#define page_align(S)\ + (((S) + (mparams.page_size - SIZE_T_ONE)) & ~(mparams.page_size - SIZE_T_ONE)) + +/* granularity-align a size */ +#define granularity_align(S)\ + (((S) + (mparams.granularity - SIZE_T_ONE))\ + & ~(mparams.granularity - SIZE_T_ONE)) + + +/* For mmap, use granularity alignment on windows, else page-align */ +#ifdef WIN32 +#define mmap_align(S) granularity_align(S) +#else +#define mmap_align(S) page_align(S) +#endif + +/* For sys_alloc, enough padding to ensure can malloc request on success */ +#define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT) + +#define is_page_aligned(S)\ + (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0) +#define is_granularity_aligned(S)\ + (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0) + +/* True if segment S holds address A */ +#define segment_holds(S, A)\ + ((char*)(A) >= S->base && (char*)(A) < S->base + S->size) + +/* Return segment holding given address */ +static msegmentptr segment_holding(mstate m, char* addr) { + msegmentptr sp = &m->seg; + for (;;) { + if (addr >= sp->base && addr < sp->base + sp->size) + return sp; + if ((sp = sp->next) == 0) + return 0; + } +} + +/* Return true if segment contains a segment link */ +static int has_segment_link(mstate m, msegmentptr ss) { + msegmentptr sp = &m->seg; + for (;;) { + if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size) + return 1; + if ((sp = sp->next) == 0) + return 0; + } +} + +#ifndef MORECORE_CANNOT_TRIM +#define should_trim(M,s) ((s) > (M)->trim_check) +#else /* MORECORE_CANNOT_TRIM */ +#define should_trim(M,s) (0) +#endif /* MORECORE_CANNOT_TRIM */ + +/* + TOP_FOOT_SIZE is padding at the end of a segment, including space + that may be needed to place segment records and fenceposts when new + noncontiguous segments are added. +*/ +#define TOP_FOOT_SIZE\ + (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) + + +/* ------------------------------- Hooks -------------------------------- */ + +/* + PREACTION should be defined to return 0 on success, and nonzero on + failure. If you are not using locking, you can redefine these to do + anything you like. +*/ + +#if USE_LOCKS +#define PREACTION(M) ((use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0) +#define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); } +#else /* USE_LOCKS */ + +#ifndef PREACTION +#define PREACTION(M) (0) +#endif /* PREACTION */ + +#ifndef POSTACTION +#define POSTACTION(M) +#endif /* POSTACTION */ + +#endif /* USE_LOCKS */ + +/* + CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses. + USAGE_ERROR_ACTION is triggered on detected bad frees and + reallocs. The argument p is an address that might have triggered the + fault. It is ignored by the two predefined actions, but might be + useful in custom actions that try to help diagnose errors. +*/ + +#if PROCEED_ON_ERROR + +/* A count of the number of corruption errors causing resets */ +int malloc_corruption_error_count; + +/* default corruption action */ +static void reset_on_error(mstate m); + +#define CORRUPTION_ERROR_ACTION(m) reset_on_error(m) +#define USAGE_ERROR_ACTION(m, p) + +#else /* PROCEED_ON_ERROR */ + +#ifndef CORRUPTION_ERROR_ACTION +#define CORRUPTION_ERROR_ACTION(m) ABORT +#endif /* CORRUPTION_ERROR_ACTION */ + +#ifndef USAGE_ERROR_ACTION +#define USAGE_ERROR_ACTION(m,p) ABORT +#endif /* USAGE_ERROR_ACTION */ + +#endif /* PROCEED_ON_ERROR */ + + +/* -------------------------- Debugging setup ---------------------------- */ + +#if ! DEBUG + +#define check_free_chunk(M,P) +#define check_inuse_chunk(M,P) +#define check_malloced_chunk(M,P,N) +#define check_mmapped_chunk(M,P) +#define check_malloc_state(M) +#define check_top_chunk(M,P) + +#else /* DEBUG */ +#define check_free_chunk(M,P) do_check_free_chunk(M,P) +#define check_inuse_chunk(M,P) do_check_inuse_chunk(M,P) +#define check_top_chunk(M,P) do_check_top_chunk(M,P) +#define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N) +#define check_mmapped_chunk(M,P) do_check_mmapped_chunk(M,P) +#define check_malloc_state(M) do_check_malloc_state(M) + +static void do_check_any_chunk(mstate m, mchunkptr p); +static void do_check_top_chunk(mstate m, mchunkptr p); +static void do_check_mmapped_chunk(mstate m, mchunkptr p); +static void do_check_inuse_chunk(mstate m, mchunkptr p); +static void do_check_free_chunk(mstate m, mchunkptr p); +static void do_check_malloced_chunk(mstate m, void* mem, size_t s); +static void do_check_tree(mstate m, tchunkptr t); +static void do_check_treebin(mstate m, bindex_t i); +static void do_check_smallbin(mstate m, bindex_t i); +static void do_check_malloc_state(mstate m); +static int bin_find(mstate m, mchunkptr x); +static size_t traverse_and_check(mstate m); +#endif /* DEBUG */ + +/* ---------------------------- Indexing Bins ---------------------------- */ + +#define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS) +#define small_index(s) (bindex_t)((s) >> SMALLBIN_SHIFT) +#define small_index2size(i) ((i) << SMALLBIN_SHIFT) +#define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE)) + +/* addressing by index. See above about smallbin repositioning */ +#define smallbin_at(M, i) ((sbinptr)((char*)&((M)->smallbins[(i)<<1]))) +#define treebin_at(M,i) (&((M)->treebins[i])) + +/* assign tree index for size S to variable I. Use x86 asm if possible */ +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +#define compute_tree_index(S, I)\ +{\ + unsigned int X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int K = (unsigned) sizeof(X)*__CHAR_BIT__ - 1 - (unsigned) __builtin_clz(X); \ + I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ + }\ +} + +#elif defined (__INTEL_COMPILER) +#define compute_tree_index(S, I)\ +{\ + size_t X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int K = _bit_scan_reverse (X); \ + I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ + }\ +} + +#elif defined(_MSC_VER) && _MSC_VER>=1300 +#define compute_tree_index(S, I)\ +{\ + size_t X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int K;\ + _BitScanReverse((DWORD *) &K, (DWORD) X);\ + I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ + }\ +} + +#else /* GNUC */ +#define compute_tree_index(S, I)\ +{\ + size_t X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int Y = (unsigned int)X;\ + unsigned int N = ((Y - 0x100) >> 16) & 8;\ + unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\ + N += K;\ + N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\ + K = 14 - N + ((Y <<= K) >> 15);\ + I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\ + }\ +} +#endif /* GNUC */ + +/* Bit representing maximum resolved size in a treebin at i */ +#define bit_for_tree_index(i) \ + (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2) + +/* Shift placing maximum resolved bit in a treebin at i as sign bit */ +#define leftshift_for_tree_index(i) \ + ((i == NTREEBINS-1)? 0 : \ + ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2))) + +/* The size of the smallest chunk held in bin with index i */ +#define minsize_for_tree_index(i) \ + ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \ + (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1))) + + +/* ------------------------ Operations on bin maps ----------------------- */ + +/* bit corresponding to given index */ +#define idx2bit(i) ((binmap_t)(1) << (i)) + +/* Mark/Clear bits with given index */ +#define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i)) +#define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i)) +#define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i)) + +#define mark_treemap(M,i) ((M)->treemap |= idx2bit(i)) +#define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i)) +#define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i)) + +/* isolate the least set bit of a bitmap */ +#define least_bit(x) ((x) & -(x)) + +/* mask with all bits to left of least bit of x on */ +#define left_bits(x) ((x<<1) | -(x<<1)) + +/* mask with all bits to left of or equal to least bit of x on */ +#define same_or_left_bits(x) ((x) | -(x)) + +/* index corresponding to given bit. Use x86 asm if possible */ + +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +#define compute_bit2idx(X, I)\ +{\ + unsigned int J;\ + J = __builtin_ctz(X); \ + I = (bindex_t)J;\ +} + +#elif defined (__INTEL_COMPILER) +#define compute_bit2idx(X, I)\ +{\ + unsigned int J;\ + J = _bit_scan_forward (X); \ + I = (bindex_t)J;\ +} + +#elif defined(_MSC_VER) && _MSC_VER>=1300 +#define compute_bit2idx(X, I)\ +{\ + unsigned int J;\ + _BitScanForward((DWORD *) &J, X);\ + I = (bindex_t)J;\ +} + +#elif USE_BUILTIN_FFS +#define compute_bit2idx(X, I) I = ffs(X)-1 + +#else +#define compute_bit2idx(X, I)\ +{\ + unsigned int Y = X - 1;\ + unsigned int K = Y >> (16-4) & 16;\ + unsigned int N = K; Y >>= K;\ + N += K = Y >> (8-3) & 8; Y >>= K;\ + N += K = Y >> (4-2) & 4; Y >>= K;\ + N += K = Y >> (2-1) & 2; Y >>= K;\ + N += K = Y >> (1-0) & 1; Y >>= K;\ + I = (bindex_t)(N + Y);\ +} +#endif /* GNUC */ + + +/* ----------------------- Runtime Check Support ------------------------- */ + +/* + For security, the main invariant is that malloc/free/etc never + writes to a static address other than malloc_state, unless static + malloc_state itself has been corrupted, which cannot occur via + malloc (because of these checks). In essence this means that we + believe all pointers, sizes, maps etc held in malloc_state, but + check all of those linked or offsetted from other embedded data + structures. These checks are interspersed with main code in a way + that tends to minimize their run-time cost. + + When FOOTERS is defined, in addition to range checking, we also + verify footer fields of inuse chunks, which can be used guarantee + that the mstate controlling malloc/free is intact. This is a + streamlined version of the approach described by William Robertson + et al in "Run-time Detection of Heap-based Overflows" LISA'03 + http://www.usenix.org/events/lisa03/tech/robertson.html The footer + of an inuse chunk holds the xor of its mstate and a random seed, + that is checked upon calls to free() and realloc(). This is + (probabalistically) unguessable from outside the program, but can be + computed by any code successfully malloc'ing any chunk, so does not + itself provide protection against code that has already broken + security through some other means. Unlike Robertson et al, we + always dynamically check addresses of all offset chunks (previous, + next, etc). This turns out to be cheaper than relying on hashes. +*/ + +#if !INSECURE +/* Check if address a is at least as high as any from MORECORE or MMAP */ +#define ok_address(M, a) ((char*)(a) >= (M)->least_addr) +/* Check if address of next chunk n is higher than base chunk p */ +#define ok_next(p, n) ((char*)(p) < (char*)(n)) +/* Check if p has inuse status */ +#define ok_inuse(p) is_inuse(p) +/* Check if p has its pinuse bit on */ +#define ok_pinuse(p) pinuse(p) + +#else /* !INSECURE */ +#define ok_address(M, a) (1) +#define ok_next(b, n) (1) +#define ok_inuse(p) (1) +#define ok_pinuse(p) (1) +#endif /* !INSECURE */ + +#if (FOOTERS && !INSECURE) +/* Check if (alleged) mstate m has expected magic field */ +#define ok_magic(M) ((M)->magic == mparams.magic) +#else /* (FOOTERS && !INSECURE) */ +#define ok_magic(M) (1) +#endif /* (FOOTERS && !INSECURE) */ + +/* In gcc, use __builtin_expect to minimize impact of checks */ +#if !INSECURE +#if defined(__GNUC__) && __GNUC__ >= 3 +#define RTCHECK(e) __builtin_expect(e, 1) +#else /* GNUC */ +#define RTCHECK(e) (e) +#endif /* GNUC */ +#else /* !INSECURE */ +#define RTCHECK(e) (1) +#endif /* !INSECURE */ + +/* macros to set up inuse chunks with or without footers */ + +#if !FOOTERS + +#define mark_inuse_foot(M,p,s) + +/* Macros for setting head/foot of non-mmapped chunks */ + +/* Set cinuse bit and pinuse bit of next chunk */ +#define set_inuse(M,p,s)\ + ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ + ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) + +/* Set cinuse and pinuse of this chunk and pinuse of next chunk */ +#define set_inuse_and_pinuse(M,p,s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ + ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) + +/* Set size, cinuse and pinuse bit of this chunk */ +#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT)) + +#else /* FOOTERS */ + +/* Set foot of inuse chunk to be xor of mstate and seed */ +#define mark_inuse_foot(M,p,s)\ + (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic)) + +#define get_mstate_for(p)\ + ((mstate)(((mchunkptr)((char*)(p) +\ + (chunksize(p))))->prev_foot ^ mparams.magic)) + +#define set_inuse(M,p,s)\ + ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ + (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \ + mark_inuse_foot(M,p,s)) + +#define set_inuse_and_pinuse(M,p,s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ + (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\ + mark_inuse_foot(M,p,s)) + +#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ + mark_inuse_foot(M, p, s)) + +#endif /* !FOOTERS */ + +/* ---------------------------- setting mparams -------------------------- */ + +#if LOCK_AT_FORK +static void pre_fork(void) { ACQUIRE_LOCK(&(gm)->mutex); } +static void post_fork_parent(void) { RELEASE_LOCK(&(gm)->mutex); } +static void post_fork_child(void) { INITIAL_LOCK(&(gm)->mutex); } +#endif /* LOCK_AT_FORK */ + +/* Initialize mparams */ +static int init_mparams(void) { +#ifdef NEED_GLOBAL_LOCK_INIT + if (malloc_global_mutex_status <= 0) + init_malloc_global_mutex(); +#endif + + ACQUIRE_MALLOC_GLOBAL_LOCK(); + if (mparams.magic == 0) { + size_t magic; + size_t psize; + size_t gsize; + +#ifndef WIN32 + psize = malloc_getpagesize; + gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize); +#else /* WIN32 */ + { + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + psize = system_info.dwPageSize; + gsize = ((DEFAULT_GRANULARITY != 0)? + DEFAULT_GRANULARITY : system_info.dwAllocationGranularity); + } +#endif /* WIN32 */ + + /* Sanity-check configuration: + size_t must be unsigned and as wide as pointer type. + ints must be at least 4 bytes. + alignment must be at least 8. + Alignment, min chunk size, and page size must all be powers of 2. + */ + if ((sizeof(size_t) != sizeof(char*)) || + (MAX_SIZE_T < MIN_CHUNK_SIZE) || + (sizeof(int) < 4) || + (MALLOC_ALIGNMENT < (size_t)8U) || + ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) || + ((MCHUNK_SIZE & (MCHUNK_SIZE-SIZE_T_ONE)) != 0) || + ((gsize & (gsize-SIZE_T_ONE)) != 0) || + ((psize & (psize-SIZE_T_ONE)) != 0)) + ABORT; + mparams.granularity = gsize; + mparams.page_size = psize; + mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD; + mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD; +#if MORECORE_CONTIGUOUS + mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT; +#else /* MORECORE_CONTIGUOUS */ + mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT; +#endif /* MORECORE_CONTIGUOUS */ + +#if !ONLY_MSPACES + /* Set up lock for main malloc area */ + gm->mflags = mparams.default_mflags; + (void)INITIAL_LOCK(&gm->mutex); +#endif +#if LOCK_AT_FORK + pthread_atfork(&pre_fork, &post_fork_parent, &post_fork_child); +#endif + + { +#if USE_DEV_RANDOM + int fd; + unsigned char buf[sizeof(size_t)]; + /* Try to use /dev/urandom, else fall back on using time */ + if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 && + read(fd, buf, sizeof(buf)) == sizeof(buf)) { + magic = *((size_t *) buf); + close(fd); + } + else +#endif /* USE_DEV_RANDOM */ +#ifdef WIN32 + magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U); +#elif defined(LACKS_TIME_H) + magic = (size_t)&magic ^ (size_t)0x55555555U; +#else + magic = (size_t)(time(0) ^ (size_t)0x55555555U); +#endif + magic |= (size_t)8U; /* ensure nonzero */ + magic &= ~(size_t)7U; /* improve chances of fault for bad values */ + /* Until memory modes commonly available, use volatile-write */ + (*(volatile size_t *)(&(mparams.magic))) = magic; + } + } + + RELEASE_MALLOC_GLOBAL_LOCK(); + return 1; +} + +/* support for mallopt */ +static int change_mparam(int param_number, int value) { + size_t val; + ensure_initialization(); + val = (value == -1)? MAX_SIZE_T : (size_t)value; + switch(param_number) { + case M_TRIM_THRESHOLD: + mparams.trim_threshold = val; + return 1; + case M_GRANULARITY: + if (val >= mparams.page_size && ((val & (val-1)) == 0)) { + mparams.granularity = val; + return 1; + } + else + return 0; + case M_MMAP_THRESHOLD: + mparams.mmap_threshold = val; + return 1; + default: + return 0; + } +} + +#if DEBUG +/* ------------------------- Debugging Support --------------------------- */ + +/* Check properties of any chunk, whether free, inuse, mmapped etc */ +static void do_check_any_chunk(mstate m, mchunkptr p) { + assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); + assert(ok_address(m, p)); +} + +/* Check properties of top chunk */ +static void do_check_top_chunk(mstate m, mchunkptr p) { + msegmentptr sp = segment_holding(m, (char*)p); + size_t sz = p->head & ~INUSE_BITS; /* third-lowest bit can be set! */ + assert(sp != 0); + assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); + assert(ok_address(m, p)); + assert(sz == m->topsize); + assert(sz > 0); + assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE); + assert(pinuse(p)); + assert(!pinuse(chunk_plus_offset(p, sz))); +} + +/* Check properties of (inuse) mmapped chunks */ +static void do_check_mmapped_chunk(mstate m, mchunkptr p) { + size_t sz = chunksize(p); + size_t len = (sz + (p->prev_foot) + MMAP_FOOT_PAD); + assert(is_mmapped(p)); + assert(use_mmap(m)); + assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); + assert(ok_address(m, p)); + assert(!is_small(sz)); + assert((len & (mparams.page_size-SIZE_T_ONE)) == 0); + assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD); + assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0); +} + +/* Check properties of inuse chunks */ +static void do_check_inuse_chunk(mstate m, mchunkptr p) { + do_check_any_chunk(m, p); + assert(is_inuse(p)); + assert(next_pinuse(p)); + /* If not pinuse and not mmapped, previous chunk has OK offset */ + assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p); + if (is_mmapped(p)) + do_check_mmapped_chunk(m, p); +} + +/* Check properties of free chunks */ +static void do_check_free_chunk(mstate m, mchunkptr p) { + size_t sz = chunksize(p); + mchunkptr next = chunk_plus_offset(p, sz); + do_check_any_chunk(m, p); + assert(!is_inuse(p)); + assert(!next_pinuse(p)); + assert (!is_mmapped(p)); + if (p != m->dv && p != m->top) { + if (sz >= MIN_CHUNK_SIZE) { + assert((sz & CHUNK_ALIGN_MASK) == 0); + assert(is_aligned(chunk2mem(p))); + assert(next->prev_foot == sz); + assert(pinuse(p)); + assert (next == m->top || is_inuse(next)); + assert(p->fd->bk == p); + assert(p->bk->fd == p); + } + else /* markers are always of size SIZE_T_SIZE */ + assert(sz == SIZE_T_SIZE); + } +} + +/* Check properties of malloced chunks at the point they are malloced */ +static void do_check_malloced_chunk(mstate m, void* mem, size_t s) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + size_t sz = p->head & ~INUSE_BITS; + do_check_inuse_chunk(m, p); + assert((sz & CHUNK_ALIGN_MASK) == 0); + assert(sz >= MIN_CHUNK_SIZE); + assert(sz >= s); + /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */ + assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE)); + } +} + +/* Check a tree and its subtrees. */ +static void do_check_tree(mstate m, tchunkptr t) { + tchunkptr head = 0; + tchunkptr u = t; + bindex_t tindex = t->index; + size_t tsize = chunksize(t); + bindex_t idx; + compute_tree_index(tsize, idx); + assert(tindex == idx); + assert(tsize >= MIN_LARGE_SIZE); + assert(tsize >= minsize_for_tree_index(idx)); + assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1)))); + + do { /* traverse through chain of same-sized nodes */ + do_check_any_chunk(m, ((mchunkptr)u)); + assert(u->index == tindex); + assert(chunksize(u) == tsize); + assert(!is_inuse(u)); + assert(!next_pinuse(u)); + assert(u->fd->bk == u); + assert(u->bk->fd == u); + if (u->parent == 0) { + assert(u->child[0] == 0); + assert(u->child[1] == 0); + } + else { + assert(head == 0); /* only one node on chain has parent */ + head = u; + assert(u->parent != u); + assert (u->parent->child[0] == u || + u->parent->child[1] == u || + *((tbinptr*)(u->parent)) == u); + if (u->child[0] != 0) { + assert(u->child[0]->parent == u); + assert(u->child[0] != u); + do_check_tree(m, u->child[0]); + } + if (u->child[1] != 0) { + assert(u->child[1]->parent == u); + assert(u->child[1] != u); + do_check_tree(m, u->child[1]); + } + if (u->child[0] != 0 && u->child[1] != 0) { + assert(chunksize(u->child[0]) < chunksize(u->child[1])); + } + } + u = u->fd; + } while (u != t); + assert(head != 0); +} + +/* Check all the chunks in a treebin. */ +static void do_check_treebin(mstate m, bindex_t i) { + tbinptr* tb = treebin_at(m, i); + tchunkptr t = *tb; + int empty = (m->treemap & (1U << i)) == 0; + if (t == 0) + assert(empty); + if (!empty) + do_check_tree(m, t); +} + +/* Check all the chunks in a smallbin. */ +static void do_check_smallbin(mstate m, bindex_t i) { + sbinptr b = smallbin_at(m, i); + mchunkptr p = b->bk; + unsigned int empty = (m->smallmap & (1U << i)) == 0; + if (p == b) + assert(empty); + if (!empty) { + for (; p != b; p = p->bk) { + size_t size = chunksize(p); + mchunkptr q; + /* each chunk claims to be free */ + do_check_free_chunk(m, p); + /* chunk belongs in bin */ + assert(small_index(size) == i); + assert(p->bk == b || chunksize(p->bk) == chunksize(p)); + /* chunk is followed by an inuse chunk */ + q = next_chunk(p); + if (q->head != FENCEPOST_HEAD) + do_check_inuse_chunk(m, q); + } + } +} + +/* Find x in a bin. Used in other check functions. */ +static int bin_find(mstate m, mchunkptr x) { + size_t size = chunksize(x); + if (is_small(size)) { + bindex_t sidx = small_index(size); + sbinptr b = smallbin_at(m, sidx); + if (smallmap_is_marked(m, sidx)) { + mchunkptr p = b; + do { + if (p == x) + return 1; + } while ((p = p->fd) != b); + } + } + else { + bindex_t tidx; + compute_tree_index(size, tidx); + if (treemap_is_marked(m, tidx)) { + tchunkptr t = *treebin_at(m, tidx); + size_t sizebits = size << leftshift_for_tree_index(tidx); + while (t != 0 && chunksize(t) != size) { + t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; + sizebits <<= 1; + } + if (t != 0) { + tchunkptr u = t; + do { + if (u == (tchunkptr)x) + return 1; + } while ((u = u->fd) != t); + } + } + } + return 0; +} + +/* Traverse each chunk and check it; return total */ +static size_t traverse_and_check(mstate m) { + size_t sum = 0; + if (is_initialized(m)) { + msegmentptr s = &m->seg; + sum += m->topsize + TOP_FOOT_SIZE; + while (s != 0) { + mchunkptr q = align_as_chunk(s->base); + mchunkptr lastq = 0; + assert(pinuse(q)); + while (segment_holds(s, q) && + q != m->top && q->head != FENCEPOST_HEAD) { + sum += chunksize(q); + if (is_inuse(q)) { + assert(!bin_find(m, q)); + do_check_inuse_chunk(m, q); + } + else { + assert(q == m->dv || bin_find(m, q)); + assert(lastq == 0 || is_inuse(lastq)); /* Not 2 consecutive free */ + do_check_free_chunk(m, q); + } + lastq = q; + q = next_chunk(q); + } + s = s->next; + } + } + return sum; +} + + +/* Check all properties of malloc_state. */ +static void do_check_malloc_state(mstate m) { + bindex_t i; + size_t total; + /* check bins */ + for (i = 0; i < NSMALLBINS; ++i) + do_check_smallbin(m, i); + for (i = 0; i < NTREEBINS; ++i) + do_check_treebin(m, i); + + if (m->dvsize != 0) { /* check dv chunk */ + do_check_any_chunk(m, m->dv); + assert(m->dvsize == chunksize(m->dv)); + assert(m->dvsize >= MIN_CHUNK_SIZE); + assert(bin_find(m, m->dv) == 0); + } + + if (m->top != 0) { /* check top chunk */ + do_check_top_chunk(m, m->top); + /*assert(m->topsize == chunksize(m->top)); redundant */ + assert(m->topsize > 0); + assert(bin_find(m, m->top) == 0); + } + + total = traverse_and_check(m); + assert(total <= m->footprint); + assert(m->footprint <= m->max_footprint); +} +#endif /* DEBUG */ + +/* ----------------------------- statistics ------------------------------ */ + +#if !NO_MALLINFO +static struct mallinfo internal_mallinfo(mstate m) { + struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + ensure_initialization(); + if (!PREACTION(m)) { + check_malloc_state(m); + if (is_initialized(m)) { + size_t nfree = SIZE_T_ONE; /* top always free */ + size_t mfree = m->topsize + TOP_FOOT_SIZE; + size_t sum = mfree; + msegmentptr s = &m->seg; + while (s != 0) { + mchunkptr q = align_as_chunk(s->base); + while (segment_holds(s, q) && + q != m->top && q->head != FENCEPOST_HEAD) { + size_t sz = chunksize(q); + sum += sz; + if (!is_inuse(q)) { + mfree += sz; + ++nfree; + } + q = next_chunk(q); + } + s = s->next; + } + + nm.arena = sum; + nm.ordblks = nfree; + nm.hblkhd = m->footprint - sum; + nm.usmblks = m->max_footprint; + nm.uordblks = m->footprint - mfree; + nm.fordblks = mfree; + nm.keepcost = m->topsize; + } + + POSTACTION(m); + } + return nm; +} +#endif /* !NO_MALLINFO */ + +#if !NO_MALLOC_STATS +static void internal_malloc_stats(mstate m) { + ensure_initialization(); + if (!PREACTION(m)) { + size_t maxfp = 0; + size_t fp = 0; + size_t used = 0; + check_malloc_state(m); + if (is_initialized(m)) { + msegmentptr s = &m->seg; + maxfp = m->max_footprint; + fp = m->footprint; + used = fp - (m->topsize + TOP_FOOT_SIZE); + + while (s != 0) { + mchunkptr q = align_as_chunk(s->base); + while (segment_holds(s, q) && + q != m->top && q->head != FENCEPOST_HEAD) { + if (!is_inuse(q)) + used -= chunksize(q); + q = next_chunk(q); + } + s = s->next; + } + } + POSTACTION(m); /* drop lock */ + fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp)); + fprintf(stderr, "system bytes = %10lu\n", (unsigned long)(fp)); + fprintf(stderr, "in use bytes = %10lu\n", (unsigned long)(used)); + } +} +#endif /* NO_MALLOC_STATS */ + +/* ----------------------- Operations on smallbins ----------------------- */ + +/* + Various forms of linking and unlinking are defined as macros. Even + the ones for trees, which are very long but have very short typical + paths. This is ugly but reduces reliance on inlining support of + compilers. +*/ + +/* Link a free chunk into a smallbin */ +#define insert_small_chunk(M, P, S) {\ + bindex_t I = small_index(S);\ + mchunkptr B = smallbin_at(M, I);\ + mchunkptr F = B;\ + assert(S >= MIN_CHUNK_SIZE);\ + if (!smallmap_is_marked(M, I))\ + mark_smallmap(M, I);\ + else if (RTCHECK(ok_address(M, B->fd)))\ + F = B->fd;\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + B->fd = P;\ + F->bk = P;\ + P->fd = F;\ + P->bk = B;\ +} + +/* Unlink a chunk from a smallbin */ +#define unlink_small_chunk(M, P, S) {\ + mchunkptr F = P->fd;\ + mchunkptr B = P->bk;\ + bindex_t I = small_index(S);\ + assert(P != B);\ + assert(P != F);\ + assert(chunksize(P) == small_index2size(I));\ + if (RTCHECK(F == smallbin_at(M,I) || (ok_address(M, F) && F->bk == P))) { \ + if (B == F) {\ + clear_smallmap(M, I);\ + }\ + else if (RTCHECK(B == smallbin_at(M,I) ||\ + (ok_address(M, B) && B->fd == P))) {\ + F->bk = B;\ + B->fd = F;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ +} + +/* Unlink the first chunk from a smallbin */ +#define unlink_first_small_chunk(M, B, P, I) {\ + mchunkptr F = P->fd;\ + assert(P != B);\ + assert(P != F);\ + assert(chunksize(P) == small_index2size(I));\ + if (B == F) {\ + clear_smallmap(M, I);\ + }\ + else if (RTCHECK(ok_address(M, F) && F->bk == P)) {\ + F->bk = B;\ + B->fd = F;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ +} + +/* Replace dv node, binning the old one */ +/* Used only when dvsize known to be small */ +#define replace_dv(M, P, S) {\ + size_t DVS = M->dvsize;\ + assert(is_small(DVS));\ + if (DVS != 0) {\ + mchunkptr DV = M->dv;\ + insert_small_chunk(M, DV, DVS);\ + }\ + M->dvsize = S;\ + M->dv = P;\ +} + +/* ------------------------- Operations on trees ------------------------- */ + +/* Insert chunk into tree */ +#define insert_large_chunk(M, X, S) {\ + tbinptr* H;\ + bindex_t I;\ + compute_tree_index(S, I);\ + H = treebin_at(M, I);\ + X->index = I;\ + X->child[0] = X->child[1] = 0;\ + if (!treemap_is_marked(M, I)) {\ + mark_treemap(M, I);\ + *H = X;\ + X->parent = (tchunkptr)H;\ + X->fd = X->bk = X;\ + }\ + else {\ + tchunkptr T = *H;\ + size_t K = S << leftshift_for_tree_index(I);\ + for (;;) {\ + if (chunksize(T) != S) {\ + tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\ + K <<= 1;\ + if (*C != 0)\ + T = *C;\ + else if (RTCHECK(ok_address(M, C))) {\ + *C = X;\ + X->parent = T;\ + X->fd = X->bk = X;\ + break;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + break;\ + }\ + }\ + else {\ + tchunkptr F = T->fd;\ + if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\ + T->fd = F->bk = X;\ + X->fd = F;\ + X->bk = T;\ + X->parent = 0;\ + break;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + break;\ + }\ + }\ + }\ + }\ +} + +/* + Unlink steps: + + 1. If x is a chained node, unlink it from its same-sized fd/bk links + and choose its bk node as its replacement. + 2. If x was the last node of its size, but not a leaf node, it must + be replaced with a leaf node (not merely one with an open left or + right), to make sure that lefts and rights of descendents + correspond properly to bit masks. We use the rightmost descendent + of x. We could use any other leaf, but this is easy to locate and + tends to counteract removal of leftmosts elsewhere, and so keeps + paths shorter than minimally guaranteed. This doesn't loop much + because on average a node in a tree is near the bottom. + 3. If x is the base of a chain (i.e., has parent links) relink + x's parent and children to x's replacement (or null if none). +*/ + +#define unlink_large_chunk(M, X) {\ + tchunkptr XP = X->parent;\ + tchunkptr R;\ + if (X->bk != X) {\ + tchunkptr F = X->fd;\ + R = X->bk;\ + if (RTCHECK(ok_address(M, F) && F->bk == X && R->fd == X)) {\ + F->bk = R;\ + R->fd = F;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ + else {\ + tchunkptr* RP;\ + if (((R = *(RP = &(X->child[1]))) != 0) ||\ + ((R = *(RP = &(X->child[0]))) != 0)) {\ + tchunkptr* CP;\ + while ((*(CP = &(R->child[1])) != 0) ||\ + (*(CP = &(R->child[0])) != 0)) {\ + R = *(RP = CP);\ + }\ + if (RTCHECK(ok_address(M, RP)))\ + *RP = 0;\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ + }\ + if (XP != 0) {\ + tbinptr* H = treebin_at(M, X->index);\ + if (X == *H) {\ + if ((*H = R) == 0) \ + clear_treemap(M, X->index);\ + }\ + else if (RTCHECK(ok_address(M, XP))) {\ + if (XP->child[0] == X) \ + XP->child[0] = R;\ + else \ + XP->child[1] = R;\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + if (R != 0) {\ + if (RTCHECK(ok_address(M, R))) {\ + tchunkptr C0, C1;\ + R->parent = XP;\ + if ((C0 = X->child[0]) != 0) {\ + if (RTCHECK(ok_address(M, C0))) {\ + R->child[0] = C0;\ + C0->parent = R;\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + if ((C1 = X->child[1]) != 0) {\ + if (RTCHECK(ok_address(M, C1))) {\ + R->child[1] = C1;\ + C1->parent = R;\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ +} + +/* Relays to large vs small bin operations */ + +#define insert_chunk(M, P, S)\ + if (is_small(S)) insert_small_chunk(M, P, S)\ + else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); } + +#define unlink_chunk(M, P, S)\ + if (is_small(S)) unlink_small_chunk(M, P, S)\ + else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); } + + +/* Relays to internal calls to malloc/free from realloc, memalign etc */ + +#if ONLY_MSPACES +#define internal_malloc(m, b) mspace_malloc(m, b) +#define internal_free(m, mem) mspace_free(m,mem); +#else /* ONLY_MSPACES */ +#if MSPACES +#define internal_malloc(m, b)\ + ((m == gm)? dlmalloc(b) : mspace_malloc(m, b)) +#define internal_free(m, mem)\ + if (m == gm) dlfree(mem); else mspace_free(m,mem); +#else /* MSPACES */ +#define internal_malloc(m, b) dlmalloc(b) +#define internal_free(m, mem) dlfree(mem) +#endif /* MSPACES */ +#endif /* ONLY_MSPACES */ + +/* ----------------------- Direct-mmapping chunks ----------------------- */ + +/* + Directly mmapped chunks are set up with an offset to the start of + the mmapped region stored in the prev_foot field of the chunk. This + allows reconstruction of the required argument to MUNMAP when freed, + and also allows adjustment of the returned chunk to meet alignment + requirements (especially in memalign). +*/ + +/* Malloc using mmap */ +static void* mmap_alloc(mstate m, size_t nb) { + size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + if (m->footprint_limit != 0) { + size_t fp = m->footprint + mmsize; + if (fp <= m->footprint || fp > m->footprint_limit) + return 0; + } + if (mmsize > nb) { /* Check for wrap around 0 */ + char* mm = (char*)(CALL_DIRECT_MMAP(mmsize)); + if (mm != CMFAIL) { + size_t offset = align_offset(chunk2mem(mm)); + size_t psize = mmsize - offset - MMAP_FOOT_PAD; + mchunkptr p = (mchunkptr)(mm + offset); + p->prev_foot = offset; + p->head = psize; + mark_inuse_foot(m, p, psize); + chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD; + chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0; + + if (m->least_addr == 0 || mm < m->least_addr) + m->least_addr = mm; + if ((m->footprint += mmsize) > m->max_footprint) + m->max_footprint = m->footprint; + assert(is_aligned(chunk2mem(p))); + check_mmapped_chunk(m, p); + return chunk2mem(p); + } + } + return 0; +} + +/* Realloc using mmap */ +static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb, int flags) { + size_t oldsize = chunksize(oldp); + (void)flags; /* placate people compiling -Wunused */ + if (is_small(nb)) /* Can't shrink mmap regions below small size */ + return 0; + /* Keep old chunk if big enough but not too big */ + if (oldsize >= nb + SIZE_T_SIZE && + (oldsize - nb) <= (mparams.granularity << 1)) + return oldp; + else { + size_t offset = oldp->prev_foot; + size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD; + size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + char* cp = (char*)CALL_MREMAP((char*)oldp - offset, + oldmmsize, newmmsize, flags); + if (cp != CMFAIL) { + mchunkptr newp = (mchunkptr)(cp + offset); + size_t psize = newmmsize - offset - MMAP_FOOT_PAD; + newp->head = psize; + mark_inuse_foot(m, newp, psize); + chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD; + chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0; + + if (cp < m->least_addr) + m->least_addr = cp; + if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint) + m->max_footprint = m->footprint; + check_mmapped_chunk(m, newp); + return newp; + } + } + return 0; +} + + +/* -------------------------- mspace management -------------------------- */ + +/* Initialize top chunk and its size */ +static void init_top(mstate m, mchunkptr p, size_t psize) { + /* Ensure alignment */ + size_t offset = align_offset(chunk2mem(p)); + p = (mchunkptr)((char*)p + offset); + psize -= offset; + + m->top = p; + m->topsize = psize; + p->head = psize | PINUSE_BIT; + /* set size of fake trailing chunk holding overhead space only once */ + chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE; + m->trim_check = mparams.trim_threshold; /* reset on each update */ +} + +/* Initialize bins for a new mstate that is otherwise zeroed out */ +static void init_bins(mstate m) { + /* Establish circular links for smallbins */ + bindex_t i; + for (i = 0; i < NSMALLBINS; ++i) { + sbinptr bin = smallbin_at(m,i); + bin->fd = bin->bk = bin; + } +} + +#if PROCEED_ON_ERROR + +/* default corruption action */ +static void reset_on_error(mstate m) { + int i; + ++malloc_corruption_error_count; + /* Reinitialize fields to forget about all memory */ + m->smallmap = m->treemap = 0; + m->dvsize = m->topsize = 0; + m->seg.base = 0; + m->seg.size = 0; + m->seg.next = 0; + m->top = m->dv = 0; + for (i = 0; i < NTREEBINS; ++i) + *treebin_at(m, i) = 0; + init_bins(m); +} +#endif /* PROCEED_ON_ERROR */ + +/* Allocate chunk and prepend remainder with chunk in successor base. */ +static void* prepend_alloc(mstate m, char* newbase, char* oldbase, + size_t nb) { + mchunkptr p = align_as_chunk(newbase); + mchunkptr oldfirst = align_as_chunk(oldbase); + size_t psize = (char*)oldfirst - (char*)p; + mchunkptr q = chunk_plus_offset(p, nb); + size_t qsize = psize - nb; + set_size_and_pinuse_of_inuse_chunk(m, p, nb); + + assert((char*)oldfirst > (char*)q); + assert(pinuse(oldfirst)); + assert(qsize >= MIN_CHUNK_SIZE); + + /* consolidate remainder with first chunk of old base */ + if (oldfirst == m->top) { + size_t tsize = m->topsize += qsize; + m->top = q; + q->head = tsize | PINUSE_BIT; + check_top_chunk(m, q); + } + else if (oldfirst == m->dv) { + size_t dsize = m->dvsize += qsize; + m->dv = q; + set_size_and_pinuse_of_free_chunk(q, dsize); + } + else { + if (!is_inuse(oldfirst)) { + size_t nsize = chunksize(oldfirst); + unlink_chunk(m, oldfirst, nsize); + oldfirst = chunk_plus_offset(oldfirst, nsize); + qsize += nsize; + } + set_free_with_pinuse(q, qsize, oldfirst); + insert_chunk(m, q, qsize); + check_free_chunk(m, q); + } + + check_malloced_chunk(m, chunk2mem(p), nb); + return chunk2mem(p); +} + +/* Add a segment to hold a new noncontiguous region */ +static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) { + /* Determine locations and sizes of segment, fenceposts, old top */ + char* old_top = (char*)m->top; + msegmentptr oldsp = segment_holding(m, old_top); + char* old_end = oldsp->base + oldsp->size; + size_t ssize = pad_request(sizeof(struct malloc_segment)); + char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + size_t offset = align_offset(chunk2mem(rawsp)); + char* asp = rawsp + offset; + char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp; + mchunkptr sp = (mchunkptr)csp; + msegmentptr ss = (msegmentptr)(chunk2mem(sp)); + mchunkptr tnext = chunk_plus_offset(sp, ssize); + mchunkptr p = tnext; + int nfences = 0; + + /* reset top to new space */ + init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); + + /* Set up segment record */ + assert(is_aligned(ss)); + set_size_and_pinuse_of_inuse_chunk(m, sp, ssize); + *ss = m->seg; /* Push current record */ + m->seg.base = tbase; + m->seg.size = tsize; + m->seg.sflags = mmapped; + m->seg.next = ss; + + /* Insert trailing fenceposts */ + for (;;) { + mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE); + p->head = FENCEPOST_HEAD; + ++nfences; + if ((char*)(&(nextp->head)) < old_end) + p = nextp; + else + break; + } + assert(nfences >= 2); + + /* Insert the rest of old top into a bin as an ordinary free chunk */ + if (csp != old_top) { + mchunkptr q = (mchunkptr)old_top; + size_t psize = csp - old_top; + mchunkptr tn = chunk_plus_offset(q, psize); + set_free_with_pinuse(q, psize, tn); + insert_chunk(m, q, psize); + } + + check_top_chunk(m, m->top); +} + +/* -------------------------- System allocation -------------------------- */ + +/* Get memory from system using MORECORE or MMAP */ +static void* sys_alloc(mstate m, size_t nb) { + char* tbase = CMFAIL; + size_t tsize = 0; + flag_t mmap_flag = 0; + size_t asize; /* allocation size */ + + ensure_initialization(); + + /* Directly map large chunks, but only if already initialized */ + if (use_mmap(m) && nb >= mparams.mmap_threshold && m->topsize != 0) { + void* mem = mmap_alloc(m, nb); + if (mem != 0) + return mem; + } + + asize = granularity_align(nb + SYS_ALLOC_PADDING); + if (asize <= nb) + return 0; /* wraparound */ + if (m->footprint_limit != 0) { + size_t fp = m->footprint + asize; + if (fp <= m->footprint || fp > m->footprint_limit) + return 0; + } + + /* + Try getting memory in any of three ways (in most-preferred to + least-preferred order): + 1. A call to MORECORE that can normally contiguously extend memory. + (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or + or main space is mmapped or a previous contiguous call failed) + 2. A call to MMAP new space (disabled if not HAVE_MMAP). + Note that under the default settings, if MORECORE is unable to + fulfill a request, and HAVE_MMAP is true, then mmap is + used as a noncontiguous system allocator. This is a useful backup + strategy for systems with holes in address spaces -- in this case + sbrk cannot contiguously expand the heap, but mmap may be able to + find space. + 3. A call to MORECORE that cannot usually contiguously extend memory. + (disabled if not HAVE_MORECORE) + + In all cases, we need to request enough bytes from system to ensure + we can malloc nb bytes upon success, so pad with enough space for + top_foot, plus alignment-pad to make sure we don't lose bytes if + not on boundary, and round this up to a granularity unit. + */ + + if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) { + char* br = CMFAIL; + size_t ssize = asize; /* sbrk call size */ + msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top); + ACQUIRE_MALLOC_GLOBAL_LOCK(); + + if (ss == 0) { /* First time through or recovery */ + char* base = (char*)CALL_MORECORE(0); + if (base != CMFAIL) { + size_t fp; + /* Adjust to end on a page boundary */ + if (!is_page_aligned(base)) + ssize += (page_align((size_t)base) - (size_t)base); + fp = m->footprint + ssize; /* recheck limits */ + if (ssize > nb && ssize < HALF_MAX_SIZE_T && + (m->footprint_limit == 0 || + (fp > m->footprint && fp <= m->footprint_limit)) && + (br = (char*)(CALL_MORECORE(ssize))) == base) { + tbase = base; + tsize = ssize; + } + } + } + else { + /* Subtract out existing available top space from MORECORE request. */ + ssize = granularity_align(nb - m->topsize + SYS_ALLOC_PADDING); + /* Use mem here only if it did continuously extend old space */ + if (ssize < HALF_MAX_SIZE_T && + (br = (char*)(CALL_MORECORE(ssize))) == ss->base+ss->size) { + tbase = br; + tsize = ssize; + } + } + + if (tbase == CMFAIL) { /* Cope with partial failure */ + if (br != CMFAIL) { /* Try to use/extend the space we did get */ + if (ssize < HALF_MAX_SIZE_T && + ssize < nb + SYS_ALLOC_PADDING) { + size_t esize = granularity_align(nb + SYS_ALLOC_PADDING - ssize); + if (esize < HALF_MAX_SIZE_T) { + char* end = (char*)CALL_MORECORE(esize); + if (end != CMFAIL) + ssize += esize; + else { /* Can't use; try to release */ + (void) CALL_MORECORE(-ssize); + br = CMFAIL; + } + } + } + } + if (br != CMFAIL) { /* Use the space we did get */ + tbase = br; + tsize = ssize; + } + else + disable_contiguous(m); /* Don't try contiguous path in the future */ + } + + RELEASE_MALLOC_GLOBAL_LOCK(); + } + + if (HAVE_MMAP && tbase == CMFAIL) { /* Try MMAP */ + char* mp = (char*)(CALL_MMAP(asize)); + if (mp != CMFAIL) { + tbase = mp; + tsize = asize; + mmap_flag = USE_MMAP_BIT; + } + } + + if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */ + if (asize < HALF_MAX_SIZE_T) { + char* br = CMFAIL; + char* end = CMFAIL; + ACQUIRE_MALLOC_GLOBAL_LOCK(); + br = (char*)(CALL_MORECORE(asize)); + end = (char*)(CALL_MORECORE(0)); + RELEASE_MALLOC_GLOBAL_LOCK(); + if (br != CMFAIL && end != CMFAIL && br < end) { + size_t ssize = end - br; + if (ssize > nb + TOP_FOOT_SIZE) { + tbase = br; + tsize = ssize; + } + } + } + } + + if (tbase != CMFAIL) { + + if ((m->footprint += tsize) > m->max_footprint) + m->max_footprint = m->footprint; + + if (!is_initialized(m)) { /* first-time initialization */ + if (m->least_addr == 0 || tbase < m->least_addr) + m->least_addr = tbase; + m->seg.base = tbase; + m->seg.size = tsize; + m->seg.sflags = mmap_flag; + m->magic = mparams.magic; + m->release_checks = MAX_RELEASE_CHECK_RATE; + init_bins(m); +#if !ONLY_MSPACES + if (is_global(m)) + init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); + else +#endif + { + /* Offset top by embedded malloc_state */ + mchunkptr mn = next_chunk(mem2chunk(m)); + init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE); + } + } + + else { + /* Try to merge with an existing segment */ + msegmentptr sp = &m->seg; + /* Only consider most recent segment if traversal suppressed */ + while (sp != 0 && tbase != sp->base + sp->size) + sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; + if (sp != 0 && + !is_extern_segment(sp) && + (sp->sflags & USE_MMAP_BIT) == mmap_flag && + segment_holds(sp, m->top)) { /* append */ + sp->size += tsize; + init_top(m, m->top, m->topsize + tsize); + } + else { + if (tbase < m->least_addr) + m->least_addr = tbase; + sp = &m->seg; + while (sp != 0 && sp->base != tbase + tsize) + sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; + if (sp != 0 && + !is_extern_segment(sp) && + (sp->sflags & USE_MMAP_BIT) == mmap_flag) { + char* oldbase = sp->base; + sp->base = tbase; + sp->size += tsize; + return prepend_alloc(m, tbase, oldbase, nb); + } + else + add_segment(m, tbase, tsize, mmap_flag); + } + } + + if (nb < m->topsize) { /* Allocate from new or extended top space */ + size_t rsize = m->topsize -= nb; + mchunkptr p = m->top; + mchunkptr r = m->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(m, p, nb); + check_top_chunk(m, m->top); + check_malloced_chunk(m, chunk2mem(p), nb); + return chunk2mem(p); + } + } + + MALLOC_FAILURE_ACTION; + return 0; +} + +/* ----------------------- system deallocation -------------------------- */ + +/* Unmap and unlink any mmapped segments that don't contain used chunks */ +static size_t release_unused_segments(mstate m) { + size_t released = 0; + int nsegs = 0; + msegmentptr pred = &m->seg; + msegmentptr sp = pred->next; + while (sp != 0) { + char* base = sp->base; + size_t size = sp->size; + msegmentptr next = sp->next; + ++nsegs; + if (is_mmapped_segment(sp) && !is_extern_segment(sp)) { + mchunkptr p = align_as_chunk(base); + size_t psize = chunksize(p); + /* Can unmap if first chunk holds entire segment and not pinned */ + if (!is_inuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) { + tchunkptr tp = (tchunkptr)p; + assert(segment_holds(sp, (char*)sp)); + if (p == m->dv) { + m->dv = 0; + m->dvsize = 0; + } + else { + unlink_large_chunk(m, tp); + } + if (CALL_MUNMAP(base, size) == 0) { + released += size; + m->footprint -= size; + /* unlink obsoleted record */ + sp = pred; + sp->next = next; + } + else { /* back out if cannot unmap */ + insert_large_chunk(m, tp, psize); + } + } + } + if (NO_SEGMENT_TRAVERSAL) /* scan only first segment */ + break; + pred = sp; + sp = next; + } + /* Reset check counter */ + m->release_checks = (((size_t) nsegs > (size_t) MAX_RELEASE_CHECK_RATE)? + (size_t) nsegs : (size_t) MAX_RELEASE_CHECK_RATE); + return released; +} + +static int sys_trim(mstate m, size_t pad) { + size_t released = 0; + ensure_initialization(); + if (pad < MAX_REQUEST && is_initialized(m)) { + pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */ + + if (m->topsize > pad) { + /* Shrink top space in granularity-size units, keeping at least one */ + size_t unit = mparams.granularity; + size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit - + SIZE_T_ONE) * unit; + msegmentptr sp = segment_holding(m, (char*)m->top); + + if (!is_extern_segment(sp)) { + if (is_mmapped_segment(sp)) { + if (HAVE_MMAP && + sp->size >= extra && + !has_segment_link(m, sp)) { /* can't shrink if pinned */ + size_t newsize = sp->size - extra; + (void)newsize; /* placate people compiling -Wunused-variable */ + /* Prefer mremap, fall back to munmap */ + if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) || + (CALL_MUNMAP(sp->base + newsize, extra) == 0)) { + released = extra; + } + } + } + else if (HAVE_MORECORE) { + if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */ + extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit; + ACQUIRE_MALLOC_GLOBAL_LOCK(); + { + /* Make sure end of memory is where we last set it. */ + char* old_br = (char*)(CALL_MORECORE(0)); + if (old_br == sp->base + sp->size) { + char* rel_br = (char*)(CALL_MORECORE(-extra)); + char* new_br = (char*)(CALL_MORECORE(0)); + if (rel_br != CMFAIL && new_br < old_br) + released = old_br - new_br; + } + } + RELEASE_MALLOC_GLOBAL_LOCK(); + } + } + + if (released != 0) { + sp->size -= released; + m->footprint -= released; + init_top(m, m->top, m->topsize - released); + check_top_chunk(m, m->top); + } + } + + /* Unmap any unused mmapped segments */ + if (HAVE_MMAP) + released += release_unused_segments(m); + + /* On failure, disable autotrim to avoid repeated failed future calls */ + if (released == 0 && m->topsize > m->trim_check) + m->trim_check = MAX_SIZE_T; + } + + return (released != 0)? 1 : 0; +} + +/* Consolidate and bin a chunk. Differs from exported versions + of free mainly in that the chunk need not be marked as inuse. +*/ +static void dispose_chunk(mstate m, mchunkptr p, size_t psize) { + mchunkptr next = chunk_plus_offset(p, psize); + if (!pinuse(p)) { + mchunkptr prev; + size_t prevsize = p->prev_foot; + if (is_mmapped(p)) { + psize += prevsize + MMAP_FOOT_PAD; + if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) + m->footprint -= psize; + return; + } + prev = chunk_minus_offset(p, prevsize); + psize += prevsize; + p = prev; + if (RTCHECK(ok_address(m, prev))) { /* consolidate backward */ + if (p != m->dv) { + unlink_chunk(m, p, prevsize); + } + else if ((next->head & INUSE_BITS) == INUSE_BITS) { + m->dvsize = psize; + set_free_with_pinuse(p, psize, next); + return; + } + } + else { + CORRUPTION_ERROR_ACTION(m); + return; + } + } + if (RTCHECK(ok_address(m, next))) { + if (!cinuse(next)) { /* consolidate forward */ + if (next == m->top) { + size_t tsize = m->topsize += psize; + m->top = p; + p->head = tsize | PINUSE_BIT; + if (p == m->dv) { + m->dv = 0; + m->dvsize = 0; + } + return; + } + else if (next == m->dv) { + size_t dsize = m->dvsize += psize; + m->dv = p; + set_size_and_pinuse_of_free_chunk(p, dsize); + return; + } + else { + size_t nsize = chunksize(next); + psize += nsize; + unlink_chunk(m, next, nsize); + set_size_and_pinuse_of_free_chunk(p, psize); + if (p == m->dv) { + m->dvsize = psize; + return; + } + } + } + else { + set_free_with_pinuse(p, psize, next); + } + insert_chunk(m, p, psize); + } + else { + CORRUPTION_ERROR_ACTION(m); + } +} + +/* ---------------------------- malloc --------------------------- */ + +/* allocate a large request from the best fitting chunk in a treebin */ +static void* tmalloc_large(mstate m, size_t nb) { + tchunkptr v = 0; + size_t rsize = -nb; /* Unsigned negation */ + tchunkptr t; + bindex_t idx; + compute_tree_index(nb, idx); + if ((t = *treebin_at(m, idx)) != 0) { + /* Traverse tree for this bin looking for node with size == nb */ + size_t sizebits = nb << leftshift_for_tree_index(idx); + tchunkptr rst = 0; /* The deepest untaken right subtree */ + for (;;) { + tchunkptr rt; + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + v = t; + if ((rsize = trem) == 0) + break; + } + rt = t->child[1]; + t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; + if (rt != 0 && rt != t) + rst = rt; + if (t == 0) { + t = rst; /* set t to least subtree holding sizes > nb */ + break; + } + sizebits <<= 1; + } + } + if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */ + binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap; + if (leftbits != 0) { + bindex_t i; + binmap_t leastbit = least_bit(leftbits); + compute_bit2idx(leastbit, i); + t = *treebin_at(m, i); + } + } + + while (t != 0) { /* find smallest of tree or subtree */ + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + rsize = trem; + v = t; + } + t = leftmost_child(t); + } + + /* If dv is a better fit, return 0 so malloc will use it */ + if (v != 0 && rsize < (size_t)(m->dvsize - nb)) { + if (RTCHECK(ok_address(m, v))) { /* split */ + mchunkptr r = chunk_plus_offset(v, nb); + assert(chunksize(v) == rsize + nb); + if (RTCHECK(ok_next(v, r))) { + unlink_large_chunk(m, v); + if (rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(m, v, (rsize + nb)); + else { + set_size_and_pinuse_of_inuse_chunk(m, v, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + insert_chunk(m, r, rsize); + } + return chunk2mem(v); + } + } + CORRUPTION_ERROR_ACTION(m); + } + return 0; +} + +/* allocate a small request from the best fitting chunk in a treebin */ +static void* tmalloc_small(mstate m, size_t nb) { + tchunkptr t, v; + size_t rsize; + bindex_t i; + binmap_t leastbit = least_bit(m->treemap); + compute_bit2idx(leastbit, i); + v = t = *treebin_at(m, i); + rsize = chunksize(t) - nb; + + while ((t = leftmost_child(t)) != 0) { + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + rsize = trem; + v = t; + } + } + + if (RTCHECK(ok_address(m, v))) { + mchunkptr r = chunk_plus_offset(v, nb); + assert(chunksize(v) == rsize + nb); + if (RTCHECK(ok_next(v, r))) { + unlink_large_chunk(m, v); + if (rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(m, v, (rsize + nb)); + else { + set_size_and_pinuse_of_inuse_chunk(m, v, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(m, r, rsize); + } + return chunk2mem(v); + } + } + + CORRUPTION_ERROR_ACTION(m); + return 0; +} + +#if !ONLY_MSPACES + +void* dlmalloc(size_t bytes) { + /* + Basic algorithm: + If a small request (< 256 bytes minus per-chunk overhead): + 1. If one exists, use a remainderless chunk in associated smallbin. + (Remainderless means that there are too few excess bytes to + represent as a chunk.) + 2. If it is big enough, use the dv chunk, which is normally the + chunk adjacent to the one used for the most recent small request. + 3. If one exists, split the smallest available chunk in a bin, + saving remainder in dv. + 4. If it is big enough, use the top chunk. + 5. If available, get memory from system and use it + Otherwise, for a large request: + 1. Find the smallest available binned chunk that fits, and use it + if it is better fitting than dv chunk, splitting if necessary. + 2. If better fitting than any binned chunk, use the dv chunk. + 3. If it is big enough, use the top chunk. + 4. If request size >= mmap threshold, try to directly mmap this chunk. + 5. If available, get memory from system and use it + + The ugly goto's here ensure that postaction occurs along all paths. + */ + +#if USE_LOCKS + ensure_initialization(); /* initialize in sys_alloc if not using locks */ +#endif + + if (!PREACTION(gm)) { + void* mem; + size_t nb; + if (bytes <= MAX_SMALL_REQUEST) { + bindex_t idx; + binmap_t smallbits; + nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); + idx = small_index(nb); + smallbits = gm->smallmap >> idx; + + if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ + mchunkptr b, p; + idx += ~smallbits & 1; /* Uses next bin if idx empty */ + b = smallbin_at(gm, idx); + p = b->fd; + assert(chunksize(p) == small_index2size(idx)); + unlink_first_small_chunk(gm, b, p, idx); + set_inuse_and_pinuse(gm, p, small_index2size(idx)); + mem = chunk2mem(p); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + else if (nb > gm->dvsize) { + if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ + mchunkptr b, p, r; + size_t rsize; + bindex_t i; + binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); + binmap_t leastbit = least_bit(leftbits); + compute_bit2idx(leastbit, i); + b = smallbin_at(gm, i); + p = b->fd; + assert(chunksize(p) == small_index2size(i)); + unlink_first_small_chunk(gm, b, p, i); + rsize = small_index2size(i) - nb; + /* Fit here cannot be remainderless if 4byte sizes */ + if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(gm, p, small_index2size(i)); + else { + set_size_and_pinuse_of_inuse_chunk(gm, p, nb); + r = chunk_plus_offset(p, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(gm, r, rsize); + } + mem = chunk2mem(p); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) { + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + } + } + else if (bytes >= MAX_REQUEST) + nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ + else { + nb = pad_request(bytes); + if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) { + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + } + + if (nb <= gm->dvsize) { + size_t rsize = gm->dvsize - nb; + mchunkptr p = gm->dv; + if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ + mchunkptr r = gm->dv = chunk_plus_offset(p, nb); + gm->dvsize = rsize; + set_size_and_pinuse_of_free_chunk(r, rsize); + set_size_and_pinuse_of_inuse_chunk(gm, p, nb); + } + else { /* exhaust dv */ + size_t dvs = gm->dvsize; + gm->dvsize = 0; + gm->dv = 0; + set_inuse_and_pinuse(gm, p, dvs); + } + mem = chunk2mem(p); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + else if (nb < gm->topsize) { /* Split top */ + size_t rsize = gm->topsize -= nb; + mchunkptr p = gm->top; + mchunkptr r = gm->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(gm, p, nb); + mem = chunk2mem(p); + check_top_chunk(gm, gm->top); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + mem = sys_alloc(gm, nb); + + postaction: + POSTACTION(gm); + return mem; + } + + return 0; +} + +/* ---------------------------- free --------------------------- */ + +void dlfree(void* mem) { + /* + Consolidate freed chunks with preceeding or succeeding bordering + free chunks, if they exist, and then place in a bin. Intermixed + with special cases for top, dv, mmapped chunks, and usage errors. + */ + + if (mem != 0) { + mchunkptr p = mem2chunk(mem); +#if FOOTERS + mstate fm = get_mstate_for(p); + if (!ok_magic(fm)) { + USAGE_ERROR_ACTION(fm, p); + return; + } +#else /* FOOTERS */ +#define fm gm +#endif /* FOOTERS */ + if (!PREACTION(fm)) { + check_inuse_chunk(fm, p); + if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) { + size_t psize = chunksize(p); + mchunkptr next = chunk_plus_offset(p, psize); + if (!pinuse(p)) { + size_t prevsize = p->prev_foot; + if (is_mmapped(p)) { + psize += prevsize + MMAP_FOOT_PAD; + if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) + fm->footprint -= psize; + goto postaction; + } + else { + mchunkptr prev = chunk_minus_offset(p, prevsize); + psize += prevsize; + p = prev; + if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ + if (p != fm->dv) { + unlink_chunk(fm, p, prevsize); + } + else if ((next->head & INUSE_BITS) == INUSE_BITS) { + fm->dvsize = psize; + set_free_with_pinuse(p, psize, next); + goto postaction; + } + } + else + goto erroraction; + } + } + + if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { + if (!cinuse(next)) { /* consolidate forward */ + if (next == fm->top) { + size_t tsize = fm->topsize += psize; + fm->top = p; + p->head = tsize | PINUSE_BIT; + if (p == fm->dv) { + fm->dv = 0; + fm->dvsize = 0; + } + if (should_trim(fm, tsize)) + sys_trim(fm, 0); + goto postaction; + } + else if (next == fm->dv) { + size_t dsize = fm->dvsize += psize; + fm->dv = p; + set_size_and_pinuse_of_free_chunk(p, dsize); + goto postaction; + } + else { + size_t nsize = chunksize(next); + psize += nsize; + unlink_chunk(fm, next, nsize); + set_size_and_pinuse_of_free_chunk(p, psize); + if (p == fm->dv) { + fm->dvsize = psize; + goto postaction; + } + } + } + else + set_free_with_pinuse(p, psize, next); + + if (is_small(psize)) { + insert_small_chunk(fm, p, psize); + check_free_chunk(fm, p); + } + else { + tchunkptr tp = (tchunkptr)p; + insert_large_chunk(fm, tp, psize); + check_free_chunk(fm, p); + if (--fm->release_checks == 0) + release_unused_segments(fm); + } + goto postaction; + } + } + erroraction: + USAGE_ERROR_ACTION(fm, p); + postaction: + POSTACTION(fm); + } + } +#if !FOOTERS +#undef fm +#endif /* FOOTERS */ +} + +void* dlcalloc(size_t n_elements, size_t elem_size) { + void* mem; + size_t req = 0; + if (n_elements != 0) { + req = n_elements * elem_size; + if (((n_elements | elem_size) & ~(size_t)0xffff) && + (req / n_elements != elem_size)) + req = MAX_SIZE_T; /* force downstream failure on overflow */ + } + mem = dlmalloc(req); + if (mem != 0 && calloc_must_clear(mem2chunk(mem))) + memset(mem, 0, req); + return mem; +} + +#endif /* !ONLY_MSPACES */ + +/* ------------ Internal support for realloc, memalign, etc -------------- */ + +/* Try to realloc; only in-place unless can_move true */ +static mchunkptr try_realloc_chunk(mstate m, mchunkptr p, size_t nb, + int can_move) { + mchunkptr newp = 0; + size_t oldsize = chunksize(p); + mchunkptr next = chunk_plus_offset(p, oldsize); + if (RTCHECK(ok_address(m, p) && ok_inuse(p) && + ok_next(p, next) && ok_pinuse(next))) { + if (is_mmapped(p)) { + newp = mmap_resize(m, p, nb, can_move); + } + else if (oldsize >= nb) { /* already big enough */ + size_t rsize = oldsize - nb; + if (rsize >= MIN_CHUNK_SIZE) { /* split off remainder */ + mchunkptr r = chunk_plus_offset(p, nb); + set_inuse(m, p, nb); + set_inuse(m, r, rsize); + dispose_chunk(m, r, rsize); + } + newp = p; + } + else if (next == m->top) { /* extend into top */ + if (oldsize + m->topsize > nb) { + size_t newsize = oldsize + m->topsize; + size_t newtopsize = newsize - nb; + mchunkptr newtop = chunk_plus_offset(p, nb); + set_inuse(m, p, nb); + newtop->head = newtopsize |PINUSE_BIT; + m->top = newtop; + m->topsize = newtopsize; + newp = p; + } + } + else if (next == m->dv) { /* extend into dv */ + size_t dvs = m->dvsize; + if (oldsize + dvs >= nb) { + size_t dsize = oldsize + dvs - nb; + if (dsize >= MIN_CHUNK_SIZE) { + mchunkptr r = chunk_plus_offset(p, nb); + mchunkptr n = chunk_plus_offset(r, dsize); + set_inuse(m, p, nb); + set_size_and_pinuse_of_free_chunk(r, dsize); + clear_pinuse(n); + m->dvsize = dsize; + m->dv = r; + } + else { /* exhaust dv */ + size_t newsize = oldsize + dvs; + set_inuse(m, p, newsize); + m->dvsize = 0; + m->dv = 0; + } + newp = p; + } + } + else if (!cinuse(next)) { /* extend into next free chunk */ + size_t nextsize = chunksize(next); + if (oldsize + nextsize >= nb) { + size_t rsize = oldsize + nextsize - nb; + unlink_chunk(m, next, nextsize); + if (rsize < MIN_CHUNK_SIZE) { + size_t newsize = oldsize + nextsize; + set_inuse(m, p, newsize); + } + else { + mchunkptr r = chunk_plus_offset(p, nb); + set_inuse(m, p, nb); + set_inuse(m, r, rsize); + dispose_chunk(m, r, rsize); + } + newp = p; + } + } + } + else { + USAGE_ERROR_ACTION(m, chunk2mem(p)); + } + return newp; +} + +static void* internal_memalign(mstate m, size_t alignment, size_t bytes) { + void* mem = 0; + if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */ + alignment = MIN_CHUNK_SIZE; + if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */ + size_t a = MALLOC_ALIGNMENT << 1; + while (a < alignment) a <<= 1; + alignment = a; + } + if (bytes >= MAX_REQUEST - alignment) { + if (m != 0) { /* Test isn't needed but avoids compiler warning */ + MALLOC_FAILURE_ACTION; + } + } + else { + size_t nb = request2size(bytes); + size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD; + mem = internal_malloc(m, req); + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + if (PREACTION(m)) + return 0; + if ((((size_t)(mem)) & (alignment - 1)) != 0) { /* misaligned */ + /* + Find an aligned spot inside chunk. Since we need to give + back leading space in a chunk of at least MIN_CHUNK_SIZE, if + the first calculation places us at a spot with less than + MIN_CHUNK_SIZE leader, we can move to the next aligned spot. + We've allocated enough total room so that this is always + possible. + */ + char* br = (char*)mem2chunk((size_t)(((size_t)((char*)mem + alignment - + SIZE_T_ONE)) & + -alignment)); + char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)? + br : br+alignment; + mchunkptr newp = (mchunkptr)pos; + size_t leadsize = pos - (char*)(p); + size_t newsize = chunksize(p) - leadsize; + + if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */ + newp->prev_foot = p->prev_foot + leadsize; + newp->head = newsize; + } + else { /* Otherwise, give back leader, use the rest */ + set_inuse(m, newp, newsize); + set_inuse(m, p, leadsize); + dispose_chunk(m, p, leadsize); + } + p = newp; + } + + /* Give back spare room at the end */ + if (!is_mmapped(p)) { + size_t size = chunksize(p); + if (size > nb + MIN_CHUNK_SIZE) { + size_t remainder_size = size - nb; + mchunkptr remainder = chunk_plus_offset(p, nb); + set_inuse(m, p, nb); + set_inuse(m, remainder, remainder_size); + dispose_chunk(m, remainder, remainder_size); + } + } + + mem = chunk2mem(p); + assert (chunksize(p) >= nb); + assert(((size_t)mem & (alignment - 1)) == 0); + check_inuse_chunk(m, p); + POSTACTION(m); + } + } + return mem; +} + +/* + Common support for independent_X routines, handling + all of the combinations that can result. + The opts arg has: + bit 0 set if all elements are same size (using sizes[0]) + bit 1 set if elements should be zeroed +*/ +static void** ialloc(mstate m, + size_t n_elements, + size_t* sizes, + int opts, + void* chunks[]) { + + size_t element_size; /* chunksize of each element, if all same */ + size_t contents_size; /* total size of elements */ + size_t array_size; /* request size of pointer array */ + void* mem; /* malloced aggregate space */ + mchunkptr p; /* corresponding chunk */ + size_t remainder_size; /* remaining bytes while splitting */ + void** marray; /* either "chunks" or malloced ptr array */ + mchunkptr array_chunk; /* chunk for malloced ptr array */ + flag_t was_enabled; /* to disable mmap */ + size_t size; + size_t i; + + ensure_initialization(); + /* compute array length, if needed */ + if (chunks != 0) { + if (n_elements == 0) + return chunks; /* nothing to do */ + marray = chunks; + array_size = 0; + } + else { + /* if empty req, must still return chunk representing empty array */ + if (n_elements == 0) + return (void**)internal_malloc(m, 0); + marray = 0; + array_size = request2size(n_elements * (sizeof(void*))); + } + + /* compute total element size */ + if (opts & 0x1) { /* all-same-size */ + element_size = request2size(*sizes); + contents_size = n_elements * element_size; + } + else { /* add up all the sizes */ + element_size = 0; + contents_size = 0; + for (i = 0; i != n_elements; ++i) + contents_size += request2size(sizes[i]); + } + + size = contents_size + array_size; + + /* + Allocate the aggregate chunk. First disable direct-mmapping so + malloc won't use it, since we would not be able to later + free/realloc space internal to a segregated mmap region. + */ + was_enabled = use_mmap(m); + disable_mmap(m); + mem = internal_malloc(m, size - CHUNK_OVERHEAD); + if (was_enabled) + enable_mmap(m); + if (mem == 0) + return 0; + + if (PREACTION(m)) return 0; + p = mem2chunk(mem); + remainder_size = chunksize(p); + + assert(!is_mmapped(p)); + + if (opts & 0x2) { /* optionally clear the elements */ + memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size); + } + + /* If not provided, allocate the pointer array as final part of chunk */ + if (marray == 0) { + size_t array_chunk_size; + array_chunk = chunk_plus_offset(p, contents_size); + array_chunk_size = remainder_size - contents_size; + marray = (void**) (chunk2mem(array_chunk)); + set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size); + remainder_size = contents_size; + } + + /* split out elements */ + for (i = 0; ; ++i) { + marray[i] = chunk2mem(p); + if (i != n_elements-1) { + if (element_size != 0) + size = element_size; + else + size = request2size(sizes[i]); + remainder_size -= size; + set_size_and_pinuse_of_inuse_chunk(m, p, size); + p = chunk_plus_offset(p, size); + } + else { /* the final element absorbs any overallocation slop */ + set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size); + break; + } + } + +#if DEBUG + if (marray != chunks) { + /* final element must have exactly exhausted chunk */ + if (element_size != 0) { + assert(remainder_size == element_size); + } + else { + assert(remainder_size == request2size(sizes[i])); + } + check_inuse_chunk(m, mem2chunk(marray)); + } + for (i = 0; i != n_elements; ++i) + check_inuse_chunk(m, mem2chunk(marray[i])); + +#endif /* DEBUG */ + + POSTACTION(m); + return marray; +} + +/* Try to free all pointers in the given array. + Note: this could be made faster, by delaying consolidation, + at the price of disabling some user integrity checks, We + still optimize some consolidations by combining adjacent + chunks before freeing, which will occur often if allocated + with ialloc or the array is sorted. +*/ +static size_t internal_bulk_free(mstate m, void* array[], size_t nelem) { + size_t unfreed = 0; + if (!PREACTION(m)) { + void** a; + void** fence = &(array[nelem]); + for (a = array; a != fence; ++a) { + void* mem = *a; + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + size_t psize = chunksize(p); +#if FOOTERS + if (get_mstate_for(p) != m) { + ++unfreed; + continue; + } +#endif + check_inuse_chunk(m, p); + *a = 0; + if (RTCHECK(ok_address(m, p) && ok_inuse(p))) { + void ** b = a + 1; /* try to merge with next chunk */ + mchunkptr next = next_chunk(p); + if (b != fence && *b == chunk2mem(next)) { + size_t newsize = chunksize(next) + psize; + set_inuse(m, p, newsize); + *b = chunk2mem(p); + } + else + dispose_chunk(m, p, psize); + } + else { + CORRUPTION_ERROR_ACTION(m); + break; + } + } + } + if (should_trim(m, m->topsize)) + sys_trim(m, 0); + POSTACTION(m); + } + return unfreed; +} + +/* Traversal */ +#if MALLOC_INSPECT_ALL +static void internal_inspect_all(mstate m, + void(*handler)(void *start, + void *end, + size_t used_bytes, + void* callback_arg), + void* arg) { + if (is_initialized(m)) { + mchunkptr top = m->top; + msegmentptr s; + for (s = &m->seg; s != 0; s = s->next) { + mchunkptr q = align_as_chunk(s->base); + while (segment_holds(s, q) && q->head != FENCEPOST_HEAD) { + mchunkptr next = next_chunk(q); + size_t sz = chunksize(q); + size_t used; + void* start; + if (is_inuse(q)) { + used = sz - CHUNK_OVERHEAD; /* must not be mmapped */ + start = chunk2mem(q); + } + else { + used = 0; + if (is_small(sz)) { /* offset by possible bookkeeping */ + start = (void*)((char*)q + sizeof(struct malloc_chunk)); + } + else { + start = (void*)((char*)q + sizeof(struct malloc_tree_chunk)); + } + } + if (start < (void*)next) /* skip if all space is bookkeeping */ + handler(start, next, used, arg); + if (q == top) + break; + q = next; + } + } + } +} +#endif /* MALLOC_INSPECT_ALL */ + +/* ------------------ Exported realloc, memalign, etc -------------------- */ + +#if !ONLY_MSPACES + +void* dlrealloc(void* oldmem, size_t bytes) { + void* mem = 0; + if (oldmem == 0) { + mem = dlmalloc(bytes); + } + else if (bytes >= MAX_REQUEST) { + MALLOC_FAILURE_ACTION; + } +#ifdef REALLOC_ZERO_BYTES_FREES + else if (bytes == 0) { + dlfree(oldmem); + } +#endif /* REALLOC_ZERO_BYTES_FREES */ + else { + size_t nb = request2size(bytes); + mchunkptr oldp = mem2chunk(oldmem); +#if ! FOOTERS + mstate m = gm; +#else /* FOOTERS */ + mstate m = get_mstate_for(oldp); + if (!ok_magic(m)) { + USAGE_ERROR_ACTION(m, oldmem); + return 0; + } +#endif /* FOOTERS */ + if (!PREACTION(m)) { + mchunkptr newp = try_realloc_chunk(m, oldp, nb, 1); + POSTACTION(m); + if (newp != 0) { + check_inuse_chunk(m, newp); + mem = chunk2mem(newp); + } + else { + mem = internal_malloc(m, bytes); + if (mem != 0) { + size_t oc = chunksize(oldp) - overhead_for(oldp); + memcpy(mem, oldmem, (oc < bytes)? oc : bytes); + internal_free(m, oldmem); + } + } + } + } + return mem; +} + +void* dlrealloc_in_place(void* oldmem, size_t bytes) { + void* mem = 0; + if (oldmem != 0) { + if (bytes >= MAX_REQUEST) { + MALLOC_FAILURE_ACTION; + } + else { + size_t nb = request2size(bytes); + mchunkptr oldp = mem2chunk(oldmem); +#if ! FOOTERS + mstate m = gm; +#else /* FOOTERS */ + mstate m = get_mstate_for(oldp); + if (!ok_magic(m)) { + USAGE_ERROR_ACTION(m, oldmem); + return 0; + } +#endif /* FOOTERS */ + if (!PREACTION(m)) { + mchunkptr newp = try_realloc_chunk(m, oldp, nb, 0); + POSTACTION(m); + if (newp == oldp) { + check_inuse_chunk(m, newp); + mem = oldmem; + } + } + } + } + return mem; +} + +void* dlmemalign(size_t alignment, size_t bytes) { + if (alignment <= MALLOC_ALIGNMENT) { + return dlmalloc(bytes); + } + return internal_memalign(gm, alignment, bytes); +} + +int dlposix_memalign(void** pp, size_t alignment, size_t bytes) { + void* mem = 0; + if (alignment == MALLOC_ALIGNMENT) + mem = dlmalloc(bytes); + else { + size_t d = alignment / sizeof(void*); + size_t r = alignment % sizeof(void*); + if (r != 0 || d == 0 || (d & (d-SIZE_T_ONE)) != 0) + return EINVAL; + else if (bytes <= MAX_REQUEST - alignment) { + if (alignment < MIN_CHUNK_SIZE) + alignment = MIN_CHUNK_SIZE; + mem = internal_memalign(gm, alignment, bytes); + } + } + if (mem == 0) + return ENOMEM; + else { + *pp = mem; + return 0; + } +} + +void* dlvalloc(size_t bytes) { + size_t pagesz; + ensure_initialization(); + pagesz = mparams.page_size; + return dlmemalign(pagesz, bytes); +} + +void* dlpvalloc(size_t bytes) { + size_t pagesz; + ensure_initialization(); + pagesz = mparams.page_size; + return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE)); +} + +void** dlindependent_calloc(size_t n_elements, size_t elem_size, + void* chunks[]) { + size_t sz = elem_size; /* serves as 1-element array */ + return ialloc(gm, n_elements, &sz, 3, chunks); +} + +void** dlindependent_comalloc(size_t n_elements, size_t sizes[], + void* chunks[]) { + return ialloc(gm, n_elements, sizes, 0, chunks); +} + +size_t dlbulk_free(void* array[], size_t nelem) { + return internal_bulk_free(gm, array, nelem); +} + +#if MALLOC_INSPECT_ALL +void dlmalloc_inspect_all(void(*handler)(void *start, + void *end, + size_t used_bytes, + void* callback_arg), + void* arg) { + ensure_initialization(); + if (!PREACTION(gm)) { + internal_inspect_all(gm, handler, arg); + POSTACTION(gm); + } +} +#endif /* MALLOC_INSPECT_ALL */ + +int dlmalloc_trim(size_t pad) { + int result = 0; + ensure_initialization(); + if (!PREACTION(gm)) { + result = sys_trim(gm, pad); + POSTACTION(gm); + } + return result; +} + +size_t dlmalloc_footprint(void) { + return gm->footprint; +} + +size_t dlmalloc_max_footprint(void) { + return gm->max_footprint; +} + +size_t dlmalloc_footprint_limit(void) { + size_t maf = gm->footprint_limit; + return maf == 0 ? MAX_SIZE_T : maf; +} + +size_t dlmalloc_set_footprint_limit(size_t bytes) { + size_t result; /* invert sense of 0 */ + if (bytes == 0) + result = granularity_align(1); /* Use minimal size */ + if (bytes == MAX_SIZE_T) + result = 0; /* disable */ + else + result = granularity_align(bytes); + return gm->footprint_limit = result; +} + +#if !NO_MALLINFO +struct mallinfo dlmallinfo(void) { + return internal_mallinfo(gm); +} +#endif /* NO_MALLINFO */ + +#if !NO_MALLOC_STATS +void dlmalloc_stats() { + internal_malloc_stats(gm); +} +#endif /* NO_MALLOC_STATS */ + +int dlmallopt(int param_number, int value) { + return change_mparam(param_number, value); +} + +size_t dlmalloc_usable_size(void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + if (is_inuse(p)) + return chunksize(p) - overhead_for(p); + } + return 0; +} + +#endif /* !ONLY_MSPACES */ + +/* ----------------------------- user mspaces ---------------------------- */ + +#if MSPACES + +static mstate init_user_mstate(char* tbase, size_t tsize) { + size_t msize = pad_request(sizeof(struct malloc_state)); + mchunkptr mn; + mchunkptr msp = align_as_chunk(tbase); + mstate m = (mstate)(chunk2mem(msp)); + memset(m, 0, msize); + (void)INITIAL_LOCK(&m->mutex); + msp->head = (msize|INUSE_BITS); + m->seg.base = m->least_addr = tbase; + m->seg.size = m->footprint = m->max_footprint = tsize; + m->magic = mparams.magic; + m->release_checks = MAX_RELEASE_CHECK_RATE; + m->mflags = mparams.default_mflags; + m->extp = 0; + m->exts = 0; + disable_contiguous(m); + init_bins(m); + mn = next_chunk(mem2chunk(m)); + init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE); + check_top_chunk(m, m->top); + return m; +} + +mspace create_mspace(size_t capacity, int locked) { + mstate m = 0; + size_t msize; + ensure_initialization(); + msize = pad_request(sizeof(struct malloc_state)); + if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { + size_t rs = ((capacity == 0)? mparams.granularity : + (capacity + TOP_FOOT_SIZE + msize)); + size_t tsize = granularity_align(rs); + char* tbase = (char*)(CALL_MMAP(tsize)); + if (tbase != CMFAIL) { + m = init_user_mstate(tbase, tsize); + m->seg.sflags = USE_MMAP_BIT; + set_lock(m, locked); + } + } + return (mspace)m; +} + +mspace create_mspace_with_base(void* base, size_t capacity, int locked) { + mstate m = 0; + size_t msize; + ensure_initialization(); + msize = pad_request(sizeof(struct malloc_state)); + if (capacity > msize + TOP_FOOT_SIZE && + capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { + m = init_user_mstate((char*)base, capacity); + m->seg.sflags = EXTERN_BIT; + set_lock(m, locked); + } + return (mspace)m; +} + +int mspace_track_large_chunks(mspace msp, int enable) { + int ret = 0; + mstate ms = (mstate)msp; + if (!PREACTION(ms)) { + if (!use_mmap(ms)) { + ret = 1; + } + if (!enable) { + enable_mmap(ms); + } else { + disable_mmap(ms); + } + POSTACTION(ms); + } + return ret; +} + +size_t destroy_mspace(mspace msp) { + size_t freed = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + msegmentptr sp = &ms->seg; + (void)DESTROY_LOCK(&ms->mutex); /* destroy before unmapped */ + while (sp != 0) { + char* base = sp->base; + size_t size = sp->size; + flag_t flag = sp->sflags; + (void)base; /* placate people compiling -Wunused-variable */ + sp = sp->next; + if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) && + CALL_MUNMAP(base, size) == 0) + freed += size; + } + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return freed; +} + +/* + mspace versions of routines are near-clones of the global + versions. This is not so nice but better than the alternatives. +*/ + +void* mspace_malloc(mspace msp, size_t bytes) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + if (!PREACTION(ms)) { + void* mem; + size_t nb; + if (bytes <= MAX_SMALL_REQUEST) { + bindex_t idx; + binmap_t smallbits; + nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); + idx = small_index(nb); + smallbits = ms->smallmap >> idx; + + if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ + mchunkptr b, p; + idx += ~smallbits & 1; /* Uses next bin if idx empty */ + b = smallbin_at(ms, idx); + p = b->fd; + assert(chunksize(p) == small_index2size(idx)); + unlink_first_small_chunk(ms, b, p, idx); + set_inuse_and_pinuse(ms, p, small_index2size(idx)); + mem = chunk2mem(p); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + else if (nb > ms->dvsize) { + if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ + mchunkptr b, p, r; + size_t rsize; + bindex_t i; + binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); + binmap_t leastbit = least_bit(leftbits); + compute_bit2idx(leastbit, i); + b = smallbin_at(ms, i); + p = b->fd; + assert(chunksize(p) == small_index2size(i)); + unlink_first_small_chunk(ms, b, p, i); + rsize = small_index2size(i) - nb; + /* Fit here cannot be remainderless if 4byte sizes */ + if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(ms, p, small_index2size(i)); + else { + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + r = chunk_plus_offset(p, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(ms, r, rsize); + } + mem = chunk2mem(p); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) { + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + } + } + else if (bytes >= MAX_REQUEST) + nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ + else { + nb = pad_request(bytes); + if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) { + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + } + + if (nb <= ms->dvsize) { + size_t rsize = ms->dvsize - nb; + mchunkptr p = ms->dv; + if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ + mchunkptr r = ms->dv = chunk_plus_offset(p, nb); + ms->dvsize = rsize; + set_size_and_pinuse_of_free_chunk(r, rsize); + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + } + else { /* exhaust dv */ + size_t dvs = ms->dvsize; + ms->dvsize = 0; + ms->dv = 0; + set_inuse_and_pinuse(ms, p, dvs); + } + mem = chunk2mem(p); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + else if (nb < ms->topsize) { /* Split top */ + size_t rsize = ms->topsize -= nb; + mchunkptr p = ms->top; + mchunkptr r = ms->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + mem = chunk2mem(p); + check_top_chunk(ms, ms->top); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + mem = sys_alloc(ms, nb); + + postaction: + POSTACTION(ms); + return mem; + } + + return 0; +} + +void mspace_free(mspace msp, void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); +#if FOOTERS + mstate fm = get_mstate_for(p); + (void)msp; /* placate people compiling -Wunused */ +#else /* FOOTERS */ + mstate fm = (mstate)msp; +#endif /* FOOTERS */ + if (!ok_magic(fm)) { + USAGE_ERROR_ACTION(fm, p); + return; + } + if (!PREACTION(fm)) { + check_inuse_chunk(fm, p); + if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) { + size_t psize = chunksize(p); + mchunkptr next = chunk_plus_offset(p, psize); + if (!pinuse(p)) { + size_t prevsize = p->prev_foot; + if (is_mmapped(p)) { + psize += prevsize + MMAP_FOOT_PAD; + if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) + fm->footprint -= psize; + goto postaction; + } + else { + mchunkptr prev = chunk_minus_offset(p, prevsize); + psize += prevsize; + p = prev; + if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ + if (p != fm->dv) { + unlink_chunk(fm, p, prevsize); + } + else if ((next->head & INUSE_BITS) == INUSE_BITS) { + fm->dvsize = psize; + set_free_with_pinuse(p, psize, next); + goto postaction; + } + } + else + goto erroraction; + } + } + + if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { + if (!cinuse(next)) { /* consolidate forward */ + if (next == fm->top) { + size_t tsize = fm->topsize += psize; + fm->top = p; + p->head = tsize | PINUSE_BIT; + if (p == fm->dv) { + fm->dv = 0; + fm->dvsize = 0; + } + if (should_trim(fm, tsize)) + sys_trim(fm, 0); + goto postaction; + } + else if (next == fm->dv) { + size_t dsize = fm->dvsize += psize; + fm->dv = p; + set_size_and_pinuse_of_free_chunk(p, dsize); + goto postaction; + } + else { + size_t nsize = chunksize(next); + psize += nsize; + unlink_chunk(fm, next, nsize); + set_size_and_pinuse_of_free_chunk(p, psize); + if (p == fm->dv) { + fm->dvsize = psize; + goto postaction; + } + } + } + else + set_free_with_pinuse(p, psize, next); + + if (is_small(psize)) { + insert_small_chunk(fm, p, psize); + check_free_chunk(fm, p); + } + else { + tchunkptr tp = (tchunkptr)p; + insert_large_chunk(fm, tp, psize); + check_free_chunk(fm, p); + if (--fm->release_checks == 0) + release_unused_segments(fm); + } + goto postaction; + } + } + erroraction: + USAGE_ERROR_ACTION(fm, p); + postaction: + POSTACTION(fm); + } + } +} + +void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) { + void* mem; + size_t req = 0; + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + if (n_elements != 0) { + req = n_elements * elem_size; + if (((n_elements | elem_size) & ~(size_t)0xffff) && + (req / n_elements != elem_size)) + req = MAX_SIZE_T; /* force downstream failure on overflow */ + } + mem = internal_malloc(ms, req); + if (mem != 0 && calloc_must_clear(mem2chunk(mem))) + memset(mem, 0, req); + return mem; +} + +void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) { + void* mem = 0; + if (oldmem == 0) { + mem = mspace_malloc(msp, bytes); + } + else if (bytes >= MAX_REQUEST) { + MALLOC_FAILURE_ACTION; + } +#ifdef REALLOC_ZERO_BYTES_FREES + else if (bytes == 0) { + mspace_free(msp, oldmem); + } +#endif /* REALLOC_ZERO_BYTES_FREES */ + else { + size_t nb = request2size(bytes); + mchunkptr oldp = mem2chunk(oldmem); +#if ! FOOTERS + mstate m = (mstate)msp; +#else /* FOOTERS */ + mstate m = get_mstate_for(oldp); + if (!ok_magic(m)) { + USAGE_ERROR_ACTION(m, oldmem); + return 0; + } +#endif /* FOOTERS */ + if (!PREACTION(m)) { + mchunkptr newp = try_realloc_chunk(m, oldp, nb, 1); + POSTACTION(m); + if (newp != 0) { + check_inuse_chunk(m, newp); + mem = chunk2mem(newp); + } + else { + mem = mspace_malloc(m, bytes); + if (mem != 0) { + size_t oc = chunksize(oldp) - overhead_for(oldp); + memcpy(mem, oldmem, (oc < bytes)? oc : bytes); + mspace_free(m, oldmem); + } + } + } + } + return mem; +} + +void* mspace_realloc_in_place(mspace msp, void* oldmem, size_t bytes) { + void* mem = 0; + if (oldmem != 0) { + if (bytes >= MAX_REQUEST) { + MALLOC_FAILURE_ACTION; + } + else { + size_t nb = request2size(bytes); + mchunkptr oldp = mem2chunk(oldmem); +#if ! FOOTERS + mstate m = (mstate)msp; +#else /* FOOTERS */ + mstate m = get_mstate_for(oldp); + (void)msp; /* placate people compiling -Wunused */ + if (!ok_magic(m)) { + USAGE_ERROR_ACTION(m, oldmem); + return 0; + } +#endif /* FOOTERS */ + if (!PREACTION(m)) { + mchunkptr newp = try_realloc_chunk(m, oldp, nb, 0); + POSTACTION(m); + if (newp == oldp) { + check_inuse_chunk(m, newp); + mem = oldmem; + } + } + } + } + return mem; +} + +void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + if (alignment <= MALLOC_ALIGNMENT) + return mspace_malloc(msp, bytes); + return internal_memalign(ms, alignment, bytes); +} + +void** mspace_independent_calloc(mspace msp, size_t n_elements, + size_t elem_size, void* chunks[]) { + size_t sz = elem_size; /* serves as 1-element array */ + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return ialloc(ms, n_elements, &sz, 3, chunks); +} + +void** mspace_independent_comalloc(mspace msp, size_t n_elements, + size_t sizes[], void* chunks[]) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return ialloc(ms, n_elements, sizes, 0, chunks); +} + +size_t mspace_bulk_free(mspace msp, void* array[], size_t nelem) { + return internal_bulk_free((mstate)msp, array, nelem); +} + +#if MALLOC_INSPECT_ALL +void mspace_inspect_all(mspace msp, + void(*handler)(void *start, + void *end, + size_t used_bytes, + void* callback_arg), + void* arg) { + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + if (!PREACTION(ms)) { + internal_inspect_all(ms, handler, arg); + POSTACTION(ms); + } + } + else { + USAGE_ERROR_ACTION(ms,ms); + } +} +#endif /* MALLOC_INSPECT_ALL */ + +int mspace_trim(mspace msp, size_t pad) { + int result = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + if (!PREACTION(ms)) { + result = sys_trim(ms, pad); + POSTACTION(ms); + } + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + +#if !NO_MALLOC_STATS +void mspace_malloc_stats(mspace msp) { + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + internal_malloc_stats(ms); + } + else { + USAGE_ERROR_ACTION(ms,ms); + } +} +#endif /* NO_MALLOC_STATS */ + +size_t mspace_footprint(mspace msp) { + size_t result = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + result = ms->footprint; + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + +size_t mspace_max_footprint(mspace msp) { + size_t result = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + result = ms->max_footprint; + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + +size_t mspace_footprint_limit(mspace msp) { + size_t result = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + size_t maf = ms->footprint_limit; + result = (maf == 0) ? MAX_SIZE_T : maf; + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + +size_t mspace_set_footprint_limit(mspace msp, size_t bytes) { + size_t result = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + if (bytes == 0) + result = granularity_align(1); /* Use minimal size */ + if (bytes == MAX_SIZE_T) + result = 0; /* disable */ + else + result = granularity_align(bytes); + ms->footprint_limit = result; + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + +#if !NO_MALLINFO +struct mallinfo mspace_mallinfo(mspace msp) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + } + return internal_mallinfo(ms); +} +#endif /* NO_MALLINFO */ + +size_t mspace_usable_size(const void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + if (is_inuse(p)) + return chunksize(p) - overhead_for(p); + } + return 0; +} + +int mspace_mallopt(int param_number, int value) { + return change_mparam(param_number, value); +} + +#endif /* MSPACES */ + + +/* -------------------- Alternative MORECORE functions ------------------- */ + +/* + Guidelines for creating a custom version of MORECORE: + + * For best performance, MORECORE should allocate in multiples of pagesize. + * MORECORE may allocate more memory than requested. (Or even less, + but this will usually result in a malloc failure.) + * MORECORE must not allocate memory when given argument zero, but + instead return one past the end address of memory from previous + nonzero call. + * For best performance, consecutive calls to MORECORE with positive + arguments should return increasing addresses, indicating that + space has been contiguously extended. + * Even though consecutive calls to MORECORE need not return contiguous + addresses, it must be OK for malloc'ed chunks to span multiple + regions in those cases where they do happen to be contiguous. + * MORECORE need not handle negative arguments -- it may instead + just return MFAIL when given negative arguments. + Negative arguments are always multiples of pagesize. MORECORE + must not misinterpret negative args as large positive unsigned + args. You can suppress all such calls from even occurring by defining + MORECORE_CANNOT_TRIM, + + As an example alternative MORECORE, here is a custom allocator + kindly contributed for pre-OSX macOS. It uses virtually but not + necessarily physically contiguous non-paged memory (locked in, + present and won't get swapped out). You can use it by uncommenting + this section, adding some #includes, and setting up the appropriate + defines above: + + #define MORECORE osMoreCore + + There is also a shutdown routine that should somehow be called for + cleanup upon program exit. + + #define MAX_POOL_ENTRIES 100 + #define MINIMUM_MORECORE_SIZE (64 * 1024U) + static int next_os_pool; + void *our_os_pools[MAX_POOL_ENTRIES]; + + void *osMoreCore(int size) + { + void *ptr = 0; + static void *sbrk_top = 0; + + if (size > 0) + { + if (size < MINIMUM_MORECORE_SIZE) + size = MINIMUM_MORECORE_SIZE; + if (CurrentExecutionLevel() == kTaskLevel) + ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0); + if (ptr == 0) + { + return (void *) MFAIL; + } + // save ptrs so they can be freed during cleanup + our_os_pools[next_os_pool] = ptr; + next_os_pool++; + ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK); + sbrk_top = (char *) ptr + size; + return ptr; + } + else if (size < 0) + { + // we don't currently support shrink behavior + return (void *) MFAIL; + } + else + { + return sbrk_top; + } + } + + // cleanup any allocated memory pools + // called as last thing before shutting down driver + + void osCleanupMem(void) + { + void **ptr; + + for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++) + if (*ptr) + { + PoolDeallocate(*ptr); + *ptr = 0; + } + } + +*/ + + +/* ----------------------------------------------------------------------- +History: + v2.8.6 Wed Aug 29 06:57:58 2012 Doug Lea + * fix bad comparison in dlposix_memalign + * don't reuse adjusted asize in sys_alloc + * add LOCK_AT_FORK -- thanks to Kirill Artamonov for the suggestion + * reduce compiler warnings -- thanks to all who reported/suggested these + + v2.8.5 Sun May 22 10:26:02 2011 Doug Lea (dl at gee) + * Always perform unlink checks unless INSECURE + * Add posix_memalign. + * Improve realloc to expand in more cases; expose realloc_in_place. + Thanks to Peter Buhr for the suggestion. + * Add footprint_limit, inspect_all, bulk_free. Thanks + to Barry Hayes and others for the suggestions. + * Internal refactorings to avoid calls while holding locks + * Use non-reentrant locks by default. Thanks to Roland McGrath + for the suggestion. + * Small fixes to mspace_destroy, reset_on_error. + * Various configuration extensions/changes. Thanks + to all who contributed these. + + V2.8.4a Thu Apr 28 14:39:43 2011 (dl at gee.cs.oswego.edu) + * Update Creative Commons URL + + V2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee) + * Use zeros instead of prev foot for is_mmapped + * Add mspace_track_large_chunks; thanks to Jean Brouwers + * Fix set_inuse in internal_realloc; thanks to Jean Brouwers + * Fix insufficient sys_alloc padding when using 16byte alignment + * Fix bad error check in mspace_footprint + * Adaptations for ptmalloc; thanks to Wolfram Gloger. + * Reentrant spin locks; thanks to Earl Chew and others + * Win32 improvements; thanks to Niall Douglas and Earl Chew + * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options + * Extension hook in malloc_state + * Various small adjustments to reduce warnings on some compilers + * Various configuration extensions/changes for more platforms. Thanks + to all who contributed these. + + V2.8.3 Thu Sep 22 11:16:32 2005 Doug Lea (dl at gee) + * Add max_footprint functions + * Ensure all appropriate literals are size_t + * Fix conditional compilation problem for some #define settings + * Avoid concatenating segments with the one provided + in create_mspace_with_base + * Rename some variables to avoid compiler shadowing warnings + * Use explicit lock initialization. + * Better handling of sbrk interference. + * Simplify and fix segment insertion, trimming and mspace_destroy + * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x + * Thanks especially to Dennis Flanagan for help on these. + + V2.8.2 Sun Jun 12 16:01:10 2005 Doug Lea (dl at gee) + * Fix memalign brace error. + + V2.8.1 Wed Jun 8 16:11:46 2005 Doug Lea (dl at gee) + * Fix improper #endif nesting in C++ + * Add explicit casts needed for C++ + + V2.8.0 Mon May 30 14:09:02 2005 Doug Lea (dl at gee) + * Use trees for large bins + * Support mspaces + * Use segments to unify sbrk-based and mmap-based system allocation, + removing need for emulation on most platforms without sbrk. + * Default safety checks + * Optional footer checks. Thanks to William Robertson for the idea. + * Internal code refactoring + * Incorporate suggestions and platform-specific changes. + Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas, + Aaron Bachmann, Emery Berger, and others. + * Speed up non-fastbin processing enough to remove fastbins. + * Remove useless cfree() to avoid conflicts with other apps. + * Remove internal memcpy, memset. Compilers handle builtins better. + * Remove some options that no one ever used and rename others. + + V2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee) + * Fix malloc_state bitmap array misdeclaration + + V2.7.1 Thu Jul 25 10:58:03 2002 Doug Lea (dl at gee) + * Allow tuning of FIRST_SORTED_BIN_SIZE + * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte. + * Better detection and support for non-contiguousness of MORECORE. + Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger + * Bypass most of malloc if no frees. Thanks To Emery Berger. + * Fix freeing of old top non-contiguous chunk im sysmalloc. + * Raised default trim and map thresholds to 256K. + * Fix mmap-related #defines. Thanks to Lubos Lunak. + * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield. + * Branch-free bin calculation + * Default trim and mmap thresholds now 256K. + + V2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee) + * Introduce independent_comalloc and independent_calloc. + Thanks to Michael Pachos for motivation and help. + * Make optional .h file available + * Allow > 2GB requests on 32bit systems. + * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>. + Thanks also to Andreas Mueller <a.mueller at paradatec.de>, + and Anonymous. + * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for + helping test this.) + * memalign: check alignment arg + * realloc: don't try to shift chunks backwards, since this + leads to more fragmentation in some programs and doesn't + seem to help in any others. + * Collect all cases in malloc requiring system memory into sysmalloc + * Use mmap as backup to sbrk + * Place all internal state in malloc_state + * Introduce fastbins (although similar to 2.5.1) + * Many minor tunings and cosmetic improvements + * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK + * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS + Thanks to Tony E. Bennett <tbennett@nvidia.com> and others. + * Include errno.h to support default failure action. + + V2.6.6 Sun Dec 5 07:42:19 1999 Doug Lea (dl at gee) + * return null for negative arguments + * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com> + * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h' + (e.g. WIN32 platforms) + * Cleanup header file inclusion for WIN32 platforms + * Cleanup code to avoid Microsoft Visual C++ compiler complaints + * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing + memory allocation routines + * Set 'malloc_getpagesize' for WIN32 platforms (needs more work) + * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to + usage of 'assert' in non-WIN32 code + * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to + avoid infinite loop + * Always call 'fREe()' rather than 'free()' + + V2.6.5 Wed Jun 17 15:57:31 1998 Doug Lea (dl at gee) + * Fixed ordering problem with boundary-stamping + + V2.6.3 Sun May 19 08:17:58 1996 Doug Lea (dl at gee) + * Added pvalloc, as recommended by H.J. Liu + * Added 64bit pointer support mainly from Wolfram Gloger + * Added anonymously donated WIN32 sbrk emulation + * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen + * malloc_extend_top: fix mask error that caused wastage after + foreign sbrks + * Add linux mremap support code from HJ Liu + + V2.6.2 Tue Dec 5 06:52:55 1995 Doug Lea (dl at gee) + * Integrated most documentation with the code. + * Add support for mmap, with help from + Wolfram Gloger (Gloger@lrz.uni-muenchen.de). + * Use last_remainder in more cases. + * Pack bins using idea from colin@nyx10.cs.du.edu + * Use ordered bins instead of best-fit threshhold + * Eliminate block-local decls to simplify tracing and debugging. + * Support another case of realloc via move into top + * Fix error occuring when initial sbrk_base not word-aligned. + * Rely on page size for units instead of SBRK_UNIT to + avoid surprises about sbrk alignment conventions. + * Add mallinfo, mallopt. Thanks to Raymond Nijssen + (raymond@es.ele.tue.nl) for the suggestion. + * Add `pad' argument to malloc_trim and top_pad mallopt parameter. + * More precautions for cases where other routines call sbrk, + courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de). + * Added macros etc., allowing use in linux libc from + H.J. Lu (hjl@gnu.ai.mit.edu) + * Inverted this history list + + V2.6.1 Sat Dec 2 14:10:57 1995 Doug Lea (dl at gee) + * Re-tuned and fixed to behave more nicely with V2.6.0 changes. + * Removed all preallocation code since under current scheme + the work required to undo bad preallocations exceeds + the work saved in good cases for most test programs. + * No longer use return list or unconsolidated bins since + no scheme using them consistently outperforms those that don't + given above changes. + * Use best fit for very large chunks to prevent some worst-cases. + * Added some support for debugging + + V2.6.0 Sat Nov 4 07:05:23 1995 Doug Lea (dl at gee) + * Removed footers when chunks are in use. Thanks to + Paul Wilson (wilson@cs.texas.edu) for the suggestion. + + V2.5.4 Wed Nov 1 07:54:51 1995 Doug Lea (dl at gee) + * Added malloc_trim, with help from Wolfram Gloger + (wmglo@Dent.MED.Uni-Muenchen.DE). + + V2.5.3 Tue Apr 26 10:16:01 1994 Doug Lea (dl at g) + + V2.5.2 Tue Apr 5 16:20:40 1994 Doug Lea (dl at g) + * realloc: try to expand in both directions + * malloc: swap order of clean-bin strategy; + * realloc: only conditionally expand backwards + * Try not to scavenge used bins + * Use bin counts as a guide to preallocation + * Occasionally bin return list chunks in first scan + * Add a few optimizations from colin@nyx10.cs.du.edu + + V2.5.1 Sat Aug 14 15:40:43 1993 Doug Lea (dl at g) + * faster bin computation & slightly different binning + * merged all consolidations to one part of malloc proper + (eliminating old malloc_find_space & malloc_clean_bin) + * Scan 2 returns chunks (not just 1) + * Propagate failure in realloc if malloc returns 0 + * Add stuff to allow compilation on non-ANSI compilers + from kpv@research.att.com + + V2.5 Sat Aug 7 07:41:59 1993 Doug Lea (dl at g.oswego.edu) + * removed potential for odd address access in prev_chunk + * removed dependency on getpagesize.h + * misc cosmetics and a bit more internal documentation + * anticosmetics: mangled names in macros to evade debugger strangeness + * tested on sparc, hp-700, dec-mips, rs6000 + with gcc & native cc (hp, dec only) allowing + Detlefs & Zorn comparison study (in SIGPLAN Notices.) + + Trial version Fri Aug 28 13:14:29 1992 Doug Lea (dl at g.oswego.edu) + * Based loosely on libg++-1.2X malloc. (It retains some of the overall + structure of old version, but most details differ.) + +*/ diff --git a/tools/src/dlmalloc/malloc_config.h b/tools/src/dlmalloc/malloc_config.h new file mode 100644 index 0000000..7616baf --- /dev/null +++ b/tools/src/dlmalloc/malloc_config.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ + +#include <string.h> + +#include "../heapblock.h" +#include "../utils.h" + +#define HAVE_MORECORE 1 +#define HAVE_MMAP 0 +#define MORECORE sbrk +// This is optimal; dlmalloc copes with other users of sbrk/MORECORE gracefully, and heapblock +// guarantees contiguous returns if called consecutively. +#define MORECORE_CONTIGUOUS 1 +#define MALLOC_ALIGNMENT 16 +#define ABORT panic("dlmalloc: internal error\n") +#define NO_MALLINFO 1 +#define NO_MALLOC_STATS 1 +#define malloc_getpagesize 16384 +#define LACKS_FCNTL_H 1 +#define LACKS_SYS_MMAN_H 1 +#define LACKS_SYS_PARAM_H 1 +#define LACKS_SYS_TYPES_H 1 +#define LACKS_STRINGS_H 1 +#define LACKS_STRING_H 1 +#define LACKS_STDLIB_H 1 +#define LACKS_SCHED_H 1 +#define LACKS_TIME_H 1 +#define LACKS_UNISTD_H 1 +#define MALLOC_FAILURE_ACTION panic("dlmalloc: out of memory\n"); + +static void *sbrk(intptr_t inc) +{ + if (inc < 0) + return (void *)-1; + + return heapblock_alloc(inc); +} diff --git a/tools/src/exception.c b/tools/src/exception.c new file mode 100644 index 0000000..e849456 --- /dev/null +++ b/tools/src/exception.c @@ -0,0 +1,388 @@ +/* SPDX-License-Identifier: MIT */ + +#include "exception.h" +#include "aic.h" +#include "aic_regs.h" +#include "cpu_regs.h" +#include "gxf.h" +#include "iodev.h" +#include "memory.h" +#include "uart.h" +#include "utils.h" + +#define EL0_STACK_SIZE 0x4000 + +u8 el0_stack[EL0_STACK_SIZE] ALIGNED(64); +void *el0_stack_base = (void *)(u64)(&el0_stack[EL0_STACK_SIZE]); + +extern char _vectors_start[0]; +extern char _el1_vectors_start[0]; + +volatile enum exc_guard_t exc_guard = GUARD_OFF; +volatile int exc_count = 0; + +void el0_ret(void); +void el1_ret(void); + +static char *m_table[0x10] = { + [0x00] = "EL0t", // + [0x04] = "EL1t", // + [0x05] = "EL1h", // + [0x08] = "EL2t", // + [0x09] = "EL2h", // +}; + +static char *gl_m_table[0x10] = { + [0x00] = "GL0t", // + [0x04] = "GL1t", // + [0x05] = "GL1h", // + [0x08] = "GL2t", // + [0x09] = "GL2h", // +}; + +static char *ec_table[0x40] = { + [0x00] = "unknown", + [0x01] = "wf*", + [0x03] = "c15 mcr/mrc", + [0x04] = "c15 mcrr/mrrc", + [0x05] = "c14 mcr/mrc", + [0x06] = "ldc/stc", + [0x07] = "FP off", + [0x08] = "VMRS access", + [0x09] = "PAC off", + [0x0a] = "ld/st64b", + [0x0c] = "c14 mrrc", + [0x0d] = "branch target", + [0x0e] = "illegal state", + [0x11] = "svc in a32", + [0x12] = "hvc in a32", + [0x13] = "smc in a32", + [0x15] = "svc in a64", + [0x16] = "hvc in a64", + [0x17] = "smc in a64", + [0x18] = "other mcr/mrc/sys", + [0x19] = "SVE off", + [0x1a] = "eret", + [0x1c] = "PAC failure", + [0x20] = "instruction abort (lower)", + [0x21] = "instruction abort (current)", + [0x22] = "pc misaligned", + [0x24] = "data abort (lower)", + [0x25] = "data abort (current)", + [0x26] = "sp misaligned", + [0x28] = "FP exception (a32)", + [0x2c] = "FP exception (a64)", + [0x2f] = "SError", + [0x30] = "BP (lower)", + [0x31] = "BP (current)", + [0x32] = "step (lower)", + [0x33] = "step (current)", + [0x34] = "watchpoint (lower)", + [0x35] = "watchpoint (current)", + [0x38] = "bkpt (a32)", + [0x3a] = "vector catch (a32)", + [0x3c] = "brk (a64)", +}; + +static const char *get_exception_source(u64 spsr) +{ + u64 aspsr = in_gl12() ? mrs(SYS_IMP_APL_ASPSR_GL1) : 0; + const char *m_desc = NULL; + + if (aspsr & 1) + m_desc = gl_m_table[spsr & 0xf]; + else + m_desc = m_table[spsr & 0xf]; + + if (!m_desc) + m_desc = "?"; + + return m_desc; +} + +static const char *get_exception_level(void) +{ + u64 lvl = mrs(CurrentEL); + + if (in_gl12()) { + if (lvl == 0x04) + return "GL1"; + else if (lvl == 0x08) + return "GL2"; + } else { + if (lvl == 0x04) + return "EL1"; + else if (lvl == 0x08) + return "EL2"; + } + + return "?"; +} + +void exception_initialize(void) +{ + msr(VBAR_EL1, _vectors_start); + + // Clear FIQ sources + msr(CNTP_CTL_EL0, 7L); + msr(CNTV_CTL_EL0, 7L); + if (in_el2()) { + msr(CNTP_CTL_EL02, 7L); + msr(CNTV_CTL_EL02, 7L); + } + reg_clr(SYS_IMP_APL_PMCR0, PMCR0_IACT | PMCR0_IMODE_MASK); + reg_clr(SYS_IMP_APL_UPMCR0, UPMCR0_IMODE_MASK); + msr(SYS_IMP_APL_IPI_SR_EL1, IPI_SR_PENDING); + + if (is_primary_core()) + msr(DAIF, 0 << 6); // Enable SError, IRQ and FIQ + else + msr(DAIF, 3 << 6); // Disable IRQ and FIQ + + if (in_el2()) { + // Set up a sane HCR_EL2 + msr(HCR_EL2, (BIT(41) | // API + BIT(40) | // APK + BIT(37) | // TEA + BIT(34) | // E2H + BIT(31) | // RW + BIT(27) | // TGE + BIT(5) | // AMO + BIT(4) | // IMO + BIT(3)); // FMO + ); + // Set up exception forwarding from EL1 + msr(VBAR_EL12, _el1_vectors_start); + sysop("isb"); + } +} + +void exception_shutdown(void) +{ + msr(DAIF, 7 << 6); // Disable SError, IRQ and FIQ +} + +void print_regs(u64 *regs, int el12) +{ + bool in_gl; + u64 sp = ((u64)(regs)) + 256; + + in_gl = in_gl12(); + + u64 spsr = in_gl ? mrs(SYS_IMP_APL_SPSR_GL1) : (el12 ? mrs(SPSR_EL12) : mrs(SPSR_EL1)); + + printf("Exception taken from %s\n", get_exception_source(spsr)); + printf("Running in %s\n", get_exception_level()); + printf("MPIDR: 0x%lx\n", mrs(MPIDR_EL1)); + printf("Registers: (@%p)\n", regs); + printf(" x0-x3: %016lx %016lx %016lx %016lx\n", regs[0], regs[1], regs[2], regs[3]); + printf(" x4-x7: %016lx %016lx %016lx %016lx\n", regs[4], regs[5], regs[6], regs[7]); + printf(" x8-x11: %016lx %016lx %016lx %016lx\n", regs[8], regs[9], regs[10], regs[11]); + printf("x12-x15: %016lx %016lx %016lx %016lx\n", regs[12], regs[13], regs[14], regs[15]); + printf("x16-x19: %016lx %016lx %016lx %016lx\n", regs[16], regs[17], regs[18], regs[19]); + printf("x20-x23: %016lx %016lx %016lx %016lx\n", regs[20], regs[21], regs[22], regs[23]); + printf("x24-x27: %016lx %016lx %016lx %016lx\n", regs[24], regs[25], regs[26], regs[27]); + printf("x28-x30: %016lx %016lx %016lx\n", regs[28], regs[29], regs[30]); + + u64 elr = in_gl ? mrs(SYS_IMP_APL_ELR_GL1) : (el12 ? mrs(ELR_EL12) : mrs(ELR_EL1)); + u64 esr = in_gl ? mrs(SYS_IMP_APL_ESR_GL1) : (el12 ? mrs(ESR_EL12) : mrs(ESR_EL1)); + u64 far = in_gl ? mrs(SYS_IMP_APL_FAR_GL1) : (el12 ? mrs(FAR_EL12) : mrs(FAR_EL1)); + + printf("PC: 0x%lx (rel: 0x%lx)\n", elr, elr - (u64)_base); + printf("SP: 0x%lx\n", sp); + printf("SPSR: 0x%lx\n", spsr); + if (in_gl12()) { + printf("ASPSR: 0x%lx\n", mrs(SYS_IMP_APL_ASPSR_GL1)); + } + printf("FAR: 0x%lx\n", far); + + const char *ec_desc = ec_table[(esr >> 26) & 0x3f]; + printf("ESR: 0x%lx (%s)\n", esr, ec_desc ? ec_desc : "?"); + + u64 sts = mrs(SYS_IMP_APL_L2C_ERR_STS); + printf("L2C_ERR_STS: 0x%lx\n", sts); + printf("L2C_ERR_ADR: 0x%lx\n", mrs(SYS_IMP_APL_L2C_ERR_ADR)); + printf("L2C_ERR_INF: 0x%lx\n", mrs(SYS_IMP_APL_L2C_ERR_INF)); + msr(SYS_IMP_APL_L2C_ERR_STS, sts); + + if (is_ecore()) { + printf("E_LSU_ERR_STS: 0x%lx\n", mrs(SYS_IMP_APL_E_LSU_ERR_STS)); + printf("E_FED_ERR_STS: 0x%lx\n", mrs(SYS_IMP_APL_E_FED_ERR_STS)); + printf("E_MMU_ERR_STS: 0x%lx\n", mrs(SYS_IMP_APL_E_MMU_ERR_STS)); + } else { + printf("LSU_ERR_STS: 0x%lx\n", mrs(SYS_IMP_APL_LSU_ERR_STS)); + printf("FED_ERR_STS: 0x%lx\n", mrs(SYS_IMP_APL_FED_ERR_STS)); + printf("MMU_ERR_STS: 0x%lx\n", mrs(SYS_IMP_APL_MMU_ERR_STS)); + } +} + +void exc_sync(u64 *regs) +{ + u32 insn; + int el12 = 0; + bool in_gl = in_gl12(); + + u64 spsr = in_gl ? mrs(SYS_IMP_APL_SPSR_GL1) : mrs(SPSR_EL1); + u64 esr = in_gl ? mrs(SYS_IMP_APL_ESR_GL1) : mrs(ESR_EL1); + u64 elr = in_gl ? mrs(SYS_IMP_APL_ELR_GL1) : mrs(ELR_EL1); + + if ((spsr & 0xf) == 0 && ((esr >> 26) & 0x3f) == 0x3c) { + // On clean EL0 return, let the normal exception return + // path take us back to the return thunk. + msr(SPSR_EL1, 0x09); // EL2h + msr(ELR_EL1, el0_ret); + return; + } + + if (in_el2() && !in_gl12() && (spsr & 0xf) == 5 && ((esr >> 26) & 0x3f) == 0x16) { + // Hypercall + u32 imm = mrs(ESR_EL2) & 0xffff; + switch (imm) { + case 0: + // On clean EL1 return, let the normal exception return + // path take us back to the return thunk. + msr(SPSR_EL2, 0x09); // EL2h + msr(ELR_EL2, el1_ret); + return; + case 0x10 ... 0x1f: + if (!(exc_guard & GUARD_SILENT)) + printf("EL1 Exception: 0x%x\n", imm); + // Short-circuit the hypercall and handle the EL1 exception + el12 = 1; + msr(SPSR_EL2, mrs(SPSR_EL12)); + msr(ELR_EL2, mrs(ELR_EL12)); + break; + default: + printf("Unknown HVC: 0x%x\n", imm); + break; + } + } else { + if (!(exc_guard & GUARD_SILENT)) + printf("Exception: SYNC\n"); + } + + sysop("isb"); + sysop("dsb sy"); + + if (!(exc_guard & GUARD_SILENT)) + print_regs(regs, el12); + + u64 l2c_err_sts = mrs(SYS_IMP_APL_L2C_ERR_STS); + msr(SYS_IMP_APL_L2C_ERR_STS, l2c_err_sts); // Clear the L2C_ERR flag bits + + switch (exc_guard & GUARD_TYPE_MASK) { + case GUARD_SKIP: + elr += 4; + break; + case GUARD_MARK: + // Assuming this is a load or store, dest reg is in low bits + insn = read32(elr); + regs[insn & 0x1f] = 0xacce5515abad1dea; + elr += 4; + break; + case GUARD_RETURN: + regs[0] = 0xacce5515abad1dea; + elr = regs[30]; + exc_guard = GUARD_OFF; + break; + case GUARD_OFF: + default: + printf("Unhandled exception, rebooting...\n"); + flush_and_reboot(); + } + + exc_count++; + + if (!(exc_guard & GUARD_SILENT)) + printf("Recovering from exception (ELR=0x%lx)\n", elr); + if (in_gl) + msr(SYS_IMP_APL_ELR_GL1, elr); + else + msr(ELR_EL1, elr); + + sysop("isb"); + sysop("dsb sy"); +} + +void exc_irq(u64 *regs) +{ + u32 reason = aic_ack(); + + printf("Exception: IRQ (from %s) die: %lu type: %lu num: %lu mpidr: %lx\n", + get_exception_source(0), FIELD_GET(AIC_EVENT_DIE, reason), + FIELD_GET(AIC_EVENT_TYPE, reason), FIELD_GET(AIC_EVENT_NUM, reason), mrs(MPIDR_EL1)); + + UNUSED(regs); + // print_regs(regs); +} + +void exc_fiq(u64 *regs) +{ + printf("Exception: FIQ (from %s)\n", get_exception_source(0)); + + u64 reg = mrs(CNTP_CTL_EL0); + if (reg == 0x5) { + printf(" PHYS timer IRQ, masking\n"); + msr(CNTP_CTL_EL0, 7L); + } + + reg = mrs(CNTV_CTL_EL0); + if (reg == 0x5) { + printf(" VIRT timer IRQ, masking\n"); + msr(CNTV_CTL_EL0, 7L); + } + + if (in_el2()) { + reg = mrs(CNTP_CTL_EL02); + if (reg == 0x5) { + printf(" PHYS EL02 timer IRQ, masking\n"); + msr(CNTP_CTL_EL02, 7L); + } + reg = mrs(CNTV_CTL_EL02); + if (reg == 0x5) { + printf(" VIRT EL02 timer IRQ, masking\n"); + msr(CNTV_CTL_EL02, 7L); + } + } + + reg = mrs(SYS_IMP_APL_PMCR0); + if ((reg & (PMCR0_IMODE_MASK | PMCR0_IACT)) == (PMCR0_IMODE_FIQ | PMCR0_IACT)) { + printf(" PMC IRQ, masking\n"); + reg_clr(SYS_IMP_APL_PMCR0, PMCR0_IACT | PMCR0_IMODE_MASK); + } + reg = mrs(SYS_IMP_APL_UPMCR0); + if ((reg & UPMCR0_IMODE_MASK) == UPMCR0_IMODE_FIQ && (mrs(SYS_IMP_APL_UPMSR) & UPMSR_IACT)) { + printf(" UPMC IRQ, masking\n"); + reg_clr(SYS_IMP_APL_UPMCR0, UPMCR0_IMODE_MASK); + } + + if (mrs(SYS_IMP_APL_IPI_SR_EL1) & IPI_SR_PENDING) { + printf(" Fast IPI IRQ, clearing\n"); + msr(SYS_IMP_APL_IPI_SR_EL1, IPI_SR_PENDING); + } + + UNUSED(regs); + // print_regs(regs); +} + +void exc_serr(u64 *regs) +{ + if (!(exc_guard & GUARD_SILENT)) + printf("Exception: SError\n"); + + sysop("dsb sy"); + sysop("isb"); + + if (!(exc_guard & GUARD_SILENT)) + print_regs(regs, 0); + + if ((exc_guard & GUARD_TYPE_MASK) == GUARD_OFF) { + printf("Unhandled exception, rebooting...\n"); + flush_and_reboot(); + } + + exc_count++; + + sysop("dsb sy"); + sysop("isb"); +} diff --git a/tools/src/exception.h b/tools/src/exception.h new file mode 100644 index 0000000..786f38f --- /dev/null +++ b/tools/src/exception.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef __EXCEPTION_H__ +#define __EXCEPTION_H__ + +#define SIZEOF_EXC_INFO (64 * 8) + +#ifndef __ASSEMBLER__ + +#include <assert.h> +#include <stdint.h> + +#include "types.h" + +enum exc_guard_t { + GUARD_OFF = 0, + GUARD_SKIP, + GUARD_MARK, + GUARD_RETURN, + GUARD_TYPE_MASK = 0xff, + GUARD_SILENT = 0x100, +}; + +struct exc_info { + u64 regs[32]; + u64 spsr; + u64 elr; + u64 esr; + u64 far; + u64 afsr1; + u64 sp[3]; + u64 cpu_id; + u64 mpidr; + u64 elr_phys; + u64 far_phys; + u64 sp_phys; + void *extra; +}; +static_assert(sizeof(struct exc_info) <= SIZEOF_EXC_INFO, "Please increase SIZEOF_EXC_INFO"); +static_assert((sizeof(struct exc_info) & 15) == 0, "SIZEOF_EXC_INFO must be a multiple of 16"); + +extern volatile enum exc_guard_t exc_guard; +extern volatile int exc_count; + +void exception_initialize(void); +void exception_shutdown(void); + +void print_regs(u64 *regs, int el12); + +uint64_t el0_call(void *func, uint64_t a, uint64_t b, uint64_t c, uint64_t d); +uint64_t el1_call(void *func, uint64_t a, uint64_t b, uint64_t c, uint64_t d); + +#endif + +#endif diff --git a/tools/src/exception_asm.S b/tools/src/exception_asm.S new file mode 100644 index 0000000..8c8d01f --- /dev/null +++ b/tools/src/exception_asm.S @@ -0,0 +1,231 @@ +/* SPDX-License-Identifier: MIT */ + +#include "exception.h" +#include "memory.h" + +.globl exc_sync +.globl exc_irq +.globl exc_fiq +.globl exc_serr +.globl _vectors_start +.globl el0_stack + +.globl _v_sp0_sync +.type _v_sp0_sync, @function +_v_sp0_sync: + msr pan, #0 + sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8) + str x30, [sp, #-16]! + bl _exc_entry + bl exc_sync + + b _exc_return + +.globl _v_sp0_irq +.type _v_sp0_irq, @function +_v_sp0_irq: + msr pan, #0 + sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8) + str x30, [sp, #-16]! + bl _exc_entry + bl exc_irq + + b _exc_return + +.globl _v_sp0_fiq +.type _v_sp0_fiq, @function +_v_sp0_fiq: + msr pan, #0 + sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8) + str x30, [sp, #-16]! + bl _exc_entry + bl exc_fiq + + b _exc_return + +.globl _v_sp0_serr +.type _v_sp0_serr, @function +_v_sp0_serr: + msr pan, #0 + sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8) + str x30, [sp, #-16]! + bl _exc_entry + bl exc_serr + + b _exc_return + +.globl _exc_entry +.type _exc_entry, @function +_exc_entry: + stp x28, x29, [sp, #-16]! + stp x26, x27, [sp, #-16]! + stp x24, x25, [sp, #-16]! + stp x22, x23, [sp, #-16]! + stp x20, x21, [sp, #-16]! + stp x18, x19, [sp, #-16]! + stp x16, x17, [sp, #-16]! + stp x14, x15, [sp, #-16]! + stp x12, x13, [sp, #-16]! + stp x10, x11, [sp, #-16]! + stp x8, x9, [sp, #-16]! + stp x6, x7, [sp, #-16]! + stp x4, x5, [sp, #-16]! + stp x2, x3, [sp, #-16]! + stp x0, x1, [sp, #-16]! + + mov x0, sp + ret + +.globl _exc_return +.type _exc_return, @function +_exc_return: + ldp x0, x1, [sp], #16 + ldp x2, x3, [sp], #16 + ldp x4, x5, [sp], #16 + ldp x6, x7, [sp], #16 + ldp x8, x9, [sp], #16 + ldp x10, x11, [sp], #16 + ldp x12, x13, [sp], #16 + ldp x14, x15, [sp], #16 + ldp x16, x17, [sp], #16 + ldr x18, [sp], #8 + add sp, sp, #88 + ldr x30, [sp], #16 + + add sp, sp, #(SIZEOF_EXC_INFO - 32 * 8) + + eret + +.globl el0_call +.type el0_call, @function +el0_call: + str x30, [sp, #-16]! + + // Disable EL1 + mrs x5, hcr_el2 + orr x5, x5, #(1 << 27) + msr hcr_el2, x5 + isb + + mrs x5, daif + msr daifclr, 3 + msr spsr_el1, x5 + + ldr x5, =_el0_thunk + msr elr_el1, x5 + + mov x5, #REGION_RWX_EL0 + orr x0, x0, x5 + + ldr x5, =el0_stack_base + ldr x5, [x5] + mov x6, #REGION_RW_EL0 + orr x5, x5, x6 + msr spsel, #0 + mov sp, x5 + + eret + +_el0_thunk: + mov x5, x0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + mov x3, x4 + + blr x5 + + brk 0 + .long 0 + +.globl el0_ret +.type el0_ret, @function +el0_ret: + ldr x30, [sp], #16 + ret + +.globl el1_call +.type el1_call, @function +el1_call: + str x30, [sp, #-16]! + + // Enable EL1, but only if not already done. + // this check is here because writes to hcr_el2 are only possible from GL2 + // if that mode has been enabled + mrs x5, hcr_el2 + bic x6, x5, #(1 << 27) + cmp x5, x6 + beq 1f + msr hcr_el2, x6 + isb + + 1: mrs x5, daif + msr daifclr, 3 + mov x6, #5 + orr x5, x5, x6 // EL1h + msr spsr_el2, x5 + + ldr x5, =_el1_thunk + msr elr_el2, x5 + + ldr x5, =el0_stack_base + ldr x5, [x5] + msr sp_el1, x5 + + eret + +_el1_thunk: + mov x5, x0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + mov x3, x4 + + blr x5 + + hvc 0 + .long 0 + +.globl el1_ret +.type el1_ret, @function +el1_ret: + ldr x30, [sp], #16 + ret + +.align 11 +.globl _el1_vectors_start +_el1_vectors_start: + hvc 0x10 + .align 7 + hvc 0x11 + .align 7 + hvc 0x12 + .align 7 + hvc 0x13 + .align 7 + + hvc 0x14 + .align 7 + hvc 0x15 + .align 7 + hvc 0x16 + .align 7 + hvc 0x17 + .align 7 + + hvc 0x18 + .align 7 + hvc 0x19 + .align 7 + hvc 0x1a + .align 7 + hvc 0x1b + .align 7 + + hvc 0x1c + .align 7 + hvc 0x1d + .align 7 + hvc 0x1e + .align 7 + hvc 0x1f diff --git a/tools/src/fb.c b/tools/src/fb.c new file mode 100644 index 0000000..4c3e8b5 --- /dev/null +++ b/tools/src/fb.c @@ -0,0 +1,415 @@ +/* SPDX-License-Identifier: MIT */ + +#include "fb.h" +#include "assert.h" +#include "iodev.h" +#include "malloc.h" +#include "memory.h" +#include "string.h" +#include "types.h" +#include "utils.h" +#include "xnuboot.h" + +#define FB_DEPTH_MASK 0xff + +fb_t fb; + +struct image { + u32 *ptr; + u32 width; + u32 height; +}; + +static struct { + struct { + u8 *ptr; + u32 width; + u32 height; + } font; + + struct { + u32 row; + u32 col; + + u32 max_row; + u32 max_col; + } cursor; + + struct { + u32 rows; + u32 cols; + } margin; + + bool initialized; + bool active; +} console; + +extern u8 _binary_build_bootlogo_128_bin_start[]; +extern u8 _binary_build_bootlogo_256_bin_start[]; + +extern u8 _binary_build_font_bin_start[]; +extern u8 _binary_build_font_retina_bin_start[]; + +const struct image logo_128 = { + .ptr = (void *)_binary_build_bootlogo_128_bin_start, + .width = 128, + .height = 128, +}; + +const struct image logo_256 = { + .ptr = (void *)_binary_build_bootlogo_256_bin_start, + .width = 256, + .height = 256, +}; + +const struct image *logo; +struct image orig_logo; + +void fb_update(void) +{ + memcpy128(fb.hwptr, fb.ptr, fb.size); +} + +static void fb_clear_font_row(u32 row) +{ + const u32 row_size = (console.margin.cols + console.cursor.max_col) * console.font.width * 4; + const u32 ystart = (console.margin.rows + row) * console.font.height * fb.stride; + + for (u32 y = 0; y < console.font.height; ++y) + memset32(fb.ptr + ystart + y * fb.stride, 0, row_size); +} + +static void fb_move_font_row(u32 dst, u32 src) +{ + const u32 row_size = (console.margin.cols + console.cursor.max_col) * console.font.width * 4; + u32 ysrc = (console.margin.rows + src) * console.font.height; + u32 ydst = (console.margin.rows + dst) * console.font.height; + + ysrc *= fb.stride; + ydst *= fb.stride; + + for (u32 y = 0; y < console.font.height; ++y) + memcpy32(fb.ptr + ydst + y * fb.stride, fb.ptr + ysrc + y * fb.stride, row_size); + + fb_clear_font_row(src); +} + +static inline u32 rgb2pixel_30(rgb_t c) +{ + return (c.b << 2) | (c.g << 12) | (c.r << 22); +} + +static inline rgb_t pixel2rgb_30(u32 c) +{ + return (rgb_t){(c >> 22) & 0xff, (c >> 12) & 0xff, c >> 2}; +} + +static inline void fb_set_pixel(u32 x, u32 y, rgb_t c) +{ + fb.ptr[x + y * fb.stride] = rgb2pixel_30(c); +} + +static inline rgb_t fb_get_pixel(u32 x, u32 y) +{ + return pixel2rgb_30(fb.ptr[x + y * fb.stride]); +} + +void fb_blit(u32 x, u32 y, u32 w, u32 h, void *data, u32 stride, pix_fmt_t pix_fmt) +{ + u8 *p = data; + + for (u32 i = 0; i < h; i++) { + for (u32 j = 0; j < w; j++) { + rgb_t color; + switch (pix_fmt) { + default: + case PIX_FMT_XRGB: + color.r = p[(j + i * stride) * 4]; + color.g = p[(j + i * stride) * 4 + 1]; + color.b = p[(j + i * stride) * 4 + 2]; + break; + case PIX_FMT_XBGR: + color.r = p[(j + i * stride) * 4 + 2]; + color.g = p[(j + i * stride) * 4 + 1]; + color.b = p[(j + i * stride) * 4]; + break; + } + fb_set_pixel(x + j, y + i, color); + } + } + fb_update(); +} + +void fb_unblit(u32 x, u32 y, u32 w, u32 h, void *data, u32 stride) +{ + u8 *p = data; + + for (u32 i = 0; i < h; i++) { + for (u32 j = 0; j < w; j++) { + rgb_t color = fb_get_pixel(x + j, y + i); + p[(j + i * stride) * 4] = color.r; + p[(j + i * stride) * 4 + 1] = color.g; + p[(j + i * stride) * 4 + 2] = color.b; + p[(j + i * stride) * 4 + 3] = 0xff; + } + } +} + +void fb_fill(u32 x, u32 y, u32 w, u32 h, rgb_t color) +{ + u32 c = rgb2pixel_30(color); + for (u32 i = 0; i < h; i++) + memset32(&fb.ptr[x + (y + i) * fb.stride], c, w * 4); + fb_update(); +} + +void fb_clear(rgb_t color) +{ + u32 c = rgb2pixel_30(color); + memset32(fb.ptr, c, fb.stride * fb.height * 4); + fb_update(); +} + +void fb_blit_image(u32 x, u32 y, const struct image *img) +{ + fb_blit(x, y, img->width, img->height, img->ptr, img->width, PIX_FMT_XRGB); +} + +void fb_unblit_image(u32 x, u32 y, struct image *img) +{ + fb_unblit(x, y, img->width, img->height, img->ptr, img->width); +} + +void fb_blit_logo(const struct image *logo) +{ + fb_blit_image((fb.width - logo->width) / 2, (fb.height - logo->height) / 2, logo); +} + +void fb_display_logo(void) +{ + printf("fb: display logo\n"); + fb_blit_logo(logo); +} + +void fb_restore_logo(void) +{ + if (!orig_logo.ptr) + return; + fb_blit_logo(&orig_logo); +} + +void fb_improve_logo(void) +{ + const u8 magic[] = "BY;iX2gK0b89P9P*Qa"; + u8 *p = (void *)orig_logo.ptr; + + if (!p || p[orig_logo.width * (orig_logo.height + 1) * 2] <= 250) + return; + + for (u32 i = 0; i < orig_logo.height; i++) { + const u8 *c = &magic[min((max(i * 128 / orig_logo.height, 41) - 41) / 11, 5) * 3]; + for (u32 j = 0; j < (orig_logo.width * 4); j++, p++) + *p = (*p * (c[(j - (j >> 2)) % 3] - 42)) / 63; + } +} + +static inline rgb_t font_get_pixel(u8 c, u32 x, u32 y) +{ + c -= 0x20; + u8 v = + console.font.ptr[c * console.font.width * console.font.height + y * console.font.width + x]; + + rgb_t col = {.r = v, .g = v, .b = v}; + return col; +} + +static void fb_putbyte(u8 c) +{ + u32 x = (console.margin.cols + console.cursor.col) * console.font.width; + u32 y = (console.margin.rows + console.cursor.row) * console.font.height; + + for (u32 i = 0; i < console.font.height; i++) + for (u32 j = 0; j < console.font.width; j++) + fb_set_pixel(x + j, y + i, font_get_pixel(c, j, i)); +} + +static void fb_putchar(u8 c) +{ + if (c == '\r') { + console.cursor.col = 0; + } else if (c == '\n') { + console.cursor.row++; + console.cursor.col = 0; + } else if (c >= 0x20 && c < 0x7f) { + fb_putbyte(c); + console.cursor.col++; + } else { + fb_putbyte('?'); + console.cursor.col++; + } + + if (console.cursor.col == console.cursor.max_col) { + console.cursor.row++; + console.cursor.col = 0; + } + + if (console.cursor.row == console.cursor.max_row) + fb_console_scroll(1); +} + +void fb_console_scroll(u32 n) +{ + u32 row = 0; + n = min(n, console.cursor.row); + for (; row < console.cursor.max_row - n; ++row) + fb_move_font_row(row, row + n); + for (; row < console.cursor.max_row; ++row) + fb_clear_font_row(row); + console.cursor.row -= n; +} + +void fb_console_reserve_lines(u32 n) +{ + if ((console.cursor.max_row - console.cursor.row) <= n) + fb_console_scroll(1 + n - (console.cursor.max_row - console.cursor.row)); + fb_update(); +} + +ssize_t fb_console_write(const char *bfr, size_t len) +{ + ssize_t wrote = 0; + + if (!console.initialized || !console.active) + return 0; + + while (len--) { + fb_putchar(*bfr++); + wrote++; + } + + fb_update(); + + return wrote; +} + +static bool fb_console_iodev_can_write(void *opaque) +{ + UNUSED(opaque); + return console.initialized && console.active; +} + +static ssize_t fb_console_iodev_write(void *opaque, const void *buf, size_t len) +{ + UNUSED(opaque); + return fb_console_write(buf, len); +} + +const struct iodev_ops iodev_fb_ops = { + .can_write = fb_console_iodev_can_write, + .write = fb_console_iodev_write, +}; + +struct iodev iodev_fb = { + .ops = &iodev_fb_ops, + .usage = USAGE_CONSOLE, + .lock = SPINLOCK_INIT, +}; + +static void fb_clear_console(void) +{ + for (u32 row = 0; row < console.cursor.max_row; ++row) + fb_clear_font_row(row); + + console.cursor.col = 0; + console.cursor.row = 0; + fb_update(); +} + +void fb_init(bool clear) +{ + fb.hwptr = (void *)cur_boot_args.video.base; + fb.stride = cur_boot_args.video.stride / 4; + fb.width = cur_boot_args.video.width; + fb.height = cur_boot_args.video.height; + fb.depth = cur_boot_args.video.depth & FB_DEPTH_MASK; + fb.size = cur_boot_args.video.stride * cur_boot_args.video.height; + printf("fb init: %dx%d (%d) [s=%d] @%p\n", fb.width, fb.height, fb.depth, fb.stride, fb.hwptr); + + mmu_add_mapping(cur_boot_args.video.base, cur_boot_args.video.base, ALIGN_UP(fb.size, 0x4000), + MAIR_IDX_NORMAL_NC, PERM_RW); + + fb.ptr = malloc(fb.size); + memcpy(fb.ptr, fb.hwptr, fb.size); + + if (cur_boot_args.video.depth & FB_DEPTH_FLAG_RETINA) { + logo = &logo_256; + console.font.ptr = _binary_build_font_retina_bin_start; + console.font.width = 16; + console.font.height = 32; + } else { + logo = &logo_128; + console.font.ptr = _binary_build_font_bin_start; + console.font.width = 8; + console.font.height = 16; + } + + if (!orig_logo.ptr) { + orig_logo = *logo; + orig_logo.ptr = malloc(orig_logo.width * orig_logo.height * 4); + fb_unblit_image((fb.width - orig_logo.width) / 2, (fb.height - orig_logo.height) / 2, + &orig_logo); + } + + if (clear) + memset32(fb.ptr, 0, fb.size); + + console.margin.rows = 2; + console.margin.cols = 4; + console.cursor.col = 0; + console.cursor.row = 0; + + console.cursor.max_row = (fb.height / console.font.height) - 2 * console.margin.rows; + console.cursor.max_col = + ((fb.width - logo->width) / 2) / console.font.width - 2 * console.margin.cols; + + console.initialized = true; + console.active = false; + + fb_clear_console(); + + printf("fb console: max rows %d, max cols %d\n", console.cursor.max_row, + console.cursor.max_col); +} + +void fb_set_active(bool active) +{ + console.active = active; + if (active) + iodev_console_kick(); +} + +void fb_shutdown(bool restore_logo) +{ + if (!console.initialized) + return; + + console.active = false; + console.initialized = false; + fb_clear_console(); + if (restore_logo) { + fb_restore_logo(); + free(orig_logo.ptr); + orig_logo.ptr = NULL; + } + free(fb.ptr); +} + +void fb_reinit(void) +{ + if (!console.initialized) + return; + + fb_shutdown(false); + fb_init(true); + fb_display_logo(); +} diff --git a/tools/src/fb.h b/tools/src/fb.h new file mode 100644 index 0000000..2bfd406 --- /dev/null +++ b/tools/src/fb.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef FB_H +#define FB_H + +#include "types.h" + +#define FB_DEPTH_FLAG_RETINA 0x10000 + +typedef struct { + u32 *ptr; /* pointer to the start of the framebuffer */ + u32 *hwptr; /* pointer to the start of the real framebuffer */ + u32 stride; /* framebuffer stride divided by four (i.e. stride in pixels) */ + u32 depth; /* framebuffer depth (i.e. bits per pixel) */ + u32 width; /* width of the framebuffer in pixels */ + u32 height; /* height of the framebuffer in pixels */ + u32 size; /* size of the framebuffer in bytes */ +} fb_t; + +typedef struct { + u8 r; + u8 g; + u8 b; +} rgb_t; + +typedef enum { + PIX_FMT_XRGB, + PIX_FMT_XBGR, +} pix_fmt_t; + +extern fb_t fb; + +static inline rgb_t int2rgb(u32 c) +{ + return (rgb_t){c >> 16, c >> 8, c}; +} + +void fb_init(bool clear); +void fb_shutdown(bool restore_logo); +void fb_reinit(void); +void fb_update(void); +void fb_set_active(bool active); + +void fb_blit(u32 x, u32 y, u32 w, u32 h, void *data, u32 stride, pix_fmt_t format); +void fb_unblit(u32 x, u32 y, u32 w, u32 h, void *data, u32 stride); +void fb_fill(u32 x, u32 y, u32 w, u32 h, rgb_t color); +void fb_clear(rgb_t color); + +void fb_display_logo(void); +void fb_restore_logo(void); +void fb_improve_logo(void); + +void fb_console_scroll(u32 n); +void fb_console_reserve_lines(u32 n); +ssize_t fb_console_write(const char *bfr, size_t len); + +#endif diff --git a/tools/src/firmware.c b/tools/src/firmware.c new file mode 100644 index 0000000..ca5f108 --- /dev/null +++ b/tools/src/firmware.c @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: MIT */ + +#include "firmware.h" +#include "adt.h" +#include "string.h" +#include "types.h" +#include "utils.h" + +#include "libfdt/libfdt.h" +#include "libfdt/libfdt_env.h" + +struct fw_version_info os_firmware; +struct fw_version_info system_firmware; + +const struct fw_version_info fw_versions[NUM_FW_VERSIONS] = { + [V_UNKNOWN] = {V_UNKNOWN, "unknown", {0}, 1, "unknown"}, + [V12_1] = {V12_1, "12.1", {12, 1, 0}, 3, "iBoot-7429.61.2"}, + [V12_2] = {V12_2, "12.2", {12, 2, 0}, 3, "iBoot-7429.81.3"}, + [V12_3] = {V12_3, "12.3", {12, 3, 0}, 3, "iBoot-7459.101.2"}, + [V12_3_1] = {V12_3_1, "12.3.1", {12, 3, 1}, 3, "iBoot-7459.101.3"}, + [V12_4] = {V12_4, "12.4", {12, 4, 0}, 3, "iBoot-7459.121.3"}, + [V12_5] = {V12_5, "12.5", {12, 5, 0}, 3, "iBoot-7459.141.1"}, + // Same as 12.5 + // {V12_6, "12.6", {12, 6, 0}, 3, "iBoot-7459.141.1"}, + [V13_0B4] = {V13_0B4, "13.0 beta4", {12, 99, 4}, 3, "iBoot-8419.0.151.0.1"}, + [V13_0] = {V13_0, "13.0", {13, 0, 0}, 3, "iBoot-8419.41.10"}, +}; + +int firmware_set_fdt(void *fdt, int node, const char *prop, const struct fw_version_info *ver) +{ + fdt32_t data[ARRAY_SIZE(ver->num)]; + + for (size_t i = 0; i < ver->num_length; i++) { + data[i] = cpu_to_fdt32(ver->num[i]); + } + + return fdt_setprop(fdt, node, prop, data, ver->num_length * sizeof(u32)); +} + +static void detect_firmware(struct fw_version_info *info, const char *ver) +{ + for (size_t i = 0; i < ARRAY_SIZE(fw_versions); i++) { + if (!strcmp(fw_versions[i].iboot, ver)) { + *info = fw_versions[i]; + return; + } + } + + *info = fw_versions[V_UNKNOWN]; + info->iboot = ver; +} + +int firmware_init(void) +{ + int node = adt_path_offset(adt, "/chosen"); + + if (node < 0) { + printf("ADT: no /chosen found\n"); + return -1; + } + + u32 len; + const char *p = adt_getprop(adt, node, "firmware-version", &len); + if (p && len && p[len - 1] == 0) { + detect_firmware(&os_firmware, p); + printf("OS FW version: %s (%s)\n", os_firmware.string, os_firmware.iboot); + } else { + printf("ADT: failed to find firmware-version\n"); + return -1; + } + + p = adt_getprop(adt, node, "system-firmware-version", &len); + if (p && len && p[len - 1] == 0) { + detect_firmware(&system_firmware, p); + printf("System FW version: %s (%s)\n", system_firmware.string, system_firmware.iboot); + } else { + printf("ADT: failed to find system-firmware-version\n"); + return -1; + } + + return 0; +} diff --git a/tools/src/firmware.h b/tools/src/firmware.h new file mode 100644 index 0000000..1a3375b --- /dev/null +++ b/tools/src/firmware.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef __FIRMWARE_H__ +#define __FIRMWARE_H__ + +#include "types.h" + +enum fw_version { + V_UNKNOWN, + V12_1, + V12_2, + V12_3, + V12_3_1, + V12_4, + V12_5, + // V12_6, + V13_0B4, + V13_0, + NUM_FW_VERSIONS, +}; + +struct fw_version_info { + enum fw_version version; + const char *string; + u32 num[4]; + size_t num_length; + const char *iboot; +}; + +extern struct fw_version_info os_firmware; +extern struct fw_version_info system_firmware; +extern const struct fw_version_info fw_versions[NUM_FW_VERSIONS]; + +int firmware_init(void); +int firmware_set_fdt(void *fdt, int node, const char *prop, const struct fw_version_info *ver); + +#endif diff --git a/tools/src/gxf.c b/tools/src/gxf.c new file mode 100644 index 0000000..7b751c5 --- /dev/null +++ b/tools/src/gxf.c @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: MIT */ + +#include "cpu_regs.h" +#include "exception.h" +#include "gxf.h" +#include "malloc.h" +#include "memory.h" +#include "smp.h" +#include "uart.h" +#include "utils.h" + +uint64_t gxf_enter(void *func, uint64_t a, uint64_t b, uint64_t c, uint64_t d); + +void _gxf_init(void *gl2_stack, void *gl1_stack); + +u8 *gl1_stack[MAX_CPUS]; +u8 *gl2_stack[MAX_CPUS]; + +void gxf_init(void) +{ + int cpu = smp_id(); + + if (!gl2_stack[cpu]) + gl2_stack[cpu] = memalign(0x4000, GL_STACK_SIZE); + if (in_el2() && !gl1_stack[cpu]) + gl1_stack[cpu] = memalign(0x4000, GL_STACK_SIZE); + + _gxf_init(gl2_stack[cpu], gl1_stack[cpu]); +} + +bool gxf_enabled(void) +{ + if (!(mrs(SYS_IMP_APL_SPRR_CONFIG_EL1) & SPRR_CONFIG_EN)) + return false; + + return (mrs(SYS_IMP_APL_GXF_CONFIG_EL1) & GXF_CONFIG_EN); +} + +bool in_gl12(void) +{ + if (!gxf_enabled()) + return false; + + return (mrs(SYS_IMP_APL_GXF_STATUS_EL1) & GXF_STATUS_GUARDED); +} + +static uint64_t gl_call(void *func, uint64_t a, uint64_t b, uint64_t c, uint64_t d) +{ + // disable the MMU first since enabling SPRR will change the meaning of all + // pagetable permission bits and also prevent us from having rwx pages + u64 sprr_state = mrs(SYS_IMP_APL_SPRR_CONFIG_EL1); + if (!(sprr_state & SPRR_CONFIG_EN)) + reg_set_sync(SYS_IMP_APL_SPRR_CONFIG_EL1, sprr_state | SPRR_CONFIG_EN); + + u64 gxf_state = mrs(SYS_IMP_APL_GXF_CONFIG_EL1); + if (!(gxf_state & GXF_CONFIG_EN)) + reg_set_sync(SYS_IMP_APL_GXF_CONFIG_EL1, gxf_state | GXF_CONFIG_EN); + + uint64_t ret = gxf_enter(func, a, b, c, d); + + if (!(gxf_state & GXF_CONFIG_EN)) + msr_sync(SYS_IMP_APL_GXF_CONFIG_EL1, gxf_state); + if (!(sprr_state & SPRR_CONFIG_EN)) + msr_sync(SYS_IMP_APL_SPRR_CONFIG_EL1, sprr_state); + + return ret; +} + +uint64_t gl2_call(void *func, uint64_t a, uint64_t b, uint64_t c, uint64_t d) +{ + if (mrs(CurrentEL) != 0x8) + return -1; + return gl_call(func, a, b, c, d); +} + +struct gl_call_argv { + void *func; + uint64_t a, b, c, d; +}; + +static uint64_t gl_call_wrapper(struct gl_call_argv *args) +{ + return gl_call(args->func, args->a, args->b, args->c, args->d); +} + +uint64_t gl1_call(void *func, uint64_t a, uint64_t b, uint64_t c, uint64_t d) +{ + if (mrs(CurrentEL) == 0x4) + return gl_call(func, a, b, c, d); + + struct gl_call_argv args; + args.func = func; + args.a = a; + args.b = b; + args.c = c; + args.d = d; + + // enable EL1 here since once GXF has been enabled HCR_EL2 writes are only possible from GL2 + if (mrs(HCR_EL2) & HCR_TGE) + reg_clr(HCR_EL2, HCR_TGE); + + u64 sprr_state = mrs(SYS_IMP_APL_SPRR_CONFIG_EL1) & SPRR_CONFIG_EN; + reg_set_sync(SYS_IMP_APL_SPRR_CONFIG_EL1, SPRR_CONFIG_EN); + + u64 gxf_state = mrs(SYS_IMP_APL_GXF_CONFIG_EL1) & GXF_CONFIG_EN; + reg_set_sync(SYS_IMP_APL_GXF_CONFIG_EL1, GXF_CONFIG_EN); + + uint64_t ret = el1_call(gl_call_wrapper, (uint64_t)&args, 0, 0, 0); + + msr_sync(SYS_IMP_APL_GXF_CONFIG_EL1, gxf_state); + msr_sync(SYS_IMP_APL_SPRR_CONFIG_EL1, sprr_state); + + return ret; +} diff --git a/tools/src/gxf.h b/tools/src/gxf.h new file mode 100644 index 0000000..9d1f22b --- /dev/null +++ b/tools/src/gxf.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef __GXF_H__ +#define __GXF_H__ + +#include "types.h" + +#define GL_STACK_SIZE 0x10000 + +#ifndef __ASSEMBLER__ + +bool gxf_enabled(void); +bool in_gl12(void); + +void gxf_init(void); + +uint64_t gl1_call(void *func, uint64_t a, uint64_t b, uint64_t c, uint64_t d); +uint64_t gl2_call(void *func, uint64_t a, uint64_t b, uint64_t c, uint64_t d); + +#endif + +#endif diff --git a/tools/src/gxf_asm.S b/tools/src/gxf_asm.S new file mode 100644 index 0000000..6b6405f --- /dev/null +++ b/tools/src/gxf_asm.S @@ -0,0 +1,246 @@ +/* SPDX-License-Identifier: MIT */ + +#include "gxf.h" +#include "cpu_regs.h" +#include "exception.h" + +#define genter .long 0x00201420 +#define gexit .long 0x00201400 + +.global _gxf_init +.type _gxf_init, @function +_gxf_init: + str x30, [sp, #-16]! + mov x5, x0 + mov x6, x1 + mov x0, 1 + msr SYS_IMP_APL_SPRR_CONFIG_EL1, x0 + isb + msr SYS_IMP_APL_GXF_CONFIG_EL1, x0 + isb + ldr x0, =_gxf_setup + msr SYS_IMP_APL_GXF_ENTER_EL1, x0 + isb + genter + msr SYS_IMP_APL_GXF_CONFIG_EL1, xzr + isb + msr SYS_IMP_APL_SPRR_CONFIG_EL1, xzr + isb + ldr x30, [sp], #16 + ret + +.globl gxf_enter +.type gxf_enter, @function +gxf_enter: + genter + ret + +_gxf_setup: + mov sp, x5 + ldr x1, =_gxf_vectors + ldr x2, =_gxf_exc_sync + ldr x3, =_gxf_entry + msr SYS_IMP_APL_VBAR_GL1, x1 + msr SYS_IMP_APL_GXF_ABORT_EL1, x2 + msr SYS_IMP_APL_GXF_ENTER_EL1, x3 + + mrs x4, CurrentEL + cmp x4, #8 + bne 1f + + msr SYS_IMP_APL_SP_GL12, x6 + msr SYS_IMP_APL_VBAR_GL12, x1 + msr SYS_IMP_APL_GXF_ABORT_EL12, x2 + msr SYS_IMP_APL_GXF_ENTER_EL12, x3 + +1: + isb + gexit + +_gxf_entry: + stp x29, x30, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x19, x20, [sp, #-16]! + + // these registers would be overwritten by each exception happening in GL1/2 + // but we need them to gexit correctly again + mrs x20, SYS_IMP_APL_SPSR_GL1 + mrs x21, SYS_IMP_APL_ASPSR_GL1 + mrs x22, SYS_IMP_APL_ESR_GL1 + mrs x23, SYS_IMP_APL_ELR_GL1 + mrs x24, SYS_IMP_APL_FAR_GL1 + + mov x5, x0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + mov x3, x4 + + blr x5 + + msr SYS_IMP_APL_SPSR_GL1, x20 + msr SYS_IMP_APL_ASPSR_GL1, x21 + msr SYS_IMP_APL_ESR_GL1, x22 + msr SYS_IMP_APL_ELR_GL1, x23 + msr SYS_IMP_APL_FAR_GL1, x24 + + ldp x19, x20, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x29, x30, [sp], #16 + + isb + gexit + +.align 11 +_gxf_vectors: + mov x9, '0' + b _gxf_exc_unk + .align 7 + mov x9, '1' + b _gxf_exc_unk + .align 7 + mov x9, '2' + b _gxf_exc_unk + .align 7 + mov x9, '3' + b _gxf_exc_unk + .align 7 + b _gxf_exc_sync + .align 7 + mov x9, '5' + b _gxf_exc_unk + .align 7 + mov x9, '6' + b _gxf_exc_unk + .align 7 + b _gxf_serr + .align 7 + b _gxf_exc_sync + .align 7 + mov x9, '9' + b _gxf_exc_unk + .align 7 + mov x9, 'a' + b _gxf_exc_unk + .align 7 + b _gxf_serr + .align 7 + mov x9, 'c' + b _gxf_exc_unk + .align 7 + mov x9, 'd' + b _gxf_exc_unk + .align 7 + mov x9, 'e' + b _gxf_exc_unk + .align 7 + mov x9, 'f' + b _gxf_exc_unk + .align 7 + +_gxf_exc_sync: + msr pan, #0 + sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8) + str x30, [sp, #-16]! + bl _gxf_exc_entry + bl exc_sync + b _gxf_exc_return + +_gxf_serr: + msr pan, #0 + sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8) + str x30, [sp, #-16]! + bl _gxf_exc_entry + bl exc_serr + b _gxf_exc_return + +_gxf_exc_entry: + stp x28, x29, [sp, #-16]! + stp x26, x27, [sp, #-16]! + stp x24, x25, [sp, #-16]! + stp x22, x23, [sp, #-16]! + stp x20, x21, [sp, #-16]! + stp x18, x19, [sp, #-16]! + stp x16, x17, [sp, #-16]! + stp x14, x15, [sp, #-16]! + stp x12, x13, [sp, #-16]! + stp x10, x11, [sp, #-16]! + stp x8, x9, [sp, #-16]! + stp x6, x7, [sp, #-16]! + stp x4, x5, [sp, #-16]! + stp x2, x3, [sp, #-16]! + stp x0, x1, [sp, #-16]! + + mov x0, sp + + mrs x1, SYS_IMP_APL_SPSR_GL1 + msr SPSR_EL1, x1 + mrs x1, SYS_IMP_APL_ELR_GL1 + msr ELR_EL1, x1 + mrs x1, SYS_IMP_APL_ESR_GL1 + msr ESR_EL1, x1 + mrs x1, SYS_IMP_APL_FAR_GL1 + msr FAR_EL1, x1 + + ret + +_gxf_exc_return: + mrs x0, SPSR_EL1 + msr SYS_IMP_APL_SPSR_GL1, x0 + mrs x0, ELR_EL1 + msr SYS_IMP_APL_ELR_GL1, x0 + + ldp x0, x1, [sp], #16 + ldp x2, x3, [sp], #16 + ldp x4, x5, [sp], #16 + ldp x6, x7, [sp], #16 + ldp x8, x9, [sp], #16 + ldp x10, x11, [sp], #16 + ldp x12, x13, [sp], #16 + ldp x14, x15, [sp], #16 + ldp x16, x17, [sp], #16 + ldp x18, x19, [sp], #16 + ldp x20, x21, [sp], #16 + ldp x22, x23, [sp], #16 + ldp x24, x25, [sp], #16 + ldp x26, x27, [sp], #16 + ldp x28, x29, [sp], #16 + ldr x30, [sp], #16 + + add sp, sp, #(SIZEOF_EXC_INFO - 32 * 8) + + isb + + gexit + +_gxf_exc_unk: + msr pan, #0 + mov w0, 0xd /* '\r', clang compat */ + bl debug_putc + mov w0, '\n' + bl debug_putc + mov w0, '!' + bl debug_putc + mov w0, 'G' + bl debug_putc + mov w0, 'L' + bl debug_putc + mov w0, 'E' + bl debug_putc + mov w0, 'X' + bl debug_putc + mov w0, 'C' + bl debug_putc + mov w0, ':' + bl debug_putc + mov w0, w9 + bl debug_putc + mov w0, '!' + bl debug_putc + mov w0, 0xd /* '\r', clang compat */ + bl debug_putc + mov w0, '\n' + bl debug_putc + b reboot diff --git a/tools/src/heapblock.c b/tools/src/heapblock.c new file mode 100644 index 0000000..5f07f44 --- /dev/null +++ b/tools/src/heapblock.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: MIT */ + +#include "heapblock.h" +#include "assert.h" +#include "types.h" +#include "utils.h" +#include "xnuboot.h" + +/* + * This is a non-freeing allocator, used as a backend for malloc and for uncompressing data. + * + * Allocating 0 bytes is allowed, and guarantees "infinite" (until the end of RAM) space is + * available at the returned pointer as long as no other malloc/heapblock calls occur, which is + * useful as a buffer for unknown-length uncompressed data. A subsequent call with a size will then + * actually reserve the block. + */ + +static void *heap_base; + +void heapblock_init(void) +{ + void *top_of_kernel_data = (void *)cur_boot_args.top_of_kernel_data; + + heap_base = top_of_kernel_data; + heapblock_alloc(0); // align base + + printf("Heap base: %p\n", heap_base); +} + +void *heapblock_alloc(size_t size) +{ + return heapblock_alloc_aligned(size, 64); +} + +void *heapblock_alloc_aligned(size_t size, size_t align) +{ + assert((align & (align - 1)) == 0); + assert(heap_base); + + uintptr_t block = (((uintptr_t)heap_base) + align - 1) & ~(align - 1); + heap_base = (void *)(block + size); + + return (void *)block; +} diff --git a/tools/src/heapblock.h b/tools/src/heapblock.h new file mode 100644 index 0000000..d67411d --- /dev/null +++ b/tools/src/heapblock.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef HEAPBLOCK_H +#define HEAPBLOCK_H + +#include "types.h" + +void heapblock_init(void); + +void *heapblock_alloc(size_t size); +void *heapblock_alloc_aligned(size_t size, size_t align); + +#endif diff --git a/tools/src/hv.c b/tools/src/hv.c new file mode 100644 index 0000000..be01692 --- /dev/null +++ b/tools/src/hv.c @@ -0,0 +1,329 @@ +/* SPDX-License-Identifier: MIT */ + +#include "hv.h" +#include "assert.h" +#include "cpu_regs.h" +#include "display.h" +#include "gxf.h" +#include "memory.h" +#include "pcie.h" +#include "smp.h" +#include "string.h" +#include "usb.h" +#include "utils.h" + +#define HV_TICK_RATE 1000 + +DECLARE_SPINLOCK(bhl); + +void hv_enter_guest(u64 x0, u64 x1, u64 x2, u64 x3, void *entry); +void hv_exit_guest(void) __attribute__((noreturn)); + +extern char _hv_vectors_start[0]; + +u64 hv_tick_interval; + +int hv_pinned_cpu; +int hv_want_cpu; + +static bool hv_should_exit; +bool hv_started_cpus[MAX_CPUS]; +u32 hv_cpus_in_guest; +u64 hv_saved_sp[MAX_CPUS]; + +struct hv_secondary_info_t { + uint64_t hcr; + uint64_t hacr; + uint64_t vtcr, vttbr; + uint64_t mdcr; + uint64_t mdscr; + uint64_t amx_ctl; + uint64_t apvmkeylo, apvmkeyhi, apsts; + uint64_t actlr_el2; + uint64_t actlr_el1; + uint64_t cnthctl; + uint64_t sprr_config; + uint64_t gxf_config; +}; + +static struct hv_secondary_info_t hv_secondary_info; + +void hv_init(void) +{ + pcie_shutdown(); + // Make sure we wake up DCP if we put it to sleep, just quiesce it to match ADT + if (display_is_external && display_start_dcp() >= 0) + display_shutdown(DCP_QUIESCED); + // reenable hpm interrupts for the guest for unused iodevs + usb_hpm_restore_irqs(0); + smp_start_secondaries(); + smp_set_wfe_mode(true); + hv_wdt_init(); + + // Enable physical timer for EL1 + msr(CNTHCTL_EL2, CNTHCTL_EL1PTEN | CNTHCTL_EL1PCTEN); + + hv_pt_init(); + + // Configure hypervisor defaults + hv_write_hcr(HCR_API | // Allow PAuth instructions + HCR_APK | // Allow PAuth key registers + HCR_TEA | // Trap external aborts + HCR_E2H | // VHE mode (forced) + HCR_RW | // AArch64 guest + HCR_AMO | // Trap SError exceptions + HCR_VM); // Enable stage 2 translation + + // No guest vectors initially + msr(VBAR_EL12, 0); + + // Compute tick interval + hv_tick_interval = mrs(CNTFRQ_EL0) / HV_TICK_RATE; + + sysop("dsb ishst"); + sysop("tlbi alle1is"); + sysop("dsb ish"); + sysop("isb"); +} + +static void hv_set_gxf_vbar(void) +{ + msr(SYS_IMP_APL_VBAR_GL1, _hv_vectors_start); +} + +void hv_start(void *entry, u64 regs[4]) +{ + hv_should_exit = false; + memset(hv_started_cpus, 0, sizeof(hv_started_cpus)); + hv_started_cpus[0] = 1; + + msr(VBAR_EL1, _hv_vectors_start); + + if (gxf_enabled()) + gl2_call(hv_set_gxf_vbar, 0, 0, 0, 0); + + hv_secondary_info.hcr = mrs(HCR_EL2); + hv_secondary_info.hacr = mrs(HACR_EL2); + hv_secondary_info.vtcr = mrs(VTCR_EL2); + hv_secondary_info.vttbr = mrs(VTTBR_EL2); + hv_secondary_info.mdcr = mrs(MDCR_EL2); + hv_secondary_info.mdscr = mrs(MDSCR_EL1); + hv_secondary_info.amx_ctl = mrs(SYS_IMP_APL_AMX_CTL_EL2); + hv_secondary_info.apvmkeylo = mrs(SYS_IMP_APL_APVMKEYLO_EL2); + hv_secondary_info.apvmkeyhi = mrs(SYS_IMP_APL_APVMKEYHI_EL2); + hv_secondary_info.apsts = mrs(SYS_IMP_APL_APSTS_EL12); + hv_secondary_info.actlr_el2 = mrs(ACTLR_EL2); + hv_secondary_info.actlr_el1 = mrs(SYS_IMP_APL_ACTLR_EL12); + hv_secondary_info.cnthctl = mrs(CNTHCTL_EL2); + hv_secondary_info.sprr_config = mrs(SYS_IMP_APL_SPRR_CONFIG_EL1); + hv_secondary_info.gxf_config = mrs(SYS_IMP_APL_GXF_CONFIG_EL1); + + hv_arm_tick(); + hv_pinned_cpu = -1; + hv_want_cpu = -1; + hv_cpus_in_guest = 1; + + hv_enter_guest(regs[0], regs[1], regs[2], regs[3], entry); + + __atomic_sub_fetch(&hv_cpus_in_guest, 1, __ATOMIC_ACQUIRE); + spin_lock(&bhl); + + hv_wdt_stop(); + + hv_should_exit = true; + printf("HV: Exiting hypervisor (main CPU)\n"); + + for (int i = 0; i < MAX_CPUS; i++) { + if (hv_started_cpus[i]) { + printf("HV: Waiting for CPU %d to exit\n", i); + spin_unlock(&bhl); + smp_wait(i); + spin_lock(&bhl); + hv_started_cpus[i] = false; + } + } + + printf("HV: All CPUs exited\n"); + spin_unlock(&bhl); +} + +static void hv_init_secondary(struct hv_secondary_info_t *info) +{ + gxf_init(); + + msr(VBAR_EL1, _hv_vectors_start); + + msr(HCR_EL2, info->hcr); + msr(HACR_EL2, info->hacr); + msr(VTCR_EL2, info->vtcr); + msr(VTTBR_EL2, info->vttbr); + msr(MDCR_EL2, info->mdcr); + msr(MDSCR_EL1, info->mdscr); + msr(SYS_IMP_APL_AMX_CTL_EL2, info->amx_ctl); + msr(SYS_IMP_APL_APVMKEYLO_EL2, info->apvmkeylo); + msr(SYS_IMP_APL_APVMKEYHI_EL2, info->apvmkeyhi); + msr(SYS_IMP_APL_APSTS_EL12, info->apsts); + msr(ACTLR_EL2, info->actlr_el2); + msr(SYS_IMP_APL_ACTLR_EL12, info->actlr_el1); + msr(CNTHCTL_EL2, info->cnthctl); + msr(SYS_IMP_APL_SPRR_CONFIG_EL1, info->sprr_config); + msr(SYS_IMP_APL_GXF_CONFIG_EL1, info->gxf_config); + + if (gxf_enabled()) + gl2_call(hv_set_gxf_vbar, 0, 0, 0, 0); + + hv_arm_tick(); +} + +static void hv_enter_secondary(void *entry, u64 regs[4]) +{ + hv_enter_guest(regs[0], regs[1], regs[2], regs[3], entry); + + spin_lock(&bhl); + + hv_should_exit = true; + printf("HV: Exiting from CPU %d\n", smp_id()); + + __atomic_sub_fetch(&hv_cpus_in_guest, 1, __ATOMIC_ACQUIRE); + + spin_unlock(&bhl); +} + +void hv_start_secondary(int cpu, void *entry, u64 regs[4]) +{ + printf("HV: Initializing secondary %d\n", cpu); + iodev_console_flush(); + + mmu_init_secondary(cpu); + iodev_console_flush(); + smp_call4(cpu, hv_init_secondary, (u64)&hv_secondary_info, 0, 0, 0); + smp_wait(cpu); + iodev_console_flush(); + + printf("HV: Entering guest secondary %d at %p\n", cpu, entry); + hv_started_cpus[cpu] = true; + __atomic_add_fetch(&hv_cpus_in_guest, 1, __ATOMIC_ACQUIRE); + + iodev_console_flush(); + smp_call4(cpu, hv_enter_secondary, (u64)entry, (u64)regs, 0, 0); +} + +void hv_rendezvous(void) +{ + if (!__atomic_load_n(&hv_cpus_in_guest, __ATOMIC_ACQUIRE)) + return; + + /* IPI all CPUs. This might result in spurious IPIs to the guest... */ + for (int i = 0; i < MAX_CPUS; i++) { + if (i != smp_id() && hv_started_cpus[i]) { + smp_send_ipi(i); + } + } + while (__atomic_load_n(&hv_cpus_in_guest, __ATOMIC_ACQUIRE)) + ; +} + +bool hv_switch_cpu(int cpu) +{ + if (cpu > MAX_CPUS || cpu < 0 || !hv_started_cpus[cpu]) { + printf("HV: CPU #%d is inactive or invalid\n", cpu); + return false; + } + printf("HV: switching to CPU #%d\n", cpu); + hv_want_cpu = cpu; + hv_rendezvous(); + return true; +} + +void hv_pin_cpu(int cpu) +{ + hv_pinned_cpu = cpu; +} + +void hv_write_hcr(u64 val) +{ + if (gxf_enabled() && !in_gl12()) + gl2_call(hv_write_hcr, val, 0, 0, 0); + else + msr(HCR_EL2, val); +} + +u64 hv_get_spsr(void) +{ + if (in_gl12()) + return mrs(SYS_IMP_APL_SPSR_GL1); + else + return mrs(SPSR_EL2); +} + +void hv_set_spsr(u64 val) +{ + if (in_gl12()) + return msr(SYS_IMP_APL_SPSR_GL1, val); + else + return msr(SPSR_EL2, val); +} + +u64 hv_get_esr(void) +{ + if (in_gl12()) + return mrs(SYS_IMP_APL_ESR_GL1); + else + return mrs(ESR_EL2); +} + +u64 hv_get_far(void) +{ + if (in_gl12()) + return mrs(SYS_IMP_APL_FAR_GL1); + else + return mrs(FAR_EL2); +} + +u64 hv_get_afsr1(void) +{ + if (in_gl12()) + return mrs(SYS_IMP_APL_AFSR1_GL1); + else + return mrs(AFSR1_EL2); +} + +u64 hv_get_elr(void) +{ + if (in_gl12()) + return mrs(SYS_IMP_APL_ELR_GL1); + else + return mrs(ELR_EL2); +} + +void hv_set_elr(u64 val) +{ + if (in_gl12()) + return msr(SYS_IMP_APL_ELR_GL1, val); + else + return msr(ELR_EL2, val); +} + +void hv_arm_tick(void) +{ + msr(CNTP_TVAL_EL0, hv_tick_interval); + msr(CNTP_CTL_EL0, CNTx_CTL_ENABLE); +} + +void hv_maybe_exit(void) +{ + if (hv_should_exit) { + hv_exit_guest(); + } +} + +void hv_tick(struct exc_info *ctx) +{ + hv_wdt_pet(); + iodev_handle_events(uartproxy_iodev); + if (iodev_can_read(uartproxy_iodev)) { + if (hv_pinned_cpu == -1 || hv_pinned_cpu == smp_id()) + hv_exc_proxy(ctx, START_HV, HV_USER_INTERRUPT, NULL); + } + hv_vuart_poll(); +} diff --git a/tools/src/hv.h b/tools/src/hv.h new file mode 100644 index 0000000..c91a444 --- /dev/null +++ b/tools/src/hv.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef HV_H +#define HV_H + +#include "exception.h" +#include "iodev.h" +#include "types.h" +#include "uartproxy.h" + +typedef bool(hv_hook_t)(struct exc_info *ctx, u64 addr, u64 *val, bool write, int width); + +#define MMIO_EVT_ATTR GENMASK(31, 24) +#define MMIO_EVT_CPU GENMASK(23, 16) +#define MMIO_EVT_SH GENMASK(15, 14) +#define MMIO_EVT_MULTI BIT(6) +#define MMIO_EVT_WRITE BIT(5) +#define MMIO_EVT_WIDTH GENMASK(4, 0) + +struct hv_evt_mmiotrace { + u32 flags; + u32 reserved; + u64 pc; + u64 addr; + u64 data; +}; + +struct hv_evt_irqtrace { + u32 flags; + u16 type; + u16 num; +}; + +#define HV_MAX_RW_SIZE 64 +#define HV_MAX_RW_WORDS (HV_MAX_RW_SIZE >> 3) + +struct hv_vm_proxy_hook_data { + u32 flags; + u32 id; + u64 addr; + u64 data[HV_MAX_RW_WORDS]; +}; + +typedef enum _hv_entry_type { + HV_HOOK_VM = 1, + HV_VTIMER, + HV_USER_INTERRUPT, + HV_WDT_BARK, + HV_CPU_SWITCH, + HV_VIRTIO, +} hv_entry_type; + +/* VM */ +void hv_pt_init(void); +int hv_map(u64 from, u64 to, u64 size, u64 incr); +int hv_unmap(u64 from, u64 size); +int hv_map_hw(u64 from, u64 to, u64 size); +int hv_map_sw(u64 from, u64 to, u64 size); +int hv_map_hook(u64 from, hv_hook_t *hook, u64 size); +u64 hv_translate(u64 addr, bool s1only, bool w, u64 *par_out); +u64 hv_pt_walk(u64 addr); +bool hv_handle_dabort(struct exc_info *ctx); +bool hv_pa_write(struct exc_info *ctx, u64 addr, u64 *val, int width); +bool hv_pa_read(struct exc_info *ctx, u64 addr, u64 *val, int width); +bool hv_pa_rw(struct exc_info *ctx, u64 addr, u64 *val, bool write, int width); + +/* AIC events through tracing the MMIO event address */ +bool hv_trace_irq(u32 type, u32 num, u32 count, u32 flags); + +/* Virtual peripherals */ +void hv_vuart_poll(void); +void hv_map_vuart(u64 base, int irq, iodev_id_t iodev); +struct virtio_conf; +void hv_map_virtio(u64 base, struct virtio_conf *conf); +void virtio_put_buffer(u64 base, int qu, u32 id, u32 len); + +/* Exceptions */ +void hv_exc_proxy(struct exc_info *ctx, uartproxy_boot_reason_t reason, u32 type, void *extra); +void hv_set_time_stealing(bool enabled, bool reset); + +/* WDT */ +void hv_wdt_pet(void); +void hv_wdt_suspend(void); +void hv_wdt_resume(void); +void hv_wdt_init(void); +void hv_wdt_start(int cpu); +void hv_wdt_stop(void); +void hv_wdt_breadcrumb(char c); + +/* Utilities */ +void hv_write_hcr(u64 val); +u64 hv_get_spsr(void); +void hv_set_spsr(u64 val); +u64 hv_get_esr(void); +u64 hv_get_far(void); +u64 hv_get_elr(void); +u64 hv_get_afsr1(void); +void hv_set_elr(u64 val); + +/* HV main */ +void hv_init(void); +void hv_start(void *entry, u64 regs[4]); +void hv_start_secondary(int cpu, void *entry, u64 regs[4]); +void hv_rendezvous(void); +bool hv_switch_cpu(int cpu); +void hv_pin_cpu(int cpu); +void hv_arm_tick(void); +void hv_rearm(void); +void hv_maybe_exit(void); +void hv_tick(struct exc_info *ctx); + +#endif diff --git a/tools/src/hv_aic.c b/tools/src/hv_aic.c new file mode 100644 index 0000000..cc5406a --- /dev/null +++ b/tools/src/hv_aic.c @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: MIT */ + +#include "adt.h" +#include "aic.h" +#include "aic_regs.h" +#include "hv.h" +#include "uartproxy.h" +#include "utils.h" + +#define IRQTRACE_IRQ BIT(0) + +static u32 trace_hw_num[AIC_MAX_DIES][AIC_MAX_HW_NUM / 32]; + +static void emit_irqtrace(u16 die, u16 type, u16 num) +{ + struct hv_evt_irqtrace evt = { + .flags = IRQTRACE_IRQ, + .type = type, + .num = die * aic->max_irq + num, + }; + + hv_wdt_suspend(); + uartproxy_send_event(EVT_IRQTRACE, &evt, sizeof(evt)); + hv_wdt_resume(); +} + +static bool trace_aic_event(struct exc_info *ctx, u64 addr, u64 *val, bool write, int width) +{ + if (!hv_pa_rw(ctx, addr, val, write, width)) + return false; + + if (addr != (aic->base + aic->regs.event) || write || width != 2) { + return true; + } + + u16 die = FIELD_GET(AIC_EVENT_DIE, *val); + u16 type = FIELD_GET(AIC_EVENT_TYPE, *val); + u16 num = FIELD_GET(AIC_EVENT_NUM, *val); + + if (die > AIC_MAX_DIES) + return true; + + switch (type) { + case AIC_EVENT_TYPE_HW: + if (trace_hw_num[die][num / 32] & BIT(num & 31)) { + emit_irqtrace(die, type, num); + } + break; + default: + // ignore + break; + } + + return true; +} + +bool hv_trace_irq(u32 type, u32 num, u32 count, u32 flags) +{ + dprintf("HV: hv_trace_irq type: %u start: %u num: %u flags: 0x%x\n", type, num, count, flags); + if (type == AIC_EVENT_TYPE_HW) { + u32 die = num / aic->max_irq; + num %= AIC_MAX_HW_NUM; + if (die >= aic->max_irq || num >= AIC_MAX_HW_NUM || count > AIC_MAX_HW_NUM - num) { + printf("HV: invalid IRQ range: (%u, %u) for die %u\n", num, num + count, die); + return false; + } + for (u32 n = num; n < num + count; n++) { + switch (flags) { + case IRQTRACE_IRQ: + trace_hw_num[die][n / 32] |= BIT(n & 31); + break; + default: + trace_hw_num[die][n / 32] &= ~(BIT(n & 31)); + break; + } + } + } else { + printf("HV: not handling AIC event type: 0x%02x num: %u\n", type, num); + return false; + } + + if (!aic) { + printf("HV: AIC not initialized\n"); + return false; + } + + static bool hooked = false; + + if (aic && !hooked) { + hv_map_hook(aic->base, trace_aic_event, aic->regs.reg_size); + hooked = true; + } + + return true; +} diff --git a/tools/src/hv_asm.S b/tools/src/hv_asm.S new file mode 100644 index 0000000..634eb09 --- /dev/null +++ b/tools/src/hv_asm.S @@ -0,0 +1,196 @@ +/* spDx-License-Identifier: MIT */ + +#include "exception.h" + +.align 11 +.globl _hv_vectors_start +_hv_vectors_start: + + /* EL2 with SP_EL0 */ + mov x9, '0' + b cpu_reset + .align 7 + mov x9, '1' + b exc_unk + .align 7 + mov x9, '2' + b exc_unk + .align 7 + mov x9, '3' + b exc_unk + .align 7 + + /* EL2 with SP_EL2 */ + b _v_sp0_sync + .align 7 + b _v_sp0_irq + .align 7 + b _v_sp0_fiq + .align 7 + b _v_sp0_serr + .align 7 + + /* EL1/0 64-bit */ + b _v_hv_sync + .align 7 + b _v_hv_irq + .align 7 + b _v_hv_fiq + .align 7 + b _v_hv_serr + .align 7 + + /* EL1/0 32-bit */ + mov x9, 'p' + b exc_unk + .align 7 + mov x9, 'q' + b exc_unk + .align 7 + mov x9, 'r' + b exc_unk + .align 7 + mov x9, 's' + b exc_unk + .align 7 + +.globl _hv_entry +.type _hv_entry, @function +_hv_entry: + stp x28, x29, [sp, #-16]! + stp x26, x27, [sp, #-16]! + stp x24, x25, [sp, #-16]! + stp x22, x23, [sp, #-16]! + stp x20, x21, [sp, #-16]! + stp x18, x19, [sp, #-16]! + stp x16, x17, [sp, #-16]! + stp x14, x15, [sp, #-16]! + stp x12, x13, [sp, #-16]! + stp x10, x11, [sp, #-16]! + stp x8, x9, [sp, #-16]! + stp x6, x7, [sp, #-16]! + stp x4, x5, [sp, #-16]! + stp x2, x3, [sp, #-16]! + stp x0, x1, [sp, #-16]! + + dsb sy + isb + + mov x0, sp + ret + +.globl _hv_return +.type _hv_return, @function +_hv_return: + ldp x0, x1, [sp], #16 + ldp x2, x3, [sp], #16 + ldp x4, x5, [sp], #16 + ldp x6, x7, [sp], #16 + ldp x8, x9, [sp], #16 + ldp x10, x11, [sp], #16 + ldp x12, x13, [sp], #16 + ldp x14, x15, [sp], #16 + ldp x16, x17, [sp], #16 + ldp x18, x19, [sp], #16 + ldp x20, x21, [sp], #16 + ldp x22, x23, [sp], #16 + ldp x24, x25, [sp], #16 + ldp x26, x27, [sp], #16 + ldp x28, x29, [sp], #16 + ldr x30, [sp], #16 + + add sp, sp, #(SIZEOF_EXC_INFO - 32 * 8) + + eret + +.globl _v_hv_sync +.type _v_hv_sync, @function +_v_hv_sync: + msr pan, #0 + sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8) + str x30, [sp, #-16]! + bl _hv_entry + bl hv_exc_sync + + b _hv_return + +.globl _v_hv_irq +.type _v_hv_irq, @function +_v_hv_irq: + msr pan, #0 + sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8) + str x30, [sp, #-16]! + bl _hv_entry + bl hv_exc_irq + + b _hv_return + +.globl _v_hv_fiq +.type _v_hv_fiq, @function +_v_hv_fiq: + msr pan, #0 + sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8) + str x30, [sp, #-16]! + bl _hv_entry + bl hv_exc_fiq + + b _hv_return + +.globl _v_hv_serr +.type _v_hv_serr, @function +_v_hv_serr: + msr pan, #0 + sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8) + str x30, [sp, #-16]! + bl _hv_entry + bl hv_exc_serr + + b _hv_return + +.extern hv_saved_sp + +.globl hv_enter_guest +.type hv_enter_guest, @function +hv_enter_guest: + stp x29, x30, [sp, #-16]! + stp x27, x28, [sp, #-16]! + stp x25, x26, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x19, x20, [sp, #-16]! + str x18, [sp, #-16]! + + mrs x7, tpidr_el2 + ldr x6, =hv_saved_sp + mov x5, sp + str x5, [x6, x7, LSL #3] + + mrs x5, daif + mov x6, #5 + orr x5, x5, x6 // EL1h + msr spsr_el2, x5 + + msr elr_el2, x4 + mov x5, #0 + msr sp_el0, x5 + msr sp_el1, x5 + + eret + +.globl hv_exit_guest +.type hv_exit_guest, @function +hv_exit_guest: + mrs x7, tpidr_el2 + ldr x6, =hv_saved_sp + ldr x5, [x6, x7, LSL #3] + mov sp, x5 + + ldr x18, [sp], #16 + ldp x19, x20, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x25, x26, [sp], #16 + ldp x27, x28, [sp], #16 + ldp x29, x30, [sp], #16 + + ret diff --git a/tools/src/hv_exc.c b/tools/src/hv_exc.c new file mode 100644 index 0000000..fc750c1 --- /dev/null +++ b/tools/src/hv_exc.c @@ -0,0 +1,515 @@ +/* SPDX-License-Identifier: MIT */ + +#include "hv.h" +#include "assert.h" +#include "cpu_regs.h" +#include "exception.h" +#include "smp.h" +#include "string.h" +#include "uart.h" +#include "uartproxy.h" + +#define TIME_ACCOUNTING + +extern spinlock_t bhl; + +#define _SYSREG_ISS(_1, _2, op0, op1, CRn, CRm, op2) \ + (((op0) << ESR_ISS_MSR_OP0_SHIFT) | ((op1) << ESR_ISS_MSR_OP1_SHIFT) | \ + ((CRn) << ESR_ISS_MSR_CRn_SHIFT) | ((CRm) << ESR_ISS_MSR_CRm_SHIFT) | \ + ((op2) << ESR_ISS_MSR_OP2_SHIFT)) +#define SYSREG_ISS(...) _SYSREG_ISS(__VA_ARGS__) + +#define PERCPU(x) pcpu[mrs(TPIDR_EL2)].x + +struct hv_pcpu_data { + u32 ipi_queued; + u32 ipi_pending; + u32 pmc_pending; + u64 pmc_irq_mode; + u64 exc_entry_pmcr0_cnt; +} ALIGNED(64); + +struct hv_pcpu_data pcpu[MAX_CPUS]; + +void hv_exit_guest(void) __attribute__((noreturn)); + +static u64 stolen_time = 0; +static u64 exc_entry_time; + +extern u32 hv_cpus_in_guest; +extern int hv_pinned_cpu; +extern int hv_want_cpu; + +static bool time_stealing = true; + +static void _hv_exc_proxy(struct exc_info *ctx, uartproxy_boot_reason_t reason, u32 type, + void *extra) +{ + int from_el = FIELD_GET(SPSR_M, ctx->spsr) >> 2; + + hv_wdt_breadcrumb('P'); + + /* + * Get all the CPUs into the HV before running the proxy, to make sure they all exit to + * the guest with a consistent time offset. + */ + if (time_stealing) + hv_rendezvous(); + + u64 entry_time = mrs(CNTPCT_EL0); + + ctx->elr_phys = hv_translate(ctx->elr, false, false, NULL); + ctx->far_phys = hv_translate(ctx->far, false, false, NULL); + ctx->sp_phys = hv_translate(from_el == 0 ? ctx->sp[0] : ctx->sp[1], false, false, NULL); + ctx->extra = extra; + + struct uartproxy_msg_start start = { + .reason = reason, + .code = type, + .info = ctx, + }; + + hv_wdt_suspend(); + int ret = uartproxy_run(&start); + hv_wdt_resume(); + + switch (ret) { + case EXC_RET_HANDLED: + hv_wdt_breadcrumb('p'); + if (time_stealing) { + u64 lost = mrs(CNTPCT_EL0) - entry_time; + stolen_time += lost; + } + break; + case EXC_EXIT_GUEST: + hv_rendezvous(); + spin_unlock(&bhl); + hv_exit_guest(); // does not return + default: + printf("Guest exception not handled, rebooting.\n"); + print_regs(ctx->regs, 0); + flush_and_reboot(); // does not return + } +} + +static void hv_maybe_switch_cpu(struct exc_info *ctx, uartproxy_boot_reason_t reason, u32 type, + void *extra) +{ + while (hv_want_cpu != -1) { + if (hv_want_cpu == smp_id()) { + hv_want_cpu = -1; + _hv_exc_proxy(ctx, reason, type, extra); + } else { + // Unlock the HV so the target CPU can get into the proxy + spin_unlock(&bhl); + while (hv_want_cpu != -1) + sysop("dmb sy"); + spin_lock(&bhl); + } + } +} + +void hv_exc_proxy(struct exc_info *ctx, uartproxy_boot_reason_t reason, u32 type, void *extra) +{ + /* + * Wait while another CPU is pinned or being switched to. + * If a CPU switch is requested, handle it before actually handling the + * exception. We still tell the host the real reason code, though. + */ + while ((hv_pinned_cpu != -1 && hv_pinned_cpu != smp_id()) || hv_want_cpu != -1) { + if (hv_want_cpu == smp_id()) { + hv_want_cpu = -1; + _hv_exc_proxy(ctx, reason, type, extra); + } else { + // Unlock the HV so the target CPU can get into the proxy + spin_unlock(&bhl); + while ((hv_pinned_cpu != -1 && hv_pinned_cpu != smp_id()) || hv_want_cpu != -1) + sysop("dmb sy"); + spin_lock(&bhl); + } + } + + /* Handle the actual exception */ + _hv_exc_proxy(ctx, reason, type, extra); + + /* + * If as part of handling this exception we want to switch CPUs, handle it without returning + * to the guest. + */ + hv_maybe_switch_cpu(ctx, reason, type, extra); +} + +void hv_set_time_stealing(bool enabled, bool reset) +{ + time_stealing = enabled; + if (reset) + stolen_time = 0; +} + +static void hv_update_fiq(void) +{ + u64 hcr = mrs(HCR_EL2); + bool fiq_pending = false; + + if (mrs(CNTP_CTL_EL02) == (CNTx_CTL_ISTATUS | CNTx_CTL_ENABLE)) { + fiq_pending = true; + reg_clr(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2, VM_TMR_FIQ_ENA_ENA_P); + } else { + reg_set(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2, VM_TMR_FIQ_ENA_ENA_P); + } + + if (mrs(CNTV_CTL_EL02) == (CNTx_CTL_ISTATUS | CNTx_CTL_ENABLE)) { + fiq_pending = true; + reg_clr(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2, VM_TMR_FIQ_ENA_ENA_V); + } else { + reg_set(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2, VM_TMR_FIQ_ENA_ENA_V); + } + + fiq_pending |= PERCPU(ipi_pending) || PERCPU(pmc_pending); + + sysop("isb"); + + if ((hcr & HCR_VF) && !fiq_pending) { + hv_write_hcr(hcr & ~HCR_VF); + } else if (!(hcr & HCR_VF) && fiq_pending) { + hv_write_hcr(hcr | HCR_VF); + } +} + +#define SYSREG_MAP(sr, to) \ + case SYSREG_ISS(sr): \ + if (is_read) \ + regs[rt] = _mrs(sr_tkn(to)); \ + else \ + _msr(sr_tkn(to), regs[rt]); \ + return true; + +#define SYSREG_PASS(sr) \ + case SYSREG_ISS(sr): \ + if (is_read) \ + regs[rt] = _mrs(sr_tkn(sr)); \ + else \ + _msr(sr_tkn(sr), regs[rt]); \ + return true; + +static bool hv_handle_msr(struct exc_info *ctx, u64 iss) +{ + u64 reg = iss & (ESR_ISS_MSR_OP0 | ESR_ISS_MSR_OP2 | ESR_ISS_MSR_OP1 | ESR_ISS_MSR_CRn | + ESR_ISS_MSR_CRm); + u64 rt = FIELD_GET(ESR_ISS_MSR_Rt, iss); + bool is_read = iss & ESR_ISS_MSR_DIR; + + u64 *regs = ctx->regs; + + regs[31] = 0; + + switch (reg) { + /* Some kind of timer */ + SYSREG_PASS(sys_reg(3, 7, 15, 1, 1)); + SYSREG_PASS(sys_reg(3, 7, 15, 3, 1)); + /* Spammy stuff seen on t600x p-cores */ + SYSREG_PASS(sys_reg(3, 2, 15, 12, 0)); + SYSREG_PASS(sys_reg(3, 2, 15, 13, 0)); + SYSREG_PASS(sys_reg(3, 2, 15, 14, 0)); + SYSREG_PASS(sys_reg(3, 2, 15, 15, 0)); + SYSREG_PASS(sys_reg(3, 1, 15, 7, 0)); + SYSREG_PASS(sys_reg(3, 1, 15, 8, 0)); + SYSREG_PASS(sys_reg(3, 1, 15, 9, 0)); + SYSREG_PASS(sys_reg(3, 1, 15, 10, 0)); + /* Noisy traps */ + SYSREG_MAP(SYS_ACTLR_EL1, SYS_IMP_APL_ACTLR_EL12) + SYSREG_PASS(SYS_IMP_APL_HID4) + SYSREG_PASS(SYS_IMP_APL_EHID4) + /* We don't normally trap hese, but if we do, they're noisy */ + SYSREG_PASS(SYS_IMP_APL_GXF_STATUS_EL1) + SYSREG_PASS(SYS_IMP_APL_CNTVCT_ALIAS_EL0) + SYSREG_PASS(SYS_IMP_APL_TPIDR_GL1) + SYSREG_MAP(SYS_IMP_APL_SPSR_GL1, SYS_IMP_APL_SPSR_GL12) + SYSREG_MAP(SYS_IMP_APL_ASPSR_GL1, SYS_IMP_APL_ASPSR_GL12) + SYSREG_MAP(SYS_IMP_APL_ELR_GL1, SYS_IMP_APL_ELR_GL12) + SYSREG_MAP(SYS_IMP_APL_ESR_GL1, SYS_IMP_APL_ESR_GL12) + SYSREG_MAP(SYS_IMP_APL_SPRR_PERM_EL1, SYS_IMP_APL_SPRR_PERM_EL12) + SYSREG_MAP(SYS_IMP_APL_APCTL_EL1, SYS_IMP_APL_APCTL_EL12) + SYSREG_MAP(SYS_IMP_APL_AMX_CTL_EL1, SYS_IMP_APL_AMX_CTL_EL12) + /* FIXME:Might be wrong */ + SYSREG_PASS(sys_reg(3, 4, 15, 1, 3)) + /* pass through PMU handling */ + SYSREG_PASS(SYS_IMP_APL_PMCR1) + SYSREG_PASS(SYS_IMP_APL_PMCR2) + SYSREG_PASS(SYS_IMP_APL_PMCR3) + SYSREG_PASS(SYS_IMP_APL_PMCR4) + SYSREG_PASS(SYS_IMP_APL_PMESR0) + SYSREG_PASS(SYS_IMP_APL_PMESR1) + SYSREG_PASS(SYS_IMP_APL_PMSR) +#ifndef DEBUG_PMU_IRQ + SYSREG_PASS(SYS_IMP_APL_PMC0) +#endif + SYSREG_PASS(SYS_IMP_APL_PMC1) + SYSREG_PASS(SYS_IMP_APL_PMC2) + SYSREG_PASS(SYS_IMP_APL_PMC3) + SYSREG_PASS(SYS_IMP_APL_PMC4) + SYSREG_PASS(SYS_IMP_APL_PMC5) + SYSREG_PASS(SYS_IMP_APL_PMC6) + SYSREG_PASS(SYS_IMP_APL_PMC7) + SYSREG_PASS(SYS_IMP_APL_PMC8) + SYSREG_PASS(SYS_IMP_APL_PMC9) + + /* Outer Sharable TLB maintenance instructions */ + SYSREG_PASS(sys_reg(1, 0, 8, 1, 0)) // TLBI VMALLE1OS + SYSREG_PASS(sys_reg(1, 0, 8, 1, 1)) // TLBI VAE1OS + SYSREG_PASS(sys_reg(1, 0, 8, 1, 2)) // TLBI ASIDE1OS + SYSREG_PASS(sys_reg(1, 0, 8, 5, 1)) // TLBI RVAE1OS + + /* + * Handle this one here because m1n1/Linux (will) use it for explicit cpuidle. + * We can pass it through; going into deep sleep doesn't break the HV since we + * don't do any wfis that assume otherwise in m1n1. However, don't het macOS + * disable WFI ret (when going into systemwide sleep), since that breaks things. + */ + case SYSREG_ISS(SYS_IMP_APL_CYC_OVRD): + if (is_read) { + regs[rt] = mrs(SYS_IMP_APL_CYC_OVRD); + } else { + msr(SYS_IMP_APL_CYC_OVRD, regs[rt] & ~CYC_OVRD_DISABLE_WFI_RET); + if (regs[rt] & CYC_OVRD_DISABLE_WFI_RET) + printf("msr(SYS_IMP_APL_CYC_OVRD, 0x%08lx): Filtered WFI RET disable\n", + regs[rt]); + } + return true; + /* clang-format off */ + + /* IPI handling */ + SYSREG_PASS(SYS_IMP_APL_IPI_CR_EL1) + /* clang-format on */ + case SYSREG_ISS(SYS_IMP_APL_IPI_RR_LOCAL_EL1): { + assert(!is_read); + u64 mpidr = (regs[rt] & 0xff) | (mrs(MPIDR_EL1) & 0xffff00); + msr(SYS_IMP_APL_IPI_RR_LOCAL_EL1, regs[rt]); + for (int i = 0; i < MAX_CPUS; i++) + if (mpidr == smp_get_mpidr(i)) + pcpu[i].ipi_queued = true; + return true; + } + case SYSREG_ISS(SYS_IMP_APL_IPI_RR_GLOBAL_EL1): + assert(!is_read); + u64 mpidr = (regs[rt] & 0xff) | ((regs[rt] & 0xff0000) >> 8); + msr(SYS_IMP_APL_IPI_RR_GLOBAL_EL1, regs[rt]); + for (int i = 0; i < MAX_CPUS; i++) { + if (mpidr == (smp_get_mpidr(i) & 0xffff)) + pcpu[i].ipi_queued = true; + } + return true; + case SYSREG_ISS(SYS_IMP_APL_IPI_SR_EL1): + if (is_read) + regs[rt] = PERCPU(ipi_pending) ? IPI_SR_PENDING : 0; + else if (regs[rt] & IPI_SR_PENDING) + PERCPU(ipi_pending) = false; + return true; + /* shadow the interrupt mode and state flag */ + case SYSREG_ISS(SYS_IMP_APL_PMCR0): + if (is_read) { + u64 val = (mrs(SYS_IMP_APL_PMCR0) & ~PMCR0_IMODE_MASK) | PERCPU(pmc_irq_mode); + regs[rt] = + val | (PERCPU(pmc_pending) ? PMCR0_IACT : 0) | PERCPU(exc_entry_pmcr0_cnt); + } else { + PERCPU(pmc_pending) = !!(regs[rt] & PMCR0_IACT); + PERCPU(pmc_irq_mode) = regs[rt] & PMCR0_IMODE_MASK; + PERCPU(exc_entry_pmcr0_cnt) = regs[rt] & PMCR0_CNT_MASK; + msr(SYS_IMP_APL_PMCR0, regs[rt] & ~PERCPU(exc_entry_pmcr0_cnt)); + } + return true; +#ifdef DEBUG_PMU_IRQ + case SYSREG_ISS(SYS_IMP_APL_PMC0): + if (is_read) { + regs[rt] = mrs(SYS_IMP_APL_PMC0); + } else { + msr(SYS_IMP_APL_PMC0, regs[rt]); + printf("msr(SYS_IMP_APL_PMC0, 0x%04lx_%08lx)\n", regs[rt] >> 32, + regs[rt] & 0xFFFFFFFF); + } + return true; +#endif + /* M1RACLES reg, handle here due to silly 12.0 "mitigation" */ + case SYSREG_ISS(sys_reg(3, 5, 15, 10, 1)): + if (is_read) + regs[rt] = 0; + return true; + } + + return false; +} + +static void hv_exc_entry(struct exc_info *ctx) +{ + ctx->spsr = hv_get_spsr(); + ctx->elr = hv_get_elr(); + ctx->esr = hv_get_esr(); + ctx->far = hv_get_far(); + ctx->afsr1 = hv_get_afsr1(); + ctx->sp[0] = mrs(SP_EL0); + ctx->sp[1] = mrs(SP_EL1); + ctx->sp[2] = (u64)ctx; + ctx->cpu_id = smp_id(); + ctx->mpidr = mrs(MPIDR_EL1); + + sysop("isb"); + + // Enable SErrors in the HV, but only if not already pending + if (!(mrs(ISR_EL1) & 0x100)) + sysop("msr daifclr, 4"); + + __atomic_sub_fetch(&hv_cpus_in_guest, 1, __ATOMIC_ACQUIRE); + spin_lock(&bhl); + hv_wdt_breadcrumb('X'); + exc_entry_time = mrs(CNTPCT_EL0); + /* disable PMU counters in the hypervisor */ + u64 pmcr0 = mrs(SYS_IMP_APL_PMCR0); + PERCPU(exc_entry_pmcr0_cnt) = pmcr0 & PMCR0_CNT_MASK; + msr(SYS_IMP_APL_PMCR0, pmcr0 & ~PMCR0_CNT_MASK); +} + +static void hv_exc_exit(struct exc_info *ctx) +{ + hv_wdt_breadcrumb('x'); + hv_update_fiq(); + /* reenable PMU counters */ + reg_set(SYS_IMP_APL_PMCR0, PERCPU(exc_entry_pmcr0_cnt)); + msr(CNTVOFF_EL2, stolen_time); + spin_unlock(&bhl); + __atomic_add_fetch(&hv_cpus_in_guest, 1, __ATOMIC_ACQUIRE); + + hv_set_spsr(ctx->spsr); + hv_set_elr(ctx->elr); + msr(SP_EL0, ctx->sp[0]); + msr(SP_EL1, ctx->sp[1]); +} + +void hv_exc_sync(struct exc_info *ctx) +{ + hv_wdt_breadcrumb('S'); + hv_exc_entry(ctx); + bool handled = false; + u32 ec = FIELD_GET(ESR_EC, ctx->esr); + + switch (ec) { + case ESR_EC_DABORT_LOWER: + hv_wdt_breadcrumb('D'); + handled = hv_handle_dabort(ctx); + break; + case ESR_EC_MSR: + hv_wdt_breadcrumb('M'); + handled = hv_handle_msr(ctx, FIELD_GET(ESR_ISS, ctx->esr)); + break; + case ESR_EC_IMPDEF: + hv_wdt_breadcrumb('A'); + switch (FIELD_GET(ESR_ISS, ctx->esr)) { + case ESR_ISS_IMPDEF_MSR: + handled = hv_handle_msr(ctx, ctx->afsr1); + break; + } + break; + } + + if (handled) { + hv_wdt_breadcrumb('+'); + ctx->elr += 4; + } else { + hv_wdt_breadcrumb('-'); + // VM code can forward a nested SError exception here + if (FIELD_GET(ESR_EC, ctx->esr) == ESR_EC_SERROR) + hv_exc_proxy(ctx, START_EXCEPTION_LOWER, EXC_SERROR, NULL); + else + hv_exc_proxy(ctx, START_EXCEPTION_LOWER, EXC_SYNC, NULL); + } + + hv_exc_exit(ctx); + hv_wdt_breadcrumb('s'); +} + +void hv_exc_irq(struct exc_info *ctx) +{ + hv_wdt_breadcrumb('I'); + hv_exc_entry(ctx); + hv_exc_proxy(ctx, START_EXCEPTION_LOWER, EXC_IRQ, NULL); + hv_exc_exit(ctx); + hv_wdt_breadcrumb('i'); +} + +void hv_exc_fiq(struct exc_info *ctx) +{ + bool tick = false; + + hv_maybe_exit(); + + if (mrs(CNTP_CTL_EL0) == (CNTx_CTL_ISTATUS | CNTx_CTL_ENABLE)) { + msr(CNTP_CTL_EL0, CNTx_CTL_ISTATUS | CNTx_CTL_IMASK | CNTx_CTL_ENABLE); + tick = true; + } + + int interruptible_cpu = hv_pinned_cpu; + if (interruptible_cpu == -1) + interruptible_cpu = 0; + + if (smp_id() != interruptible_cpu && !(mrs(ISR_EL1) & 0x40) && hv_want_cpu == -1) { + // Non-interruptible CPU and it was just a timer tick (or spurious), so just update FIQs + hv_update_fiq(); + hv_arm_tick(); + return; + } + + // Slow (single threaded) path + hv_wdt_breadcrumb('F'); + hv_exc_entry(ctx); + + // Only poll for HV events in the interruptible CPU + if (tick) { + if (smp_id() == interruptible_cpu) + hv_tick(ctx); + hv_arm_tick(); + } + + if (mrs(CNTV_CTL_EL0) == (CNTx_CTL_ISTATUS | CNTx_CTL_ENABLE)) { + msr(CNTV_CTL_EL0, CNTx_CTL_ISTATUS | CNTx_CTL_IMASK | CNTx_CTL_ENABLE); + hv_exc_proxy(ctx, START_HV, HV_VTIMER, NULL); + } + + u64 reg = mrs(SYS_IMP_APL_PMCR0); + if ((reg & (PMCR0_IMODE_MASK | PMCR0_IACT)) == (PMCR0_IMODE_FIQ | PMCR0_IACT)) { +#ifdef DEBUG_PMU_IRQ + printf("[FIQ] PMC IRQ, masking and delivering to the guest\n"); +#endif + reg_clr(SYS_IMP_APL_PMCR0, PMCR0_IACT | PMCR0_IMODE_MASK); + PERCPU(pmc_pending) = true; + } + + reg = mrs(SYS_IMP_APL_UPMCR0); + if ((reg & UPMCR0_IMODE_MASK) == UPMCR0_IMODE_FIQ && (mrs(SYS_IMP_APL_UPMSR) & UPMSR_IACT)) { + printf("[FIQ] UPMC IRQ, masking"); + reg_clr(SYS_IMP_APL_UPMCR0, UPMCR0_IMODE_MASK); + hv_exc_proxy(ctx, START_EXCEPTION_LOWER, EXC_FIQ, NULL); + } + + if (mrs(SYS_IMP_APL_IPI_SR_EL1) & IPI_SR_PENDING) { + if (PERCPU(ipi_queued)) { + PERCPU(ipi_pending) = true; + PERCPU(ipi_queued) = false; + } + msr(SYS_IMP_APL_IPI_SR_EL1, IPI_SR_PENDING); + sysop("isb"); + } + + hv_maybe_switch_cpu(ctx, START_HV, HV_CPU_SWITCH, NULL); + + // Handles guest timers + hv_exc_exit(ctx); + hv_wdt_breadcrumb('f'); +} + +void hv_exc_serr(struct exc_info *ctx) +{ + hv_wdt_breadcrumb('E'); + hv_exc_entry(ctx); + hv_exc_proxy(ctx, START_EXCEPTION_LOWER, EXC_SERROR, NULL); + hv_exc_exit(ctx); + hv_wdt_breadcrumb('e'); +} diff --git a/tools/src/hv_virtio.c b/tools/src/hv_virtio.c new file mode 100644 index 0000000..abe4582 --- /dev/null +++ b/tools/src/hv_virtio.c @@ -0,0 +1,308 @@ +/* SPDX-License-Identifier: MIT */ + +#include "hv.h" +#include "aic.h" +#include "iodev.h" +#include "malloc.h" + +#define MAGIC 0x000 +#define VERSION 0x004 +#define DEVID 0x008 +#define VENDID 0x00c +#define FEAT_HOST 0x010 +#define FEAT_HOST_SEL 0x014 +#define FEAT_GUEST 0x020 +#define FEAT_GUEST_SEL 0x024 + +#define QSEL 0x030 +#define QMAX 0x034 +#define QSIZE 0x038 +#define QREADY 0x044 +#define QNOTIFY 0x050 + +#define QDESC 0x080 +#define QGUESTAREA 0x090 +#define QHOSTAREA 0x0a0 + +#define IRQ_STATUS 0x060 +#define USED_BUFFER BIT(0) +#define CFG_CHANGE BIT(1) +#define IRQ_ACK 0x064 +#define DEV_STATUS 0x070 + +#define DESC_NEXT BIT(0) +#define DESC_WRITE BIT(1) + +struct availring { + u16 flags; + u16 idx; + u16 ring[]; +}; + +struct usedring { + u16 flags; + u16 idx; + struct { + u32 id; + u32 len; + } ring[]; +}; + +struct desc { + u64 addr; + u32 len; + u16 flags; + u16 id; +}; + +struct virtio_q { + struct virtio_dev *host; + int idx; + u32 max; + u32 size; + bool ready; + struct desc *desc; + + u16 avail_seen; + struct availring *avail; + struct usedring *used; + + u64 area_regs[(QHOSTAREA + 8 - QDESC) / 4]; +}; + +struct virtio_conf { + s32 irq; + u32 devid; + u64 feats; + u32 num_qus; + void *config; + u64 config_len; + u8 verbose; +} PACKED; + +struct virtio_dev { + struct virtio_dev *next; + u64 base; + int irq; + int num_qus; + u32 devid; + u64 feats; + uint8_t *config; + size_t config_len; + bool verbose; + + u32 feat_host_sel; + u32 status; + u32 irqstatus; + + struct virtio_q *currq; + struct virtio_q qs[]; +}; + +static struct virtio_dev *devlist; + +static void notify_avail(struct exc_info *ctx, struct virtio_q *q, int idx) +{ + struct desc *d = &q->desc[idx]; + struct { + u64 devbase; + u16 qu; + u16 idx; + u32 pad; + u64 descbase; + } PACKED info = { + q->host->base, q->idx, idx, 0, (u64)q->desc, + }; + + if (q->host->verbose) + printf("virtio @ %lx: available %s buffer at %lx, size %x, flags %x\n", q->host->base, + (d->flags & DESC_WRITE) ? "device" : "driver", d->addr, d->len, d->flags); + + hv_exc_proxy(ctx, START_HV, HV_VIRTIO, &info); +} + +static void notify_buffers(struct exc_info *ctx, struct virtio_dev *dev, u32 qidx) +{ + struct virtio_q *q = &dev->qs[qidx]; + struct availring *avail = q->avail; + + if (qidx >= (u32)dev->num_qus) + return; + + for (; avail->idx != q->avail_seen; q->avail_seen++) + notify_avail(ctx, q, avail->ring[q->avail_seen % q->size]); +} + +static struct virtio_dev *dev_by_base(u64 base) +{ + struct virtio_dev *dev; + + for (dev = devlist; dev; dev = dev->next) + if (dev->base == base) + break; + + return dev; +} + +void virtio_put_buffer(u64 base, int qu, u32 id, u32 len) +{ + struct virtio_dev *dev = dev_by_base(base); + struct virtio_q *q; + struct usedring *used; + + if (!dev) { + printf("virtio_put_buffer: no device at %lx\n", base); + return; + } + + q = &dev->qs[qu]; + used = q->used; + + used->ring[used->idx % q->size].id = id; + used->ring[used->idx % q->size].len = len; + used->idx++; + + dev->irqstatus |= USED_BUFFER; + aic_set_sw(dev->irq, true); +} + +static bool handle_virtio(struct exc_info *ctx, u64 addr, u64 *val, bool write, int width) +{ + struct virtio_dev *dev; + struct virtio_q *q; + UNUSED(ctx); + UNUSED(width); + + dev = dev_by_base(addr & ~0xfff); + if (!dev) + return false; + + addr &= 0xfff; + + if (write) { + if (dev->verbose) + printf("virtio @ %lx: W 0x%lx <- 0x%lx (%d)\n", dev->base, addr, *val, width); + + switch (addr) { + case DEV_STATUS: + dev->status = *val; + break; + case QSEL: + if (((int)*val) <= dev->num_qus) + dev->currq = &dev->qs[*val]; + else + dev->currq = NULL; + break; + case QNOTIFY: + notify_buffers(ctx, dev, *val); + break; + case FEAT_HOST_SEL: + dev->feat_host_sel = *val; + break; + case IRQ_ACK: + dev->irqstatus &= ~(*val); + if (!dev->irqstatus) + aic_set_sw(dev->irq, false); + break; + } + + q = dev->currq; + if (!q) + return true; + + switch (addr) { + case QSIZE: + q->size = *val; + break; + case QREADY: + q->ready = *val & 1; + break; + case QDESC ... QHOSTAREA + 4: + addr -= QDESC; + addr /= 4; + q->area_regs[addr] = *val; + + q->desc = (void *)(q->area_regs[1] << 32 | q->area_regs[0]); + q->avail = (void *)(q->area_regs[5] << 32 | q->area_regs[4]); + q->used = (void *)(q->area_regs[9] << 32 | q->area_regs[8]); + break; + } + } else { + switch (addr) { + case MAGIC: + *val = 0x74726976; + break; + case VERSION: + *val = 2; + break; + case DEVID: + *val = dev->devid; + break; + case DEV_STATUS: + *val = dev->status; + break; + case FEAT_HOST: + *val = dev->feats >> (dev->feat_host_sel * 32); + break; + case IRQ_STATUS: + *val = dev->irqstatus; + break; + case 0x100 ... 0x1000: + if (addr - 0x100 < dev->config_len) + *val = dev->config[addr - 0x100]; + else + *val = 0; + break; + default: + q = dev->currq; + if (!q) { + *val = 0; + goto rdone; + } + } + + switch (addr) { + case QMAX: + *val = q->max; + break; + case QREADY: + *val = q->ready; + break; + } + rdone: + if (dev->verbose) + printf("virtio @ %lx: R 0x%lx -> 0x%lx (%d)\n", dev->base, addr, *val, width); + }; + + return true; +} + +void hv_map_virtio(u64 base, struct virtio_conf *conf) +{ + struct virtio_dev *dev; + int i; + + dev = malloc(sizeof(*dev) + sizeof(struct virtio_q) * conf->num_qus); + dev->num_qus = conf->num_qus; + dev->base = base; + dev->irq = conf->irq; + dev->devid = conf->devid; + dev->currq = NULL; + dev->feats = conf->feats | BIT(32); /* always set: VIRTIO_F_VERSION_1 */ + dev->config = conf->config; + dev->config_len = conf->config_len; + dev->verbose = conf->verbose; + for (i = 0; i < dev->num_qus; i++) { + dev->qs[i].host = dev; + dev->qs[i].idx = i; + dev->qs[i].max = 256; + dev->qs[i].avail_seen = 0; + dev->qs[i].ready = 0; + } + + if (devlist) + dev->next = devlist; + devlist = dev; + + hv_map_hook(base, handle_virtio, 0x1000); +} diff --git a/tools/src/hv_vm.c b/tools/src/hv_vm.c new file mode 100644 index 0000000..671ef70 --- /dev/null +++ b/tools/src/hv_vm.c @@ -0,0 +1,1278 @@ +/* SPDX-License-Identifier: MIT */ + +// #define DEBUG + +#include "hv.h" +#include "assert.h" +#include "cpu_regs.h" +#include "exception.h" +#include "iodev.h" +#include "malloc.h" +#include "smp.h" +#include "string.h" +#include "types.h" +#include "uartproxy.h" +#include "utils.h" + +#define PAGE_SIZE 0x4000 +#define CACHE_LINE_SIZE 64 +#define CACHE_LINE_LOG2 6 + +#define PTE_ACCESS BIT(10) +#define PTE_SH_NS (0b11L << 8) +#define PTE_S2AP_RW (0b11L << 6) +#define PTE_MEMATTR_UNCHANGED (0b1111L << 2) + +#define PTE_ATTRIBUTES (PTE_ACCESS | PTE_SH_NS | PTE_S2AP_RW | PTE_MEMATTR_UNCHANGED) + +#define PTE_LOWER_ATTRIBUTES GENMASK(13, 2) + +#define PTE_VALID BIT(0) +#define PTE_TYPE BIT(1) +#define PTE_BLOCK 0 +#define PTE_TABLE 1 +#define PTE_PAGE 1 + +#define VADDR_L4_INDEX_BITS 12 +#define VADDR_L3_INDEX_BITS 11 +#define VADDR_L2_INDEX_BITS 11 +#define VADDR_L1_INDEX_BITS 8 + +#define VADDR_L4_OFFSET_BITS 2 +#define VADDR_L3_OFFSET_BITS 14 +#define VADDR_L2_OFFSET_BITS 25 +#define VADDR_L1_OFFSET_BITS 36 + +#define VADDR_L2_ALIGN_MASK GENMASK(VADDR_L2_OFFSET_BITS - 1, VADDR_L3_OFFSET_BITS) +#define VADDR_L3_ALIGN_MASK GENMASK(VADDR_L3_OFFSET_BITS - 1, VADDR_L4_OFFSET_BITS) +#define PTE_TARGET_MASK GENMASK(49, VADDR_L3_OFFSET_BITS) +#define PTE_TARGET_MASK_L4 GENMASK(49, VADDR_L4_OFFSET_BITS) + +#define ENTRIES_PER_L1_TABLE BIT(VADDR_L1_INDEX_BITS) +#define ENTRIES_PER_L2_TABLE BIT(VADDR_L2_INDEX_BITS) +#define ENTRIES_PER_L3_TABLE BIT(VADDR_L3_INDEX_BITS) +#define ENTRIES_PER_L4_TABLE BIT(VADDR_L4_INDEX_BITS) + +#define SPTE_TRACE_READ BIT(63) +#define SPTE_TRACE_WRITE BIT(62) +#define SPTE_TRACE_UNBUF BIT(61) +#define SPTE_TYPE GENMASK(52, 50) +#define SPTE_MAP 0 +#define SPTE_HOOK 1 +#define SPTE_PROXY_HOOK_R 2 +#define SPTE_PROXY_HOOK_W 3 +#define SPTE_PROXY_HOOK_RW 4 + +#define IS_HW(pte) ((pte) && pte & PTE_VALID) +#define IS_SW(pte) ((pte) && !(pte & PTE_VALID)) + +#define L1_IS_TABLE(pte) ((pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE) + +#define L2_IS_TABLE(pte) ((pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE) +#define L2_IS_NOT_TABLE(pte) ((pte) && !L2_IS_TABLE(pte)) +#define L2_IS_HW_BLOCK(pte) (IS_HW(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_BLOCK) +#define L2_IS_SW_BLOCK(pte) \ + (IS_SW(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_BLOCK && FIELD_GET(SPTE_TYPE, pte) == SPTE_MAP) +#define L3_IS_TABLE(pte) (IS_SW(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE) +#define L3_IS_NOT_TABLE(pte) ((pte) && !L3_IS_TABLE(pte)) +#define L3_IS_HW_BLOCK(pte) (IS_HW(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_PAGE) +#define L3_IS_SW_BLOCK(pte) \ + (IS_SW(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_BLOCK && FIELD_GET(SPTE_TYPE, pte) == SPTE_MAP) + +uint64_t vaddr_bits; + +/* + * We use 16KB page tables for stage 2 translation, and a 64GB (36-bit) guest + * PA size, which results in the following virtual address space: + * + * [L2 index] [L3 index] [page offset] + * 11 bits 11 bits 14 bits + * + * 32MB L2 mappings look like this: + * [L2 index] [page offset] + * 11 bits 25 bits + * + * We implement sub-page granularity mappings for software MMIO hooks, which behave + * as an additional page table level used only by software. This works like this: + * + * [L2 index] [L3 index] [L4 index] [Word offset] + * 11 bits 11 bits 12 bits 2 bits + * + * Thus, L4 sub-page tables are twice the size. + * + * We use invalid mappings (PTE_VALID == 0) to represent mmiotrace descriptors, but + * otherwise the page table format is the same. The PTE_TYPE bit is weird, as 0 means + * block but 1 means both table (at L<3) and page (at L3). For mmiotrace, this is + * pushed to L4. + * + * On SoCs with more than 36-bit PA sizes there is an additional L1 translation level, + * but no blocks or software mappings are allowed there. This level can have up to 8 bits + * at this time. + */ + +static u64 *hv_Ltop; + +void hv_pt_init(void) +{ + const uint64_t pa_bits[] = {32, 36, 40, 42, 44, 48, 52}; + uint64_t pa_range = FIELD_GET(ID_AA64MMFR0_PARange, mrs(ID_AA64MMFR0_EL1)); + + vaddr_bits = min(44, pa_bits[pa_range]); + + printf("HV: Initializing for %ld-bit PA range\n", vaddr_bits); + + hv_Ltop = memalign(PAGE_SIZE, sizeof(u64) * ENTRIES_PER_L2_TABLE); + memset(hv_Ltop, 0, sizeof(u64) * ENTRIES_PER_L2_TABLE); + + u64 sl0 = vaddr_bits > 36 ? 2 : 1; + + msr(VTCR_EL2, FIELD_PREP(VTCR_PS, pa_range) | // Full PA size + FIELD_PREP(VTCR_TG0, 2) | // 16KB page size + FIELD_PREP(VTCR_SH0, 3) | // PTWs Inner Sharable + FIELD_PREP(VTCR_ORGN0, 1) | // PTWs Cacheable + FIELD_PREP(VTCR_IRGN0, 1) | // PTWs Cacheable + FIELD_PREP(VTCR_SL0, sl0) | // Start level + FIELD_PREP(VTCR_T0SZ, 64 - vaddr_bits)); // Translation region == PA + + msr(VTTBR_EL2, hv_Ltop); +} + +static u64 *hv_pt_get_l2(u64 from) +{ + u64 l1idx = from >> VADDR_L1_OFFSET_BITS; + + if (vaddr_bits <= 36) { + assert(l1idx == 0); + return hv_Ltop; + } + + u64 l1d = hv_Ltop[l1idx]; + + if (L1_IS_TABLE(l1d)) + return (u64 *)(l1d & PTE_TARGET_MASK); + + u64 *l2 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L2_TABLE * sizeof(u64)); + memset64(l2, 0, ENTRIES_PER_L2_TABLE * sizeof(u64)); + + l1d = ((u64)l2) | FIELD_PREP(PTE_TYPE, PTE_TABLE) | PTE_VALID; + hv_Ltop[l1idx] = l1d; + return l2; +} + +static void hv_pt_free_l3(u64 *l3) +{ + if (!l3) + return; + + for (u64 idx = 0; idx < ENTRIES_PER_L3_TABLE; idx++) + if (IS_SW(l3[idx]) && FIELD_GET(PTE_TYPE, l3[idx]) == PTE_TABLE) + free((void *)(l3[idx] & PTE_TARGET_MASK)); + free(l3); +} + +static void hv_pt_map_l2(u64 from, u64 to, u64 size, u64 incr) +{ + assert((from & MASK(VADDR_L2_OFFSET_BITS)) == 0); + assert(IS_SW(to) || (to & PTE_TARGET_MASK & MASK(VADDR_L2_OFFSET_BITS)) == 0); + assert((size & MASK(VADDR_L2_OFFSET_BITS)) == 0); + + to |= FIELD_PREP(PTE_TYPE, PTE_BLOCK); + + for (; size; size -= BIT(VADDR_L2_OFFSET_BITS)) { + u64 *l2 = hv_pt_get_l2(from); + u64 idx = (from >> VADDR_L2_OFFSET_BITS) & MASK(VADDR_L2_INDEX_BITS); + + if (L2_IS_TABLE(l2[idx])) + hv_pt_free_l3((u64 *)(l2[idx] & PTE_TARGET_MASK)); + + l2[idx] = to; + from += BIT(VADDR_L2_OFFSET_BITS); + to += incr * BIT(VADDR_L2_OFFSET_BITS); + } +} + +static u64 *hv_pt_get_l3(u64 from) +{ + u64 *l2 = hv_pt_get_l2(from); + u64 l2idx = (from >> VADDR_L2_OFFSET_BITS) & MASK(VADDR_L2_INDEX_BITS); + u64 l2d = l2[l2idx]; + + if (L2_IS_TABLE(l2d)) + return (u64 *)(l2d & PTE_TARGET_MASK); + + u64 *l3 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L3_TABLE * sizeof(u64)); + if (l2d) { + u64 incr = 0; + u64 l3d = l2d; + if (IS_HW(l2d)) { + l3d &= ~PTE_TYPE; + l3d |= FIELD_PREP(PTE_TYPE, PTE_PAGE); + incr = BIT(VADDR_L3_OFFSET_BITS); + } else if (IS_SW(l2d) && FIELD_GET(SPTE_TYPE, l3d) == SPTE_MAP) { + incr = BIT(VADDR_L3_OFFSET_BITS); + } + for (u64 idx = 0; idx < ENTRIES_PER_L3_TABLE; idx++, l3d += incr) + l3[idx] = l3d; + } else { + memset64(l3, 0, ENTRIES_PER_L3_TABLE * sizeof(u64)); + } + + l2d = ((u64)l3) | FIELD_PREP(PTE_TYPE, PTE_TABLE) | PTE_VALID; + l2[l2idx] = l2d; + return l3; +} + +static void hv_pt_map_l3(u64 from, u64 to, u64 size, u64 incr) +{ + assert((from & MASK(VADDR_L3_OFFSET_BITS)) == 0); + assert(IS_SW(to) || (to & PTE_TARGET_MASK & MASK(VADDR_L3_OFFSET_BITS)) == 0); + assert((size & MASK(VADDR_L3_OFFSET_BITS)) == 0); + + if (IS_HW(to)) + to |= FIELD_PREP(PTE_TYPE, PTE_PAGE); + else + to |= FIELD_PREP(PTE_TYPE, PTE_BLOCK); + + for (; size; size -= BIT(VADDR_L3_OFFSET_BITS)) { + u64 idx = (from >> VADDR_L3_OFFSET_BITS) & MASK(VADDR_L3_INDEX_BITS); + u64 *l3 = hv_pt_get_l3(from); + + if (L3_IS_TABLE(l3[idx])) + free((void *)(l3[idx] & PTE_TARGET_MASK)); + + l3[idx] = to; + from += BIT(VADDR_L3_OFFSET_BITS); + to += incr * BIT(VADDR_L3_OFFSET_BITS); + } +} + +static u64 *hv_pt_get_l4(u64 from) +{ + u64 *l3 = hv_pt_get_l3(from); + u64 l3idx = (from >> VADDR_L3_OFFSET_BITS) & MASK(VADDR_L3_INDEX_BITS); + u64 l3d = l3[l3idx]; + + if (L3_IS_TABLE(l3d)) { + return (u64 *)(l3d & PTE_TARGET_MASK); + } + + if (IS_HW(l3d)) { + assert(FIELD_GET(PTE_TYPE, l3d) == PTE_PAGE); + l3d &= PTE_TARGET_MASK; + l3d |= FIELD_PREP(PTE_TYPE, PTE_BLOCK) | FIELD_PREP(SPTE_TYPE, SPTE_MAP); + } + + u64 *l4 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L4_TABLE * sizeof(u64)); + if (l3d) { + u64 incr = 0; + u64 l4d = l3d; + l4d &= ~PTE_TYPE; + l4d |= FIELD_PREP(PTE_TYPE, PTE_PAGE); + if (FIELD_GET(SPTE_TYPE, l4d) == SPTE_MAP) + incr = BIT(VADDR_L4_OFFSET_BITS); + for (u64 idx = 0; idx < ENTRIES_PER_L4_TABLE; idx++, l4d += incr) + l4[idx] = l4d; + } else { + memset64(l4, 0, ENTRIES_PER_L4_TABLE * sizeof(u64)); + } + + l3d = ((u64)l4) | FIELD_PREP(PTE_TYPE, PTE_TABLE); + l3[l3idx] = l3d; + return l4; +} + +static void hv_pt_map_l4(u64 from, u64 to, u64 size, u64 incr) +{ + assert((from & MASK(VADDR_L4_OFFSET_BITS)) == 0); + assert((size & MASK(VADDR_L4_OFFSET_BITS)) == 0); + + assert(!IS_HW(to)); + + if (IS_SW(to)) + to |= FIELD_PREP(PTE_TYPE, PTE_PAGE); + + for (; size; size -= BIT(VADDR_L4_OFFSET_BITS)) { + u64 idx = (from >> VADDR_L4_OFFSET_BITS) & MASK(VADDR_L4_INDEX_BITS); + u64 *l4 = hv_pt_get_l4(from); + + l4[idx] = to; + from += BIT(VADDR_L4_OFFSET_BITS); + to += incr * BIT(VADDR_L4_OFFSET_BITS); + } +} + +int hv_map(u64 from, u64 to, u64 size, u64 incr) +{ + u64 chunk; + bool hw = IS_HW(to); + + if (from & MASK(VADDR_L4_OFFSET_BITS) || size & MASK(VADDR_L4_OFFSET_BITS)) + return -1; + + if (hw && (from & MASK(VADDR_L3_OFFSET_BITS) || size & MASK(VADDR_L3_OFFSET_BITS))) { + printf("HV: cannot use L4 pages with HW mappings (0x%lx -> 0x%lx)\n", from, to); + return -1; + } + + // L4 mappings to boundary + chunk = min(size, ALIGN_UP(from, BIT(VADDR_L3_OFFSET_BITS)) - from); + if (chunk) { + assert(!hw); + hv_pt_map_l4(from, to, chunk, incr); + from += chunk; + to += incr * chunk; + size -= chunk; + } + + // L3 mappings to boundary + chunk = ALIGN_DOWN(min(size, ALIGN_UP(from, BIT(VADDR_L2_OFFSET_BITS)) - from), + BIT(VADDR_L3_OFFSET_BITS)); + if (chunk) { + hv_pt_map_l3(from, to, chunk, incr); + from += chunk; + to += incr * chunk; + size -= chunk; + } + + // L2 mappings + chunk = ALIGN_DOWN(size, BIT(VADDR_L2_OFFSET_BITS)); + if (chunk && (!hw || (to & VADDR_L2_ALIGN_MASK) == 0)) { + hv_pt_map_l2(from, to, chunk, incr); + from += chunk; + to += incr * chunk; + size -= chunk; + } + + // L3 mappings to end + chunk = ALIGN_DOWN(size, BIT(VADDR_L3_OFFSET_BITS)); + if (chunk) { + hv_pt_map_l3(from, to, chunk, incr); + from += chunk; + to += incr * chunk; + size -= chunk; + } + + // L4 mappings to end + if (size) { + assert(!hw); + hv_pt_map_l4(from, to, size, incr); + } + + return 0; +} + +int hv_unmap(u64 from, u64 size) +{ + return hv_map(from, 0, size, 0); +} + +int hv_map_hw(u64 from, u64 to, u64 size) +{ + return hv_map(from, to | PTE_ATTRIBUTES | PTE_VALID, size, 1); +} + +int hv_map_sw(u64 from, u64 to, u64 size) +{ + return hv_map(from, to | FIELD_PREP(SPTE_TYPE, SPTE_MAP), size, 1); +} + +int hv_map_hook(u64 from, hv_hook_t *hook, u64 size) +{ + return hv_map(from, ((u64)hook) | FIELD_PREP(SPTE_TYPE, SPTE_HOOK), size, 0); +} + +u64 hv_translate(u64 addr, bool s1, bool w, u64 *par_out) +{ + if (!(mrs(SCTLR_EL12) & SCTLR_M)) + return addr; // MMU off + + u64 el = FIELD_GET(SPSR_M, hv_get_spsr()) >> 2; + u64 save = mrs(PAR_EL1); + + if (w) { + if (s1) { + if (el == 0) + asm("at s1e0w, %0" : : "r"(addr)); + else + asm("at s1e1w, %0" : : "r"(addr)); + } else { + if (el == 0) + asm("at s12e0w, %0" : : "r"(addr)); + else + asm("at s12e1w, %0" : : "r"(addr)); + } + } else { + if (s1) { + if (el == 0) + asm("at s1e0r, %0" : : "r"(addr)); + else + asm("at s1e1r, %0" : : "r"(addr)); + } else { + if (el == 0) + asm("at s12e0r, %0" : : "r"(addr)); + else + asm("at s12e1r, %0" : : "r"(addr)); + } + } + + u64 par = mrs(PAR_EL1); + if (par_out) + *par_out = par; + msr(PAR_EL1, save); + + if (par & PAR_F) { + dprintf("hv_translate(0x%lx, %d, %d): fault 0x%lx\n", addr, s1, w, par); + return 0; // fault + } else { + return (par & PAR_PA) | (addr & 0xfff); + } +} + +u64 hv_pt_walk(u64 addr) +{ + dprintf("hv_pt_walk(0x%lx)\n", addr); + + u64 idx = addr >> VADDR_L1_OFFSET_BITS; + u64 *l2; + if (vaddr_bits > 36) { + assert(idx < ENTRIES_PER_L1_TABLE); + + u64 l1d = hv_Ltop[idx]; + + dprintf(" l1d = 0x%lx\n", l2d); + + if (!L1_IS_TABLE(l1d)) { + dprintf(" result: 0x%lx\n", l1d); + return l1d; + } + l2 = (u64 *)(l1d & PTE_TARGET_MASK); + } else { + assert(idx == 0); + l2 = hv_Ltop; + } + + idx = (addr >> VADDR_L2_OFFSET_BITS) & MASK(VADDR_L2_INDEX_BITS); + u64 l2d = l2[idx]; + dprintf(" l2d = 0x%lx\n", l2d); + + if (!L2_IS_TABLE(l2d)) { + if (L2_IS_SW_BLOCK(l2d)) + l2d += addr & (VADDR_L2_ALIGN_MASK | VADDR_L3_ALIGN_MASK); + if (L2_IS_HW_BLOCK(l2d)) { + l2d &= ~PTE_LOWER_ATTRIBUTES; + l2d |= addr & (VADDR_L2_ALIGN_MASK | VADDR_L3_ALIGN_MASK); + } + + dprintf(" result: 0x%lx\n", l2d); + return l2d; + } + + idx = (addr >> VADDR_L3_OFFSET_BITS) & MASK(VADDR_L3_INDEX_BITS); + u64 l3d = ((u64 *)(l2d & PTE_TARGET_MASK))[idx]; + dprintf(" l3d = 0x%lx\n", l3d); + + if (!L3_IS_TABLE(l3d)) { + if (L3_IS_SW_BLOCK(l3d)) + l3d += addr & VADDR_L3_ALIGN_MASK; + if (L3_IS_HW_BLOCK(l3d)) { + l3d &= ~PTE_LOWER_ATTRIBUTES; + l3d |= addr & VADDR_L3_ALIGN_MASK; + } + dprintf(" result: 0x%lx\n", l3d); + return l3d; + } + + idx = (addr >> VADDR_L4_OFFSET_BITS) & MASK(VADDR_L4_INDEX_BITS); + dprintf(" l4 idx = 0x%lx\n", idx); + u64 l4d = ((u64 *)(l3d & PTE_TARGET_MASK))[idx]; + dprintf(" l4d = 0x%lx\n", l4d); + return l4d; +} + +#define CHECK_RN \ + if (Rn == 31) \ + return false +#define DECODE_OK \ + if (!val) \ + return true + +#define EXT(n, b) (((s32)(((u32)(n)) << (32 - (b)))) >> (32 - (b))) + +union simd_reg { + u64 d[2]; + u32 s[4]; + u16 h[8]; + u8 b[16]; +}; + +static bool emulate_load(struct exc_info *ctx, u32 insn, u64 *val, u64 *width, u64 *vaddr) +{ + u64 Rt = insn & 0x1f; + u64 Rn = (insn >> 5) & 0x1f; + u64 imm12 = EXT((insn >> 10) & 0xfff, 12); + u64 imm9 = EXT((insn >> 12) & 0x1ff, 9); + u64 imm7 = EXT((insn >> 15) & 0x7f, 7); + u64 *regs = ctx->regs; + + union simd_reg simd[32]; + + *width = insn >> 30; + + if (val) + dprintf("emulate_load(%p, 0x%08x, 0x%08lx, %ld\n", regs, insn, *val, *width); + + if ((insn & 0x3fe00400) == 0x38400400) { + // LDRx (immediate) Pre/Post-index + CHECK_RN; + DECODE_OK; + regs[Rn] += imm9; + regs[Rt] = *val; + } else if ((insn & 0x3fc00000) == 0x39400000) { + // LDRx (immediate) Unsigned offset + DECODE_OK; + regs[Rt] = *val; + } else if ((insn & 0x3fa00400) == 0x38800400) { + // LDRSx (immediate) Pre/Post-index + CHECK_RN; + DECODE_OK; + regs[Rn] += imm9; + regs[Rt] = (s64)EXT(*val, 8 << *width); + if (insn & (1 << 22)) + regs[Rt] &= 0xffffffff; + } else if ((insn & 0x3fa00000) == 0x39800000) { + // LDRSx (immediate) Unsigned offset + DECODE_OK; + regs[Rt] = (s64)EXT(*val, 8 << *width); + if (insn & (1 << 22)) + regs[Rt] &= 0xffffffff; + } else if ((insn & 0x3fe04c00) == 0x38604800) { + // LDRx (register) + DECODE_OK; + regs[Rt] = *val; + } else if ((insn & 0x3fa04c00) == 0x38a04800) { + // LDRSx (register) + DECODE_OK; + regs[Rt] = (s64)EXT(*val, 8 << *width); + if (insn & (1 << 22)) + regs[Rt] &= 0xffffffff; + } else if ((insn & 0x3fe00c00) == 0x38400000) { + // LDURx (unscaled) + DECODE_OK; + regs[Rt] = *val; + } else if ((insn & 0x3fa00c00) == 0x38a00000) { + // LDURSx (unscaled) + DECODE_OK; + regs[Rt] = (s64)EXT(*val, (8 << *width)); + if (insn & (1 << 22)) + regs[Rt] &= 0xffffffff; + } else if ((insn & 0xffc00000) == 0x29400000) { + // LDP (Signed offset, 32-bit) + *width = 3; + *vaddr = regs[Rn] + (imm7 * 4); + DECODE_OK; + u64 Rt2 = (insn >> 10) & 0x1f; + regs[Rt] = val[0] & 0xffffffff; + regs[Rt2] = val[0] >> 32; + } else if ((insn & 0xffc00000) == 0xa9400000) { + // LDP (Signed offset, 64-bit) + *width = 4; + *vaddr = regs[Rn] + (imm7 * 8); + DECODE_OK; + u64 Rt2 = (insn >> 10) & 0x1f; + regs[Rt] = val[0]; + regs[Rt2] = val[1]; + } else if ((insn & 0xfec00000) == 0xa8c00000) { + // LDP (pre/post-increment, 64-bit) + *width = 4; + *vaddr = regs[Rn] + ((insn & BIT(24)) ? (imm7 * 8) : 0); + DECODE_OK; + regs[Rn] += imm7 * 8; + u64 Rt2 = (insn >> 10) & 0x1f; + regs[Rt] = val[0]; + regs[Rt2] = val[1]; + } else if ((insn & 0xfec00000) == 0xac400000) { + // LD[N]P (SIMD&FP, 128-bit) Signed offset + *width = 5; + *vaddr = regs[Rn] + (imm7 * 16); + DECODE_OK; + u64 Rt2 = (insn >> 10) & 0x1f; + get_simd_state(simd); + simd[Rt].d[0] = val[0]; + simd[Rt].d[1] = val[1]; + simd[Rt2].d[0] = val[2]; + simd[Rt2].d[1] = val[3]; + put_simd_state(simd); + } else if ((insn & 0x3fc00000) == 0x3d400000) { + // LDR (immediate, SIMD&FP) Unsigned offset + *vaddr = regs[Rn] + (imm12 << *width); + DECODE_OK; + get_simd_state(simd); + simd[Rt].d[0] = val[0]; + simd[Rt].d[1] = 0; + put_simd_state(simd); + } else if ((insn & 0xffc00000) == 0x3dc00000) { + // LDR (immediate, SIMD&FP) Unsigned offset, 128-bit + *width = 4; + *vaddr = regs[Rn] + (imm12 << *width); + DECODE_OK; + get_simd_state(simd); + simd[Rt].d[0] = val[0]; + simd[Rt].d[1] = val[1]; + put_simd_state(simd); + } else if ((insn & 0xffe00c00) == 0x3cc00000) { + // LDURx (unscaled, SIMD&FP, 128-bit) + *width = 4; + *vaddr = regs[Rn] + (imm9 << *width); + DECODE_OK; + get_simd_state(simd); + simd[Rt].d[0] = val[0]; + simd[Rt].d[1] = val[1]; + put_simd_state(simd); + } else if ((insn & 0x3fe00400) == 0x3c400400) { + // LDR (immediate, SIMD&FP) Pre/Post-index + CHECK_RN; + DECODE_OK; + regs[Rn] += imm9; + get_simd_state(simd); + simd[Rt].d[0] = val[0]; + simd[Rt].d[1] = 0; + put_simd_state(simd); + } else if ((insn & 0xffe00400) == 0x3cc00400) { + // LDR (immediate, SIMD&FP) Pre/Post-index, 128-bit + *width = 4; + CHECK_RN; + DECODE_OK; + regs[Rn] += imm9; + get_simd_state(simd); + simd[Rt].d[0] = val[0]; + simd[Rt].d[1] = val[1]; + put_simd_state(simd); + } else if ((insn & 0x3fe04c00) == 0x3c604800) { + // LDR (register, SIMD&FP) + DECODE_OK; + get_simd_state(simd); + simd[Rt].d[0] = val[0]; + simd[Rt].d[1] = 0; + put_simd_state(simd); + } else if ((insn & 0xffe04c00) == 0x3ce04800) { + // LDR (register, SIMD&FP), 128-bit + *width = 4; + DECODE_OK; + get_simd_state(simd); + simd[Rt].d[0] = val[0]; + simd[Rt].d[1] = val[1]; + put_simd_state(simd); + } else if ((insn & 0xbffffc00) == 0x0d408400) { + // LD1 (single structure) No offset, 64-bit + *width = 3; + DECODE_OK; + u64 index = (insn >> 30) & 1; + get_simd_state(simd); + simd[Rt].d[index] = val[0]; + put_simd_state(simd); + } else if ((insn & 0x3ffffc00) == 0x08dffc00) { + // LDAR* + DECODE_OK; + regs[Rt] = *val; + } else { + return false; + } + return true; +} + +static bool emulate_store(struct exc_info *ctx, u32 insn, u64 *val, u64 *width, u64 *vaddr) +{ + u64 Rt = insn & 0x1f; + u64 Rn = (insn >> 5) & 0x1f; + u64 imm9 = EXT((insn >> 12) & 0x1ff, 9); + u64 imm7 = EXT((insn >> 15) & 0x7f, 7); + u64 *regs = ctx->regs; + + union simd_reg simd[32]; + + *width = insn >> 30; + + dprintf("emulate_store(%p, 0x%08x, ..., %ld) = ", regs, insn, *width); + + regs[31] = 0; + + u64 mask = 0xffffffffffffffffUL; + + if (*width < 3) + mask = (1UL << (8 << *width)) - 1; + + if ((insn & 0x3fe00400) == 0x38000400) { + // STRx (immediate) Pre/Post-index + CHECK_RN; + regs[Rn] += imm9; + *val = regs[Rt] & mask; + } else if ((insn & 0x3fc00000) == 0x39000000) { + // STRx (immediate) Unsigned offset + *val = regs[Rt] & mask; + } else if ((insn & 0x3fe04c00) == 0x38204800) { + // STRx (register) + *val = regs[Rt] & mask; + } else if ((insn & 0xfec00000) == 0x28000000) { + // ST[N]P (Signed offset, 32-bit) + *vaddr = regs[Rn] + (imm7 * 4); + u64 Rt2 = (insn >> 10) & 0x1f; + val[0] = (regs[Rt] & 0xffffffff) | (regs[Rt2] << 32); + *width = 3; + } else if ((insn & 0xfec00000) == 0xa8000000) { + // ST[N]P (Signed offset, 64-bit) + *vaddr = regs[Rn] + (imm7 * 8); + u64 Rt2 = (insn >> 10) & 0x1f; + val[0] = regs[Rt]; + val[1] = regs[Rt2]; + *width = 4; + } else if ((insn & 0xfec00000) == 0xa8800000) { + // ST[N]P (immediate, 64-bit, pre/post-index) + CHECK_RN; + *vaddr = regs[Rn] + ((insn & BIT(24)) ? (imm7 * 8) : 0); + regs[Rn] += (imm7 * 8); + u64 Rt2 = (insn >> 10) & 0x1f; + val[0] = regs[Rt]; + val[1] = regs[Rt2]; + *width = 4; + } else if ((insn & 0x3fc00000) == 0x3d000000) { + // STR (immediate, SIMD&FP) Unsigned offset, 8..64-bit + get_simd_state(simd); + *val = simd[Rt].d[0]; + } else if ((insn & 0x3fe04c00) == 0x3c204800) { + // STR (register, SIMD&FP) 8..64-bit + get_simd_state(simd); + *val = simd[Rt].d[0]; + } else if ((insn & 0xffe04c00) == 0x3ca04800) { + // STR (register, SIMD&FP) 128-bit + get_simd_state(simd); + val[0] = simd[Rt].d[0]; + val[1] = simd[Rt].d[1]; + *width = 4; + } else if ((insn & 0xffc00000) == 0x3d800000) { + // STR (immediate, SIMD&FP) Unsigned offset, 128-bit + get_simd_state(simd); + val[0] = simd[Rt].d[0]; + val[1] = simd[Rt].d[1]; + *width = 4; + } else if ((insn & 0xffe00000) == 0xbc000000) { + // STUR (immediate, SIMD&FP) 32-bit + get_simd_state(simd); + val[0] = simd[Rt].s[0]; + *width = 2; + } else if ((insn & 0xffe00000) == 0xfc000000) { + // STUR (immediate, SIMD&FP) 64-bit + get_simd_state(simd); + val[0] = simd[Rt].d[0]; + *width = 3; + } else if ((insn & 0xffe00000) == 0x3c800000) { + // STUR (immediate, SIMD&FP) 128-bit + get_simd_state(simd); + val[0] = simd[Rt].d[0]; + val[1] = simd[Rt].d[1]; + *width = 4; + } else if ((insn & 0xffc00000) == 0x2d000000) { + // STP (SIMD&FP, 128-bit) Signed offset + *vaddr = regs[Rn] + (imm7 * 4); + u64 Rt2 = (insn >> 10) & 0x1f; + get_simd_state(simd); + val[0] = simd[Rt].s[0] | (((u64)simd[Rt2].s[0]) << 32); + *width = 3; + } else if ((insn & 0xffc00000) == 0xad000000) { + // STP (SIMD&FP, 128-bit) Signed offset + *vaddr = regs[Rn] + (imm7 * 16); + u64 Rt2 = (insn >> 10) & 0x1f; + get_simd_state(simd); + val[0] = simd[Rt].d[0]; + val[1] = simd[Rt].d[1]; + val[2] = simd[Rt2].d[0]; + val[3] = simd[Rt2].d[1]; + *width = 5; + } else if ((insn & 0x3fe00c00) == 0x38000000) { + // STURx (unscaled) + *val = regs[Rt] & mask; + } else if ((insn & 0xffffffe0) == 0xd50b7420) { + // DC ZVA + *vaddr = regs[Rt]; + memset(val, 0, CACHE_LINE_SIZE); + *width = CACHE_LINE_LOG2; + } else if ((insn & 0x3ffffc00) == 0x089ffc00) { + // STL qR* + *val = regs[Rt] & mask; + } else { + return false; + } + + dprintf("0x%lx\n", *width); + + return true; +} + +static void emit_mmiotrace(u64 pc, u64 addr, u64 *data, u64 width, u64 flags, bool sync) +{ + struct hv_evt_mmiotrace evt = { + .flags = flags | FIELD_PREP(MMIO_EVT_CPU, smp_id()), + .pc = pc, + .addr = addr, + }; + + if (width > 3) + evt.flags |= FIELD_PREP(MMIO_EVT_WIDTH, 3) | MMIO_EVT_MULTI; + else + evt.flags |= FIELD_PREP(MMIO_EVT_WIDTH, width); + + for (int i = 0; i < (1 << width); i += 8) { + evt.data = *data++; + hv_wdt_suspend(); + uartproxy_send_event(EVT_MMIOTRACE, &evt, sizeof(evt)); + if (sync) { + iodev_flush(uartproxy_iodev); + } + hv_wdt_resume(); + evt.addr += 8; + } +} + +bool hv_pa_write(struct exc_info *ctx, u64 addr, u64 *val, int width) +{ + sysop("dsb sy"); + exc_count = 0; + exc_guard = GUARD_SKIP; + switch (width) { + case 0: + write8(addr, val[0]); + break; + case 1: + write16(addr, val[0]); + break; + case 2: + write32(addr, val[0]); + break; + case 3: + write64(addr, val[0]); + break; + case 4: + case 5: + case 6: + for (u64 i = 0; i < (1UL << (width - 3)); i++) + write64(addr + 8 * i, val[i]); + break; + default: + dprintf("HV: unsupported write width %ld\n", width); + exc_guard = GUARD_OFF; + return false; + } + // Make sure we catch SErrors here + sysop("dsb sy"); + sysop("isb"); + exc_guard = GUARD_OFF; + if (exc_count) { + printf("HV: Exception during write to 0x%lx (width: %d)\n", addr, width); + // Update exception info with "real" cause + ctx->esr = hv_get_esr(); + ctx->far = hv_get_far(); + return false; + } + return true; +} + +bool hv_pa_read(struct exc_info *ctx, u64 addr, u64 *val, int width) +{ + sysop("dsb sy"); + exc_count = 0; + exc_guard = GUARD_SKIP; + switch (width) { + case 0: + val[0] = read8(addr); + break; + case 1: + val[0] = read16(addr); + break; + case 2: + val[0] = read32(addr); + break; + case 3: + val[0] = read64(addr); + break; + case 4: + val[0] = read64(addr); + val[1] = read64(addr + 8); + break; + case 5: + val[0] = read64(addr); + val[1] = read64(addr + 8); + val[2] = read64(addr + 16); + val[3] = read64(addr + 24); + break; + default: + dprintf("HV: unsupported read width %ld\n", width); + exc_guard = GUARD_OFF; + return false; + } + sysop("dsb sy"); + exc_guard = GUARD_OFF; + if (exc_count) { + dprintf("HV: Exception during read from 0x%lx (width: %d)\n", addr, width); + // Update exception info with "real" cause + ctx->esr = hv_get_esr(); + ctx->far = hv_get_far(); + return false; + } + return true; +} + +bool hv_pa_rw(struct exc_info *ctx, u64 addr, u64 *val, bool write, int width) +{ + if (write) + return hv_pa_write(ctx, addr, val, width); + else + return hv_pa_read(ctx, addr, val, width); +} + +static bool hv_emulate_rw_aligned(struct exc_info *ctx, u64 pte, u64 vaddr, u64 ipa, u64 *val, + bool is_write, u64 width, u64 elr, u64 par) +{ + assert(pte); + assert(((ipa & 0x3fff) + (1 << width)) <= 0x4000); + + u64 target = pte & PTE_TARGET_MASK_L4; + u64 paddr = target | (vaddr & MASK(VADDR_L4_OFFSET_BITS)); + u64 flags = FIELD_PREP(MMIO_EVT_ATTR, FIELD_GET(PAR_ATTR, par)) | + FIELD_PREP(MMIO_EVT_SH, FIELD_GET(PAR_SH, par)); + + // For split ops, treat hardware mapped pages as SPTE_MAP + if (IS_HW(pte)) + pte = target | FIELD_PREP(PTE_TYPE, PTE_BLOCK) | FIELD_PREP(SPTE_TYPE, SPTE_MAP); + + if (is_write) { + // Write + hv_wdt_breadcrumb('3'); + + if (pte & SPTE_TRACE_WRITE) + emit_mmiotrace(elr, ipa, val, width, flags | MMIO_EVT_WRITE, pte & SPTE_TRACE_UNBUF); + + hv_wdt_breadcrumb('4'); + + switch (FIELD_GET(SPTE_TYPE, pte)) { + case SPTE_PROXY_HOOK_R: + paddr = ipa; + // fallthrough + case SPTE_MAP: + hv_wdt_breadcrumb('5'); + dprintf("HV: SPTE_MAP[W] @0x%lx 0x%lx -> 0x%lx (w=%d): 0x%lx\n", elr, ipa, paddr, + 1 << width, val[0]); + if (!hv_pa_write(ctx, paddr, val, width)) + return false; + break; + case SPTE_HOOK: { + hv_wdt_breadcrumb('6'); + hv_hook_t *hook = (hv_hook_t *)target; + if (!hook(ctx, ipa, val, true, width)) + return false; + dprintf("HV: SPTE_HOOK[W] @0x%lx 0x%lx -> 0x%lx (w=%d) @%p: 0x%lx\n", elr, far, ipa, + 1 << width, hook, wval); + break; + } + case SPTE_PROXY_HOOK_RW: + case SPTE_PROXY_HOOK_W: { + hv_wdt_breadcrumb('7'); + struct hv_vm_proxy_hook_data hook = { + .flags = FIELD_PREP(MMIO_EVT_WIDTH, width) | MMIO_EVT_WRITE | flags, + .id = FIELD_GET(PTE_TARGET_MASK_L4, pte), + .addr = ipa, + .data = {0}, + }; + memcpy(hook.data, val, 1 << width); + hv_exc_proxy(ctx, START_HV, HV_HOOK_VM, &hook); + break; + } + default: + printf("HV: invalid SPTE 0x%016lx for IPA 0x%lx\n", pte, ipa); + return false; + } + } else { + hv_wdt_breadcrumb('3'); + switch (FIELD_GET(SPTE_TYPE, pte)) { + case SPTE_PROXY_HOOK_W: + paddr = ipa; + // fallthrough + case SPTE_MAP: + hv_wdt_breadcrumb('4'); + if (!hv_pa_read(ctx, paddr, val, width)) + return false; + dprintf("HV: SPTE_MAP[R] @0x%lx 0x%lx -> 0x%lx (w=%d): 0x%lx\n", elr, ipa, paddr, + 1 << width, val[0]); + break; + case SPTE_HOOK: { + hv_wdt_breadcrumb('5'); + hv_hook_t *hook = (hv_hook_t *)target; + if (!hook(ctx, ipa, val, false, width)) + return false; + dprintf("HV: SPTE_HOOK[R] @0x%lx 0x%lx -> 0x%lx (w=%d) @%p: 0x%lx\n", elr, far, ipa, + 1 << width, hook, val); + break; + } + case SPTE_PROXY_HOOK_RW: + case SPTE_PROXY_HOOK_R: { + hv_wdt_breadcrumb('6'); + struct hv_vm_proxy_hook_data hook = { + .flags = FIELD_PREP(MMIO_EVT_WIDTH, width) | flags, + .id = FIELD_GET(PTE_TARGET_MASK_L4, pte), + .addr = ipa, + }; + hv_exc_proxy(ctx, START_HV, HV_HOOK_VM, &hook); + memcpy(val, hook.data, 1 << width); + break; + } + default: + printf("HV: invalid SPTE 0x%016lx for IPA 0x%lx\n", pte, ipa); + return false; + } + + hv_wdt_breadcrumb('7'); + if (pte & SPTE_TRACE_READ) + emit_mmiotrace(elr, ipa, val, width, flags, pte & SPTE_TRACE_UNBUF); + } + + hv_wdt_breadcrumb('*'); + + return true; +} + +static bool hv_emulate_rw(struct exc_info *ctx, u64 pte, u64 vaddr, u64 ipa, u8 *val, bool is_write, + u64 bytes, u64 elr, u64 par) +{ + u64 aval[HV_MAX_RW_WORDS]; + + bool advance = (IS_HW(pte) || (IS_SW(pte) && FIELD_GET(SPTE_TYPE, pte) == SPTE_MAP)) ? 1 : 0; + u64 off = 0; + u64 width; + + bool first = true; + + u64 left = bytes; + u64 paddr = (pte & PTE_TARGET_MASK_L4) | (vaddr & MASK(VADDR_L4_OFFSET_BITS)); + + while (left > 0) { + memset(aval, 0, sizeof(aval)); + + if (left >= 64 && (ipa & 63) == 0) + width = 6; + else if (left >= 32 && (ipa & 31) == 0) + width = 5; + else if (left >= 16 && (ipa & 15) == 0) + width = 4; + else if (left >= 8 && (ipa & 7) == 0) + width = 3; + else if (left >= 4 && (ipa & 3) == 0) + width = 2; + else if (left >= 2 && (ipa & 1) == 0) + width = 1; + else + width = 0; + + u64 chunk = 1 << width; + + /* + if (chunk != bytes) + printf("HV: Splitting unaligned %ld-byte %s: %ld bytes @ 0x%lx\n", bytes, + is_write ? "write" : "read", chunk, vaddr); + */ + + if (is_write) + memcpy(aval, val + off, chunk); + + if (advance) + pte = (paddr & PTE_TARGET_MASK_L4) | (pte & ~PTE_TARGET_MASK_L4); + + if (!hv_emulate_rw_aligned(ctx, pte, vaddr, ipa, aval, is_write, width, elr, par)) { + if (!first) + printf("HV: WARNING: Failed to emulate split op but part of it did commit!\n"); + return false; + } + + if (!is_write) + memcpy(val + off, aval, chunk); + + left -= chunk; + off += chunk; + + ipa += chunk; + vaddr += chunk; + if (advance) + paddr += chunk; + + first = 0; + } + + return true; +} + +bool hv_handle_dabort(struct exc_info *ctx) +{ + hv_wdt_breadcrumb('0'); + u64 esr = hv_get_esr(); + bool is_write = esr & ESR_ISS_DABORT_WnR; + + u64 far = hv_get_far(); + u64 par; + u64 ipa = hv_translate(far, true, is_write, &par); + + dprintf("hv_handle_abort(): stage 1 0x%0lx -> 0x%lx\n", far, ipa); + + if (!ipa) { + printf("HV: stage 1 translation failed at VA 0x%0lx\n", far); + return false; + } + + if (ipa >= BIT(vaddr_bits)) { + printf("hv_handle_abort(): IPA out of bounds: 0x%0lx -> 0x%lx\n", far, ipa); + return false; + } + + u64 pte = hv_pt_walk(ipa); + + if (!pte) { + printf("HV: Unmapped IPA 0x%lx\n", ipa); + return false; + } + + if (IS_HW(pte)) { + printf("HV: Data abort on mapped page (0x%lx -> 0x%lx)\n", far, pte); + // Try again, this is usually a race + ctx->elr -= 4; + return true; + } + + hv_wdt_breadcrumb('1'); + + assert(IS_SW(pte)); + + u64 elr = ctx->elr; + u64 elr_pa = hv_translate(elr, false, false, NULL); + if (!elr_pa) { + printf("HV: Failed to fetch instruction for data abort at 0x%lx\n", elr); + return false; + } + + u32 insn = read32(elr_pa); + u64 width; + + hv_wdt_breadcrumb('2'); + + u64 vaddr = far; + + u8 val[HV_MAX_RW_SIZE] ALIGNED(HV_MAX_RW_SIZE); + memset(val, 0, sizeof(val)); + + if (is_write) { + hv_wdt_breadcrumb('W'); + + if (!emulate_store(ctx, insn, (u64 *)val, &width, &vaddr)) { + printf("HV: store not emulated: 0x%08x at 0x%lx\n", insn, ipa); + return false; + } + } else { + hv_wdt_breadcrumb('R'); + + if (!emulate_load(ctx, insn, NULL, &width, &vaddr)) { + printf("HV: load not emulated: 0x%08x at 0x%lx\n", insn, ipa); + return false; + } + } + + /* + Check for HW page-straddling conditions + Right now we only support the case where the page boundary is exactly halfway + through the read/write. + */ + u64 bytes = 1 << width; + u64 vaddrp0 = vaddr & ~MASK(VADDR_L3_OFFSET_BITS); + u64 vaddrp1 = (vaddr + bytes - 1) & ~MASK(VADDR_L3_OFFSET_BITS); + + if (vaddrp0 == vaddrp1) { + // Easy case, no page straddle + if (far != vaddr) { + printf("HV: faulted at 0x%lx, but expecting 0x%lx\n", far, vaddr); + return false; + } + + if (!hv_emulate_rw(ctx, pte, vaddr, ipa, val, is_write, bytes, elr, par)) + return false; + } else { + // Oops, we're straddling a page boundary + // Treat it as two separate loads or stores + + assert(bytes > 1); + hv_wdt_breadcrumb('s'); + + u64 off = vaddrp1 - vaddr; + + u64 vaddr2; + const char *other; + if (far == vaddr) { + other = "upper"; + vaddr2 = vaddrp1; + } else { + if (far != vaddrp1) { + printf("HV: faulted at 0x%lx, but expecting 0x%lx\n", far, vaddrp1); + return false; + } + other = "lower"; + vaddr2 = vaddr; + } + + u64 par2; + u64 ipa2 = hv_translate(vaddr2, true, esr & ESR_ISS_DABORT_WnR, &par2); + if (!ipa2) { + printf("HV: %s half stage 1 translation failed at VA 0x%0lx\n", other, vaddr2); + return false; + } + if (ipa2 >= BIT(vaddr_bits)) { + printf("hv_handle_abort(): %s half IPA out of bounds: 0x%0lx -> 0x%lx\n", other, vaddr2, + ipa2); + return false; + } + + u64 pte2 = hv_pt_walk(ipa2); + if (!pte2) { + printf("HV: Unmapped %s half IPA 0x%lx\n", other, ipa2); + return false; + } + + hv_wdt_breadcrumb('S'); + + printf("HV: Emulating %s straddling page boundary as two ops @ 0x%lx (%ld bytes)\n", + is_write ? "write" : "read", vaddr, bytes); + + bool upper_ret; + if (far == vaddr) { + if (!hv_emulate_rw(ctx, pte, vaddr, ipa, val, is_write, off, elr, par)) + return false; + upper_ret = + hv_emulate_rw(ctx, pte2, vaddr2, ipa2, val + off, is_write, bytes - off, elr, par2); + } else { + if (!hv_emulate_rw(ctx, pte2, vaddr2, ipa2, val, is_write, off, elr, par2)) + return false; + upper_ret = + hv_emulate_rw(ctx, pte, vaddrp1, ipa, val + off, is_write, bytes - off, elr, par); + } + + if (!upper_ret) { + printf("HV: WARNING: Failed to emulate upper half but lower half did commit!\n"); + return false; + } + } + + if (vaddrp0 != vaddrp1) { + printf("HV: Straddled r/w data:\n"); + hexdump(val, bytes); + } + + hv_wdt_breadcrumb('8'); + if (!is_write && !emulate_load(ctx, insn, (u64 *)val, &width, &vaddr)) + return false; + + hv_wdt_breadcrumb('9'); + + return true; +} diff --git a/tools/src/hv_vuart.c b/tools/src/hv_vuart.c new file mode 100644 index 0000000..595c031 --- /dev/null +++ b/tools/src/hv_vuart.c @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: MIT */ + +#include "hv.h" +#include "aic.h" +#include "iodev.h" +#include "uart.h" +#include "uart_regs.h" +#include "usb.h" + +bool active = false; + +u32 ucon = 0; +u32 utrstat = 0; +u32 ufstat = 0; + +int vuart_irq = 0; + +static void update_irq(void) +{ + ssize_t rx_queued; + + iodev_handle_events(IODEV_USB_VUART); + + utrstat |= UTRSTAT_TXBE | UTRSTAT_TXE; + utrstat &= ~UTRSTAT_RXD; + + ufstat = 0; + if ((rx_queued = iodev_can_read(IODEV_USB_VUART))) { + utrstat |= UTRSTAT_RXD; + if (rx_queued > 15) + ufstat = FIELD_PREP(UFSTAT_RXCNT, 15) | UFSTAT_RXFULL; + else + ufstat = FIELD_PREP(UFSTAT_RXCNT, rx_queued); + + if (FIELD_GET(UCON_RXMODE, ucon) == UCON_MODE_IRQ && ucon & UCON_RXTO_ENA) { + utrstat |= UTRSTAT_RXTO; + } + } + + if (FIELD_GET(UCON_TXMODE, ucon) == UCON_MODE_IRQ && ucon & UCON_TXTHRESH_ENA) { + utrstat |= UTRSTAT_TXTHRESH; + } + + if (vuart_irq) { + uart_clear_irqs(); + if (utrstat & (UTRSTAT_TXTHRESH | UTRSTAT_RXTHRESH | UTRSTAT_RXTO)) { + aic_set_sw(vuart_irq, true); + } else { + aic_set_sw(vuart_irq, false); + } + } + + // printf("HV: vuart UTRSTAT=0x%x UFSTAT=0x%x UCON=0x%x\n", utrstat, ufstat, ucon); +} + +static bool handle_vuart(struct exc_info *ctx, u64 addr, u64 *val, bool write, int width) +{ + UNUSED(ctx); + UNUSED(width); + + addr &= 0xfff; + + update_irq(); + + if (write) { + // printf("HV: vuart W 0x%lx <- 0x%lx (%d)\n", addr, *val, width); + switch (addr) { + case UCON: + ucon = *val; + break; + case UTXH: { + uint8_t b = *val; + if (iodev_can_write(IODEV_USB_VUART)) + iodev_write(IODEV_USB_VUART, &b, 1); + break; + } + case UTRSTAT: + utrstat &= ~(*val & (UTRSTAT_TXTHRESH | UTRSTAT_RXTHRESH | UTRSTAT_RXTO)); + break; + } + } else { + switch (addr) { + case UCON: + *val = ucon; + break; + case URXH: + if (iodev_can_read(IODEV_USB_VUART)) { + uint8_t c; + iodev_read(IODEV_USB_VUART, &c, 1); + *val = c; + } else { + *val = 0; + } + break; + case UTRSTAT: + *val = utrstat; + break; + case UFSTAT: + *val = ufstat; + break; + default: + *val = 0; + break; + } + // printf("HV: vuart R 0x%lx -> 0x%lx (%d)\n", addr, *val, width); + } + + return true; +} + +void hv_vuart_poll(void) +{ + if (!active) + return; + + update_irq(); +} + +void hv_map_vuart(u64 base, int irq, iodev_id_t iodev) +{ + hv_map_hook(base, handle_vuart, 0x1000); + usb_iodev_vuart_setup(iodev); + vuart_irq = irq; + active = true; +} diff --git a/tools/src/hv_wdt.c b/tools/src/hv_wdt.c new file mode 100644 index 0000000..6010412 --- /dev/null +++ b/tools/src/hv_wdt.c @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: MIT */ + +#include "hv.h" +#include "adt.h" +#include "smp.h" +#include "uart.h" +#include "utils.h" + +#define WDT_TIMEOUT 1 + +static bool hv_wdt_active = false; +static bool hv_wdt_enabled = false; +static volatile u64 hv_wdt_timestamp = 0; +static u64 hv_wdt_timeout = 0; +static volatile u64 hv_wdt_breadcrumbs; + +static int hv_wdt_cpu; + +static u64 cpu_dbg_base = 0; + +void hv_wdt_bark(void) +{ + u64 tmp = hv_wdt_breadcrumbs; + uart_puts("HV watchdog: bark!"); + + uart_printf("Breadcrumbs: "); + for (int i = 56; i >= 0; i -= 8) { + char c = (tmp >> i) & 0xff; + if (c) + uart_putchar(c); + } + uart_putchar('\n'); + + uart_puts("Attempting to enter proxy"); + + struct uartproxy_msg_start start = { + .reason = START_HV, + .code = HV_WDT_BARK, + }; + + uartproxy_run(&start); + reboot(); +} + +void hv_wdt_main(void) +{ + while (hv_wdt_active) { + if (hv_wdt_enabled) { + sysop("dmb ish"); + u64 timestamp = hv_wdt_timestamp; + sysop("isb"); + u64 now = mrs(CNTPCT_EL0); + sysop("isb"); + if ((now - timestamp) > hv_wdt_timeout) + hv_wdt_bark(); + } + + udelay(1000); + + sysop("dmb ish"); + } +} + +void hv_wdt_pet(void) +{ + hv_wdt_timestamp = mrs(CNTPCT_EL0); + sysop("dmb ish"); +} + +void hv_wdt_suspend(void) +{ + hv_wdt_enabled = false; + sysop("dsb ish"); +} + +void hv_wdt_resume(void) +{ + hv_wdt_pet(); + hv_wdt_enabled = true; + sysop("dsb ish"); +} + +void hv_wdt_breadcrumb(char c) +{ + u64 tmp = hv_wdt_breadcrumbs; + tmp <<= 8; + tmp |= c; + hv_wdt_breadcrumbs = tmp; + sysop("dmb ish"); +} + +void hv_wdt_init(void) +{ + int node = adt_path_offset(adt, "/cpus/cpu0"); + if (node < 0) { + printf("Error getting /cpus/cpu0 node\n"); + return; + } + + u64 reg[2]; + if (ADT_GETPROP_ARRAY(adt, node, "cpu-uttdbg-reg", reg) < 0) { + printf("Error getting cpu-uttdbg-reg property\n"); + return; + } + + cpu_dbg_base = reg[0]; +} + +void hv_wdt_start(int cpu) +{ + if (hv_wdt_active) + return; + + hv_wdt_cpu = cpu; + hv_wdt_breadcrumbs = 0; + hv_wdt_timeout = mrs(CNTFRQ_EL0) * WDT_TIMEOUT; + hv_wdt_pet(); + hv_wdt_active = true; + hv_wdt_enabled = true; + smp_call4(hv_wdt_cpu, hv_wdt_main, 0, 0, 0, 0); +} + +void hv_wdt_stop(void) +{ + if (!hv_wdt_active) + return; + + hv_wdt_active = false; + smp_wait(hv_wdt_cpu); +} diff --git a/tools/src/i2c.c b/tools/src/i2c.c new file mode 100644 index 0000000..942ef1e --- /dev/null +++ b/tools/src/i2c.c @@ -0,0 +1,216 @@ +/* SPDX-License-Identifier: MIT */ + +#include "adt.h" +#include "i2c.h" +#include "malloc.h" +#include "pmgr.h" +#include "types.h" +#include "utils.h" + +#define PASEMI_FIFO_TX 0x00 +#define PASEMI_TX_FLAG_READ BIT(10) +#define PASEMI_TX_FLAG_STOP BIT(9) +#define PASEMI_TX_FLAG_START BIT(8) + +#define PASEMI_FIFO_RX 0x04 +#define PASEMI_RX_FLAG_EMPTY BIT(8) + +#define PASEMI_STATUS 0x14 +#define PASEMI_STATUS_XFER_BUSY BIT(28) +#define PASEMI_STATUS_XFER_ENDED BIT(27) + +#define PASEMI_CONTROL 0x1c +#define PASEMI_CONTROL_CLEAR_RX BIT(10) +#define PASEMI_CONTROL_CLEAR_TX BIT(9) + +struct i2c_dev { + uintptr_t base; +}; + +i2c_dev_t *i2c_init(const char *adt_node) +{ + int adt_path[8]; + int adt_offset; + adt_offset = adt_path_offset_trace(adt, adt_node, adt_path); + if (adt_offset < 0) { + printf("i2c: Error getting %s node\n", adt_node); + return NULL; + } + + u64 base; + if (adt_get_reg(adt, adt_path, "reg", 0, &base, NULL) < 0) { + printf("i2c: Error getting %s regs\n", adt_node); + return NULL; + } + + if (pmgr_adt_power_enable(adt_node)) { + printf("i2c: Error enabling power for %s\n", adt_node); + return NULL; + } + + i2c_dev_t *dev = malloc(sizeof(*dev)); + if (!dev) + return NULL; + + dev->base = base; + return dev; +} + +void i2c_shutdown(i2c_dev_t *dev) +{ + free(dev); +} + +static void i2c_clear_fifos(i2c_dev_t *dev) +{ + set32(dev->base + PASEMI_CONTROL, PASEMI_CONTROL_CLEAR_TX | PASEMI_CONTROL_CLEAR_RX); +} + +static void i2c_clear_status(i2c_dev_t *dev) +{ + write32(dev->base + PASEMI_STATUS, 0xffffffff); +} + +static void i2c_xfer_start_read(i2c_dev_t *dev, u8 addr, size_t len) +{ + write32(dev->base + PASEMI_FIFO_TX, PASEMI_TX_FLAG_START | (addr << 1) | 1); + write32(dev->base + PASEMI_FIFO_TX, PASEMI_TX_FLAG_READ | PASEMI_TX_FLAG_STOP | len); +} + +static size_t i2c_xfer_read(i2c_dev_t *dev, u8 *bfr, size_t len) +{ + for (size_t i = 0; i < len; ++i) { + u32 timeout = 5000; + u32 val; + + do { + val = read32(dev->base + PASEMI_FIFO_RX); + if (!(val & PASEMI_RX_FLAG_EMPTY)) + break; + udelay(10); + } while (--timeout); + + if (val & PASEMI_RX_FLAG_EMPTY) { + printf("i2c: timeout while reading (got %lu, expected %lu bytes)\n", i, len); + return i; + } + + bfr[i] = val; + } + + return len; +} + +static int i2c_xfer_write(i2c_dev_t *dev, u8 addr, u32 start, u32 stop, const u8 *bfr, size_t len) +{ + if (start) + write32(dev->base + PASEMI_FIFO_TX, PASEMI_TX_FLAG_START | (addr << 1)); + + for (size_t i = 0; i < len; ++i) { + u32 data = bfr[i]; + if (i == (len - 1) && stop) + data |= PASEMI_TX_FLAG_STOP; + + write32(dev->base + PASEMI_FIFO_TX, data); + } + + if (!stop) + return 0; + + if (poll32(dev->base + PASEMI_STATUS, PASEMI_STATUS_XFER_BUSY, 0, 50000)) { + printf( + "i2c: timeout while waiting for PASEMI_STATUS_XFER_BUSY to clear after write xfer\n"); + return -1; + } + + return 0; +} + +int i2c_smbus_read(i2c_dev_t *dev, u8 addr, u8 reg, u8 *bfr, size_t len) +{ + int ret = -1; + + i2c_clear_fifos(dev); + i2c_clear_status(dev); + + if (i2c_xfer_write(dev, addr, 1, 0, ®, 1)) + goto err; + + i2c_xfer_start_read(dev, addr, len + 1); + u8 len_reply; + if (i2c_xfer_read(dev, &len_reply, 1) != 1) + goto err; + + if (len_reply < len) + printf("i2c: want to read %ld bytes from addr %d but can only read %d\n", len, addr, + len_reply); + if (len_reply > len) + printf("i2c: want to read %ld bytes from addr %d but device wants to send %d\n", len, addr, + len_reply); + + ret = i2c_xfer_read(dev, bfr, min(len, len_reply)); + +err: + if (poll32(dev->base + PASEMI_STATUS, PASEMI_STATUS_XFER_BUSY, 0, 50000)) { + printf("i2c: timeout while waiting for PASEMI_STATUS_XFER_BUSY to clear after read xfer\n"); + return -1; + } + + return ret; +} + +int i2c_smbus_write(i2c_dev_t *dev, u8 addr, u8 reg, const u8 *bfr, size_t len) +{ + i2c_clear_fifos(dev); + i2c_clear_status(dev); + + if (i2c_xfer_write(dev, addr, 1, 0, ®, 1)) + return -1; + + u8 len_send = len; + if (i2c_xfer_write(dev, addr, 0, 0, &len_send, 1)) + return -1; + if (i2c_xfer_write(dev, addr, 0, 1, bfr, len)) + return -1; + + return len_send; +} + +int i2c_smbus_read32(i2c_dev_t *dev, u8 addr, u8 reg, u32 *val) +{ + u8 bfr[4]; + if (i2c_smbus_read(dev, addr, reg, bfr, 4) != 4) + return -1; + + *val = (bfr[0]) | (bfr[1] << 8) | (bfr[2] << 16) | (bfr[3] << 24); + return 0; +} + +int i2c_smbus_read16(i2c_dev_t *dev, u8 addr, u8 reg, u16 *val) +{ + u8 bfr[2]; + if (i2c_smbus_read(dev, addr, reg, bfr, 2) != 2) + return -1; + + *val = (bfr[0]) | (bfr[1] << 8); + return 0; +} + +int i2c_smbus_write32(i2c_dev_t *dev, u8 addr, u8 reg, u32 val) +{ + u8 bfr[4]; + + bfr[0] = val; + bfr[1] = val >> 8; + bfr[2] = val >> 16; + bfr[3] = val >> 24; + + return i2c_smbus_write(dev, addr, reg, bfr, 4); +} + +int i2c_smbus_read8(i2c_dev_t *dev, u8 addr, u8 reg, u8 *val) +{ + if (i2c_smbus_read(dev, addr, reg, val, 1) != 1) + return -1; + return 0; +} diff --git a/tools/src/i2c.h b/tools/src/i2c.h new file mode 100644 index 0000000..cbfc119 --- /dev/null +++ b/tools/src/i2c.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef I2C_H +#define I2C_H + +#include "types.h" + +typedef struct i2c_dev i2c_dev_t; + +i2c_dev_t *i2c_init(const char *adt_node); +void i2c_shutdown(i2c_dev_t *dev); + +int i2c_smbus_read(i2c_dev_t *dev, u8 addr, u8 reg, u8 *bfr, size_t len); +int i2c_smbus_write(i2c_dev_t *dev, u8 addr, u8 reg, const u8 *bfr, size_t len); + +int i2c_smbus_read32(i2c_dev_t *dev, u8 addr, u8 reg, u32 *val); +int i2c_smbus_write32(i2c_dev_t *dev, u8 addr, u8 reg, u32 val); + +int i2c_smbus_read16(i2c_dev_t *dev, u8 addr, u8 reg, u16 *val); +int i2c_smbus_read8(i2c_dev_t *dev, u8 addr, u8 reg, u8 *val); + +#endif diff --git a/tools/src/iodev.c b/tools/src/iodev.c new file mode 100644 index 0000000..32b5831 --- /dev/null +++ b/tools/src/iodev.c @@ -0,0 +1,319 @@ +/* SPDX-License-Identifier: MIT */ + +// #define DEBUG_IODEV + +#include "iodev.h" +#include "memory.h" +#include "string.h" + +#ifdef DEBUG_IODEV +#define dprintf printf +#else +#define dprintf(...) \ + do { \ + } while (0) +#endif + +#define CONSOLE_BUFFER_SIZE 8192 + +extern struct iodev iodev_uart; +extern struct iodev iodev_fb; +extern struct iodev iodev_usb_vuart; + +struct iodev *iodevs[IODEV_MAX] = { + [IODEV_UART] = &iodev_uart, + [IODEV_FB] = &iodev_fb, + [IODEV_USB_VUART] = &iodev_usb_vuart, +}; + +char con_buf[CONSOLE_BUFFER_SIZE]; +size_t con_wp; +size_t con_rp[IODEV_MAX]; + +void iodev_register_device(iodev_id_t id, struct iodev *dev) +{ + if (id >= IODEV_MAX) + return; + iodevs[id] = dev; +} + +struct iodev *iodev_unregister_device(iodev_id_t id) +{ + if (id < IODEV_USB0 || id >= IODEV_MAX) + return NULL; + + struct iodev *dev = iodevs[id]; + iodevs[id] = NULL; + return dev; +} + +ssize_t iodev_can_read(iodev_id_t id) +{ + if (!iodevs[id] || !iodevs[id]->ops->can_read) + return 0; + + if (mmu_active()) + spin_lock(&iodevs[id]->lock); + ssize_t ret = iodevs[id]->ops->can_read(iodevs[id]->opaque); + if (mmu_active()) + spin_unlock(&iodevs[id]->lock); + return ret; +} + +bool iodev_can_write(iodev_id_t id) +{ + if (!iodevs[id] || !iodevs[id]->ops->can_write) + return false; + + if (mmu_active()) + spin_lock(&iodevs[id]->lock); + bool ret = iodevs[id]->ops->can_write(iodevs[id]->opaque); + if (mmu_active()) + spin_unlock(&iodevs[id]->lock); + return ret; +} + +ssize_t iodev_read(iodev_id_t id, void *buf, size_t length) +{ + if (!iodevs[id] || !iodevs[id]->ops->read) + return -1; + + if (mmu_active()) + spin_lock(&iodevs[id]->lock); + ssize_t ret = iodevs[id]->ops->read(iodevs[id]->opaque, buf, length); + if (mmu_active()) + spin_unlock(&iodevs[id]->lock); + return ret; +} + +ssize_t iodev_write(iodev_id_t id, const void *buf, size_t length) +{ + if (!iodevs[id] || !iodevs[id]->ops->write) + return -1; + + if (mmu_active()) + spin_lock(&iodevs[id]->lock); + ssize_t ret = iodevs[id]->ops->write(iodevs[id]->opaque, buf, length); + if (mmu_active()) + spin_unlock(&iodevs[id]->lock); + return ret; +} + +ssize_t iodev_queue(iodev_id_t id, const void *buf, size_t length) +{ + if (!iodevs[id] || !iodevs[id]->ops->queue) + return iodev_write(id, buf, length); + + if (mmu_active()) + spin_lock(&iodevs[id]->lock); + ssize_t ret = iodevs[id]->ops->queue(iodevs[id]->opaque, buf, length); + if (mmu_active()) + spin_unlock(&iodevs[id]->lock); + return ret; +} + +void iodev_flush(iodev_id_t id) +{ + if (!iodevs[id] || !iodevs[id]->ops->flush) + return; + + if (mmu_active()) + spin_lock(&iodevs[id]->lock); + iodevs[id]->ops->flush(iodevs[id]->opaque); + if (mmu_active()) + spin_unlock(&iodevs[id]->lock); +} + +void iodev_lock(iodev_id_t id) +{ + if (!iodevs[id]) + return; + + if (mmu_active()) + spin_lock(&iodevs[id]->lock); +} + +void iodev_unlock(iodev_id_t id) +{ + if (!iodevs[id]) + return; + + if (mmu_active()) + spin_unlock(&iodevs[id]->lock); +} + +int in_iodev = 0; + +static DECLARE_SPINLOCK(console_lock); + +void iodev_console_write(const void *buf, size_t length) +{ + bool do_lock = mmu_active(); + + if (!do_lock && !is_primary_core()) { + if (length && iodevs[IODEV_UART]->usage & USAGE_CONSOLE) { + iodevs[IODEV_UART]->ops->write(iodevs[IODEV_UART]->opaque, "*", 1); + iodevs[IODEV_UART]->ops->write(iodevs[IODEV_UART]->opaque, buf, length); + } + return; + } + + if (do_lock) + spin_lock(&console_lock); + + if (in_iodev) { + if (length && iodevs[IODEV_UART]->usage & USAGE_CONSOLE) { + iodevs[IODEV_UART]->ops->write(iodevs[IODEV_UART]->opaque, "+", 1); + iodevs[IODEV_UART]->ops->write(iodevs[IODEV_UART]->opaque, buf, length); + } + if (do_lock) + spin_unlock(&console_lock); + return; + } + in_iodev++; + + dprintf(" iodev_console_write() wp=%d\n", con_wp); + for (iodev_id_t id = 0; id < IODEV_MAX; id++) { + if (!iodevs[id]) + continue; + + if (!(iodevs[id]->usage & USAGE_CONSOLE)) { + /* Drop buffer */ + con_rp[id] = con_wp + length; + continue; + } + + if (!iodev_can_write(id)) + continue; + + if (con_wp > CONSOLE_BUFFER_SIZE) + con_rp[id] = max(con_wp - CONSOLE_BUFFER_SIZE, con_rp[id]); + + dprintf(" rp=%d\n", con_rp[id]); + // Flush existing buffer to device if possible + while (con_rp[id] < con_wp) { + size_t buf_rp = con_rp[id] % CONSOLE_BUFFER_SIZE; + size_t block = min(con_wp - con_rp[id], CONSOLE_BUFFER_SIZE - buf_rp); + + dprintf(" write buf %d\n", block); + ssize_t ret = iodev_write(id, &con_buf[buf_rp], block); + + if (ret <= 0) + goto next_dev; + + con_rp[id] += ret; + } + + const u8 *p = buf; + size_t wrote = 0; + + // Write the current buffer + while (wrote < length) { + ssize_t ret = iodev_write(id, p, length - wrote); + + if (ret <= 0) + goto next_dev; + + con_rp[id] += ret; + wrote += ret; + p += ret; + } + + next_dev:; + } + + // Update console buffer + + if (length > CONSOLE_BUFFER_SIZE) { + buf += (length - CONSOLE_BUFFER_SIZE); + con_wp += (length - CONSOLE_BUFFER_SIZE); + length = CONSOLE_BUFFER_SIZE; + } + + while (length) { + size_t buf_wp = con_wp % CONSOLE_BUFFER_SIZE; + size_t block = min(length, CONSOLE_BUFFER_SIZE - buf_wp); + memcpy(&con_buf[buf_wp], buf, block); + buf += block; + con_wp += block; + length -= block; + } + + in_iodev--; + if (do_lock) + spin_unlock(&console_lock); +} + +void iodev_handle_events(iodev_id_t id) +{ + bool do_lock = mmu_active(); + + if (do_lock) + spin_lock(&console_lock); + + if (in_iodev) { + if (do_lock) + spin_unlock(&console_lock); + return; + } + + in_iodev++; + + if (iodevs[id]->ops->handle_events) + iodevs[id]->ops->handle_events(iodevs[id]->opaque); + + in_iodev--; + + if (iodev_can_write(id)) + iodev_console_write(NULL, 0); + + if (do_lock) + spin_unlock(&console_lock); +} + +void iodev_console_kick(void) +{ + iodev_console_write(NULL, 0); + + for (iodev_id_t id = 0; id < IODEV_MAX; id++) { + if (!iodevs[id]) + continue; + if (!(iodevs[id]->usage & USAGE_CONSOLE)) + continue; + + iodev_handle_events(id); + } +} + +void iodev_console_flush(void) +{ + for (iodev_id_t id = 0; id < IODEV_MAX; id++) { + if (!iodevs[id]) + continue; + if (!(iodevs[id]->usage & USAGE_CONSOLE)) + continue; + + iodev_flush(id); + } +} + +void iodev_set_usage(iodev_id_t id, iodev_usage_t usage) +{ + if (iodevs[id]) + iodevs[id]->usage = usage; +} + +iodev_usage_t iodev_get_usage(iodev_id_t id) +{ + if (iodevs[id]) + return iodevs[id]->usage; + return 0; +} + +void *iodev_get_opaque(iodev_id_t id) +{ + if (id >= IODEV_MAX || !iodevs[id]) + return NULL; + + return iodevs[id]->opaque; +} diff --git a/tools/src/iodev.h b/tools/src/iodev.h new file mode 100644 index 0000000..24187c7 --- /dev/null +++ b/tools/src/iodev.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef IODEV_H +#define IODEV_H + +#include "types.h" +#include "utils.h" + +#define USB_IODEV_COUNT 8 + +typedef enum _iodev_id_t { + IODEV_UART, + IODEV_FB, + IODEV_USB_VUART, + IODEV_USB0, + IODEV_MAX = IODEV_USB0 + USB_IODEV_COUNT, +} iodev_id_t; + +typedef enum _iodev_usage_t { + USAGE_CONSOLE = BIT(0), + USAGE_UARTPROXY = BIT(1), +} iodev_usage_t; + +struct iodev_ops { + ssize_t (*can_read)(void *opaque); + bool (*can_write)(void *opaque); + ssize_t (*read)(void *opaque, void *buf, size_t length); + ssize_t (*write)(void *opaque, const void *buf, size_t length); + ssize_t (*queue)(void *opaque, const void *buf, size_t length); + void (*flush)(void *opaque); + void (*handle_events)(void *opaque); +}; + +struct iodev { + const struct iodev_ops *ops; + + spinlock_t lock; + iodev_usage_t usage; + void *opaque; +}; + +void iodev_register_device(iodev_id_t id, struct iodev *dev); +struct iodev *iodev_unregister_device(iodev_id_t id); + +ssize_t iodev_can_read(iodev_id_t id); +bool iodev_can_write(iodev_id_t id); +ssize_t iodev_read(iodev_id_t id, void *buf, size_t length); +ssize_t iodev_write(iodev_id_t id, const void *buf, size_t length); +ssize_t iodev_queue(iodev_id_t id, const void *buf, size_t length); +void iodev_flush(iodev_id_t id); +void iodev_handle_events(iodev_id_t id); +void iodev_lock(iodev_id_t id); +void iodev_unlock(iodev_id_t id); + +void iodev_console_write(const void *buf, size_t length); +void iodev_console_kick(void); +void iodev_console_flush(void); + +iodev_usage_t iodev_get_usage(iodev_id_t id); +void iodev_set_usage(iodev_id_t id, iodev_usage_t usage); +void *iodev_get_opaque(iodev_id_t id); + +#endif diff --git a/tools/src/iova.c b/tools/src/iova.c new file mode 100644 index 0000000..c3146cd --- /dev/null +++ b/tools/src/iova.c @@ -0,0 +1,233 @@ +/* SPDX-License-Identifier: MIT */ + +#include "iova.h" +#include "malloc.h" +#include "string.h" +#include "utils.h" + +struct iova_block { + u64 iova; + size_t sz; + struct iova_block *next; +}; + +struct iova_domain { + u64 base; + u64 limit; + struct iova_block *free_list; +}; + +iova_domain_t *iovad_init(u64 base, u64 limit) +{ + if (base != ALIGN_UP(base, SZ_32M)) { + printf("iovad_init: base it not is not aligned to SZ_32M\n"); + return NULL; + } + + iova_domain_t *iovad = malloc(sizeof(*iovad)); + if (!iovad) + return NULL; + + memset(iovad, 0, sizeof(*iovad)); + + struct iova_block *blk = malloc(sizeof(*blk)); + if (!blk) { + free(iovad); + return NULL; + } + + /* don't hand out NULL pointers */ + blk->iova = base; + blk->sz = limit - SZ_16K; + blk->next = NULL; + iovad->base = base; + iovad->limit = limit; + iovad->free_list = blk; + + return iovad; +} + +void iovad_shutdown(iova_domain_t *iovad, dart_dev_t *dart) +{ + struct iova_block *blk = iovad->free_list; + + while (blk != NULL) { + struct iova_block *blk_free = blk; + blk = blk->next; + + free(blk_free); + } + + if (dart) + for (u64 addr = iovad->base; addr < iovad->limit; addr += SZ_32M) + dart_free_l2(dart, addr); + + free(iovad); +} + +bool iova_reserve(iova_domain_t *iovad, u64 iova, size_t sz) +{ + iova = ALIGN_DOWN(iova, SZ_16K); + sz = ALIGN_UP(sz, SZ_16K); + + if (iova == 0) { + iova += SZ_16K; + sz -= SZ_16K; + } + if (sz == 0) + return true; + + if (!iovad->free_list) { + printf("iova_reserve: trying to reserve iova range but empty free list\n"); + return false; + } + + struct iova_block *blk = iovad->free_list; + struct iova_block *blk_prev = NULL; + while (blk != NULL) { + if (iova >= blk->iova && iova < (blk->iova + blk->sz)) { + if (iova + sz >= (blk->iova + blk->sz)) { + printf("iova_reserve: tried to reserve [%lx; +%lx] but block in free list has " + "range [%lx; +%lx]\n", + iova, sz, blk->iova, blk->sz); + return false; + } + + if (iova == blk->iova && sz == blk->sz) { + /* if the to-be-reserved range is present as a single block in the free list we just + * need to remove it */ + if (blk_prev) + blk_prev->next = blk->next; + else + iovad->free_list = NULL; + + free(blk); + return true; + } else if (iova == blk->iova) { + /* cut off the reserved range from the beginning */ + blk->iova += sz; + blk->sz -= sz; + return true; + } else if (iova + sz == blk->iova + blk->sz) { + /* cut off the reserved range from the end */ + blk->sz -= sz; + return true; + } else { + /* the to-be-reserved range is in the middle and we'll have to split this block */ + struct iova_block *blk_new = malloc(sizeof(*blk_new)); + if (!blk_new) { + printf("iova_reserve: out of memory.\n"); + return false; + } + + blk_new->iova = iova + sz; + blk_new->sz = blk->iova + blk->sz - blk_new->iova; + blk_new->next = blk->next; + blk->next = blk_new; + blk->sz = iova - blk->iova; + return true; + } + } + + blk_prev = blk; + blk = blk->next; + } + + printf("iova_reserve: tried to reserve [%lx; +%lx] but range is already used.\n", iova, sz); + return false; +} + +u64 iova_alloc(iova_domain_t *iovad, size_t sz) +{ + sz = ALIGN_UP(sz, SZ_16K); + + struct iova_block *blk_prev = NULL; + struct iova_block *blk = iovad->free_list; + while (blk != NULL) { + if (blk->sz == sz) { + u64 iova = blk->iova; + + if (blk_prev) + blk_prev->next = blk->next; + else + iovad->free_list = blk->next; + + free(blk); + return iova; + } else if (blk->sz > sz) { + u64 iova = blk->iova; + + blk->iova += sz; + blk->sz -= sz; + + return iova; + } + + blk_prev = blk; + blk = blk->next; + } + + return 0; +} + +void iova_free(iova_domain_t *iovad, u64 iova, size_t sz) +{ + sz = ALIGN_UP(sz, SZ_16K); + + struct iova_block *blk_prev = NULL; + struct iova_block *blk = iovad->free_list; + + /* create a new free list if it's empty */ + if (!blk) { + blk = malloc(sizeof(*blk)); + if (!blk) + panic("out of memory in iovad_free"); + blk->iova = iova; + blk->sz = sz; + blk->next = NULL; + iovad->free_list = blk; + return; + } + + while (blk != NULL) { + if ((iova + sz) == blk->iova) { + /* extend the block at the beginning */ + blk->iova -= sz; + blk->sz += sz; + + /* if we have just extended the start of the free list we're already done */ + if (!blk_prev) + return; + + /* check if we can merge two blocks otherwise */ + if ((blk_prev->iova + blk_prev->sz) == blk->iova) { + blk_prev->sz += blk->sz; + blk_prev->next = blk->next; + free(blk); + } + + return; + } else if ((iova + sz) < blk->iova) { + /* create a new block */ + struct iova_block *blk_new = malloc(sizeof(*blk_new)); + if (!blk_new) + panic("iova_free: out of memory\n"); + + blk_new->iova = iova; + blk_new->sz = sz; + blk_new->next = blk; + + if (blk_prev) + blk_prev->next = blk_new; + else + iovad->free_list = blk_new; + + return; + } + + blk_prev = blk; + blk = blk->next; + } + + panic("iovad_free: corruption detected, unable to insert freed range\n"); +} diff --git a/tools/src/iova.h b/tools/src/iova.h new file mode 100644 index 0000000..1637be4 --- /dev/null +++ b/tools/src/iova.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef IOVA_H +#define IOVA_H + +#include "dart.h" +#include "types.h" + +typedef struct iova_domain iova_domain_t; + +iova_domain_t *iovad_init(u64 base, u64 limit); +void iovad_shutdown(iova_domain_t *iovad, dart_dev_t *dart); + +bool iova_reserve(iova_domain_t *iovad, u64 iova, size_t sz); +u64 iova_alloc(iova_domain_t *iovad, size_t sz); +void iova_free(iova_domain_t *iovad, u64 iova, size_t sz); + +#endif diff --git a/tools/src/kboot.c b/tools/src/kboot.c new file mode 100644 index 0000000..c56c0e7 --- /dev/null +++ b/tools/src/kboot.c @@ -0,0 +1,1937 @@ +/* SPDX-License-Identifier: MIT */ + +#include <stdint.h> + +#include "kboot.h" +#include "adt.h" +#include "assert.h" +#include "dapf.h" +#include "devicetree.h" +#include "exception.h" +#include "firmware.h" +#include "malloc.h" +#include "memory.h" +#include "pcie.h" +#include "pmgr.h" +#include "sep.h" +#include "smp.h" +#include "types.h" +#include "usb.h" +#include "utils.h" +#include "xnuboot.h" + +#include "libfdt/libfdt.h" + +#define MAX_CHOSEN_PARAMS 16 + +#define MAX_ATC_DEVS 8 +#define MAX_CIO_DEVS 8 + +#define MAX_DISP_MAPPINGS 8 + +static void *dt = NULL; +static int dt_bufsize = 0; +static void *initrd_start = NULL; +static size_t initrd_size = 0; +static char *chosen_params[MAX_CHOSEN_PARAMS][2]; + +extern const char *const m1n1_version; + +int dt_set_gpu(void *dt); + +#define DT_ALIGN 16384 + +#define bail(...) \ + do { \ + printf(__VA_ARGS__); \ + return -1; \ + } while (0) + +#define bail_cleanup(...) \ + do { \ + printf(__VA_ARGS__); \ + ret = -1; \ + goto err; \ + } while (0) + +void get_notchless_fb(u64 *fb_base, u64 *fb_height) +{ + *fb_base = cur_boot_args.video.base; + *fb_height = cur_boot_args.video.height; + + int node = adt_path_offset(adt, "/product"); + + if (node < 0) { + printf("FDT: /product node not found\n"); + return; + } + + u32 val; + + if (ADT_GETPROP(adt, node, "partially-occluded-display", &val) < 0 || !val) { + printf("FDT: No notch detected\n"); + return; + } + + u64 hfrac = cur_boot_args.video.height * 16 / cur_boot_args.video.width; + u64 new_height = cur_boot_args.video.width * hfrac / 16; + + if (new_height == cur_boot_args.video.height) { + printf("FDT: Notch detected, but display aspect is already 16:%lu?\n", hfrac); + return; + } + + u64 offset = cur_boot_args.video.height - new_height; + + printf("display: Hiding notch, %lux%lu -> %lux%lu (+%lu, 16:%lu)\n", cur_boot_args.video.width, + cur_boot_args.video.height, cur_boot_args.video.width, new_height, offset, hfrac); + + *fb_base += cur_boot_args.video.stride * offset; + *fb_height = new_height; +} + +static int dt_set_rng_seed_sep(int node) +{ + u64 kaslr_seed; + uint8_t rng_seed[128]; // same size used by Linux for kexec + + if (sep_get_random(&kaslr_seed, sizeof(kaslr_seed)) != sizeof(kaslr_seed)) + bail("SEP: couldn't get enough random bytes for KASLR seed"); + if (sep_get_random(rng_seed, sizeof(rng_seed)) != sizeof(rng_seed)) + bail("SEP: couldn't get enough random bytes for RNG seed"); + + if (fdt_setprop_u64(dt, node, "kaslr-seed", kaslr_seed)) + bail("FDT: couldn't set kaslr-seed\n"); + if (fdt_setprop(dt, node, "rng-seed", rng_seed, sizeof(rng_seed))) + bail("FDT: couldn't set rng-seed\n"); + + printf("FDT: Passing %ld bytes of KASLR seed and %ld bytes of random seed\n", + sizeof(kaslr_seed), sizeof(rng_seed)); + + return 0; +} + +static int dt_set_rng_seed_adt(int node) +{ + int anode = adt_path_offset(adt, "/chosen"); + + if (anode < 0) + bail("ADT: /chosen not found\n"); + + const uint8_t *random_seed; + u32 seed_length; + + random_seed = adt_getprop(adt, anode, "random-seed", &seed_length); + if (random_seed) { + printf("ADT: %d bytes of random seed available\n", seed_length); + + if (seed_length >= sizeof(u64)) { + u64 kaslr_seed; + + memcpy(&kaslr_seed, random_seed, sizeof(kaslr_seed)); + + // Ideally we would throw away the kaslr_seed part of random_seed + // and avoid reusing it. However, Linux wants 64 bytes of bootloader + // random seed to consider its CRNG initialized, which is exactly + // how much iBoot gives us. This probably doesn't matter, since + // that entropy is going to get shuffled together and Linux makes + // sure to clear the FDT randomness after using it anyway, but just + // in case let's mix in a few bits from our own KASLR base to make + // kaslr_seed unique. + + kaslr_seed ^= (u64)cur_boot_args.virt_base; + + if (fdt_setprop_u64(dt, node, "kaslr-seed", kaslr_seed)) + bail("FDT: couldn't set kaslr-seed\n"); + + printf("FDT: KASLR seed initialized\n"); + } else { + printf("ADT: not enough random data for kaslr-seed\n"); + } + + if (seed_length) { + if (fdt_setprop(dt, node, "rng-seed", random_seed, seed_length)) + bail("FDT: couldn't set rng-seed\n"); + + printf("FDT: Passing %d bytes of random seed\n", seed_length); + } + } else { + printf("ADT: no random-seed available!\n"); + } + + return 0; +} + +static int dt_set_chosen(void) +{ + + int node = fdt_path_offset(dt, "/chosen"); + if (node < 0) + bail("FDT: /chosen node not found in devtree\n"); + + for (int i = 0; i < MAX_CHOSEN_PARAMS; i++) { + if (!chosen_params[i][0]) + break; + + const char *name = chosen_params[i][0]; + const char *value = chosen_params[i][1]; + if (fdt_setprop(dt, node, name, value, strlen(value) + 1) < 0) + bail("FDT: couldn't set chosen.%s property\n", name); + printf("FDT: %s = '%s'\n", name, value); + } + + if (initrd_start && initrd_size) { + if (fdt_setprop_u64(dt, node, "linux,initrd-start", (u64)initrd_start)) + bail("FDT: couldn't set chosen.linux,initrd-start property\n"); + + u64 end = ((u64)initrd_start) + initrd_size; + if (fdt_setprop_u64(dt, node, "linux,initrd-end", end)) + bail("FDT: couldn't set chosen.linux,initrd-end property\n"); + + if (fdt_add_mem_rsv(dt, (u64)initrd_start, initrd_size)) + bail("FDT: couldn't add reservation for the initrd\n"); + + printf("FDT: initrd at %p size 0x%lx\n", initrd_start, initrd_size); + } + + if (cur_boot_args.video.base) { + int fb = fdt_path_offset(dt, "/chosen/framebuffer"); + if (fb < 0) + bail("FDT: /chosen node not found in devtree\n"); + + u64 fb_base, fb_height; + get_notchless_fb(&fb_base, &fb_height); + u64 fb_size = cur_boot_args.video.stride * fb_height; + u64 fbreg[2] = {cpu_to_fdt64(fb_base), cpu_to_fdt64(fb_size)}; + char fbname[32]; + + snprintf(fbname, sizeof(fbname), "framebuffer@%lx", fb_base); + + if (fdt_setprop(dt, fb, "reg", fbreg, sizeof(fbreg))) + bail("FDT: couldn't set framebuffer.reg property\n"); + + if (fdt_set_name(dt, fb, fbname)) + bail("FDT: couldn't set framebuffer name\n"); + + if (fdt_setprop_u32(dt, fb, "width", cur_boot_args.video.width)) + bail("FDT: couldn't set framebuffer width\n"); + + if (fdt_setprop_u32(dt, fb, "height", fb_height)) + bail("FDT: couldn't set framebuffer height\n"); + + if (fdt_setprop_u32(dt, fb, "stride", cur_boot_args.video.stride)) + bail("FDT: couldn't set framebuffer stride\n"); + + const char *format = NULL; + + switch (cur_boot_args.video.depth & 0xff) { + case 32: + format = "x8r8g8b8"; + break; + case 30: + format = "x2r10g10b10"; + break; + case 16: + format = "r5g6b5"; + break; + default: + printf("FDT: unsupported fb depth %lu, not enabling\n", cur_boot_args.video.depth); + return 0; // Do not error out, but don't set the FB + } + + if (fdt_setprop_string(dt, fb, "format", format)) + bail("FDT: couldn't set framebuffer format\n"); + + fdt_delprop(dt, fb, "status"); // may fail if it does not exist + + printf("FDT: %s base 0x%lx size 0x%lx\n", fbname, fb_base, fb_size); + + // We do not need to reserve the framebuffer, as it will be excluded from the usable RAM + // range already. + + // save notch height in the dcp node if present + if (cur_boot_args.video.height - fb_height) { + int dcp = fdt_path_offset(dt, "dcp"); + if (dcp >= 0) + if (fdt_appendprop_u32(dt, dcp, "apple,notch-height", + cur_boot_args.video.height - fb_height)) + printf("FDT: couldn't set apple,notch-height\n"); + } + } + node = fdt_path_offset(dt, "/chosen"); + if (node < 0) + bail("FDT: /chosen node not found in devtree\n"); + + int ipd = adt_path_offset(adt, "/arm-io/spi3/ipd"); + if (ipd < 0) + ipd = adt_path_offset(adt, "/arm-io/dockchannel-mtp/mtp-transport/keyboard"); + + if (ipd < 0) { + printf("ADT: no keyboard found\n"); + } else { + u32 len; + const u8 *kblang = adt_getprop(adt, ipd, "kblang-calibration", &len); + if (kblang && len >= 2) { + if (fdt_setprop_u32(dt, node, "asahi,kblang-code", kblang[1])) + bail("FDT: couldn't set asahi,kblang-code"); + } else { + printf("ADT: kblang-calibration not found, no keyboard layout\n"); + } + } + + if (fdt_setprop(dt, node, "asahi,iboot1-version", system_firmware.iboot, + strlen(system_firmware.iboot) + 1)) + bail("FDT: couldn't set asahi,iboot1-version"); + + if (fdt_setprop(dt, node, "asahi,system-fw-version", system_firmware.string, + strlen(system_firmware.string) + 1)) + bail("FDT: couldn't set asahi,system-fw-version"); + + if (fdt_setprop(dt, node, "asahi,iboot2-version", os_firmware.iboot, + strlen(os_firmware.iboot) + 1)) + bail("FDT: couldn't set asahi,iboot2-version"); + + if (fdt_setprop(dt, node, "asahi,os-fw-version", os_firmware.string, + strlen(os_firmware.string) + 1)) + bail("FDT: couldn't set asahi,os-fw-version"); + + if (fdt_setprop(dt, node, "asahi,m1n1-stage2-version", m1n1_version, strlen(m1n1_version) + 1)) + bail("FDT: couldn't set asahi,m1n1-stage2-version"); + + if (dt_set_rng_seed_sep(node)) + return dt_set_rng_seed_adt(node); + + return 0; +} + +static int dt_set_memory(void) +{ + int anode = adt_path_offset(adt, "/chosen"); + + if (anode < 0) + bail("ADT: /chosen not found\n"); + + u64 dram_base, dram_size; + + if (ADT_GETPROP(adt, anode, "dram-base", &dram_base) < 0) + bail("ADT: Failed to get dram-base\n"); + if (ADT_GETPROP(adt, anode, "dram-size", &dram_size) < 0) + bail("ADT: Failed to get dram-size\n"); + + // Tell the kernel our usable memory range. We cannot declare all of DRAM, and just reserve the + // bottom and top, because the kernel would still map it (and just not use it), which breaks + // ioremap (e.g. simplefb). + + u64 dram_min = cur_boot_args.phys_base; + u64 dram_max = cur_boot_args.phys_base + cur_boot_args.mem_size; + + printf("FDT: DRAM at 0x%lx size 0x%lx\n", dram_base, dram_size); + printf("FDT: Usable memory is 0x%lx..0x%lx (0x%lx)\n", dram_min, dram_max, dram_max - dram_min); + + u64 memreg[2] = {cpu_to_fdt64(dram_min), cpu_to_fdt64(dram_max - dram_min)}; + + int node = fdt_path_offset(dt, "/memory"); + if (node < 0) + bail("FDT: /memory node not found in devtree\n"); + + if (fdt_setprop(dt, node, "reg", memreg, sizeof(memreg))) + bail("FDT: couldn't set memory.reg property\n"); + + return 0; +} + +static int dt_set_serial_number(void) +{ + + int fdt_root = fdt_path_offset(dt, "/"); + int adt_root = adt_path_offset(adt, "/"); + + if (fdt_root < 0) + bail("FDT: could not open a handle to FDT root.\n"); + if (adt_root < 0) + bail("ADT: could not open a handle to ADT root.\n"); + + u32 sn_len; + const char *serial_number = adt_getprop(adt, adt_root, "serial-number", &sn_len); + if (fdt_setprop_string(dt, fdt_root, "serial-number", serial_number)) + bail("FDT: unable to set device serial number!\n"); + printf("FDT: reporting device serial number: %s\n", serial_number); + + return 0; +} + +static int dt_set_cpus(void) +{ + int ret = 0; + + int cpus = fdt_path_offset(dt, "/cpus"); + if (cpus < 0) + bail("FDT: /cpus node not found in devtree\n"); + + uint32_t *pruned_phandles = malloc(MAX_CPUS * sizeof(uint32_t)); + size_t pruned = 0; + if (!pruned_phandles) + bail("FDT: out of memory\n"); + + /* Prune CPU nodes */ + int node, cpu = 0; + for (node = fdt_first_subnode(dt, cpus); node >= 0;) { + const char *name = fdt_get_name(dt, node, NULL); + if (strncmp(name, "cpu@", 4)) + goto next_node; + + if (cpu > MAX_CPUS) + bail_cleanup("Maximum number of CPUs exceeded, consider increasing MAX_CPUS\n"); + + const fdt64_t *prop = fdt_getprop(dt, node, "reg", NULL); + if (!prop) + bail_cleanup("FDT: failed to get reg property of CPU\n"); + + u64 dt_mpidr = fdt64_ld(prop); + + if (dt_mpidr == (mrs(MPIDR_EL1) & 0xFFFFFF)) + goto next_cpu; + + if (!smp_is_alive(cpu)) { + printf("FDT: CPU %d is not alive, disabling...\n", cpu); + pruned_phandles[pruned++] = fdt_get_phandle(dt, node); + + int next = fdt_next_subnode(dt, node); + fdt_nop_node(dt, node); + cpu++; + node = next; + continue; + } + + u64 mpidr = smp_get_mpidr(cpu); + + if (dt_mpidr != mpidr) + bail_cleanup("FDT: DT CPU %d MPIDR mismatch: 0x%lx != 0x%lx\n", cpu, dt_mpidr, mpidr); + + u64 release_addr = smp_get_release_addr(cpu); + if (fdt_setprop_inplace_u64(dt, node, "cpu-release-addr", release_addr)) + bail_cleanup("FDT: couldn't set cpu-release-addr property\n"); + + printf("FDT: CPU %d MPIDR=0x%lx release-addr=0x%lx\n", cpu, mpidr, release_addr); + + next_cpu: + cpu++; + next_node: + node = fdt_next_subnode(dt, node); + } + + if ((node < 0) && (node != -FDT_ERR_NOTFOUND)) { + bail_cleanup("FDT: error iterating through CPUs\n"); + } + + /* Prune AIC PMU affinities */ + int aic = fdt_node_offset_by_compatible(dt, -1, "apple,aic"); + if (aic == -FDT_ERR_NOTFOUND) + aic = fdt_node_offset_by_compatible(dt, -1, "apple,aic2"); + if (aic < 0) + bail_cleanup("FDT: Failed to find AIC node\n"); + + int affinities = fdt_subnode_offset(dt, aic, "affinities"); + if (affinities < 0) { + printf("FDT: Failed to find AIC affinities node, ignoring...\n"); + } else { + int node; + for (node = fdt_first_subnode(dt, affinities); node >= 0; + node = fdt_next_subnode(dt, node)) { + int len; + const fdt32_t *phs = fdt_getprop(dt, node, "cpus", &len); + if (!phs) + bail_cleanup("FDT: Failed to find cpus property under AIC affinity\n"); + + fdt32_t *new_phs = malloc(len); + size_t index = 0; + size_t count = len / sizeof(fdt32_t); + + for (size_t i = 0; i < count; i++) { + uint32_t phandle = fdt32_ld(&phs[i]); + bool prune = false; + + for (size_t j = 0; j < pruned; j++) { + if (pruned_phandles[j] == phandle) { + prune = true; + break; + } + } + if (!prune) + new_phs[index++] = phs[i]; + } + + ret = fdt_setprop(dt, node, "cpus", new_phs, sizeof(fdt32_t) * index); + free(new_phs); + + if (ret < 0) + bail_cleanup("FDT: Failed to set cpus property under AIC affinity\n"); + + const char *name = fdt_get_name(dt, node, NULL); + printf("FDT: Pruned %ld/%ld CPU references in [AIC]/affinities/%s\n", count - index, + count, name); + } + + if ((node < 0) && (node != -FDT_ERR_NOTFOUND)) + bail_cleanup("FDT: Error iterating through affinity nodes\n"); + } + + /* Prune CPU-map */ + int cpu_map = fdt_path_offset(dt, "/cpus/cpu-map"); + if (cpu_map < 0) { + printf("FDT: /cpus/cpu-map node not found in devtree, ignoring...\n"); + free(pruned_phandles); + return 0; + } + + int cluster_idx = 0; + int cluster_node; + for (cluster_node = fdt_first_subnode(dt, cpu_map); cluster_node >= 0;) { + const char *name = fdt_get_name(dt, cluster_node, NULL); + int cpu_idx = 0; + + if (strncmp(name, "cluster", 7)) + goto next_cluster; + + int cpu_node; + for (cpu_node = fdt_first_subnode(dt, cluster_node); cpu_node >= 0;) { + const char *cpu_name = fdt_get_name(dt, cpu_node, NULL); + + if (strncmp(cpu_name, "core", 4)) + goto next_map_cpu; + + int len; + const fdt32_t *cpu_ph = fdt_getprop(dt, cpu_node, "cpu", &len); + + if (!cpu_ph || len != sizeof(*cpu_ph)) + bail_cleanup("FDT: Failed to get cpu prop for /cpus/cpu-map/%s/%s\n", name, + cpu_name); + + uint32_t phandle = fdt32_ld(cpu_ph); + bool prune = false; + for (size_t i = 0; i < pruned; i++) { + if (pruned_phandles[i] == phandle) { + prune = true; + break; + } + } + + if (prune) { + printf("FDT: Pruning /cpus/cpu-map/%s/%s\n", name, cpu_name); + + int next = fdt_next_subnode(dt, cpu_node); + fdt_nop_node(dt, cpu_node); + cpu_node = next; + continue; + } else { + char new_name[16]; + + snprintf(new_name, 16, "core%d", cpu_idx++); + fdt_set_name(dt, cpu_node, new_name); + } + next_map_cpu: + cpu_node = fdt_next_subnode(dt, cpu_node); + } + + if ((cpu_node < 0) && (cpu_node != -FDT_ERR_NOTFOUND)) + bail_cleanup("FDT: Error iterating through CPU nodes\n"); + + if (cpu_idx == 0) { + printf("FDT: Pruning /cpus/cpu-map/%s\n", name); + + int next = fdt_next_subnode(dt, cluster_node); + fdt_nop_node(dt, cluster_node); + cluster_node = next; + continue; + } else { + char new_name[16]; + + snprintf(new_name, 16, "cluster%d", cluster_idx++); + fdt_set_name(dt, cluster_node, new_name); + } + next_cluster: + cluster_node = fdt_next_subnode(dt, cluster_node); + } + + if ((cluster_node < 0) && (cluster_node != -FDT_ERR_NOTFOUND)) + bail_cleanup("FDT: Error iterating through CPU clusters\n"); + + return 0; + +err: + free(pruned_phandles); + return ret; +} + +static struct { + const char *alias; + const char *fdt_property; + bool swap; +} mac_address_devices[] = { + { + .alias = "bluetooth0", + .fdt_property = "local-bd-address", + .swap = true, + }, + { + .alias = "ethernet0", + .fdt_property = "local-mac-address", + }, + { + .alias = "wifi0", + .fdt_property = "local-mac-address", + }, +}; + +static int dt_set_mac_addresses(void) +{ + int anode = adt_path_offset(adt, "/chosen"); + + if (anode < 0) + bail("ADT: /chosen not found\n"); + + for (size_t i = 0; i < sizeof(mac_address_devices) / sizeof(*mac_address_devices); i++) { + char propname[32]; + snprintf(propname, sizeof(propname), "mac-address-%s", mac_address_devices[i].alias); + + uint8_t addr[6]; + if (ADT_GETPROP_ARRAY(adt, anode, propname, addr) < 0) + continue; + + if (mac_address_devices[i].swap) { + for (size_t i = 0; i < sizeof(addr) / 2; ++i) { + uint8_t tmp = addr[i]; + addr[i] = addr[sizeof(addr) - i - 1]; + addr[sizeof(addr) - i - 1] = tmp; + } + } + + const char *path = fdt_get_alias(dt, mac_address_devices[i].alias); + if (path == NULL) + continue; + + int node = fdt_path_offset(dt, path); + if (node < 0) + continue; + + fdt_setprop(dt, node, mac_address_devices[i].fdt_property, addr, sizeof(addr)); + } + + return 0; +} + +static int dt_set_bluetooth_cal(int anode, int node, const char *adt_name, const char *fdt_name) +{ + u32 len; + const u8 *cal_blob = adt_getprop(adt, anode, adt_name, &len); + + if (!cal_blob || !len) + bail("ADT: Failed to get %s", adt_name); + + fdt_setprop(dt, node, fdt_name, cal_blob, len); + return 0; +} + +static int dt_set_bluetooth(void) +{ + int ret; + int anode = adt_path_offset(adt, "/arm-io/bluetooth"); + + if (anode < 0) + bail("ADT: /arm-io/bluetooth not found\n"); + + const char *path = fdt_get_alias(dt, "bluetooth0"); + if (path == NULL) + return 0; + + int node = fdt_path_offset(dt, path); + if (node < 0) + return 0; + + ret = dt_set_bluetooth_cal(anode, node, "bluetooth-taurus-calibration-bf", + "brcm,taurus-bf-cal-blob"); + if (ret) + return ret; + + ret = dt_set_bluetooth_cal(anode, node, "bluetooth-taurus-calibration", "brcm,taurus-cal-blob"); + if (ret) + return ret; + + return 0; +} + +static int dt_set_multitouch(void) +{ + const char *path = fdt_get_alias(dt, "touchbar0"); + if (path == NULL) + return 0; + + int node = fdt_path_offset(dt, path); + if (node < 0) + bail("FDT: alias points at nonexistent node"); + + int anode = adt_path_offset(adt, "/arm-io/spi0/multi-touch"); + if (anode < 0) + bail("ADT /arm-io/spi0/multi-touch not found\n"); + + u32 len; + const u8 *cal_blob = adt_getprop(adt, anode, "multi-touch-calibration", &len); + if (!cal_blob || !len) + bail("ADT: Failed to get multi-touch-calibration"); + + fdt_setprop(dt, node, "apple,z2-cal-blob", cal_blob, len); + return 0; +} + +static int dt_set_wifi(void) +{ + int anode = adt_path_offset(adt, "/arm-io/wlan"); + + if (anode < 0) + bail("ADT: /arm-io/wlan not found\n"); + + uint8_t info[16]; + if (ADT_GETPROP_ARRAY(adt, anode, "wifi-antenna-sku-info", info) < 0) + bail("ADT: Failed to get wifi-antenna-sku-info"); + + const char *path = fdt_get_alias(dt, "wifi0"); + if (path == NULL) + return 0; + + int node = fdt_path_offset(dt, path); + if (node < 0) + return 0; + + char antenna[8]; + memcpy(antenna, &info[8], sizeof(antenna)); + fdt_setprop_string(dt, node, "apple,antenna-sku", antenna); + + u32 len; + const u8 *cal_blob = adt_getprop(adt, anode, "wifi-calibration-msf", &len); + + if (!cal_blob || !len) + bail("ADT: Failed to get wifi-calibration-msf"); + + fdt_setprop(dt, node, "brcm,cal-blob", cal_blob, len); + + return 0; +} + +static void dt_set_uboot_dm_preloc(int node) +{ + // Tell U-Boot to bind this node early + fdt_setprop_empty(dt, node, "u-boot,dm-pre-reloc"); + fdt_setprop_empty(dt, node, "bootph-all"); + + // Make sure the power domains are bound early as well + int pds_size; + const fdt32_t *pds = fdt_getprop(dt, node, "power-domains", &pds_size); + if (!pds) + return; + + fdt32_t *phandles = malloc(pds_size); + if (!phandles) { + printf("FDT: out of memory\n"); + return; + } + memcpy(phandles, pds, pds_size); + + for (int i = 0; i < pds_size / 4; i++) { + node = fdt_node_offset_by_phandle(dt, fdt32_ld(&phandles[i])); + if (node < 0) + continue; + dt_set_uboot_dm_preloc(node); + + // restore node offset after DT update + node = fdt_node_offset_by_phandle(dt, fdt32_ld(&phandles[i])); + if (node < 0) + continue; + + // And make sure the PMGR node is bound early too + node = fdt_parent_offset(dt, node); + if (node < 0) + continue; + dt_set_uboot_dm_preloc(node); + } + + free(phandles); +} + +static int dt_set_uboot(void) +{ + // Make sure that U-Boot can initialize the serial port in its + // pre-relocation phase by marking its node and the nodes of the + // power domains it depends on with a "u-boot,dm-pre-reloc" + // property. + + const char *path = fdt_get_alias(dt, "serial0"); + if (path == NULL) + return 0; + + int node = fdt_path_offset(dt, path); + if (node < 0) + return 0; + + dt_set_uboot_dm_preloc(node); + return 0; +} + +struct atc_tunable { + u32 offset : 24; + u32 size : 8; + u32 mask; + u32 value; +} PACKED; +static_assert(sizeof(struct atc_tunable) == 12, "Invalid atc_tunable size"); + +struct adt_tunable_info { + const char *adt_name; + const char *fdt_name; + size_t reg_offset; + size_t reg_size; + bool required; +}; + +static const struct adt_tunable_info atc_tunables[] = { + /* global tunables applied after power on or reset */ + {"tunable_ATC0AXI2AF", "apple,tunable-axi2af", 0x0, 0x4000, true}, + {"tunable_ATC_FABRIC", "apple,tunable-common", 0x45000, 0x4000, true}, + {"tunable_AUS_CMN_TOP", "apple,tunable-common", 0x800, 0x4000, true}, + {"tunable_AUS_CMN_SHM", "apple,tunable-common", 0xa00, 0x4000, true}, + {"tunable_AUSPLL_CORE", "apple,tunable-common", 0x2200, 0x4000, true}, + {"tunable_AUSPLL_TOP", "apple,tunable-common", 0x2000, 0x4000, true}, + {"tunable_CIO3PLL_CORE", "apple,tunable-common", 0x2a00, 0x4000, true}, + {"tunable_CIO3PLL_TOP", "apple,tunable-common", 0x2800, 0x4000, true}, + {"tunable_CIO_CIO3PLL_TOP", "apple,tunable-common", 0x2800, 0x4000, false}, + {"tunable_USB_ACIOPHY_TOP", "apple,tunable-common", 0x0, 0x4000, true}, + /* lane-specific tunables applied after a cable is connected */ + {"tunable_DP_LN0_AUSPMA_TX_TOP", "apple,tunable-lane0-dp", 0xc000, 0x1000, true}, + {"tunable_DP_LN1_AUSPMA_TX_TOP", "apple,tunable-lane1-dp", 0x13000, 0x1000, true}, + {"tunable_USB_LN0_AUSPMA_RX_TOP", "apple,tunable-lane0-usb", 0x9000, 0x1000, true}, + {"tunable_USB_LN0_AUSPMA_RX_EQ", "apple,tunable-lane0-usb", 0xa000, 0x1000, true}, + {"tunable_USB_LN0_AUSPMA_RX_SHM", "apple,tunable-lane0-usb", 0xb000, 0x1000, true}, + {"tunable_USB_LN0_AUSPMA_TX_TOP", "apple,tunable-lane0-usb", 0xc000, 0x1000, true}, + {"tunable_USB_LN1_AUSPMA_RX_TOP", "apple,tunable-lane1-usb", 0x10000, 0x1000, true}, + {"tunable_USB_LN1_AUSPMA_RX_EQ", "apple,tunable-lane1-usb", 0x11000, 0x1000, true}, + {"tunable_USB_LN1_AUSPMA_RX_SHM", "apple,tunable-lane1-usb", 0x12000, 0x1000, true}, + {"tunable_USB_LN1_AUSPMA_TX_TOP", "apple,tunable-lane1-usb", 0x13000, 0x1000, true}, + {"tunable_CIO_LN0_AUSPMA_RX_TOP", "apple,tunable-lane0-cio", 0x9000, 0x1000, true}, + {"tunable_CIO_LN0_AUSPMA_RX_EQ", "apple,tunable-lane0-cio", 0xa000, 0x1000, true}, + {"tunable_CIO_LN0_AUSPMA_RX_SHM", "apple,tunable-lane0-cio", 0xb000, 0x1000, true}, + {"tunable_CIO_LN0_AUSPMA_TX_TOP", "apple,tunable-lane0-cio", 0xc000, 0x1000, true}, + {"tunable_CIO_LN1_AUSPMA_RX_TOP", "apple,tunable-lane1-cio", 0x10000, 0x1000, true}, + {"tunable_CIO_LN1_AUSPMA_RX_EQ", "apple,tunable-lane1-cio", 0x11000, 0x1000, true}, + {"tunable_CIO_LN1_AUSPMA_RX_SHM", "apple,tunable-lane1-cio", 0x12000, 0x1000, true}, + {"tunable_CIO_LN1_AUSPMA_TX_TOP", "apple,tunable-lane1-cio", 0x13000, 0x1000, true}, +}; + +static int dt_append_atc_tunable(int adt_node, int fdt_node, + const struct adt_tunable_info *tunable_info) +{ + u32 tunables_len; + const struct atc_tunable *tunable_adt = + adt_getprop(adt, adt_node, tunable_info->adt_name, &tunables_len); + + if (!tunable_adt) { + printf("ADT: tunable %s not found\n", tunable_info->adt_name); + + if (tunable_info->required) + return -1; + else + return 0; + } + + if (tunables_len % sizeof(*tunable_adt)) { + printf("ADT: tunable %s with invalid length %d\n", tunable_info->adt_name, tunables_len); + return -1; + } + + u32 n_tunables = tunables_len / sizeof(*tunable_adt); + for (size_t j = 0; j < n_tunables; j++) { + const struct atc_tunable *tunable = &tunable_adt[j]; + + if (tunable->size != 32) { + printf("kboot: ATC tunable has invalid size %d\n", tunable->size); + return -1; + } + + if (tunable->offset % (tunable->size / 8)) { + printf("kboot: ATC tunable has unaligned offset %x\n", tunable->offset); + return -1; + } + + if (tunable->offset + (tunable->size / 8) > tunable_info->reg_size) { + printf("kboot: ATC tunable has invalid offset %x\n", tunable->offset); + return -1; + } + + if (fdt_appendprop_u32(dt, fdt_node, tunable_info->fdt_name, + tunable->offset + tunable_info->reg_offset) < 0) + return -1; + if (fdt_appendprop_u32(dt, fdt_node, tunable_info->fdt_name, tunable->mask) < 0) + return -1; + if (fdt_appendprop_u32(dt, fdt_node, tunable_info->fdt_name, tunable->value) < 0) + return -1; + } + + return 0; +} + +static void dt_copy_atc_tunables(const char *adt_path, const char *dt_alias) +{ + int ret; + + int adt_node = adt_path_offset(adt, adt_path); + if (adt_node < 0) + return; + + const char *fdt_path = fdt_get_alias(dt, dt_alias); + if (fdt_path == NULL) { + printf("FDT: Unable to find alias %s\n", dt_alias); + return; + } + + int fdt_node = fdt_path_offset(dt, fdt_path); + if (fdt_node < 0) { + printf("FDT: Unable to find path %s for alias %s\n", fdt_path, dt_alias); + return; + } + + for (size_t i = 0; i < sizeof(atc_tunables) / sizeof(*atc_tunables); ++i) { + ret = dt_append_atc_tunable(adt_node, fdt_node, &atc_tunables[i]); + if (ret) + goto cleanup; + } + + return; + +cleanup: + /* + * USB3 and Thunderbolt won't work if something went wrong. Clean up to make + * sure we don't leave half-filled properties around so that we can at least + * try to boot with USB2 support only. + */ + for (size_t i = 0; i < sizeof(atc_tunables) / sizeof(*atc_tunables); ++i) + fdt_delprop(dt, fdt_node, atc_tunables[i].fdt_name); + + printf("FDT: Unable to setup ATC tunables for %s - USB3/Thunderbolt will not work\n", adt_path); +} + +static int dt_set_atc_tunables(void) +{ + char adt_path[32]; + char fdt_alias[32]; + + for (int i = 0; i < MAX_ATC_DEVS; ++i) { + memset(adt_path, 0, sizeof(adt_path)); + snprintf(adt_path, sizeof(adt_path), "/arm-io/atc-phy%d", i); + + memset(fdt_alias, 0, sizeof(adt_path)); + snprintf(fdt_alias, sizeof(fdt_alias), "atcphy%d", i); + + dt_copy_atc_tunables(adt_path, fdt_alias); + } + + return 0; +} + +static const struct adt_tunable_info acio_tunables[] = { + /* NHI tunables */ + {"hi_up_tx_desc_fabric_tunables", "apple,tunable-nhi", 0xf0000, 0x4000, true}, + {"hi_up_tx_data_fabric_tunables", "apple,tunable-nhi", 0xec000, 0x4000, true}, + {"hi_up_rx_desc_fabric_tunables", "apple,tunable-nhi", 0xe8000, 0x4000, true}, + {"hi_up_wr_fabric_tunables", "apple,tunable-nhi", 0xf4000, 0x4000, true}, + {"hi_up_merge_fabric_tunables", "apple,tunable-nhi", 0xf8000, 0x4000, true}, + {"hi_dn_merge_fabric_tunables", "apple,tunable-nhi", 0xfc000, 0x4000, true}, + {"fw_int_ctl_management_tunables", "apple,tunable-nhi", 0x4000, 0x4000, true}, + /* M3 tunables */ + {"top_tunables", "apple,tunable-m3", 0x0, 0x4000, true}, + {"hbw_fabric_tunables", "apple,tunable-m3", 0x4000, 0x4000, true}, + {"lbw_fabric_tunables", "apple,tunable-m3", 0x8000, 0x4000, true}, + /* PCIe adapter tunables */ + {"pcie_adapter_regs_tunables", "apple,tunable-pcie-adapter", 0x0, 0x4000, true}, +}; + +struct acio_tunable { + u32 offset; + u32 size; + u64 mask; + u64 value; +} PACKED; +static_assert(sizeof(struct acio_tunable) == 24, "Invalid acio_tunable size"); + +/* + * This is *almost* identical to dt_append_atc_tunable except for the different + * tunable struct and that tunable->size is in bytes instead of bits. + * If only C had generics that aren't macros :-( + */ +static int dt_append_acio_tunable(int adt_node, int fdt_node, + const struct adt_tunable_info *tunable_info) +{ + u32 tunables_len; + const struct acio_tunable *tunable_adt = + adt_getprop(adt, adt_node, tunable_info->adt_name, &tunables_len); + + if (!tunable_adt) { + printf("ADT: tunable %s not found\n", tunable_info->adt_name); + + if (tunable_info->required) + return -1; + else + return 0; + } + + if (tunables_len % sizeof(*tunable_adt)) { + printf("ADT: tunable %s with invalid length %d\n", tunable_info->adt_name, tunables_len); + return -1; + } + + u32 n_tunables = tunables_len / sizeof(*tunable_adt); + for (size_t j = 0; j < n_tunables; j++) { + const struct acio_tunable *tunable = &tunable_adt[j]; + + if (tunable->size != 4) { + printf("kboot: ACIO tunable has invalid size %d\n", tunable->size); + return -1; + } + + if (tunable->offset % tunable->size) { + printf("kboot: ACIO tunable has unaligned offset %x\n", tunable->offset); + return -1; + } + + if (tunable->offset + tunable->size > tunable_info->reg_size) { + printf("kboot: ACIO tunable has invalid offset %x\n", tunable->offset); + return -1; + } + + if (fdt_appendprop_u32(dt, fdt_node, tunable_info->fdt_name, + tunable->offset + tunable_info->reg_offset) < 0) + return -1; + if (fdt_appendprop_u32(dt, fdt_node, tunable_info->fdt_name, tunable->mask) < 0) + return -1; + if (fdt_appendprop_u32(dt, fdt_node, tunable_info->fdt_name, tunable->value) < 0) + return -1; + } + + return 0; +} + +static int dt_copy_acio_tunables(const char *adt_path, const char *dt_alias) +{ + int ret; + int adt_node = adt_path_offset(adt, adt_path); + if (adt_node < 0) + return -1; + + const char *fdt_path = fdt_get_alias(dt, dt_alias); + if (fdt_path == NULL) + bail("FDT: Unable to find alias %s\n", dt_alias); + + int fdt_node = fdt_path_offset(dt, fdt_path); + if (fdt_node < 0) + bail("FDT: Unable to find path %s for alias %s\n", fdt_path, dt_alias); + + u32 drom_len; + const u8 *drom_blob = adt_getprop(adt, adt_node, "thunderbolt-drom", &drom_len); + if (!drom_blob || !drom_len) + bail("ADT: Failed to get thunderbolt-drom"); + + fdt_setprop(dt, fdt_node, "apple,thunderbolt-drom", drom_blob, drom_len); + for (size_t i = 0; i < sizeof(acio_tunables) / sizeof(*acio_tunables); ++i) { + ret = dt_append_acio_tunable(adt_node, fdt_node, &acio_tunables[i]); + if (ret) + bail_cleanup("ADT: unable to convert '%s' tunable", acio_tunables[i].adt_name); + } + + return 0; + +err: + fdt_delprop(dt, fdt_node, "apple,thunderbolt-drom"); + fdt_delprop(dt, fdt_node, "apple,tunable-nhi"); + fdt_delprop(dt, fdt_node, "apple,tunable-m3"); + fdt_delprop(dt, fdt_node, "apple,tunable-pcie-adapter"); + + return -1; +} + +static int dt_set_acio_tunables(void) +{ + char adt_path[32]; + char fdt_alias[32]; + + for (int i = 0; i < MAX_CIO_DEVS; ++i) { + memset(adt_path, 0, sizeof(adt_path)); + snprintf(adt_path, sizeof(adt_path), "/arm-io/acio%d", i); + + memset(fdt_alias, 0, sizeof(adt_path)); + snprintf(fdt_alias, sizeof(fdt_alias), "acio%d", i); + + dt_copy_acio_tunables(adt_path, fdt_alias); + } + + return 0; +} + +static int dt_get_iommu_node(int node, u32 num) +{ + int len; + assert(num < 32); + const void *prop = fdt_getprop(dt, node, "iommus", &len); + if (!prop || len < 0 || (u32)len < 8 * (num + 1)) { + printf("FDT: unexpected 'iommus' prop / len %d\n", len); + return -FDT_ERR_NOTFOUND; + } + + const fdt32_t *iommus = prop; + uint32_t phandle = fdt32_ld(&iommus[num * 2]); + + return fdt_node_offset_by_phandle(dt, phandle); +} + +static dart_dev_t *dt_init_dart_by_node(int node, u32 num) +{ + int len; + assert(num < 32); + const void *prop = fdt_getprop(dt, node, "iommus", &len); + if (!prop || len < 0 || (u32)len < 8 * (num + 1)) { + printf("FDT: unexpected 'iommus' prop / len %d\n", len); + return NULL; + } + + const fdt32_t *iommus = prop; + u32 iommu_phandle = fdt32_ld(&iommus[num * 2]); + u32 iommu_stream = fdt32_ld(&iommus[num * 2 + 1]); + + printf("FDT: iommu phande:%u stream:%u\n", iommu_phandle, iommu_stream); + + return dart_init_fdt(dt, iommu_phandle, iommu_stream, true); +} + +static u64 dart_get_mapping(dart_dev_t *dart, const char *path, u64 paddr, size_t size) +{ + u64 iova = dart_search(dart, (void *)paddr); + if (DART_IS_ERR(iova)) { + printf("ADT: %s paddr: 0x%lx is not mapped\n", path, paddr); + return iova; + } + + u64 pend = (u64)dart_translate(dart, iova + size - 1); + if (pend != (paddr + size - 1)) { + printf("ADT: %s is not continuously mapped: 0x%lx\n", path, pend); + return DART_PTR_ERR; + } + + return iova; +} + +static int dt_device_set_reserved_mem(int node, dart_dev_t *dart, const char *name, + uint32_t phandle, u64 paddr, u64 size) +{ + int ret; + + u64 iova = dart_get_mapping(dart, name, paddr, size); + if (DART_IS_ERR(iova)) + bail("ADT: no mapping found for '%s' 0x%012lx iova:0x%08lx)\n", name, paddr, iova); + + ret = fdt_appendprop_u32(dt, node, "iommu-addresses", phandle); + if (ret != 0) + bail("DT: could not append phandle '%s.compatible' property: %d\n", name, ret); + + ret = fdt_appendprop_u64(dt, node, "iommu-addresses", iova); + if (ret != 0) + bail("DT: could not append iova to '%s.iommu-addresses' property: %d\n", name, ret); + + ret = fdt_appendprop_u64(dt, node, "iommu-addresses", size); + if (ret != 0) + bail("DT: could not append size to '%s.iommu-addresses' property: %d\n", name, ret); + + return 0; +} + +static int dt_get_or_add_reserved_mem(const char *node_name, const char *compat, u64 paddr, + size_t size) +{ + int ret; + int resv_node = fdt_path_offset(dt, "/reserved-memory"); + if (resv_node < 0) + bail("DT: '/reserved-memory' not found\n"); + + int node = fdt_subnode_offset(dt, resv_node, node_name); + if (node >= 0) + return node; + + node = fdt_add_subnode(dt, resv_node, node_name); + if (node < 0) + bail("DT: failed to add node '%s' to '/reserved-memory'\n", node_name); + + uint32_t phandle; + ret = fdt_generate_phandle(dt, &phandle); + if (ret) + bail("DT: failed to generate phandle: %d\n", ret); + + ret = fdt_setprop_u32(dt, node, "phandle", phandle); + if (ret != 0) + bail("DT: couldn't set '%s.phandle' property: %d\n", node_name, ret); + + u64 reg[2] = {cpu_to_fdt64(paddr), cpu_to_fdt64(size)}; + ret = fdt_setprop(dt, node, "reg", reg, sizeof(reg)); + if (ret != 0) + bail("DT: couldn't set '%s.reg' property: %d\n", node_name, ret); + + ret = fdt_setprop_string(dt, node, "compatible", compat); + if (ret != 0) + bail("DT: couldn't set '%s.compatible' property: %d\n", node_name, ret); + + ret = fdt_setprop_empty(dt, node, "no-map"); + if (ret != 0) + bail("DT: couldn't set '%s.no-map' property: %d\n", node_name, ret); + + return node; +} + +static int dt_device_add_mem_region(const char *alias, uint32_t phandle, const char *name) +{ + int ret; + int dev_node = fdt_path_offset(dt, alias); + if (dev_node < 0) + bail("DT: failed to get node for alias '%s'\n", alias); + + ret = fdt_appendprop_u32(dt, dev_node, "memory-region", phandle); + if (ret != 0) + bail("DT: failed to append to 'memory-region' property\n"); + + dev_node = fdt_path_offset(dt, alias); + if (dev_node < 0) + bail("DT: failed to update node for alias '%s'\n", alias); + + ret = fdt_appendprop_string(dt, dev_node, "memory-region-names", name); + if (ret != 0) + bail("DT: failed to append to 'memory-region-names' property\n"); + + return 0; +} + +static int dt_set_dcp_firmware(const char *alias) +{ + const char *path = fdt_get_alias(dt, alias); + + if (!path) + return 0; + + int node = fdt_path_offset(dt, path); + if (node < 0) + return 0; + + if (firmware_set_fdt(dt, node, "apple,firmware-version", &os_firmware) < 0) + bail("FDT: Could not set apple,firmware-version for %s\n", path); + + const struct fw_version_info *compat; + + switch (os_firmware.version) { + case V12_3_1: + case V12_4: + compat = &fw_versions[V12_3]; + break; + default: + compat = &os_firmware; + break; + } + + if (firmware_set_fdt(dt, node, "apple,firmware-compat", compat) < 0) + bail("FDT: Could not set apple,firmware-compat for %s\n", path); + + return 0; +} + +struct disp_mapping { + char region_adt[24]; + char mem_fdt[24]; + bool map_dcp; + bool map_disp; + bool map_piodma; +}; + +struct mem_region { + u64 paddr; + u64 size; +}; + +static int dt_add_reserved_regions(const char *dcp_alias, const char *disp_alias, + const char *piodma_alias, const char *compat, + struct disp_mapping *maps, struct mem_region *region, + u32 num_maps) +{ + int ret = 0; + dart_dev_t *dart_dcp = NULL, *dart_disp = NULL, *dart_piodma = NULL; + uint32_t dcp_phandle = 0, disp_phandle = 0, piodma_phandle = 0; + + /* Check for display device aliases, if one is missing assume it is an old DT + * without display nodes and return without error. + * Otherwise init each dart and retrieve the node's phandle. + */ + if (dcp_alias) { + int dcp_node = fdt_path_offset(dt, dcp_alias); + if (dcp_node < 0) { + printf("DT: could not resolve '%s' alias\n", dcp_alias); + goto err; // cleanup + } + dart_dcp = dt_init_dart_by_node(dcp_node, 0); + if (!dart_dcp) + bail_cleanup("DT: failed to init DART for '%s'\n", dcp_alias); + dcp_phandle = fdt_get_phandle(dt, dcp_node); + } + + if (disp_alias) { + int disp_node = fdt_path_offset(dt, disp_alias); + if (disp_node < 0) { + printf("DT: could not resolve '%s' alias\n", disp_alias); + goto err; // cleanup + } + dart_disp = dt_init_dart_by_node(disp_node, 0); + if (!dart_disp) + bail_cleanup("DT: failed to init DART for '%s'\n", disp_alias); + disp_phandle = fdt_get_phandle(dt, disp_node); + } + + if (piodma_alias) { + int piodma_node = fdt_path_offset(dt, piodma_alias); + if (piodma_node < 0) { + printf("DT: could not resolve '%s' alias\n", piodma_alias); + goto err; // cleanup + } + + dart_piodma = dt_init_dart_by_node(piodma_node, 0); + if (!dart_piodma) + bail_cleanup("DT: failed to init DART for '%s'\n", piodma_alias); + piodma_phandle = fdt_get_phandle(dt, piodma_node); + } + + for (unsigned i = 0; i < num_maps; i++) { + const char *name = maps[i].mem_fdt; + char node_name[64]; + + snprintf(node_name, sizeof(node_name), "%s@%lx", name, region[i].paddr); + int mem_node = + dt_get_or_add_reserved_mem(node_name, compat, region[i].paddr, region[i].size); + if (mem_node < 0) + goto err; + + uint32_t mem_phandle = fdt_get_phandle(dt, mem_node); + + if (maps[i].map_dcp && dart_dcp) { + ret = dt_device_set_reserved_mem(mem_node, dart_dcp, node_name, dcp_phandle, + region[i].paddr, region[i].size); + if (ret != 0) + goto err; + } + if (maps[i].map_disp && dart_disp) { + ret = dt_device_set_reserved_mem(mem_node, dart_disp, node_name, disp_phandle, + region[i].paddr, region[i].size); + if (ret != 0) + goto err; + } + if (maps[i].map_piodma && dart_piodma) { + ret = dt_device_set_reserved_mem(mem_node, dart_piodma, node_name, piodma_phandle, + region[i].paddr, region[i].size); + if (ret != 0) + goto err; + } + + /* modify device nodes after filling /reserved-memory to avoid + * reloading mem_node's offset */ + if (maps[i].map_dcp && dcp_alias) { + ret = dt_device_add_mem_region(dcp_alias, mem_phandle, maps[i].mem_fdt); + if (ret < 0) + goto err; + } + if (maps[i].map_disp && disp_alias) { + ret = dt_device_add_mem_region(disp_alias, mem_phandle, maps[i].mem_fdt); + if (ret < 0) + goto err; + } + if (maps[i].map_piodma && piodma_alias) { + ret = dt_device_add_mem_region(piodma_alias, mem_phandle, maps[i].mem_fdt); + if (ret < 0) + goto err; + } + } + + /* enable dart-disp0, it is disabled in device tree to avoid resetting + * it and breaking display scanout when booting with old m1n1 which + * does not lock dart-disp0. + */ + if (disp_alias) { + int disp_node = fdt_path_offset(dt, disp_alias); + + int dart_disp0 = dt_get_iommu_node(disp_node, 0); + if (dart_disp0 < 0) + bail_cleanup("DT: failed to find 'dart-disp0'\n"); + + if (fdt_setprop_string(dt, dart_disp0, "status", "okay") < 0) + bail_cleanup("DT: failed to enable 'dart-disp0'\n"); + } +err: + if (dart_dcp) + dart_shutdown(dart_dcp); + if (dart_disp) + dart_shutdown(dart_disp); + if (dart_piodma) + dart_shutdown(dart_piodma); + + return ret; +} + +static int dt_carveout_reserved_regions(const char *dcp_alias, const char *disp_alias, + const char *piodma_alias, struct disp_mapping *maps, + u32 num_maps) +{ + int ret = 0; + + struct mem_region region[MAX_DISP_MAPPINGS]; + + assert(num_maps <= MAX_DISP_MAPPINGS); + + // return early if dcp_alias does not exists + if (!fdt_get_alias(dt, dcp_alias)) + return 0; + + ret = dt_set_dcp_firmware(dcp_alias); + if (ret) + return ret; + + int node = adt_path_offset(adt, "/chosen/carveout-memory-map"); + if (node < 0) + bail("ADT: '/chosen/carveout-memory-map' not found\n"); + + /* read physical addresses of reserved memory regions */ + /* do this up front to avoid errors after modifying the DT */ + for (unsigned i = 0; i < num_maps; i++) { + + int ret; + u64 phys_map[2]; + struct disp_mapping *map = &maps[i]; + const char *name = map->region_adt; + + ret = ADT_GETPROP_ARRAY(adt, node, name, phys_map); + if (ret != sizeof(phys_map)) + bail("ADT: could not get carveout memory '%s'\n", name); + if (!phys_map[0] || !phys_map[1]) + bail("ADT: carveout memory '%s'\n", name); + + region[i].paddr = phys_map[0]; + region[i].size = phys_map[1]; + } + + return dt_add_reserved_regions(dcp_alias, disp_alias, piodma_alias, "apple,asc-mem", maps, + region, num_maps); +} + +static struct disp_mapping disp_reserved_regions_vram[] = { + // boot framebuffer, mapped to dart-disp0 sid 0 and dart-dcp sid 0/5 + {"vram", "framebuffer", true, true, false}, +}; + +static int dt_vram_reserved_region(const char *dcp_alias, const char *disp_alias) +{ + int ret = 0; + int adt_path[4]; + struct mem_region region; + + // return early if dcp_alias does not exists + if (!fdt_get_alias(dt, dcp_alias)) + return 0; + + int node = adt_path_offset_trace(adt, "/vram", adt_path); + + if (node < 0) + bail("ADT: '/vram' not found\n"); + + int pp = 0; + while (adt_path[pp]) + pp++; + adt_path[pp + 1] = 0; + + ret = adt_get_reg(adt, adt_path, "reg", 0, ®ion.paddr, ®ion.size); + if (ret < 0) + bail("ADT: failed to read /vram/reg\n"); + + return dt_add_reserved_regions(dcp_alias, disp_alias, NULL, "framebuffer", + disp_reserved_regions_vram, ®ion, 1); +} + +static struct disp_mapping disp_reserved_regions_t8103[] = { + {"region-id-50", "dcp_data", true, false, false}, + {"region-id-57", "region57", true, false, false}, + // The 2 following regions are mapped in dart-dcp sid 0 and dart-disp0 sid 0 and 4 + {"region-id-94", "region94", true, true, false}, + {"region-id-95", "region95", true, false, true}, +}; + +static struct disp_mapping dcpext_reserved_regions_t8103[] = { + {"region-id-73", "dcpext_data", true, false, false}, + {"region-id-74", "region74", true, false, false}, +}; + +static struct disp_mapping disp_reserved_regions_t8112[] = { + {"region-id-49", "dcp_txt", true, false, false}, + {"region-id-50", "dcp_data", true, false, false}, + {"region-id-57", "region57", true, false, false}, + // The 2 following regions are mapped in dart-dcp sid 5 and dart-disp0 sid 0 and 4 + {"region-id-94", "region94", true, true, false}, + {"region-id-95", "region95", true, false, true}, +}; + +static struct disp_mapping dcpext_reserved_regions_t8112[] = { + {"region-id-49", "dcp_txt", true, false, false}, + {"region-id-73", "dcpext_data", true, false, false}, + {"region-id-74", "region74", true, false, false}, +}; + +static struct disp_mapping disp_reserved_regions_t600x[] = { + {"region-id-50", "dcp_data", true, false, false}, + {"region-id-57", "region57", true, false, false}, + // The 2 following regions are mapped in dart-dcp sid 0 and dart-disp0 sid 0 and 4 + {"region-id-94", "region94", true, true, false}, + {"region-id-95", "region95", true, false, true}, + // used on M1 Pro/Max/Ultra, mapped to dcp and disp0 + {"region-id-157", "region157", true, true, false}, +}; + +#define MAX_DCPEXT 8 + +static struct disp_mapping dcpext_reserved_regions_t600x[MAX_DCPEXT][2] = { + { + {"region-id-73", "dcpext0_data", true, false, false}, + {"region-id-74", "", true, false, false}, + }, + { + {"region-id-88", "dcpext1_data", true, false, false}, + {"region-id-89", "region89", true, false, false}, + }, + { + {"region-id-111", "dcpext2_data", true, false, false}, + {"region-id-112", "region112", true, false, false}, + }, + { + {"region-id-119", "dcpext3_data", true, false, false}, + {"region-id-120", "region120", true, false, false}, + }, + { + {"region-id-127", "dcpext4_data", true, false, false}, + {"region-id-128", "region128", true, false, false}, + }, + { + {"region-id-135", "dcpext5_data", true, false, false}, + {"region-id-136", "region136", true, false, false}, + }, + { + {"region-id-143", "dcpext6_data", true, false, false}, + {"region-id-144", "region144", true, false, false}, + }, + { + {"region-id-151", "dcpext7_data", true, false, false}, + {"region-id-152", "region152", true, false, false}, + }, +}; + +#define ARRAY_SIZE(s) (sizeof(s) / sizeof((s)[0])) + +static int dt_set_display(void) +{ + /* lock dart-disp0 to prevent old software from resetting it */ + dart_lock_adt("/arm-io/dart-disp0", 0); + + /* Add "/reserved-memory" nodes with iommu mapping and link them to their + * devices. The memory is already excluded from useable RAM so these nodes + * are only required to inform the OS about the existing mappings. + * Required for disp0, dcp and all dcpext. + * Checks for dcp* / disp*_piodma / disp* aliases and fails silently if + * they are missing. */ + + int ret = 0; + + if (!fdt_node_check_compatible(dt, 0, "apple,t8103")) { + ret = dt_carveout_reserved_regions("dcp", "disp0", "disp0_piodma", + disp_reserved_regions_t8103, + ARRAY_SIZE(disp_reserved_regions_t8103)); + if (ret) + return ret; + + ret = dt_carveout_reserved_regions("dcpext", NULL, NULL, dcpext_reserved_regions_t8103, + ARRAY_SIZE(dcpext_reserved_regions_t8103)); + } else if (!fdt_node_check_compatible(dt, 0, "apple,t8112")) { + ret = dt_carveout_reserved_regions("dcp", "disp0", "disp0_piodma", + disp_reserved_regions_t8112, + ARRAY_SIZE(disp_reserved_regions_t8112)); + if (ret) + return ret; + + ret = dt_carveout_reserved_regions("dcpext", NULL, NULL, dcpext_reserved_regions_t8112, + ARRAY_SIZE(dcpext_reserved_regions_t8112)); + } else if (!fdt_node_check_compatible(dt, 0, "apple,t6000") || + !fdt_node_check_compatible(dt, 0, "apple,t6001") || + !fdt_node_check_compatible(dt, 0, "apple,t6002")) { + ret = dt_carveout_reserved_regions("dcp", "disp0", "disp0_piodma", + disp_reserved_regions_t600x, + ARRAY_SIZE(disp_reserved_regions_t600x)); + if (ret) + return ret; + + for (int n = 0; n < MAX_DCPEXT && ret == 0; n++) { + char dcpext_alias[16]; + + snprintf(dcpext_alias, sizeof(dcpext_alias), "dcpext%d", n); + ret = dt_carveout_reserved_regions(dcpext_alias, NULL, NULL, + dcpext_reserved_regions_t600x[n], + ARRAY_SIZE(dcpext_reserved_regions_t600x[n])); + } + } else { + printf("DT: unknown compatible, skip display reserved-memory setup\n"); + return 0; + } + if (ret) + return ret; + + return dt_vram_reserved_region("dcp", "disp0"); +} + +static int dt_disable_missing_devs(const char *adt_prefix, const char *dt_prefix, int max_devs) +{ + int ret = -1; + int adt_prefix_len = strlen(adt_prefix); + int dt_prefix_len = strlen(dt_prefix); + + int acnt = 0, phcnt = 0; + u64 *addrs = malloc(max_devs * sizeof(u64)); + u32 *phandles = malloc(max_devs * sizeof(u32) * 4); // Allow up to 4 extra nodes per device + if (!addrs || !phandles) + bail_cleanup("FDT: out of memory\n"); + + int path[8]; + int node = adt_path_offset_trace(adt, "/arm-io", path); + if (node < 0) + bail_cleanup("ADT: /arm-io not found\n"); + + int pp = 0; + while (path[pp]) + pp++; + path[pp + 1] = 0; + + u32 die_count; + if (ADT_GETPROP(adt, node, "die-count", &die_count) < 0) { + die_count = 1; + } + if (die_count > 8) { + printf("ADT: limiting die-count from %u to 8\n", die_count); + die_count = 8; + } + + /* Find ADT registers */ + ADT_FOREACH_CHILD(adt, node) + { + const char *name = adt_get_name(adt, node); + if (strncmp(name, adt_prefix, adt_prefix_len)) + continue; + + path[pp] = node; + if (adt_get_reg(adt, path, "reg", 0, &addrs[acnt++], NULL) < 0) + bail_cleanup("Error getting /arm-io/%s regs\n", name); + } + + for (u32 die = 0; die < die_count; ++die) { + char path[32] = "/soc"; + + if (die_count > 1) { + // pre-linux submission multi-die path + // can probably removed the next time someone read this comment. + snprintf(path, sizeof(path), "/soc/die%u", die); + int die_node = fdt_path_offset(dt, path); + if (die_node < 0) { + /* this should use aliases for the soc nodes */ + u64 die_unit_addr = die * PMGR_DIE_OFFSET + 0x200000000; + snprintf(path, sizeof(path), "/soc@%lx", die_unit_addr); + } + } + + int soc = fdt_path_offset(dt, path); + if (soc < 0) + bail("FDT: %s node not found in devtree\n", path); + + // parse ranges for address translation + struct dt_ranges_tbl ranges[DT_MAX_RANGES] = {0}; + dt_parse_ranges(dt, soc, ranges); + + /* Disable primary devices */ + fdt_for_each_subnode(node, dt, soc) + { + const char *name = fdt_get_name(dt, node, NULL); + if (strncmp(name, dt_prefix, dt_prefix_len)) + continue; + + const fdt64_t *reg = fdt_getprop(dt, node, "reg", NULL); + if (!reg) + bail_cleanup("FDT: failed to get reg property of %s\n", name); + + u64 addr = dt_translate(ranges, reg); + + int i; + for (i = 0; i < acnt; i++) + if (addrs[i] == addr) + break; + if (i < acnt) + continue; + + int iommus_size; + const fdt32_t *iommus = fdt_getprop(dt, node, "iommus", &iommus_size); + if (iommus) { + if (iommus_size & 7 || iommus_size > 4 * 8) { + printf("FDT: bad iommus property for %s/%s\n", path, name); + } else { + for (int i = 0; i < iommus_size / 8; i++) + phandles[phcnt++] = fdt32_ld(&iommus[i * 2]); + } + } + + int phys_size; + const fdt32_t *phys = fdt_getprop(dt, node, "phys", &phys_size); + if (phys) { + if (phys_size & 7 || phys_size > 4 * 8) { + printf("FDT: bad phys property for %s/%s\n", path, name); + } else { + for (int i = 0; i < phys_size / 8; i++) + phandles[phcnt++] = fdt32_ld(&phys[i * 2]); + } + } + + const char *status = fdt_getprop(dt, node, "status", NULL); + if (!status || strcmp(status, "disabled")) { + printf("FDT: Disabling missing device %s/%s\n", path, name); + + if (fdt_setprop_string(dt, node, "status", "disabled") < 0) + bail_cleanup("FDT: failed to set status property of %s/%s\n", path, name); + } + } + + /* Disable secondary devices */ + fdt_for_each_subnode(node, dt, soc) + { + const char *name = fdt_get_name(dt, node, NULL); + u32 phandle = fdt_get_phandle(dt, node); + + for (int i = 0; i < phcnt; i++) { + if (phandles[i] != phandle) + continue; + + const char *status = fdt_getprop(dt, node, "status", NULL); + if (status && !strcmp(status, "disabled")) + continue; + + printf("FDT: Disabling secondary device %s/%s\n", path, name); + + if (fdt_setprop_string(dt, node, "status", "disabled") < 0) + bail_cleanup("FDT: failed to set status property of %s/%s\n", path, name); + break; + } + } + } + + ret = 0; +err: + free(phandles); + free(addrs); + + return ret; +} + +static int dt_transfer_virtios(void) +{ + int path[3]; + path[0] = adt_path_offset(adt, "/arm-io/"); + if (path[0] < 0) + bail("ADT: /arm-io not found\n"); + + int aic = fdt_node_offset_by_compatible(dt, -1, "apple,aic"); + if (aic == -FDT_ERR_NOTFOUND) + aic = fdt_node_offset_by_compatible(dt, -1, "apple,aic2"); + if (aic < 0) + bail("FDT: failed to find AIC node\n"); + + u32 aic_phandle = fdt_get_phandle(dt, aic); + const fdt32_t *ic_prop = fdt_getprop(dt, aic, "#interrupt-cells", NULL); + u32 intcells = 0; + if (ic_prop) + intcells = fdt32_ld(ic_prop); + if (intcells < 3 || intcells > 4) + bail("FDT: bad '#interrupt-cells' on AIC node (%d)\n", intcells); + + for (u32 i = 0; i < 16; i++) { + char name[16], fullname[32]; + snprintf(name, sizeof(name) - 1, "virtio%d", i); + + path[1] = adt_subnode_offset(adt, path[0], name); + if (path[1] < 0) + break; + path[2] = 0; + + u64 addr, size; + if (adt_get_reg(adt, path, "reg", 0, &addr, &size) < 0) + bail("ADT: error getting /arm-io/%s regs\n", name); + + u32 irq; + ADT_GETPROP(adt, path[1], "interrupts", &irq); + + snprintf(fullname, sizeof(fullname) - 1, "virtio@%lx", addr); + printf("FDT: Adding %s found in ADT\n", name); + + int fnode = fdt_add_subnode(dt, 0, fullname); + if (fnode < 0) + bail("FDT: failed to create %s\n", fullname); + + if (fdt_setprop_string(dt, fnode, "compatible", "virtio,mmio")) + bail("FDT: couldn't set %s.compatible\n", fullname); + + fdt64_t reg[2]; + fdt64_st(reg + 0, addr); + fdt64_st(reg + 1, size); + if (fdt_setprop(dt, fnode, "reg", reg, sizeof(reg))) + bail("FDT: couldn't set %s.reg\n", fullname); + + if (fdt_setprop_u32(dt, fnode, "interrupt-parent", aic_phandle)) + bail("FDT: couldn't set %s.interrupt-parent\n", fullname); + + fdt32_t intprop[4]; + fdt32_st(intprop + 0, 0); // AIC_IRQ + fdt32_st(intprop + 1, 0); + fdt32_st(intprop + intcells - 2, irq); + fdt32_st(intprop + intcells - 1, 4); // IRQ_TYPE_LEVEL_HIGH + if (fdt_setprop(dt, fnode, "interrupts", intprop, 4 * intcells)) + bail("FDT: couldn't set %s.interrupts\n", fullname); + } + + return 0; +} + +void kboot_set_initrd(void *start, size_t size) +{ + initrd_start = start; + initrd_size = size; +} + +int kboot_set_chosen(const char *name, const char *value) +{ + int i = 0; + + if (!name) + return -1; + + for (i = 0; i < MAX_CHOSEN_PARAMS; i++) { + if (!chosen_params[i][0]) { + chosen_params[i][0] = malloc(strlen(name) + 1); + strcpy(chosen_params[i][0], name); + break; + } + + if (!strcmp(name, chosen_params[i][0])) { + free(chosen_params[i][1]); + chosen_params[i][1] = NULL; + break; + } + } + + if (i >= MAX_CHOSEN_PARAMS) + return -1; + + if (value) { + chosen_params[i][1] = malloc(strlen(value) + 1); + strcpy(chosen_params[i][1], value); + } + + return i; +} + +int kboot_prepare_dt(void *fdt) +{ + if (dt) { + free(dt); + dt = NULL; + } + + dt_bufsize = fdt_totalsize(fdt); + assert(dt_bufsize); + + dt_bufsize += 64 * 1024; // Add 64K of buffer for modifications + dt = memalign(DT_ALIGN, dt_bufsize); + + if (fdt_open_into(fdt, dt, dt_bufsize) < 0) + bail("FDT: fdt_open_into() failed\n"); + + if (fdt_add_mem_rsv(dt, (u64)dt, dt_bufsize)) + bail("FDT: couldn't add reservation for the devtree\n"); + + if (fdt_add_mem_rsv(dt, (u64)_base, ((u64)_end) - ((u64)_base))) + bail("FDT: couldn't add reservation for m1n1\n"); + + if (dt_set_chosen()) + return -1; + if (dt_set_serial_number()) + return -1; + if (dt_set_memory()) + return -1; + if (dt_set_cpus()) + return -1; + if (dt_set_mac_addresses()) + return -1; + if (dt_set_wifi()) + return -1; + if (dt_set_bluetooth()) + return -1; + if (dt_set_uboot()) + return -1; + if (dt_set_atc_tunables()) + return -1; + if (dt_set_acio_tunables()) + return -1; + if (dt_set_display()) + return -1; + if (dt_set_gpu(dt)) + return -1; + if (dt_set_multitouch()) + return -1; + if (dt_disable_missing_devs("usb-drd", "usb@", 8)) + return -1; + if (dt_disable_missing_devs("i2c", "i2c@", 8)) + return -1; +#ifndef RELEASE + if (dt_transfer_virtios()) + return 1; +#endif + + if (fdt_pack(dt)) + bail("FDT: fdt_pack() failed\n"); + + printf("FDT prepared at %p\n", dt); + + return 0; +} + +int kboot_boot(void *kernel) +{ + usb_init(); + pcie_init(); + dapf_init_all(); + + printf("Setting SMP mode to WFE...\n"); + smp_set_wfe_mode(true); + printf("Preparing to boot kernel at %p with fdt at %p\n", kernel, dt); + + next_stage.entry = kernel; + next_stage.args[0] = (u64)dt; + next_stage.args[1] = 0; + next_stage.args[2] = 0; + next_stage.args[3] = 0; + next_stage.args[4] = 0; + next_stage.restore_logo = false; + + return 0; +} diff --git a/tools/src/kboot.h b/tools/src/kboot.h new file mode 100644 index 0000000..44a8740 --- /dev/null +++ b/tools/src/kboot.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef KBOOT_H +#define KBOOT_H + +#include "types.h" + +struct kernel_header { + u32 code[2]; /* Executable code */ + u64 text_offset; /* Image load offset, little endian */ + u64 image_size; /* Effective Image size, little endian */ + u64 flags; /* kernel flags, little endian */ + u64 res2; /* reserved */ + u64 res3; /* reserved */ + u64 res4; /* reserved */ + u32 magic; /* Magic number, little endian, "ARM\x64" */ + u32 res5; /* reserved (used for PE COFF offset) */ +}; + +void kboot_set_initrd(void *start, size_t size); +int kboot_set_chosen(const char *name, const char *value); +int kboot_prepare_dt(void *fdt); +int kboot_boot(void *kernel); + +#endif diff --git a/tools/src/kboot_gpu.c b/tools/src/kboot_gpu.c new file mode 100644 index 0000000..54e6d03 --- /dev/null +++ b/tools/src/kboot_gpu.c @@ -0,0 +1,452 @@ +/* SPDX-License-Identifier: MIT */ + +#include "kboot.h" +#include "adt.h" +#include "assert.h" +#include "firmware.h" +#include "math.h" +#include "pmgr.h" +#include "soc.h" +#include "utils.h" + +#include "libfdt/libfdt.h" + +#define bail(...) \ + do { \ + printf(__VA_ARGS__); \ + return -1; \ + } while (0) + +#define MAX_PSTATES 16 +#define MAX_CLUSTERS 8 + +struct perf_state { + u32 freq; + u32 volt; +}; + +static int get_core_counts(u32 *count, u32 nclusters, u32 ncores) +{ + u64 base; + pmgr_adt_power_enable("/arm-io/sgx"); + + int adt_sgx_path[8]; + if (adt_path_offset_trace(adt, "/arm-io/sgx", adt_sgx_path) < 0) + bail("ADT: GPU: Failed to get sgx\n"); + + if (adt_get_reg(adt, adt_sgx_path, "reg", 0, &base, NULL) < 0) + bail("ADT: GPU: Failed to get sgx reg 0\n"); + + u32 cores_lo = read32(base + 0xd01500); + u32 cores_hi = read32(base + 0xd01514); + + u64 cores = (((u64)cores_hi) << 32) | cores_lo; + + for (u32 i = 0; i < nclusters; i++) { + count[i] = __builtin_popcount(cores & MASK(ncores)); + cores >>= ncores; + } + + return 0; +} + +static void adjust_leakage(float *val, u32 clusters, u32 *cores, u32 max, float uncore_fraction) +{ + for (u32 i = 0; i < clusters; i++) { + float uncore = val[i] * uncore_fraction; + float core = val[i] - uncore; + + val[i] = uncore + (cores[i] / (float)max) * core; + } +} + +static void load_fuses(float *out, u32 count, u64 base, u32 start, u32 width, float scale, + float offset, bool flip) +{ + for (u32 i = 0; i < count; i++) { + base += (start / 32) * 4; + start &= 31; + + u32 low = read32(base); + u32 high = read32(base + 4); + u32 val = (((((u64)high) << 32) | low) >> start) & MASK(width); + + float fval = (float)val * scale + offset; + + if (flip) + out[count - i - 1] = fval; + else + out[i] = fval; + + start += width; + } +} + +static u32 t8103_pwr_scale[] = {0, 63, 80, 108, 150, 198, 210}; + +static int calc_power_t8103(u32 count, u32 table_count, const struct perf_state *core, + const struct perf_state *sram, u32 *max_pwr, float *core_leak, + float *sram_leak) +{ + UNUSED(sram); + UNUSED(core_leak); + UNUSED(sram_leak); + u32 *pwr_scale; + u32 pwr_scale_count; + u32 core_count; + u32 max_cores; + + switch (chip_id) { + case T8103: + pwr_scale = t8103_pwr_scale; + pwr_scale_count = ARRAY_SIZE(t8103_pwr_scale); + max_cores = 8; + break; + default: + bail("ADT: GPU: Unsupported chip\n"); + } + + if (get_core_counts(&core_count, 1, max_cores)) + return -1; + + if (table_count != 1) + bail("ADT: GPU: expected 1 perf state table but got %d\n", table_count); + + if (count != pwr_scale_count) + bail("ADT: GPU: expected %d perf states but got %d\n", pwr_scale_count, count); + + for (u32 i = 0; i < pwr_scale_count; i++) + max_pwr[i] = (u32)core[i].volt * (u32)pwr_scale[i] * 100; + + core_leak[0] = 1000.0; + sram_leak[0] = 45.0; + + adjust_leakage(core_leak, 1, &core_count, max_cores, 0.12); + adjust_leakage(sram_leak, 1, &core_count, max_cores, 0.2); + + return 0; +} + +static int calc_power_t600x(u32 count, u32 table_count, const struct perf_state *core, + const struct perf_state *sram, u32 *max_pwr, float *core_leak, + float *sram_leak) +{ + float s_sram, k_sram, s_core, k_core; + float dk_core, dk_sram; + float imax = 1000; + + u32 nclusters = 0; + u32 ncores = 0; + u32 core_count[MAX_CLUSTERS]; + + bool simple_exps = false; + bool adjust_leakages = true; + + switch (chip_id) { + case T6002: + nclusters += 4; + load_fuses(core_leak + 4, 4, 0x22922bc1b8, 25, 13, 2, 2, true); + load_fuses(sram_leak + 4, 4, 0x22922bc1cc, 4, 9, 1, 1, true); + // fallthrough + case T6001: + nclusters += 2; + case T6000: + nclusters += 2; + load_fuses(core_leak + 0, min(4, nclusters), 0x2922bc1b8, 25, 13, 2, 2, false); + load_fuses(sram_leak + 0, min(4, nclusters), 0x2922bc1cc, 4, 9, 1, 1, false); + + s_sram = 4.3547606; + k_sram = 0.024927923; + // macOS difference: macOS uses a misbehaved piecewise function here + // Since it's obviously wrong, let's just use only the first component + s_core = 1.48461742; + k_core = 0.39013552; + dk_core = 8.558; + dk_sram = 0.05; + + ncores = 8; + adjust_leakages = true; + imax = 26.0; + break; + case T8112: + nclusters = 1; + load_fuses(core_leak, 1, 0x23d2c84dc, 30, 13, 2, 2, false); + load_fuses(sram_leak, 1, 0x23d2c84b0, 15, 9, 1, 1, false); + + s_sram = 3.61619841; + k_sram = 0.0529281; + // macOS difference: macOS uses a misbehaved piecewise function here + // Since it's obviously wrong, let's just use only the first component + s_core = 1.21356187; + k_core = 0.43328839; + dk_core = 9.83196; + dk_sram = 0.07828; + + simple_exps = true; + ncores = 10; + adjust_leakages = false; // pre-adjusted? + imax = 24.0; + break; + } + + if (get_core_counts(core_count, nclusters, ncores)) + return -1; + + printf("FDT: GPU: Core counts: "); + for (u32 i = 0; i < nclusters; i++) { + printf("%d ", core_count[i]); + } + printf("\n"); + + if (adjust_leakages) { + adjust_leakage(core_leak, nclusters, core_count, ncores, 0.0825); + adjust_leakage(sram_leak, nclusters, core_count, ncores, 0.2247); + } + + if (table_count != nclusters) + bail("ADT: GPU: expected %d perf state tables but got %d\n", nclusters, table_count); + + max_pwr[0] = 0; + + for (u32 i = 1; i < count; i++) { + u32 total_mw = 0; + + for (u32 j = 0; j < nclusters; j++) { + // macOS difference: macOS truncates Hz to integer MHz before doing this math. + // That's probably wrong, so let's not do that. + + float mw = 0; + size_t idx = j * count + i; + + mw += sram[idx].volt / 1000.f * sram_leak[j] * k_sram * + expf(sram[idx].volt / 1000.f * s_sram); + mw += core[idx].volt / 1000.f * core_leak[j] * k_core * + expf(core[idx].volt / 1000.f * s_core); + + float sbase = sram[idx].volt / 750.f; + float sram_v_p; + if (simple_exps) + sram_v_p = sbase * sbase; // v ^ 2 + else + sram_v_p = sbase * sbase * sbase; // v ^ 3 + mw += dk_sram * (sram[idx].freq / 1000000.f) * sram_v_p; + + float cbase = core[idx].volt / 750.f; + float core_v_p; + if (simple_exps || core[idx].volt < 750) + core_v_p = cbase * cbase; // v ^ 2 + else + core_v_p = cbase * cbase * cbase; // v ^ 3 + mw += dk_core * (core[idx].freq / 1000000.f) * core_v_p; + + if (mw > imax * core[idx].volt) + mw = imax * core[idx].volt; + + total_mw += mw; + } + + max_pwr[i] = total_mw * 1000; + } + + return 0; +} + +static int dt_set_region(void *dt, int sgx, const char *name, const char *path) +{ + u64 base, size; + char prop[64]; + + snprintf(prop, sizeof(prop), "%s-base", name); + if (ADT_GETPROP(adt, sgx, prop, &base) < 0 || !base) + bail("ADT: GPU: failed to find %s property\n", prop); + + snprintf(prop, sizeof(prop), "%s-size", name); + if (ADT_GETPROP(adt, sgx, prop, &size) < 0 || !base) + bail("ADT: GPU: failed to find %s property\n", prop); + + int node = fdt_path_offset(dt, path); + if (node < 0) + bail("FDT: GPU: failed to find %s node\n", path); + + fdt64_t reg[2]; + + fdt64_st(®[0], base); + fdt64_st(®[1], size); + + if (fdt_setprop_inplace(dt, node, "reg", reg, sizeof(reg))) + bail("FDT: GPU: failed to set reg prop for %s\n", path); + + return 0; +} + +int fdt_set_float_array(void *dt, int node, const char *name, float *val, int count) +{ + fdt32_t data[MAX_CLUSTERS]; + + if (count > MAX_CLUSTERS) + bail("FDT: GPU: fdt_set_float_array() with too many values\n"); + + memcpy(data, val, sizeof(float) * count); + for (int i = 0; i < count; i++) { + data[i] = cpu_to_fdt32(data[i]); + } + + if (fdt_setprop_inplace(dt, node, name, data, sizeof(u32) * count)) + bail("FDT: GPU: Failed to set %s\n", name); + + return 0; +} + +int dt_set_gpu(void *dt) +{ + int (*calc_power)(u32 count, u32 table_count, const struct perf_state *perf, + const struct perf_state *sram, u32 *max_pwr, float *core_leak, + float *sram_leak); + + printf("FDT: GPU: Initializing GPU info\n"); + + switch (chip_id) { + case T8103: + calc_power = calc_power_t8103; + break; + case T6000: + case T6001: + case T6002: + case T8112: + calc_power = calc_power_t600x; + break; + default: + printf("ADT: GPU: unsupported chip!\n"); + return 0; + } + + int gpu = fdt_path_offset(dt, "gpu"); + if (gpu < 0) { + printf("FDT: GPU: gpu alias not found in device tree\n"); + return 0; + } + + int len; + const fdt32_t *opps_ph = fdt_getprop(dt, gpu, "operating-points-v2", &len); + if (!opps_ph || len != 4) + bail("FDT: GPU: operating-points-v2 not found\n"); + + int opps = fdt_node_offset_by_phandle(dt, fdt32_ld(opps_ph)); + if (opps < 0) + bail("FDT: GPU: node for phandle %u not found\n", fdt32_ld(opps_ph)); + + int sgx = adt_path_offset(adt, "/arm-io/sgx"); + if (sgx < 0) + bail("ADT: GPU: /arm-io/sgx node not found\n"); + + u32 perf_state_count; + if (ADT_GETPROP(adt, sgx, "perf-state-count", &perf_state_count) < 0 || !perf_state_count) + bail("ADT: GPU: missing perf-state-count\n"); + + u32 perf_state_table_count; + if (ADT_GETPROP(adt, sgx, "perf-state-table-count", &perf_state_table_count) < 0 || + !perf_state_table_count) + bail("ADT: GPU: missing perf-state-table-count\n"); + + if (perf_state_count > MAX_PSTATES) + bail("ADT: GPU: perf-state-count too large\n"); + + if (perf_state_table_count > MAX_CLUSTERS) + bail("ADT: GPU: perf-state-table-count too large\n"); + + u32 perf_states_len; + const struct perf_state *perf_states, *perf_states_sram; + + perf_states = adt_getprop(adt, sgx, "perf-states", &perf_states_len); + if (!perf_states || + perf_states_len != sizeof(*perf_states) * perf_state_count * perf_state_table_count) + bail("ADT: GPU: invalid perf-states length\n"); + + perf_states_sram = adt_getprop(adt, sgx, "perf-states-sram", &perf_states_len); + if (perf_states_sram && + perf_states_len != sizeof(*perf_states) * perf_state_count * perf_state_table_count) + bail("ADT: GPU: invalid perf-states-sram length\n"); + + u32 max_pwr[MAX_PSTATES]; + float core_leak[MAX_CLUSTERS]; + float sram_leak[MAX_CLUSTERS]; + + if (calc_power(perf_state_count, perf_state_table_count, perf_states, perf_states_sram, max_pwr, + core_leak, sram_leak)) + return -1; + + printf("FDT: GPU: Max power table: "); + for (u32 i = 0; i < perf_state_count; i++) { + printf("%d ", max_pwr[i]); + } + printf("\nFDT: GPU: Core leakage table: "); + for (u32 i = 0; i < perf_state_table_count; i++) { + printf("%d.%03d ", (int)core_leak[i], ((int)(core_leak[i] * 1000) % 1000)); + } + printf("\nFDT: GPU: SRAM leakage table: "); + for (u32 i = 0; i < perf_state_table_count; i++) { + printf("%d.%03d ", (int)sram_leak[i], ((int)(sram_leak[i] * 1000) % 1000)); + } + printf("\n"); + + if (fdt_set_float_array(dt, gpu, "apple,core-leak-coef", core_leak, perf_state_table_count)) + return -1; + + if (fdt_set_float_array(dt, gpu, "apple,sram-leak-coef", sram_leak, perf_state_table_count)) + return -1; + + if (firmware_set_fdt(dt, gpu, "apple,firmware-version", &os_firmware)) + return -1; + + const struct fw_version_info *compat; + + switch (os_firmware.version) { + case V12_3_1: + compat = &fw_versions[V12_3]; + break; + default: + compat = &os_firmware; + break; + } + + if (firmware_set_fdt(dt, gpu, "apple,firmware-compat", compat)) + return -1; + + u32 i = 0; + int opp; + fdt_for_each_subnode(opp, dt, opps) + { + fdt32_t volts[MAX_CLUSTERS]; + + for (u32 j = 0; j < perf_state_table_count; j++) { + volts[j] = cpu_to_fdt32(perf_states[i + j * perf_state_count].volt * 1000); + } + + if (i >= perf_state_count) + bail("FDT: GPU: Expected %d operating points, but found more\n", perf_state_count); + + if (fdt_setprop_inplace(dt, opp, "opp-microvolt", &volts, + sizeof(u32) * perf_state_table_count)) + bail("FDT: GPU: Failed to set opp-microvolt for PS %d\n", i); + + if (fdt_setprop_inplace_u64(dt, opp, "opp-hz", perf_states[i].freq)) + bail("FDT: GPU: Failed to set opp-hz for PS %d\n", i); + + if (fdt_setprop_inplace_u32(dt, opp, "opp-microwatt", max_pwr[i])) + bail("FDT: GPU: Failed to set opp-microwatt for PS %d\n", i); + + i++; + } + + if (i != perf_state_count) + bail("FDT: GPU: Expected %d operating points, but found %d\n", perf_state_count, i); + + if (dt_set_region(dt, sgx, "gfx-handoff", "/reserved-memory/uat-handoff")) + return -1; + if (dt_set_region(dt, sgx, "gfx-shared-region", "/reserved-memory/uat-pagetables")) + return -1; + if (dt_set_region(dt, sgx, "gpu-region", "/reserved-memory/uat-ttbs")) + return -1; + + return 0; +} diff --git a/tools/src/libfdt/fdt.c b/tools/src/libfdt/fdt.c new file mode 100644 index 0000000..ebd163a --- /dev/null +++ b/tools/src/libfdt/fdt.c @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ +#include "libfdt_env.h" + +#include "fdt.h" +#include "libfdt.h" + +#include "libfdt_internal.h" + +/* + * Minimal sanity check for a read-only tree. fdt_ro_probe_() checks + * that the given buffer contains what appears to be a flattened + * device tree with sane information in its header. + */ +int32_t fdt_ro_probe_(const void *fdt) +{ + uint32_t totalsize = fdt_totalsize(fdt); + + if (can_assume(VALID_DTB)) + return totalsize; + + if (fdt_magic(fdt) == FDT_MAGIC) { + /* Complete tree */ + if (!can_assume(LATEST)) { + if (fdt_version(fdt) < FDT_FIRST_SUPPORTED_VERSION) + return -FDT_ERR_BADVERSION; + if (fdt_last_comp_version(fdt) > + FDT_LAST_SUPPORTED_VERSION) + return -FDT_ERR_BADVERSION; + } + } else if (fdt_magic(fdt) == FDT_SW_MAGIC) { + /* Unfinished sequential-write blob */ + if (!can_assume(VALID_INPUT) && fdt_size_dt_struct(fdt) == 0) + return -FDT_ERR_BADSTATE; + } else { + return -FDT_ERR_BADMAGIC; + } + + if (totalsize < INT32_MAX) + return totalsize; + else + return -FDT_ERR_TRUNCATED; +} + +static int check_off_(uint32_t hdrsize, uint32_t totalsize, uint32_t off) +{ + return (off >= hdrsize) && (off <= totalsize); +} + +static int check_block_(uint32_t hdrsize, uint32_t totalsize, + uint32_t base, uint32_t size) +{ + if (!check_off_(hdrsize, totalsize, base)) + return 0; /* block start out of bounds */ + if ((base + size) < base) + return 0; /* overflow */ + if (!check_off_(hdrsize, totalsize, base + size)) + return 0; /* block end out of bounds */ + return 1; +} + +size_t fdt_header_size_(uint32_t version) +{ + if (version <= 1) + return FDT_V1_SIZE; + else if (version <= 2) + return FDT_V2_SIZE; + else if (version <= 3) + return FDT_V3_SIZE; + else if (version <= 16) + return FDT_V16_SIZE; + else + return FDT_V17_SIZE; +} + +size_t fdt_header_size(const void *fdt) +{ + return can_assume(LATEST) ? FDT_V17_SIZE : + fdt_header_size_(fdt_version(fdt)); +} + +int fdt_check_header(const void *fdt) +{ + size_t hdrsize; + + if (fdt_magic(fdt) != FDT_MAGIC) + return -FDT_ERR_BADMAGIC; + if (!can_assume(LATEST)) { + if ((fdt_version(fdt) < FDT_FIRST_SUPPORTED_VERSION) + || (fdt_last_comp_version(fdt) > + FDT_LAST_SUPPORTED_VERSION)) + return -FDT_ERR_BADVERSION; + if (fdt_version(fdt) < fdt_last_comp_version(fdt)) + return -FDT_ERR_BADVERSION; + } + hdrsize = fdt_header_size(fdt); + if (!can_assume(VALID_DTB)) { + + if ((fdt_totalsize(fdt) < hdrsize) + || (fdt_totalsize(fdt) > INT_MAX)) + return -FDT_ERR_TRUNCATED; + + /* Bounds check memrsv block */ + if (!check_off_(hdrsize, fdt_totalsize(fdt), + fdt_off_mem_rsvmap(fdt))) + return -FDT_ERR_TRUNCATED; + } + + if (!can_assume(VALID_DTB)) { + /* Bounds check structure block */ + if (!can_assume(LATEST) && fdt_version(fdt) < 17) { + if (!check_off_(hdrsize, fdt_totalsize(fdt), + fdt_off_dt_struct(fdt))) + return -FDT_ERR_TRUNCATED; + } else { + if (!check_block_(hdrsize, fdt_totalsize(fdt), + fdt_off_dt_struct(fdt), + fdt_size_dt_struct(fdt))) + return -FDT_ERR_TRUNCATED; + } + + /* Bounds check strings block */ + if (!check_block_(hdrsize, fdt_totalsize(fdt), + fdt_off_dt_strings(fdt), + fdt_size_dt_strings(fdt))) + return -FDT_ERR_TRUNCATED; + } + + return 0; +} + +const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int len) +{ + unsigned int uoffset = offset; + unsigned int absoffset = offset + fdt_off_dt_struct(fdt); + + if (offset < 0) + return NULL; + + if (!can_assume(VALID_INPUT)) + if ((absoffset < uoffset) + || ((absoffset + len) < absoffset) + || (absoffset + len) > fdt_totalsize(fdt)) + return NULL; + + if (can_assume(LATEST) || fdt_version(fdt) >= 0x11) + if (((uoffset + len) < uoffset) + || ((offset + len) > fdt_size_dt_struct(fdt))) + return NULL; + + return fdt_offset_ptr_(fdt, offset); +} + +uint32_t fdt_next_tag(const void *fdt, int startoffset, int *nextoffset) +{ + const fdt32_t *tagp, *lenp; + uint32_t tag; + int offset = startoffset; + const char *p; + + *nextoffset = -FDT_ERR_TRUNCATED; + tagp = fdt_offset_ptr(fdt, offset, FDT_TAGSIZE); + if (!can_assume(VALID_DTB) && !tagp) + return FDT_END; /* premature end */ + tag = fdt32_to_cpu(*tagp); + offset += FDT_TAGSIZE; + + *nextoffset = -FDT_ERR_BADSTRUCTURE; + switch (tag) { + case FDT_BEGIN_NODE: + /* skip name */ + do { + p = fdt_offset_ptr(fdt, offset++, 1); + } while (p && (*p != '\0')); + if (!can_assume(VALID_DTB) && !p) + return FDT_END; /* premature end */ + break; + + case FDT_PROP: + lenp = fdt_offset_ptr(fdt, offset, sizeof(*lenp)); + if (!can_assume(VALID_DTB) && !lenp) + return FDT_END; /* premature end */ + /* skip-name offset, length and value */ + offset += sizeof(struct fdt_property) - FDT_TAGSIZE + + fdt32_to_cpu(*lenp); + if (!can_assume(LATEST) && + fdt_version(fdt) < 0x10 && fdt32_to_cpu(*lenp) >= 8 && + ((offset - fdt32_to_cpu(*lenp)) % 8) != 0) + offset += 4; + break; + + case FDT_END: + case FDT_END_NODE: + case FDT_NOP: + break; + + default: + return FDT_END; + } + + if (!fdt_offset_ptr(fdt, startoffset, offset - startoffset)) + return FDT_END; /* premature end */ + + *nextoffset = FDT_TAGALIGN(offset); + return tag; +} + +int fdt_check_node_offset_(const void *fdt, int offset) +{ + if (!can_assume(VALID_INPUT) + && ((offset < 0) || (offset % FDT_TAGSIZE))) + return -FDT_ERR_BADOFFSET; + + if (fdt_next_tag(fdt, offset, &offset) != FDT_BEGIN_NODE) + return -FDT_ERR_BADOFFSET; + + return offset; +} + +int fdt_check_prop_offset_(const void *fdt, int offset) +{ + if (!can_assume(VALID_INPUT) + && ((offset < 0) || (offset % FDT_TAGSIZE))) + return -FDT_ERR_BADOFFSET; + + if (fdt_next_tag(fdt, offset, &offset) != FDT_PROP) + return -FDT_ERR_BADOFFSET; + + return offset; +} + +int fdt_next_node(const void *fdt, int offset, int *depth) +{ + int nextoffset = 0; + uint32_t tag; + + if (offset >= 0) + if ((nextoffset = fdt_check_node_offset_(fdt, offset)) < 0) + return nextoffset; + + do { + offset = nextoffset; + tag = fdt_next_tag(fdt, offset, &nextoffset); + + switch (tag) { + case FDT_PROP: + case FDT_NOP: + break; + + case FDT_BEGIN_NODE: + if (depth) + (*depth)++; + break; + + case FDT_END_NODE: + if (depth && ((--(*depth)) < 0)) + return nextoffset; + break; + + case FDT_END: + if ((nextoffset >= 0) + || ((nextoffset == -FDT_ERR_TRUNCATED) && !depth)) + return -FDT_ERR_NOTFOUND; + else + return nextoffset; + } + } while (tag != FDT_BEGIN_NODE); + + return offset; +} + +int fdt_first_subnode(const void *fdt, int offset) +{ + int depth = 0; + + offset = fdt_next_node(fdt, offset, &depth); + if (offset < 0 || depth != 1) + return -FDT_ERR_NOTFOUND; + + return offset; +} + +int fdt_next_subnode(const void *fdt, int offset) +{ + int depth = 1; + + /* + * With respect to the parent, the depth of the next subnode will be + * the same as the last. + */ + do { + offset = fdt_next_node(fdt, offset, &depth); + if (offset < 0 || depth < 1) + return -FDT_ERR_NOTFOUND; + } while (depth > 1); + + return offset; +} + +const char *fdt_find_string_(const char *strtab, int tabsize, const char *s) +{ + int len = strlen(s) + 1; + const char *last = strtab + tabsize - len; + const char *p; + + for (p = strtab; p <= last; p++) + if (memcmp(p, s, len) == 0) + return p; + return NULL; +} + +int fdt_move(const void *fdt, void *buf, int bufsize) +{ + if (!can_assume(VALID_INPUT) && bufsize < 0) + return -FDT_ERR_NOSPACE; + + FDT_RO_PROBE(fdt); + + if (fdt_totalsize(fdt) > (unsigned int)bufsize) + return -FDT_ERR_NOSPACE; + + memmove(buf, fdt, fdt_totalsize(fdt)); + return 0; +} diff --git a/tools/src/libfdt/fdt.h b/tools/src/libfdt/fdt.h new file mode 100644 index 0000000..f2e6880 --- /dev/null +++ b/tools/src/libfdt/fdt.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */ +#ifndef FDT_H +#define FDT_H +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * Copyright 2012 Kim Phillips, Freescale Semiconductor. + */ + +#ifndef __ASSEMBLY__ + +struct fdt_header { + fdt32_t magic; /* magic word FDT_MAGIC */ + fdt32_t totalsize; /* total size of DT block */ + fdt32_t off_dt_struct; /* offset to structure */ + fdt32_t off_dt_strings; /* offset to strings */ + fdt32_t off_mem_rsvmap; /* offset to memory reserve map */ + fdt32_t version; /* format version */ + fdt32_t last_comp_version; /* last compatible version */ + + /* version 2 fields below */ + fdt32_t boot_cpuid_phys; /* Which physical CPU id we're + booting on */ + /* version 3 fields below */ + fdt32_t size_dt_strings; /* size of the strings block */ + + /* version 17 fields below */ + fdt32_t size_dt_struct; /* size of the structure block */ +}; + +struct fdt_reserve_entry { + fdt64_t address; + fdt64_t size; +}; + +struct fdt_node_header { + fdt32_t tag; + char name[0]; +}; + +struct fdt_property { + fdt32_t tag; + fdt32_t len; + fdt32_t nameoff; + char data[0]; +}; + +#endif /* !__ASSEMBLY */ + +#define FDT_MAGIC 0xd00dfeed /* 4: version, 4: total size */ +#define FDT_TAGSIZE sizeof(fdt32_t) + +#define FDT_BEGIN_NODE 0x1 /* Start node: full name */ +#define FDT_END_NODE 0x2 /* End node */ +#define FDT_PROP 0x3 /* Property: name off, + size, content */ +#define FDT_NOP 0x4 /* nop */ +#define FDT_END 0x9 + +#define FDT_V1_SIZE (7*sizeof(fdt32_t)) +#define FDT_V2_SIZE (FDT_V1_SIZE + sizeof(fdt32_t)) +#define FDT_V3_SIZE (FDT_V2_SIZE + sizeof(fdt32_t)) +#define FDT_V16_SIZE FDT_V3_SIZE +#define FDT_V17_SIZE (FDT_V16_SIZE + sizeof(fdt32_t)) + +#endif /* FDT_H */ diff --git a/tools/src/libfdt/fdt_addresses.c b/tools/src/libfdt/fdt_addresses.c new file mode 100644 index 0000000..6357859 --- /dev/null +++ b/tools/src/libfdt/fdt_addresses.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2014 David Gibson <david@gibson.dropbear.id.au> + * Copyright (C) 2018 embedded brains GmbH + */ +#include "libfdt_env.h" + +#include "fdt.h" +#include "libfdt.h" + +#include "libfdt_internal.h" + +static int fdt_cells(const void *fdt, int nodeoffset, const char *name) +{ + const fdt32_t *c; + uint32_t val; + int len; + + c = fdt_getprop(fdt, nodeoffset, name, &len); + if (!c) + return len; + + if (len != sizeof(*c)) + return -FDT_ERR_BADNCELLS; + + val = fdt32_to_cpu(*c); + if (val > FDT_MAX_NCELLS) + return -FDT_ERR_BADNCELLS; + + return (int)val; +} + +int fdt_address_cells(const void *fdt, int nodeoffset) +{ + int val; + + val = fdt_cells(fdt, nodeoffset, "#address-cells"); + if (val == 0) + return -FDT_ERR_BADNCELLS; + if (val == -FDT_ERR_NOTFOUND) + return 2; + return val; +} + +int fdt_size_cells(const void *fdt, int nodeoffset) +{ + int val; + + val = fdt_cells(fdt, nodeoffset, "#size-cells"); + if (val == -FDT_ERR_NOTFOUND) + return 1; + return val; +} + +/* This function assumes that [address|size]_cells is 1 or 2 */ +int fdt_appendprop_addrrange(void *fdt, int parent, int nodeoffset, + const char *name, uint64_t addr, uint64_t size) +{ + int addr_cells, size_cells, ret; + uint8_t data[sizeof(fdt64_t) * 2], *prop; + + ret = fdt_address_cells(fdt, parent); + if (ret < 0) + return ret; + addr_cells = ret; + + ret = fdt_size_cells(fdt, parent); + if (ret < 0) + return ret; + size_cells = ret; + + /* check validity of address */ + prop = data; + if (addr_cells == 1) { + if ((addr > UINT32_MAX) || ((UINT32_MAX + 1 - addr) < size)) + return -FDT_ERR_BADVALUE; + + fdt32_st(prop, (uint32_t)addr); + } else if (addr_cells == 2) { + fdt64_st(prop, addr); + } else { + return -FDT_ERR_BADNCELLS; + } + + /* check validity of size */ + prop += addr_cells * sizeof(fdt32_t); + if (size_cells == 1) { + if (size > UINT32_MAX) + return -FDT_ERR_BADVALUE; + + fdt32_st(prop, (uint32_t)size); + } else if (size_cells == 2) { + fdt64_st(prop, size); + } else { + return -FDT_ERR_BADNCELLS; + } + + return fdt_appendprop(fdt, nodeoffset, name, data, + (addr_cells + size_cells) * sizeof(fdt32_t)); +} diff --git a/tools/src/libfdt/fdt_empty_tree.c b/tools/src/libfdt/fdt_empty_tree.c new file mode 100644 index 0000000..15f0cd7 --- /dev/null +++ b/tools/src/libfdt/fdt_empty_tree.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2012 David Gibson, IBM Corporation. + */ +#include "libfdt_env.h" + +#include "fdt.h" +#include "libfdt.h" + +#include "libfdt_internal.h" + +int fdt_create_empty_tree(void *buf, int bufsize) +{ + int err; + + err = fdt_create(buf, bufsize); + if (err) + return err; + + err = fdt_finish_reservemap(buf); + if (err) + return err; + + err = fdt_begin_node(buf, ""); + if (err) + return err; + + err = fdt_end_node(buf); + if (err) + return err; + + err = fdt_finish(buf); + if (err) + return err; + + return fdt_open_into(buf, buf, bufsize); +} diff --git a/tools/src/libfdt/fdt_overlay.c b/tools/src/libfdt/fdt_overlay.c new file mode 100644 index 0000000..1fc78d4 --- /dev/null +++ b/tools/src/libfdt/fdt_overlay.c @@ -0,0 +1,882 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2016 Free Electrons + * Copyright (C) 2016 NextThing Co. + */ +#include "libfdt_env.h" + +#include "fdt.h" +#include "libfdt.h" + +#include "libfdt_internal.h" + +/** + * overlay_get_target_phandle - retrieves the target phandle of a fragment + * @fdto: pointer to the device tree overlay blob + * @fragment: node offset of the fragment in the overlay + * + * overlay_get_target_phandle() retrieves the target phandle of an + * overlay fragment when that fragment uses a phandle (target + * property) instead of a path (target-path property). + * + * returns: + * the phandle pointed by the target property + * 0, if the phandle was not found + * -1, if the phandle was malformed + */ +static uint32_t overlay_get_target_phandle(const void *fdto, int fragment) +{ + const fdt32_t *val; + int len; + + val = fdt_getprop(fdto, fragment, "target", &len); + if (!val) + return 0; + + if ((len != sizeof(*val)) || (fdt32_to_cpu(*val) == (uint32_t)-1)) + return (uint32_t)-1; + + return fdt32_to_cpu(*val); +} + +/** + * overlay_get_target - retrieves the offset of a fragment's target + * @fdt: Base device tree blob + * @fdto: Device tree overlay blob + * @fragment: node offset of the fragment in the overlay + * @pathp: pointer which receives the path of the target (or NULL) + * + * overlay_get_target() retrieves the target offset in the base + * device tree of a fragment, no matter how the actual targeting is + * done (through a phandle or a path) + * + * returns: + * the targeted node offset in the base device tree + * Negative error code on error + */ +static int overlay_get_target(const void *fdt, const void *fdto, + int fragment, char const **pathp) +{ + uint32_t phandle; + const char *path = NULL; + int path_len = 0, ret; + + /* Try first to do a phandle based lookup */ + phandle = overlay_get_target_phandle(fdto, fragment); + if (phandle == (uint32_t)-1) + return -FDT_ERR_BADPHANDLE; + + /* no phandle, try path */ + if (!phandle) { + /* And then a path based lookup */ + path = fdt_getprop(fdto, fragment, "target-path", &path_len); + if (path) + ret = fdt_path_offset(fdt, path); + else + ret = path_len; + } else + ret = fdt_node_offset_by_phandle(fdt, phandle); + + /* + * If we haven't found either a target or a + * target-path property in a node that contains a + * __overlay__ subnode (we wouldn't be called + * otherwise), consider it a improperly written + * overlay + */ + if (ret < 0 && path_len == -FDT_ERR_NOTFOUND) + ret = -FDT_ERR_BADOVERLAY; + + /* return on error */ + if (ret < 0) + return ret; + + /* return pointer to path (if available) */ + if (pathp) + *pathp = path ? path : NULL; + + return ret; +} + +/** + * overlay_phandle_add_offset - Increases a phandle by an offset + * @fdt: Base device tree blob + * @node: Device tree overlay blob + * @name: Name of the property to modify (phandle or linux,phandle) + * @delta: offset to apply + * + * overlay_phandle_add_offset() increments a node phandle by a given + * offset. + * + * returns: + * 0 on success. + * Negative error code on error + */ +static int overlay_phandle_add_offset(void *fdt, int node, + const char *name, uint32_t delta) +{ + const fdt32_t *val; + uint32_t adj_val; + int len; + + val = fdt_getprop(fdt, node, name, &len); + if (!val) + return len; + + if (len != sizeof(*val)) + return -FDT_ERR_BADPHANDLE; + + adj_val = fdt32_to_cpu(*val); + if ((adj_val + delta) < adj_val) + return -FDT_ERR_NOPHANDLES; + + adj_val += delta; + if (adj_val == (uint32_t)-1) + return -FDT_ERR_NOPHANDLES; + + return fdt_setprop_inplace_u32(fdt, node, name, adj_val); +} + +/** + * overlay_adjust_node_phandles - Offsets the phandles of a node + * @fdto: Device tree overlay blob + * @node: Offset of the node we want to adjust + * @delta: Offset to shift the phandles of + * + * overlay_adjust_node_phandles() adds a constant to all the phandles + * of a given node. This is mainly use as part of the overlay + * application process, when we want to update all the overlay + * phandles to not conflict with the overlays of the base device tree. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_adjust_node_phandles(void *fdto, int node, + uint32_t delta) +{ + int child; + int ret; + + ret = overlay_phandle_add_offset(fdto, node, "phandle", delta); + if (ret && ret != -FDT_ERR_NOTFOUND) + return ret; + + ret = overlay_phandle_add_offset(fdto, node, "linux,phandle", delta); + if (ret && ret != -FDT_ERR_NOTFOUND) + return ret; + + fdt_for_each_subnode(child, fdto, node) { + ret = overlay_adjust_node_phandles(fdto, child, delta); + if (ret) + return ret; + } + + return 0; +} + +/** + * overlay_adjust_local_phandles - Adjust the phandles of a whole overlay + * @fdto: Device tree overlay blob + * @delta: Offset to shift the phandles of + * + * overlay_adjust_local_phandles() adds a constant to all the + * phandles of an overlay. This is mainly use as part of the overlay + * application process, when we want to update all the overlay + * phandles to not conflict with the overlays of the base device tree. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_adjust_local_phandles(void *fdto, uint32_t delta) +{ + /* + * Start adjusting the phandles from the overlay root + */ + return overlay_adjust_node_phandles(fdto, 0, delta); +} + +/** + * overlay_update_local_node_references - Adjust the overlay references + * @fdto: Device tree overlay blob + * @tree_node: Node offset of the node to operate on + * @fixup_node: Node offset of the matching local fixups node + * @delta: Offset to shift the phandles of + * + * overlay_update_local_nodes_references() update the phandles + * pointing to a node within the device tree overlay by adding a + * constant delta. + * + * This is mainly used as part of a device tree application process, + * where you want the device tree overlays phandles to not conflict + * with the ones from the base device tree before merging them. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_update_local_node_references(void *fdto, + int tree_node, + int fixup_node, + uint32_t delta) +{ + int fixup_prop; + int fixup_child; + int ret; + + fdt_for_each_property_offset(fixup_prop, fdto, fixup_node) { + const fdt32_t *fixup_val; + const char *tree_val; + const char *name; + int fixup_len; + int tree_len; + int i; + + fixup_val = fdt_getprop_by_offset(fdto, fixup_prop, + &name, &fixup_len); + if (!fixup_val) + return fixup_len; + + if (fixup_len % sizeof(uint32_t)) + return -FDT_ERR_BADOVERLAY; + fixup_len /= sizeof(uint32_t); + + tree_val = fdt_getprop(fdto, tree_node, name, &tree_len); + if (!tree_val) { + if (tree_len == -FDT_ERR_NOTFOUND) + return -FDT_ERR_BADOVERLAY; + + return tree_len; + } + + for (i = 0; i < fixup_len; i++) { + fdt32_t adj_val; + uint32_t poffset; + + poffset = fdt32_to_cpu(fixup_val[i]); + + /* + * phandles to fixup can be unaligned. + * + * Use a memcpy for the architectures that do + * not support unaligned accesses. + */ + memcpy(&adj_val, tree_val + poffset, sizeof(adj_val)); + + adj_val = cpu_to_fdt32(fdt32_to_cpu(adj_val) + delta); + + ret = fdt_setprop_inplace_namelen_partial(fdto, + tree_node, + name, + strlen(name), + poffset, + &adj_val, + sizeof(adj_val)); + if (ret == -FDT_ERR_NOSPACE) + return -FDT_ERR_BADOVERLAY; + + if (ret) + return ret; + } + } + + fdt_for_each_subnode(fixup_child, fdto, fixup_node) { + const char *fixup_child_name = fdt_get_name(fdto, fixup_child, + NULL); + int tree_child; + + tree_child = fdt_subnode_offset(fdto, tree_node, + fixup_child_name); + if (tree_child == -FDT_ERR_NOTFOUND) + return -FDT_ERR_BADOVERLAY; + if (tree_child < 0) + return tree_child; + + ret = overlay_update_local_node_references(fdto, + tree_child, + fixup_child, + delta); + if (ret) + return ret; + } + + return 0; +} + +/** + * overlay_update_local_references - Adjust the overlay references + * @fdto: Device tree overlay blob + * @delta: Offset to shift the phandles of + * + * overlay_update_local_references() update all the phandles pointing + * to a node within the device tree overlay by adding a constant + * delta to not conflict with the base overlay. + * + * This is mainly used as part of a device tree application process, + * where you want the device tree overlays phandles to not conflict + * with the ones from the base device tree before merging them. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_update_local_references(void *fdto, uint32_t delta) +{ + int fixups; + + fixups = fdt_path_offset(fdto, "/__local_fixups__"); + if (fixups < 0) { + /* There's no local phandles to adjust, bail out */ + if (fixups == -FDT_ERR_NOTFOUND) + return 0; + + return fixups; + } + + /* + * Update our local references from the root of the tree + */ + return overlay_update_local_node_references(fdto, 0, fixups, + delta); +} + +/** + * overlay_fixup_one_phandle - Set an overlay phandle to the base one + * @fdt: Base Device Tree blob + * @fdto: Device tree overlay blob + * @symbols_off: Node offset of the symbols node in the base device tree + * @path: Path to a node holding a phandle in the overlay + * @path_len: number of path characters to consider + * @name: Name of the property holding the phandle reference in the overlay + * @name_len: number of name characters to consider + * @poffset: Offset within the overlay property where the phandle is stored + * @label: Label of the node referenced by the phandle + * + * overlay_fixup_one_phandle() resolves an overlay phandle pointing to + * a node in the base device tree. + * + * This is part of the device tree overlay application process, when + * you want all the phandles in the overlay to point to the actual + * base dt nodes. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_fixup_one_phandle(void *fdt, void *fdto, + int symbols_off, + const char *path, uint32_t path_len, + const char *name, uint32_t name_len, + int poffset, const char *label) +{ + const char *symbol_path; + uint32_t phandle; + fdt32_t phandle_prop; + int symbol_off, fixup_off; + int prop_len; + + if (symbols_off < 0) + return symbols_off; + + symbol_path = fdt_getprop(fdt, symbols_off, label, + &prop_len); + if (!symbol_path) + return prop_len; + + symbol_off = fdt_path_offset(fdt, symbol_path); + if (symbol_off < 0) + return symbol_off; + + phandle = fdt_get_phandle(fdt, symbol_off); + if (!phandle) + return -FDT_ERR_NOTFOUND; + + fixup_off = fdt_path_offset_namelen(fdto, path, path_len); + if (fixup_off == -FDT_ERR_NOTFOUND) + return -FDT_ERR_BADOVERLAY; + if (fixup_off < 0) + return fixup_off; + + phandle_prop = cpu_to_fdt32(phandle); + return fdt_setprop_inplace_namelen_partial(fdto, fixup_off, + name, name_len, poffset, + &phandle_prop, + sizeof(phandle_prop)); +}; + +/** + * overlay_fixup_phandle - Set an overlay phandle to the base one + * @fdt: Base Device Tree blob + * @fdto: Device tree overlay blob + * @symbols_off: Node offset of the symbols node in the base device tree + * @property: Property offset in the overlay holding the list of fixups + * + * overlay_fixup_phandle() resolves all the overlay phandles pointed + * to in a __fixups__ property, and updates them to match the phandles + * in use in the base device tree. + * + * This is part of the device tree overlay application process, when + * you want all the phandles in the overlay to point to the actual + * base dt nodes. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_fixup_phandle(void *fdt, void *fdto, int symbols_off, + int property) +{ + const char *value; + const char *label; + int len; + + value = fdt_getprop_by_offset(fdto, property, + &label, &len); + if (!value) { + if (len == -FDT_ERR_NOTFOUND) + return -FDT_ERR_INTERNAL; + + return len; + } + + do { + const char *path, *name, *fixup_end; + const char *fixup_str = value; + uint32_t path_len, name_len; + uint32_t fixup_len; + char *sep, *endptr; + int poffset, ret; + + fixup_end = memchr(value, '\0', len); + if (!fixup_end) + return -FDT_ERR_BADOVERLAY; + fixup_len = fixup_end - fixup_str; + + len -= fixup_len + 1; + value += fixup_len + 1; + + path = fixup_str; + sep = memchr(fixup_str, ':', fixup_len); + if (!sep || *sep != ':') + return -FDT_ERR_BADOVERLAY; + + path_len = sep - path; + if (path_len == (fixup_len - 1)) + return -FDT_ERR_BADOVERLAY; + + fixup_len -= path_len + 1; + name = sep + 1; + sep = memchr(name, ':', fixup_len); + if (!sep || *sep != ':') + return -FDT_ERR_BADOVERLAY; + + name_len = sep - name; + if (!name_len) + return -FDT_ERR_BADOVERLAY; + + poffset = strtoul(sep + 1, &endptr, 10); + if ((*endptr != '\0') || (endptr <= (sep + 1))) + return -FDT_ERR_BADOVERLAY; + + ret = overlay_fixup_one_phandle(fdt, fdto, symbols_off, + path, path_len, name, name_len, + poffset, label); + if (ret) + return ret; + } while (len > 0); + + return 0; +} + +/** + * overlay_fixup_phandles - Resolve the overlay phandles to the base + * device tree + * @fdt: Base Device Tree blob + * @fdto: Device tree overlay blob + * + * overlay_fixup_phandles() resolves all the overlay phandles pointing + * to nodes in the base device tree. + * + * This is one of the steps of the device tree overlay application + * process, when you want all the phandles in the overlay to point to + * the actual base dt nodes. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_fixup_phandles(void *fdt, void *fdto) +{ + int fixups_off, symbols_off; + int property; + + /* We can have overlays without any fixups */ + fixups_off = fdt_path_offset(fdto, "/__fixups__"); + if (fixups_off == -FDT_ERR_NOTFOUND) + return 0; /* nothing to do */ + if (fixups_off < 0) + return fixups_off; + + /* And base DTs without symbols */ + symbols_off = fdt_path_offset(fdt, "/__symbols__"); + if ((symbols_off < 0 && (symbols_off != -FDT_ERR_NOTFOUND))) + return symbols_off; + + fdt_for_each_property_offset(property, fdto, fixups_off) { + int ret; + + ret = overlay_fixup_phandle(fdt, fdto, symbols_off, property); + if (ret) + return ret; + } + + return 0; +} + +/** + * overlay_apply_node - Merges a node into the base device tree + * @fdt: Base Device Tree blob + * @target: Node offset in the base device tree to apply the fragment to + * @fdto: Device tree overlay blob + * @node: Node offset in the overlay holding the changes to merge + * + * overlay_apply_node() merges a node into a target base device tree + * node pointed. + * + * This is part of the final step in the device tree overlay + * application process, when all the phandles have been adjusted and + * resolved and you just have to merge overlay into the base device + * tree. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_apply_node(void *fdt, int target, + void *fdto, int node) +{ + int property; + int subnode; + + fdt_for_each_property_offset(property, fdto, node) { + const char *name; + const void *prop; + int prop_len; + int ret; + + prop = fdt_getprop_by_offset(fdto, property, &name, + &prop_len); + if (prop_len == -FDT_ERR_NOTFOUND) + return -FDT_ERR_INTERNAL; + if (prop_len < 0) + return prop_len; + + ret = fdt_setprop(fdt, target, name, prop, prop_len); + if (ret) + return ret; + } + + fdt_for_each_subnode(subnode, fdto, node) { + const char *name = fdt_get_name(fdto, subnode, NULL); + int nnode; + int ret; + + nnode = fdt_add_subnode(fdt, target, name); + if (nnode == -FDT_ERR_EXISTS) { + nnode = fdt_subnode_offset(fdt, target, name); + if (nnode == -FDT_ERR_NOTFOUND) + return -FDT_ERR_INTERNAL; + } + + if (nnode < 0) + return nnode; + + ret = overlay_apply_node(fdt, nnode, fdto, subnode); + if (ret) + return ret; + } + + return 0; +} + +/** + * overlay_merge - Merge an overlay into its base device tree + * @fdt: Base Device Tree blob + * @fdto: Device tree overlay blob + * + * overlay_merge() merges an overlay into its base device tree. + * + * This is the next to last step in the device tree overlay application + * process, when all the phandles have been adjusted and resolved and + * you just have to merge overlay into the base device tree. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_merge(void *fdt, void *fdto) +{ + int fragment; + + fdt_for_each_subnode(fragment, fdto, 0) { + int overlay; + int target; + int ret; + + /* + * Each fragments will have an __overlay__ node. If + * they don't, it's not supposed to be merged + */ + overlay = fdt_subnode_offset(fdto, fragment, "__overlay__"); + if (overlay == -FDT_ERR_NOTFOUND) + continue; + + if (overlay < 0) + return overlay; + + target = overlay_get_target(fdt, fdto, fragment, NULL); + if (target < 0) + return target; + + ret = overlay_apply_node(fdt, target, fdto, overlay); + if (ret) + return ret; + } + + return 0; +} + +static int get_path_len(const void *fdt, int nodeoffset) +{ + int len = 0, namelen; + const char *name; + + FDT_RO_PROBE(fdt); + + for (;;) { + name = fdt_get_name(fdt, nodeoffset, &namelen); + if (!name) + return namelen; + + /* root? we're done */ + if (namelen == 0) + break; + + nodeoffset = fdt_parent_offset(fdt, nodeoffset); + if (nodeoffset < 0) + return nodeoffset; + len += namelen + 1; + } + + /* in case of root pretend it's "/" */ + if (len == 0) + len++; + return len; +} + +/** + * overlay_symbol_update - Update the symbols of base tree after a merge + * @fdt: Base Device Tree blob + * @fdto: Device tree overlay blob + * + * overlay_symbol_update() updates the symbols of the base tree with the + * symbols of the applied overlay + * + * This is the last step in the device tree overlay application + * process, allowing the reference of overlay symbols by subsequent + * overlay operations. + * + * returns: + * 0 on success + * Negative error code on failure + */ +static int overlay_symbol_update(void *fdt, void *fdto) +{ + int root_sym, ov_sym, prop, path_len, fragment, target; + int len, frag_name_len, ret, rel_path_len; + const char *s, *e; + const char *path; + const char *name; + const char *frag_name; + const char *rel_path; + const char *target_path; + char *buf; + void *p; + + ov_sym = fdt_subnode_offset(fdto, 0, "__symbols__"); + + /* if no overlay symbols exist no problem */ + if (ov_sym < 0) + return 0; + + root_sym = fdt_subnode_offset(fdt, 0, "__symbols__"); + + /* it no root symbols exist we should create them */ + if (root_sym == -FDT_ERR_NOTFOUND) + root_sym = fdt_add_subnode(fdt, 0, "__symbols__"); + + /* any error is fatal now */ + if (root_sym < 0) + return root_sym; + + /* iterate over each overlay symbol */ + fdt_for_each_property_offset(prop, fdto, ov_sym) { + path = fdt_getprop_by_offset(fdto, prop, &name, &path_len); + if (!path) + return path_len; + + /* verify it's a string property (terminated by a single \0) */ + if (path_len < 1 || memchr(path, '\0', path_len) != &path[path_len - 1]) + return -FDT_ERR_BADVALUE; + + /* keep end marker to avoid strlen() */ + e = path + path_len; + + if (*path != '/') + return -FDT_ERR_BADVALUE; + + /* get fragment name first */ + s = strchr(path + 1, '/'); + if (!s) { + /* Symbol refers to something that won't end + * up in the target tree */ + continue; + } + + frag_name = path + 1; + frag_name_len = s - path - 1; + + /* verify format; safe since "s" lies in \0 terminated prop */ + len = sizeof("/__overlay__/") - 1; + if ((e - s) > len && (memcmp(s, "/__overlay__/", len) == 0)) { + /* /<fragment-name>/__overlay__/<relative-subnode-path> */ + rel_path = s + len; + rel_path_len = e - rel_path - 1; + } else if ((e - s) == len + && (memcmp(s, "/__overlay__", len - 1) == 0)) { + /* /<fragment-name>/__overlay__ */ + rel_path = ""; + rel_path_len = 0; + } else { + /* Symbol refers to something that won't end + * up in the target tree */ + continue; + } + + /* find the fragment index in which the symbol lies */ + ret = fdt_subnode_offset_namelen(fdto, 0, frag_name, + frag_name_len); + /* not found? */ + if (ret < 0) + return -FDT_ERR_BADOVERLAY; + fragment = ret; + + /* an __overlay__ subnode must exist */ + ret = fdt_subnode_offset(fdto, fragment, "__overlay__"); + if (ret < 0) + return -FDT_ERR_BADOVERLAY; + + /* get the target of the fragment */ + ret = overlay_get_target(fdt, fdto, fragment, &target_path); + if (ret < 0) + return ret; + target = ret; + + /* if we have a target path use */ + if (!target_path) { + ret = get_path_len(fdt, target); + if (ret < 0) + return ret; + len = ret; + } else { + len = strlen(target_path); + } + + ret = fdt_setprop_placeholder(fdt, root_sym, name, + len + (len > 1) + rel_path_len + 1, &p); + if (ret < 0) + return ret; + + if (!target_path) { + /* again in case setprop_placeholder changed it */ + ret = overlay_get_target(fdt, fdto, fragment, &target_path); + if (ret < 0) + return ret; + target = ret; + } + + buf = p; + if (len > 1) { /* target is not root */ + if (!target_path) { + ret = fdt_get_path(fdt, target, buf, len + 1); + if (ret < 0) + return ret; + } else + memcpy(buf, target_path, len + 1); + + } else + len--; + + buf[len] = '/'; + memcpy(buf + len + 1, rel_path, rel_path_len); + buf[len + 1 + rel_path_len] = '\0'; + } + + return 0; +} + +int fdt_overlay_apply(void *fdt, void *fdto) +{ + uint32_t delta; + int ret; + + FDT_RO_PROBE(fdt); + FDT_RO_PROBE(fdto); + + ret = fdt_find_max_phandle(fdt, &delta); + if (ret) + goto err; + + ret = overlay_adjust_local_phandles(fdto, delta); + if (ret) + goto err; + + ret = overlay_update_local_references(fdto, delta); + if (ret) + goto err; + + ret = overlay_fixup_phandles(fdt, fdto); + if (ret) + goto err; + + ret = overlay_merge(fdt, fdto); + if (ret) + goto err; + + ret = overlay_symbol_update(fdt, fdto); + if (ret) + goto err; + + /* + * The overlay has been damaged, erase its magic. + */ + fdt_set_magic(fdto, ~0); + + return 0; + +err: + /* + * The overlay might have been damaged, erase its magic. + */ + fdt_set_magic(fdto, ~0); + + /* + * The base device tree might have been damaged, erase its + * magic. + */ + fdt_set_magic(fdt, ~0); + + return ret; +} diff --git a/tools/src/libfdt/fdt_ro.c b/tools/src/libfdt/fdt_ro.c new file mode 100644 index 0000000..e7f8b62 --- /dev/null +++ b/tools/src/libfdt/fdt_ro.c @@ -0,0 +1,859 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ +#include "libfdt_env.h" + +#include "fdt.h" +#include "libfdt.h" + +#include "libfdt_internal.h" + +static int fdt_nodename_eq_(const void *fdt, int offset, + const char *s, int len) +{ + int olen; + const char *p = fdt_get_name(fdt, offset, &olen); + + if (!p || olen < len) + /* short match */ + return 0; + + if (memcmp(p, s, len) != 0) + return 0; + + if (p[len] == '\0') + return 1; + else if (!memchr(s, '@', len) && (p[len] == '@')) + return 1; + else + return 0; +} + +const char *fdt_get_string(const void *fdt, int stroffset, int *lenp) +{ + int32_t totalsize; + uint32_t absoffset; + size_t len; + int err; + const char *s, *n; + + if (can_assume(VALID_INPUT)) { + s = (const char *)fdt + fdt_off_dt_strings(fdt) + stroffset; + + if (lenp) + *lenp = strlen(s); + return s; + } + totalsize = fdt_ro_probe_(fdt); + err = totalsize; + if (totalsize < 0) + goto fail; + + err = -FDT_ERR_BADOFFSET; + absoffset = stroffset + fdt_off_dt_strings(fdt); + if (absoffset >= (unsigned)totalsize) + goto fail; + len = totalsize - absoffset; + + if (fdt_magic(fdt) == FDT_MAGIC) { + if (stroffset < 0) + goto fail; + if (can_assume(LATEST) || fdt_version(fdt) >= 17) { + if ((unsigned)stroffset >= fdt_size_dt_strings(fdt)) + goto fail; + if ((fdt_size_dt_strings(fdt) - stroffset) < len) + len = fdt_size_dt_strings(fdt) - stroffset; + } + } else if (fdt_magic(fdt) == FDT_SW_MAGIC) { + unsigned int sw_stroffset = -stroffset; + + if ((stroffset >= 0) || + (sw_stroffset > fdt_size_dt_strings(fdt))) + goto fail; + if (sw_stroffset < len) + len = sw_stroffset; + } else { + err = -FDT_ERR_INTERNAL; + goto fail; + } + + s = (const char *)fdt + absoffset; + n = memchr(s, '\0', len); + if (!n) { + /* missing terminating NULL */ + err = -FDT_ERR_TRUNCATED; + goto fail; + } + + if (lenp) + *lenp = n - s; + return s; + +fail: + if (lenp) + *lenp = err; + return NULL; +} + +const char *fdt_string(const void *fdt, int stroffset) +{ + return fdt_get_string(fdt, stroffset, NULL); +} + +static int fdt_string_eq_(const void *fdt, int stroffset, + const char *s, int len) +{ + int slen; + const char *p = fdt_get_string(fdt, stroffset, &slen); + + return p && (slen == len) && (memcmp(p, s, len) == 0); +} + +int fdt_find_max_phandle(const void *fdt, uint32_t *phandle) +{ + uint32_t max = 0; + int offset = -1; + + while (true) { + uint32_t value; + + offset = fdt_next_node(fdt, offset, NULL); + if (offset < 0) { + if (offset == -FDT_ERR_NOTFOUND) + break; + + return offset; + } + + value = fdt_get_phandle(fdt, offset); + + if (value > max) + max = value; + } + + if (phandle) + *phandle = max; + + return 0; +} + +int fdt_generate_phandle(const void *fdt, uint32_t *phandle) +{ + uint32_t max; + int err; + + err = fdt_find_max_phandle(fdt, &max); + if (err < 0) + return err; + + if (max == FDT_MAX_PHANDLE) + return -FDT_ERR_NOPHANDLES; + + if (phandle) + *phandle = max + 1; + + return 0; +} + +static const struct fdt_reserve_entry *fdt_mem_rsv(const void *fdt, int n) +{ + unsigned int offset = n * sizeof(struct fdt_reserve_entry); + unsigned int absoffset = fdt_off_mem_rsvmap(fdt) + offset; + + if (!can_assume(VALID_INPUT)) { + if (absoffset < fdt_off_mem_rsvmap(fdt)) + return NULL; + if (absoffset > fdt_totalsize(fdt) - + sizeof(struct fdt_reserve_entry)) + return NULL; + } + return fdt_mem_rsv_(fdt, n); +} + +int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size) +{ + const struct fdt_reserve_entry *re; + + FDT_RO_PROBE(fdt); + re = fdt_mem_rsv(fdt, n); + if (!can_assume(VALID_INPUT) && !re) + return -FDT_ERR_BADOFFSET; + + *address = fdt64_ld(&re->address); + *size = fdt64_ld(&re->size); + return 0; +} + +int fdt_num_mem_rsv(const void *fdt) +{ + int i; + const struct fdt_reserve_entry *re; + + for (i = 0; (re = fdt_mem_rsv(fdt, i)) != NULL; i++) { + if (fdt64_ld(&re->size) == 0) + return i; + } + return -FDT_ERR_TRUNCATED; +} + +static int nextprop_(const void *fdt, int offset) +{ + uint32_t tag; + int nextoffset; + + do { + tag = fdt_next_tag(fdt, offset, &nextoffset); + + switch (tag) { + case FDT_END: + if (nextoffset >= 0) + return -FDT_ERR_BADSTRUCTURE; + else + return nextoffset; + + case FDT_PROP: + return offset; + } + offset = nextoffset; + } while (tag == FDT_NOP); + + return -FDT_ERR_NOTFOUND; +} + +int fdt_subnode_offset_namelen(const void *fdt, int offset, + const char *name, int namelen) +{ + int depth; + + FDT_RO_PROBE(fdt); + + for (depth = 0; + (offset >= 0) && (depth >= 0); + offset = fdt_next_node(fdt, offset, &depth)) + if ((depth == 1) + && fdt_nodename_eq_(fdt, offset, name, namelen)) + return offset; + + if (depth < 0) + return -FDT_ERR_NOTFOUND; + return offset; /* error */ +} + +int fdt_subnode_offset(const void *fdt, int parentoffset, + const char *name) +{ + return fdt_subnode_offset_namelen(fdt, parentoffset, name, strlen(name)); +} + +int fdt_path_offset_namelen(const void *fdt, const char *path, int namelen) +{ + const char *end = path + namelen; + const char *p = path; + int offset = 0; + + FDT_RO_PROBE(fdt); + + /* see if we have an alias */ + if (*path != '/') { + const char *q = memchr(path, '/', end - p); + + if (!q) + q = end; + + p = fdt_get_alias_namelen(fdt, p, q - p); + if (!p) + return -FDT_ERR_BADPATH; + offset = fdt_path_offset(fdt, p); + + p = q; + } + + while (p < end) { + const char *q; + + while (*p == '/') { + p++; + if (p == end) + return offset; + } + q = memchr(p, '/', end - p); + if (! q) + q = end; + + offset = fdt_subnode_offset_namelen(fdt, offset, p, q-p); + if (offset < 0) + return offset; + + p = q; + } + + return offset; +} + +int fdt_path_offset(const void *fdt, const char *path) +{ + return fdt_path_offset_namelen(fdt, path, strlen(path)); +} + +const char *fdt_get_name(const void *fdt, int nodeoffset, int *len) +{ + const struct fdt_node_header *nh = fdt_offset_ptr_(fdt, nodeoffset); + const char *nameptr; + int err; + + if (((err = fdt_ro_probe_(fdt)) < 0) + || ((err = fdt_check_node_offset_(fdt, nodeoffset)) < 0)) + goto fail; + + nameptr = nh->name; + + if (!can_assume(LATEST) && fdt_version(fdt) < 0x10) { + /* + * For old FDT versions, match the naming conventions of V16: + * give only the leaf name (after all /). The actual tree + * contents are loosely checked. + */ + const char *leaf; + leaf = strrchr(nameptr, '/'); + if (leaf == NULL) { + err = -FDT_ERR_BADSTRUCTURE; + goto fail; + } + nameptr = leaf+1; + } + + if (len) + *len = strlen(nameptr); + + return nameptr; + + fail: + if (len) + *len = err; + return NULL; +} + +int fdt_first_property_offset(const void *fdt, int nodeoffset) +{ + int offset; + + if ((offset = fdt_check_node_offset_(fdt, nodeoffset)) < 0) + return offset; + + return nextprop_(fdt, offset); +} + +int fdt_next_property_offset(const void *fdt, int offset) +{ + if ((offset = fdt_check_prop_offset_(fdt, offset)) < 0) + return offset; + + return nextprop_(fdt, offset); +} + +static const struct fdt_property *fdt_get_property_by_offset_(const void *fdt, + int offset, + int *lenp) +{ + int err; + const struct fdt_property *prop; + + if (!can_assume(VALID_INPUT) && + (err = fdt_check_prop_offset_(fdt, offset)) < 0) { + if (lenp) + *lenp = err; + return NULL; + } + + prop = fdt_offset_ptr_(fdt, offset); + + if (lenp) + *lenp = fdt32_ld(&prop->len); + + return prop; +} + +const struct fdt_property *fdt_get_property_by_offset(const void *fdt, + int offset, + int *lenp) +{ + /* Prior to version 16, properties may need realignment + * and this API does not work. fdt_getprop_*() will, however. */ + + if (!can_assume(LATEST) && fdt_version(fdt) < 0x10) { + if (lenp) + *lenp = -FDT_ERR_BADVERSION; + return NULL; + } + + return fdt_get_property_by_offset_(fdt, offset, lenp); +} + +static const struct fdt_property *fdt_get_property_namelen_(const void *fdt, + int offset, + const char *name, + int namelen, + int *lenp, + int *poffset) +{ + for (offset = fdt_first_property_offset(fdt, offset); + (offset >= 0); + (offset = fdt_next_property_offset(fdt, offset))) { + const struct fdt_property *prop; + + prop = fdt_get_property_by_offset_(fdt, offset, lenp); + if (!can_assume(LIBFDT_FLAWLESS) && !prop) { + offset = -FDT_ERR_INTERNAL; + break; + } + if (fdt_string_eq_(fdt, fdt32_ld(&prop->nameoff), + name, namelen)) { + if (poffset) + *poffset = offset; + return prop; + } + } + + if (lenp) + *lenp = offset; + return NULL; +} + + +const struct fdt_property *fdt_get_property_namelen(const void *fdt, + int offset, + const char *name, + int namelen, int *lenp) +{ + /* Prior to version 16, properties may need realignment + * and this API does not work. fdt_getprop_*() will, however. */ + if (!can_assume(LATEST) && fdt_version(fdt) < 0x10) { + if (lenp) + *lenp = -FDT_ERR_BADVERSION; + return NULL; + } + + return fdt_get_property_namelen_(fdt, offset, name, namelen, lenp, + NULL); +} + + +const struct fdt_property *fdt_get_property(const void *fdt, + int nodeoffset, + const char *name, int *lenp) +{ + return fdt_get_property_namelen(fdt, nodeoffset, name, + strlen(name), lenp); +} + +const void *fdt_getprop_namelen(const void *fdt, int nodeoffset, + const char *name, int namelen, int *lenp) +{ + int poffset; + const struct fdt_property *prop; + + prop = fdt_get_property_namelen_(fdt, nodeoffset, name, namelen, lenp, + &poffset); + if (!prop) + return NULL; + + /* Handle realignment */ + if (!can_assume(LATEST) && fdt_version(fdt) < 0x10 && + (poffset + sizeof(*prop)) % 8 && fdt32_ld(&prop->len) >= 8) + return prop->data + 4; + return prop->data; +} + +const void *fdt_getprop_by_offset(const void *fdt, int offset, + const char **namep, int *lenp) +{ + const struct fdt_property *prop; + + prop = fdt_get_property_by_offset_(fdt, offset, lenp); + if (!prop) + return NULL; + if (namep) { + const char *name; + int namelen; + + if (!can_assume(VALID_INPUT)) { + name = fdt_get_string(fdt, fdt32_ld(&prop->nameoff), + &namelen); + if (!name) { + if (lenp) + *lenp = namelen; + return NULL; + } + *namep = name; + } else { + *namep = fdt_string(fdt, fdt32_ld(&prop->nameoff)); + } + } + + /* Handle realignment */ + if (!can_assume(LATEST) && fdt_version(fdt) < 0x10 && + (offset + sizeof(*prop)) % 8 && fdt32_ld(&prop->len) >= 8) + return prop->data + 4; + return prop->data; +} + +const void *fdt_getprop(const void *fdt, int nodeoffset, + const char *name, int *lenp) +{ + return fdt_getprop_namelen(fdt, nodeoffset, name, strlen(name), lenp); +} + +uint32_t fdt_get_phandle(const void *fdt, int nodeoffset) +{ + const fdt32_t *php; + int len; + + /* FIXME: This is a bit sub-optimal, since we potentially scan + * over all the properties twice. */ + php = fdt_getprop(fdt, nodeoffset, "phandle", &len); + if (!php || (len != sizeof(*php))) { + php = fdt_getprop(fdt, nodeoffset, "linux,phandle", &len); + if (!php || (len != sizeof(*php))) + return 0; + } + + return fdt32_ld(php); +} + +const char *fdt_get_alias_namelen(const void *fdt, + const char *name, int namelen) +{ + int aliasoffset; + + aliasoffset = fdt_path_offset(fdt, "/aliases"); + if (aliasoffset < 0) + return NULL; + + return fdt_getprop_namelen(fdt, aliasoffset, name, namelen, NULL); +} + +const char *fdt_get_alias(const void *fdt, const char *name) +{ + return fdt_get_alias_namelen(fdt, name, strlen(name)); +} + +int fdt_get_path(const void *fdt, int nodeoffset, char *buf, int buflen) +{ + int pdepth = 0, p = 0; + int offset, depth, namelen; + const char *name; + + FDT_RO_PROBE(fdt); + + if (buflen < 2) + return -FDT_ERR_NOSPACE; + + for (offset = 0, depth = 0; + (offset >= 0) && (offset <= nodeoffset); + offset = fdt_next_node(fdt, offset, &depth)) { + while (pdepth > depth) { + do { + p--; + } while (buf[p-1] != '/'); + pdepth--; + } + + if (pdepth >= depth) { + name = fdt_get_name(fdt, offset, &namelen); + if (!name) + return namelen; + if ((p + namelen + 1) <= buflen) { + memcpy(buf + p, name, namelen); + p += namelen; + buf[p++] = '/'; + pdepth++; + } + } + + if (offset == nodeoffset) { + if (pdepth < (depth + 1)) + return -FDT_ERR_NOSPACE; + + if (p > 1) /* special case so that root path is "/", not "" */ + p--; + buf[p] = '\0'; + return 0; + } + } + + if ((offset == -FDT_ERR_NOTFOUND) || (offset >= 0)) + return -FDT_ERR_BADOFFSET; + else if (offset == -FDT_ERR_BADOFFSET) + return -FDT_ERR_BADSTRUCTURE; + + return offset; /* error from fdt_next_node() */ +} + +int fdt_supernode_atdepth_offset(const void *fdt, int nodeoffset, + int supernodedepth, int *nodedepth) +{ + int offset, depth; + int supernodeoffset = -FDT_ERR_INTERNAL; + + FDT_RO_PROBE(fdt); + + if (supernodedepth < 0) + return -FDT_ERR_NOTFOUND; + + for (offset = 0, depth = 0; + (offset >= 0) && (offset <= nodeoffset); + offset = fdt_next_node(fdt, offset, &depth)) { + if (depth == supernodedepth) + supernodeoffset = offset; + + if (offset == nodeoffset) { + if (nodedepth) + *nodedepth = depth; + + if (supernodedepth > depth) + return -FDT_ERR_NOTFOUND; + else + return supernodeoffset; + } + } + + if (!can_assume(VALID_INPUT)) { + if ((offset == -FDT_ERR_NOTFOUND) || (offset >= 0)) + return -FDT_ERR_BADOFFSET; + else if (offset == -FDT_ERR_BADOFFSET) + return -FDT_ERR_BADSTRUCTURE; + } + + return offset; /* error from fdt_next_node() */ +} + +int fdt_node_depth(const void *fdt, int nodeoffset) +{ + int nodedepth; + int err; + + err = fdt_supernode_atdepth_offset(fdt, nodeoffset, 0, &nodedepth); + if (err) + return (can_assume(LIBFDT_FLAWLESS) || err < 0) ? err : + -FDT_ERR_INTERNAL; + return nodedepth; +} + +int fdt_parent_offset(const void *fdt, int nodeoffset) +{ + int nodedepth = fdt_node_depth(fdt, nodeoffset); + + if (nodedepth < 0) + return nodedepth; + return fdt_supernode_atdepth_offset(fdt, nodeoffset, + nodedepth - 1, NULL); +} + +int fdt_node_offset_by_prop_value(const void *fdt, int startoffset, + const char *propname, + const void *propval, int proplen) +{ + int offset; + const void *val; + int len; + + FDT_RO_PROBE(fdt); + + /* FIXME: The algorithm here is pretty horrible: we scan each + * property of a node in fdt_getprop(), then if that didn't + * find what we want, we scan over them again making our way + * to the next node. Still it's the easiest to implement + * approach; performance can come later. */ + for (offset = fdt_next_node(fdt, startoffset, NULL); + offset >= 0; + offset = fdt_next_node(fdt, offset, NULL)) { + val = fdt_getprop(fdt, offset, propname, &len); + if (val && (len == proplen) + && (memcmp(val, propval, len) == 0)) + return offset; + } + + return offset; /* error from fdt_next_node() */ +} + +int fdt_node_offset_by_phandle(const void *fdt, uint32_t phandle) +{ + int offset; + + if ((phandle == 0) || (phandle == ~0U)) + return -FDT_ERR_BADPHANDLE; + + FDT_RO_PROBE(fdt); + + /* FIXME: The algorithm here is pretty horrible: we + * potentially scan each property of a node in + * fdt_get_phandle(), then if that didn't find what + * we want, we scan over them again making our way to the next + * node. Still it's the easiest to implement approach; + * performance can come later. */ + for (offset = fdt_next_node(fdt, -1, NULL); + offset >= 0; + offset = fdt_next_node(fdt, offset, NULL)) { + if (fdt_get_phandle(fdt, offset) == phandle) + return offset; + } + + return offset; /* error from fdt_next_node() */ +} + +int fdt_stringlist_contains(const char *strlist, int listlen, const char *str) +{ + int len = strlen(str); + const char *p; + + while (listlen >= len) { + if (memcmp(str, strlist, len+1) == 0) + return 1; + p = memchr(strlist, '\0', listlen); + if (!p) + return 0; /* malformed strlist.. */ + listlen -= (p-strlist) + 1; + strlist = p + 1; + } + return 0; +} + +int fdt_stringlist_count(const void *fdt, int nodeoffset, const char *property) +{ + const char *list, *end; + int length, count = 0; + + list = fdt_getprop(fdt, nodeoffset, property, &length); + if (!list) + return length; + + end = list + length; + + while (list < end) { + length = strnlen(list, end - list) + 1; + + /* Abort if the last string isn't properly NUL-terminated. */ + if (list + length > end) + return -FDT_ERR_BADVALUE; + + list += length; + count++; + } + + return count; +} + +int fdt_stringlist_search(const void *fdt, int nodeoffset, const char *property, + const char *string) +{ + int length, len, idx = 0; + const char *list, *end; + + list = fdt_getprop(fdt, nodeoffset, property, &length); + if (!list) + return length; + + len = strlen(string) + 1; + end = list + length; + + while (list < end) { + length = strnlen(list, end - list) + 1; + + /* Abort if the last string isn't properly NUL-terminated. */ + if (list + length > end) + return -FDT_ERR_BADVALUE; + + if (length == len && memcmp(list, string, length) == 0) + return idx; + + list += length; + idx++; + } + + return -FDT_ERR_NOTFOUND; +} + +const char *fdt_stringlist_get(const void *fdt, int nodeoffset, + const char *property, int idx, + int *lenp) +{ + const char *list, *end; + int length; + + list = fdt_getprop(fdt, nodeoffset, property, &length); + if (!list) { + if (lenp) + *lenp = length; + + return NULL; + } + + end = list + length; + + while (list < end) { + length = strnlen(list, end - list) + 1; + + /* Abort if the last string isn't properly NUL-terminated. */ + if (list + length > end) { + if (lenp) + *lenp = -FDT_ERR_BADVALUE; + + return NULL; + } + + if (idx == 0) { + if (lenp) + *lenp = length - 1; + + return list; + } + + list += length; + idx--; + } + + if (lenp) + *lenp = -FDT_ERR_NOTFOUND; + + return NULL; +} + +int fdt_node_check_compatible(const void *fdt, int nodeoffset, + const char *compatible) +{ + const void *prop; + int len; + + prop = fdt_getprop(fdt, nodeoffset, "compatible", &len); + if (!prop) + return len; + + return !fdt_stringlist_contains(prop, len, compatible); +} + +int fdt_node_offset_by_compatible(const void *fdt, int startoffset, + const char *compatible) +{ + int offset, err; + + FDT_RO_PROBE(fdt); + + /* FIXME: The algorithm here is pretty horrible: we scan each + * property of a node in fdt_node_check_compatible(), then if + * that didn't find what we want, we scan over them again + * making our way to the next node. Still it's the easiest to + * implement approach; performance can come later. */ + for (offset = fdt_next_node(fdt, startoffset, NULL); + offset >= 0; + offset = fdt_next_node(fdt, offset, NULL)) { + err = fdt_node_check_compatible(fdt, offset, compatible); + if ((err < 0) && (err != -FDT_ERR_NOTFOUND)) + return err; + else if (err == 0) + return offset; + } + + return offset; /* error from fdt_next_node() */ +} diff --git a/tools/src/libfdt/fdt_rw.c b/tools/src/libfdt/fdt_rw.c new file mode 100644 index 0000000..dd5c93e --- /dev/null +++ b/tools/src/libfdt/fdt_rw.c @@ -0,0 +1,492 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ +#include "libfdt_env.h" + +#include "fdt.h" +#include "libfdt.h" + +#include "libfdt_internal.h" + +static int fdt_blocks_misordered_(const void *fdt, + int mem_rsv_size, int struct_size) +{ + return (fdt_off_mem_rsvmap(fdt) < FDT_ALIGN(sizeof(struct fdt_header), 8)) + || (fdt_off_dt_struct(fdt) < + (fdt_off_mem_rsvmap(fdt) + mem_rsv_size)) + || (fdt_off_dt_strings(fdt) < + (fdt_off_dt_struct(fdt) + struct_size)) + || (fdt_totalsize(fdt) < + (fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt))); +} + +static int fdt_rw_probe_(void *fdt) +{ + if (can_assume(VALID_DTB)) + return 0; + FDT_RO_PROBE(fdt); + + if (!can_assume(LATEST) && fdt_version(fdt) < 17) + return -FDT_ERR_BADVERSION; + if (fdt_blocks_misordered_(fdt, sizeof(struct fdt_reserve_entry), + fdt_size_dt_struct(fdt))) + return -FDT_ERR_BADLAYOUT; + if (!can_assume(LATEST) && fdt_version(fdt) > 17) + fdt_set_version(fdt, 17); + + return 0; +} + +#define FDT_RW_PROBE(fdt) \ + { \ + int err_; \ + if ((err_ = fdt_rw_probe_(fdt)) != 0) \ + return err_; \ + } + +static inline unsigned int fdt_data_size_(void *fdt) +{ + return fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt); +} + +static int fdt_splice_(void *fdt, void *splicepoint, int oldlen, int newlen) +{ + char *p = splicepoint; + unsigned int dsize = fdt_data_size_(fdt); + size_t soff = p - (char *)fdt; + + if ((oldlen < 0) || (soff + oldlen < soff) || (soff + oldlen > dsize)) + return -FDT_ERR_BADOFFSET; + if ((p < (char *)fdt) || (dsize + newlen < (unsigned)oldlen)) + return -FDT_ERR_BADOFFSET; + if (dsize - oldlen + newlen > fdt_totalsize(fdt)) + return -FDT_ERR_NOSPACE; + memmove(p + newlen, p + oldlen, ((char *)fdt + dsize) - (p + oldlen)); + return 0; +} + +static int fdt_splice_mem_rsv_(void *fdt, struct fdt_reserve_entry *p, + int oldn, int newn) +{ + int delta = (newn - oldn) * sizeof(*p); + int err; + err = fdt_splice_(fdt, p, oldn * sizeof(*p), newn * sizeof(*p)); + if (err) + return err; + fdt_set_off_dt_struct(fdt, fdt_off_dt_struct(fdt) + delta); + fdt_set_off_dt_strings(fdt, fdt_off_dt_strings(fdt) + delta); + return 0; +} + +static int fdt_splice_struct_(void *fdt, void *p, + int oldlen, int newlen) +{ + int delta = newlen - oldlen; + int err; + + if ((err = fdt_splice_(fdt, p, oldlen, newlen))) + return err; + + fdt_set_size_dt_struct(fdt, fdt_size_dt_struct(fdt) + delta); + fdt_set_off_dt_strings(fdt, fdt_off_dt_strings(fdt) + delta); + return 0; +} + +/* Must only be used to roll back in case of error */ +static void fdt_del_last_string_(void *fdt, const char *s) +{ + int newlen = strlen(s) + 1; + + fdt_set_size_dt_strings(fdt, fdt_size_dt_strings(fdt) - newlen); +} + +static int fdt_splice_string_(void *fdt, int newlen) +{ + void *p = (char *)fdt + + fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt); + int err; + + if ((err = fdt_splice_(fdt, p, 0, newlen))) + return err; + + fdt_set_size_dt_strings(fdt, fdt_size_dt_strings(fdt) + newlen); + return 0; +} + +/** + * fdt_find_add_string_() - Find or allocate a string + * + * @fdt: pointer to the device tree to check/adjust + * @s: string to find/add + * @allocated: Set to 0 if the string was found, 1 if not found and so + * allocated. Ignored if can_assume(NO_ROLLBACK) + * @return offset of string in the string table (whether found or added) + */ +static int fdt_find_add_string_(void *fdt, const char *s, int *allocated) +{ + char *strtab = (char *)fdt + fdt_off_dt_strings(fdt); + const char *p; + char *new; + int len = strlen(s) + 1; + int err; + + if (!can_assume(NO_ROLLBACK)) + *allocated = 0; + + p = fdt_find_string_(strtab, fdt_size_dt_strings(fdt), s); + if (p) + /* found it */ + return (p - strtab); + + new = strtab + fdt_size_dt_strings(fdt); + err = fdt_splice_string_(fdt, len); + if (err) + return err; + + if (!can_assume(NO_ROLLBACK)) + *allocated = 1; + + memcpy(new, s, len); + return (new - strtab); +} + +int fdt_add_mem_rsv(void *fdt, uint64_t address, uint64_t size) +{ + struct fdt_reserve_entry *re; + int err; + + FDT_RW_PROBE(fdt); + + re = fdt_mem_rsv_w_(fdt, fdt_num_mem_rsv(fdt)); + err = fdt_splice_mem_rsv_(fdt, re, 0, 1); + if (err) + return err; + + re->address = cpu_to_fdt64(address); + re->size = cpu_to_fdt64(size); + return 0; +} + +int fdt_del_mem_rsv(void *fdt, int n) +{ + struct fdt_reserve_entry *re = fdt_mem_rsv_w_(fdt, n); + + FDT_RW_PROBE(fdt); + + if (n >= fdt_num_mem_rsv(fdt)) + return -FDT_ERR_NOTFOUND; + + return fdt_splice_mem_rsv_(fdt, re, 1, 0); +} + +static int fdt_resize_property_(void *fdt, int nodeoffset, const char *name, + int len, struct fdt_property **prop) +{ + int oldlen; + int err; + + *prop = fdt_get_property_w(fdt, nodeoffset, name, &oldlen); + if (!*prop) + return oldlen; + + if ((err = fdt_splice_struct_(fdt, (*prop)->data, FDT_TAGALIGN(oldlen), + FDT_TAGALIGN(len)))) + return err; + + (*prop)->len = cpu_to_fdt32(len); + return 0; +} + +static int fdt_add_property_(void *fdt, int nodeoffset, const char *name, + int len, struct fdt_property **prop) +{ + int proplen; + int nextoffset; + int namestroff; + int err; + int allocated; + + if ((nextoffset = fdt_check_node_offset_(fdt, nodeoffset)) < 0) + return nextoffset; + + namestroff = fdt_find_add_string_(fdt, name, &allocated); + if (namestroff < 0) + return namestroff; + + *prop = fdt_offset_ptr_w_(fdt, nextoffset); + proplen = sizeof(**prop) + FDT_TAGALIGN(len); + + err = fdt_splice_struct_(fdt, *prop, 0, proplen); + if (err) { + /* Delete the string if we failed to add it */ + if (!can_assume(NO_ROLLBACK) && allocated) + fdt_del_last_string_(fdt, name); + return err; + } + + (*prop)->tag = cpu_to_fdt32(FDT_PROP); + (*prop)->nameoff = cpu_to_fdt32(namestroff); + (*prop)->len = cpu_to_fdt32(len); + return 0; +} + +int fdt_set_name(void *fdt, int nodeoffset, const char *name) +{ + char *namep; + int oldlen, newlen; + int err; + + FDT_RW_PROBE(fdt); + + namep = (char *)(uintptr_t)fdt_get_name(fdt, nodeoffset, &oldlen); + if (!namep) + return oldlen; + + newlen = strlen(name); + + err = fdt_splice_struct_(fdt, namep, FDT_TAGALIGN(oldlen+1), + FDT_TAGALIGN(newlen+1)); + if (err) + return err; + + memcpy(namep, name, newlen+1); + return 0; +} + +int fdt_setprop_placeholder(void *fdt, int nodeoffset, const char *name, + int len, void **prop_data) +{ + struct fdt_property *prop; + int err; + + FDT_RW_PROBE(fdt); + + err = fdt_resize_property_(fdt, nodeoffset, name, len, &prop); + if (err == -FDT_ERR_NOTFOUND) + err = fdt_add_property_(fdt, nodeoffset, name, len, &prop); + if (err) + return err; + + *prop_data = prop->data; + return 0; +} + +int fdt_setprop(void *fdt, int nodeoffset, const char *name, + const void *val, int len) +{ + void *prop_data; + int err; + + err = fdt_setprop_placeholder(fdt, nodeoffset, name, len, &prop_data); + if (err) + return err; + + if (len) + memcpy(prop_data, val, len); + return 0; +} + +int fdt_appendprop(void *fdt, int nodeoffset, const char *name, + const void *val, int len) +{ + struct fdt_property *prop; + int err, oldlen, newlen; + + FDT_RW_PROBE(fdt); + + prop = fdt_get_property_w(fdt, nodeoffset, name, &oldlen); + if (prop) { + newlen = len + oldlen; + err = fdt_splice_struct_(fdt, prop->data, + FDT_TAGALIGN(oldlen), + FDT_TAGALIGN(newlen)); + if (err) + return err; + prop->len = cpu_to_fdt32(newlen); + memcpy(prop->data + oldlen, val, len); + } else { + err = fdt_add_property_(fdt, nodeoffset, name, len, &prop); + if (err) + return err; + memcpy(prop->data, val, len); + } + return 0; +} + +int fdt_delprop(void *fdt, int nodeoffset, const char *name) +{ + struct fdt_property *prop; + int len, proplen; + + FDT_RW_PROBE(fdt); + + prop = fdt_get_property_w(fdt, nodeoffset, name, &len); + if (!prop) + return len; + + proplen = sizeof(*prop) + FDT_TAGALIGN(len); + return fdt_splice_struct_(fdt, prop, proplen, 0); +} + +int fdt_add_subnode_namelen(void *fdt, int parentoffset, + const char *name, int namelen) +{ + struct fdt_node_header *nh; + int offset, nextoffset; + int nodelen; + int err; + uint32_t tag; + fdt32_t *endtag; + + FDT_RW_PROBE(fdt); + + offset = fdt_subnode_offset_namelen(fdt, parentoffset, name, namelen); + if (offset >= 0) + return -FDT_ERR_EXISTS; + else if (offset != -FDT_ERR_NOTFOUND) + return offset; + + /* Try to place the new node after the parent's properties */ + fdt_next_tag(fdt, parentoffset, &nextoffset); /* skip the BEGIN_NODE */ + do { + offset = nextoffset; + tag = fdt_next_tag(fdt, offset, &nextoffset); + } while ((tag == FDT_PROP) || (tag == FDT_NOP)); + + nh = fdt_offset_ptr_w_(fdt, offset); + nodelen = sizeof(*nh) + FDT_TAGALIGN(namelen+1) + FDT_TAGSIZE; + + err = fdt_splice_struct_(fdt, nh, 0, nodelen); + if (err) + return err; + + nh->tag = cpu_to_fdt32(FDT_BEGIN_NODE); + memset(nh->name, 0, FDT_TAGALIGN(namelen+1)); + memcpy(nh->name, name, namelen); + endtag = (fdt32_t *)((char *)nh + nodelen - FDT_TAGSIZE); + *endtag = cpu_to_fdt32(FDT_END_NODE); + + return offset; +} + +int fdt_add_subnode(void *fdt, int parentoffset, const char *name) +{ + return fdt_add_subnode_namelen(fdt, parentoffset, name, strlen(name)); +} + +int fdt_del_node(void *fdt, int nodeoffset) +{ + int endoffset; + + FDT_RW_PROBE(fdt); + + endoffset = fdt_node_end_offset_(fdt, nodeoffset); + if (endoffset < 0) + return endoffset; + + return fdt_splice_struct_(fdt, fdt_offset_ptr_w_(fdt, nodeoffset), + endoffset - nodeoffset, 0); +} + +static void fdt_packblocks_(const char *old, char *new, + int mem_rsv_size, int struct_size) +{ + int mem_rsv_off, struct_off, strings_off; + + mem_rsv_off = FDT_ALIGN(sizeof(struct fdt_header), 8); + struct_off = mem_rsv_off + mem_rsv_size; + strings_off = struct_off + struct_size; + + memmove(new + mem_rsv_off, old + fdt_off_mem_rsvmap(old), mem_rsv_size); + fdt_set_off_mem_rsvmap(new, mem_rsv_off); + + memmove(new + struct_off, old + fdt_off_dt_struct(old), struct_size); + fdt_set_off_dt_struct(new, struct_off); + fdt_set_size_dt_struct(new, struct_size); + + memmove(new + strings_off, old + fdt_off_dt_strings(old), + fdt_size_dt_strings(old)); + fdt_set_off_dt_strings(new, strings_off); + fdt_set_size_dt_strings(new, fdt_size_dt_strings(old)); +} + +int fdt_open_into(const void *fdt, void *buf, int bufsize) +{ + int err; + int mem_rsv_size, struct_size; + int newsize; + const char *fdtstart = fdt; + const char *fdtend = fdtstart + fdt_totalsize(fdt); + char *tmp; + + FDT_RO_PROBE(fdt); + + mem_rsv_size = (fdt_num_mem_rsv(fdt)+1) + * sizeof(struct fdt_reserve_entry); + + if (can_assume(LATEST) || fdt_version(fdt) >= 17) { + struct_size = fdt_size_dt_struct(fdt); + } else { + struct_size = 0; + while (fdt_next_tag(fdt, struct_size, &struct_size) != FDT_END) + ; + if (struct_size < 0) + return struct_size; + } + + if (can_assume(LIBFDT_ORDER) || + !fdt_blocks_misordered_(fdt, mem_rsv_size, struct_size)) { + /* no further work necessary */ + err = fdt_move(fdt, buf, bufsize); + if (err) + return err; + fdt_set_version(buf, 17); + fdt_set_size_dt_struct(buf, struct_size); + fdt_set_totalsize(buf, bufsize); + return 0; + } + + /* Need to reorder */ + newsize = FDT_ALIGN(sizeof(struct fdt_header), 8) + mem_rsv_size + + struct_size + fdt_size_dt_strings(fdt); + + if (bufsize < newsize) + return -FDT_ERR_NOSPACE; + + /* First attempt to build converted tree at beginning of buffer */ + tmp = buf; + /* But if that overlaps with the old tree... */ + if (((tmp + newsize) > fdtstart) && (tmp < fdtend)) { + /* Try right after the old tree instead */ + tmp = (char *)(uintptr_t)fdtend; + if ((tmp + newsize) > ((char *)buf + bufsize)) + return -FDT_ERR_NOSPACE; + } + + fdt_packblocks_(fdt, tmp, mem_rsv_size, struct_size); + memmove(buf, tmp, newsize); + + fdt_set_magic(buf, FDT_MAGIC); + fdt_set_totalsize(buf, bufsize); + fdt_set_version(buf, 17); + fdt_set_last_comp_version(buf, 16); + fdt_set_boot_cpuid_phys(buf, fdt_boot_cpuid_phys(fdt)); + + return 0; +} + +int fdt_pack(void *fdt) +{ + int mem_rsv_size; + + FDT_RW_PROBE(fdt); + + mem_rsv_size = (fdt_num_mem_rsv(fdt)+1) + * sizeof(struct fdt_reserve_entry); + fdt_packblocks_(fdt, fdt, mem_rsv_size, fdt_size_dt_struct(fdt)); + fdt_set_totalsize(fdt, fdt_data_size_(fdt)); + + return 0; +} diff --git a/tools/src/libfdt/fdt_strerror.c b/tools/src/libfdt/fdt_strerror.c new file mode 100644 index 0000000..218b323 --- /dev/null +++ b/tools/src/libfdt/fdt_strerror.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "libfdt_env.h" + +#include "fdt.h" +#include "libfdt.h" + +#include "libfdt_internal.h" + +struct fdt_errtabent { + const char *str; +}; + +#define FDT_ERRTABENT(val) \ + [(val)] = { .str = #val, } + +static struct fdt_errtabent fdt_errtable[] = { + FDT_ERRTABENT(FDT_ERR_NOTFOUND), + FDT_ERRTABENT(FDT_ERR_EXISTS), + FDT_ERRTABENT(FDT_ERR_NOSPACE), + + FDT_ERRTABENT(FDT_ERR_BADOFFSET), + FDT_ERRTABENT(FDT_ERR_BADPATH), + FDT_ERRTABENT(FDT_ERR_BADPHANDLE), + FDT_ERRTABENT(FDT_ERR_BADSTATE), + + FDT_ERRTABENT(FDT_ERR_TRUNCATED), + FDT_ERRTABENT(FDT_ERR_BADMAGIC), + FDT_ERRTABENT(FDT_ERR_BADVERSION), + FDT_ERRTABENT(FDT_ERR_BADSTRUCTURE), + FDT_ERRTABENT(FDT_ERR_BADLAYOUT), + FDT_ERRTABENT(FDT_ERR_INTERNAL), + FDT_ERRTABENT(FDT_ERR_BADNCELLS), + FDT_ERRTABENT(FDT_ERR_BADVALUE), + FDT_ERRTABENT(FDT_ERR_BADOVERLAY), + FDT_ERRTABENT(FDT_ERR_NOPHANDLES), + FDT_ERRTABENT(FDT_ERR_BADFLAGS), +}; +#define FDT_ERRTABSIZE ((int)(sizeof(fdt_errtable) / sizeof(fdt_errtable[0]))) + +const char *fdt_strerror(int errval) +{ + if (errval > 0) + return "<valid offset/length>"; + else if (errval == 0) + return "<no error>"; + else if (-errval < FDT_ERRTABSIZE) { + const char *s = fdt_errtable[-errval].str; + + if (s) + return s; + } + + return "<unknown error>"; +} diff --git a/tools/src/libfdt/fdt_sw.c b/tools/src/libfdt/fdt_sw.c new file mode 100644 index 0000000..c0d9cd0 --- /dev/null +++ b/tools/src/libfdt/fdt_sw.c @@ -0,0 +1,384 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ +#include "libfdt_env.h" + +#include "fdt.h" +#include "libfdt.h" + +#include "libfdt_internal.h" + +static int fdt_sw_probe_(void *fdt) +{ + if (!can_assume(VALID_INPUT)) { + if (fdt_magic(fdt) == FDT_MAGIC) + return -FDT_ERR_BADSTATE; + else if (fdt_magic(fdt) != FDT_SW_MAGIC) + return -FDT_ERR_BADMAGIC; + } + + return 0; +} + +#define FDT_SW_PROBE(fdt) \ + { \ + int err; \ + if ((err = fdt_sw_probe_(fdt)) != 0) \ + return err; \ + } + +/* 'memrsv' state: Initial state after fdt_create() + * + * Allowed functions: + * fdt_add_reservemap_entry() + * fdt_finish_reservemap() [moves to 'struct' state] + */ +static int fdt_sw_probe_memrsv_(void *fdt) +{ + int err = fdt_sw_probe_(fdt); + if (err) + return err; + + if (!can_assume(VALID_INPUT) && fdt_off_dt_strings(fdt) != 0) + return -FDT_ERR_BADSTATE; + return 0; +} + +#define FDT_SW_PROBE_MEMRSV(fdt) \ + { \ + int err; \ + if ((err = fdt_sw_probe_memrsv_(fdt)) != 0) \ + return err; \ + } + +/* 'struct' state: Enter this state after fdt_finish_reservemap() + * + * Allowed functions: + * fdt_begin_node() + * fdt_end_node() + * fdt_property*() + * fdt_finish() [moves to 'complete' state] + */ +static int fdt_sw_probe_struct_(void *fdt) +{ + int err = fdt_sw_probe_(fdt); + if (err) + return err; + + if (!can_assume(VALID_INPUT) && + fdt_off_dt_strings(fdt) != fdt_totalsize(fdt)) + return -FDT_ERR_BADSTATE; + return 0; +} + +#define FDT_SW_PROBE_STRUCT(fdt) \ + { \ + int err; \ + if ((err = fdt_sw_probe_struct_(fdt)) != 0) \ + return err; \ + } + +static inline uint32_t sw_flags(void *fdt) +{ + /* assert: (fdt_magic(fdt) == FDT_SW_MAGIC) */ + return fdt_last_comp_version(fdt); +} + +/* 'complete' state: Enter this state after fdt_finish() + * + * Allowed functions: none + */ + +static void *fdt_grab_space_(void *fdt, size_t len) +{ + unsigned int offset = fdt_size_dt_struct(fdt); + unsigned int spaceleft; + + spaceleft = fdt_totalsize(fdt) - fdt_off_dt_struct(fdt) + - fdt_size_dt_strings(fdt); + + if ((offset + len < offset) || (offset + len > spaceleft)) + return NULL; + + fdt_set_size_dt_struct(fdt, offset + len); + return fdt_offset_ptr_w_(fdt, offset); +} + +int fdt_create_with_flags(void *buf, int bufsize, uint32_t flags) +{ + const int hdrsize = FDT_ALIGN(sizeof(struct fdt_header), + sizeof(struct fdt_reserve_entry)); + void *fdt = buf; + + if (bufsize < hdrsize) + return -FDT_ERR_NOSPACE; + + if (flags & ~FDT_CREATE_FLAGS_ALL) + return -FDT_ERR_BADFLAGS; + + memset(buf, 0, bufsize); + + /* + * magic and last_comp_version keep intermediate state during the fdt + * creation process, which is replaced with the proper FDT format by + * fdt_finish(). + * + * flags should be accessed with sw_flags(). + */ + fdt_set_magic(fdt, FDT_SW_MAGIC); + fdt_set_version(fdt, FDT_LAST_SUPPORTED_VERSION); + fdt_set_last_comp_version(fdt, flags); + + fdt_set_totalsize(fdt, bufsize); + + fdt_set_off_mem_rsvmap(fdt, hdrsize); + fdt_set_off_dt_struct(fdt, fdt_off_mem_rsvmap(fdt)); + fdt_set_off_dt_strings(fdt, 0); + + return 0; +} + +int fdt_create(void *buf, int bufsize) +{ + return fdt_create_with_flags(buf, bufsize, 0); +} + +int fdt_resize(void *fdt, void *buf, int bufsize) +{ + size_t headsize, tailsize; + char *oldtail, *newtail; + + FDT_SW_PROBE(fdt); + + if (bufsize < 0) + return -FDT_ERR_NOSPACE; + + headsize = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt); + tailsize = fdt_size_dt_strings(fdt); + + if (!can_assume(VALID_DTB) && + headsize + tailsize > fdt_totalsize(fdt)) + return -FDT_ERR_INTERNAL; + + if ((headsize + tailsize) > (unsigned)bufsize) + return -FDT_ERR_NOSPACE; + + oldtail = (char *)fdt + fdt_totalsize(fdt) - tailsize; + newtail = (char *)buf + bufsize - tailsize; + + /* Two cases to avoid clobbering data if the old and new + * buffers partially overlap */ + if (buf <= fdt) { + memmove(buf, fdt, headsize); + memmove(newtail, oldtail, tailsize); + } else { + memmove(newtail, oldtail, tailsize); + memmove(buf, fdt, headsize); + } + + fdt_set_totalsize(buf, bufsize); + if (fdt_off_dt_strings(buf)) + fdt_set_off_dt_strings(buf, bufsize); + + return 0; +} + +int fdt_add_reservemap_entry(void *fdt, uint64_t addr, uint64_t size) +{ + struct fdt_reserve_entry *re; + int offset; + + FDT_SW_PROBE_MEMRSV(fdt); + + offset = fdt_off_dt_struct(fdt); + if ((offset + sizeof(*re)) > fdt_totalsize(fdt)) + return -FDT_ERR_NOSPACE; + + re = (struct fdt_reserve_entry *)((char *)fdt + offset); + re->address = cpu_to_fdt64(addr); + re->size = cpu_to_fdt64(size); + + fdt_set_off_dt_struct(fdt, offset + sizeof(*re)); + + return 0; +} + +int fdt_finish_reservemap(void *fdt) +{ + int err = fdt_add_reservemap_entry(fdt, 0, 0); + + if (err) + return err; + + fdt_set_off_dt_strings(fdt, fdt_totalsize(fdt)); + return 0; +} + +int fdt_begin_node(void *fdt, const char *name) +{ + struct fdt_node_header *nh; + int namelen; + + FDT_SW_PROBE_STRUCT(fdt); + + namelen = strlen(name) + 1; + nh = fdt_grab_space_(fdt, sizeof(*nh) + FDT_TAGALIGN(namelen)); + if (! nh) + return -FDT_ERR_NOSPACE; + + nh->tag = cpu_to_fdt32(FDT_BEGIN_NODE); + memcpy(nh->name, name, namelen); + return 0; +} + +int fdt_end_node(void *fdt) +{ + fdt32_t *en; + + FDT_SW_PROBE_STRUCT(fdt); + + en = fdt_grab_space_(fdt, FDT_TAGSIZE); + if (! en) + return -FDT_ERR_NOSPACE; + + *en = cpu_to_fdt32(FDT_END_NODE); + return 0; +} + +static int fdt_add_string_(void *fdt, const char *s) +{ + char *strtab = (char *)fdt + fdt_totalsize(fdt); + unsigned int strtabsize = fdt_size_dt_strings(fdt); + unsigned int len = strlen(s) + 1; + unsigned int struct_top, offset; + + offset = strtabsize + len; + struct_top = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt); + if (fdt_totalsize(fdt) - offset < struct_top) + return 0; /* no more room :( */ + + memcpy(strtab - offset, s, len); + fdt_set_size_dt_strings(fdt, strtabsize + len); + return -offset; +} + +/* Must only be used to roll back in case of error */ +static void fdt_del_last_string_(void *fdt, const char *s) +{ + int strtabsize = fdt_size_dt_strings(fdt); + int len = strlen(s) + 1; + + fdt_set_size_dt_strings(fdt, strtabsize - len); +} + +static int fdt_find_add_string_(void *fdt, const char *s, int *allocated) +{ + char *strtab = (char *)fdt + fdt_totalsize(fdt); + int strtabsize = fdt_size_dt_strings(fdt); + const char *p; + + *allocated = 0; + + p = fdt_find_string_(strtab - strtabsize, strtabsize, s); + if (p) + return p - strtab; + + *allocated = 1; + + return fdt_add_string_(fdt, s); +} + +int fdt_property_placeholder(void *fdt, const char *name, int len, void **valp) +{ + struct fdt_property *prop; + int nameoff; + int allocated; + + FDT_SW_PROBE_STRUCT(fdt); + + /* String de-duplication can be slow, _NO_NAME_DEDUP skips it */ + if (sw_flags(fdt) & FDT_CREATE_FLAG_NO_NAME_DEDUP) { + allocated = 1; + nameoff = fdt_add_string_(fdt, name); + } else { + nameoff = fdt_find_add_string_(fdt, name, &allocated); + } + if (nameoff == 0) + return -FDT_ERR_NOSPACE; + + prop = fdt_grab_space_(fdt, sizeof(*prop) + FDT_TAGALIGN(len)); + if (! prop) { + if (allocated) + fdt_del_last_string_(fdt, name); + return -FDT_ERR_NOSPACE; + } + + prop->tag = cpu_to_fdt32(FDT_PROP); + prop->nameoff = cpu_to_fdt32(nameoff); + prop->len = cpu_to_fdt32(len); + *valp = prop->data; + return 0; +} + +int fdt_property(void *fdt, const char *name, const void *val, int len) +{ + void *ptr; + int ret; + + ret = fdt_property_placeholder(fdt, name, len, &ptr); + if (ret) + return ret; + memcpy(ptr, val, len); + return 0; +} + +int fdt_finish(void *fdt) +{ + char *p = (char *)fdt; + fdt32_t *end; + int oldstroffset, newstroffset; + uint32_t tag; + int offset, nextoffset; + + FDT_SW_PROBE_STRUCT(fdt); + + /* Add terminator */ + end = fdt_grab_space_(fdt, sizeof(*end)); + if (! end) + return -FDT_ERR_NOSPACE; + *end = cpu_to_fdt32(FDT_END); + + /* Relocate the string table */ + oldstroffset = fdt_totalsize(fdt) - fdt_size_dt_strings(fdt); + newstroffset = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt); + memmove(p + newstroffset, p + oldstroffset, fdt_size_dt_strings(fdt)); + fdt_set_off_dt_strings(fdt, newstroffset); + + /* Walk the structure, correcting string offsets */ + offset = 0; + while ((tag = fdt_next_tag(fdt, offset, &nextoffset)) != FDT_END) { + if (tag == FDT_PROP) { + struct fdt_property *prop = + fdt_offset_ptr_w_(fdt, offset); + int nameoff; + + nameoff = fdt32_to_cpu(prop->nameoff); + nameoff += fdt_size_dt_strings(fdt); + prop->nameoff = cpu_to_fdt32(nameoff); + } + offset = nextoffset; + } + if (nextoffset < 0) + return nextoffset; + + /* Finally, adjust the header */ + fdt_set_totalsize(fdt, newstroffset + fdt_size_dt_strings(fdt)); + + /* And fix up fields that were keeping intermediate state. */ + fdt_set_last_comp_version(fdt, FDT_FIRST_SUPPORTED_VERSION); + fdt_set_magic(fdt, FDT_MAGIC); + + return 0; +} diff --git a/tools/src/libfdt/fdt_wip.c b/tools/src/libfdt/fdt_wip.c new file mode 100644 index 0000000..44aed08 --- /dev/null +++ b/tools/src/libfdt/fdt_wip.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ +#include "libfdt_env.h" + +#include "fdt.h" +#include "libfdt.h" + +#include "libfdt_internal.h" + +int fdt_setprop_inplace_namelen_partial(void *fdt, int nodeoffset, + const char *name, int namelen, + uint32_t idx, const void *val, + int len) +{ + void *propval; + int proplen; + + propval = fdt_getprop_namelen_w(fdt, nodeoffset, name, namelen, + &proplen); + if (!propval) + return proplen; + + if ((unsigned)proplen < (len + idx)) + return -FDT_ERR_NOSPACE; + + memcpy((char *)propval + idx, val, len); + return 0; +} + +int fdt_setprop_inplace(void *fdt, int nodeoffset, const char *name, + const void *val, int len) +{ + const void *propval; + int proplen; + + propval = fdt_getprop(fdt, nodeoffset, name, &proplen); + if (!propval) + return proplen; + + if (proplen != len) + return -FDT_ERR_NOSPACE; + + return fdt_setprop_inplace_namelen_partial(fdt, nodeoffset, name, + strlen(name), 0, + val, len); +} + +static void fdt_nop_region_(void *start, int len) +{ + fdt32_t *p; + + for (p = start; (char *)p < ((char *)start + len); p++) + *p = cpu_to_fdt32(FDT_NOP); +} + +int fdt_nop_property(void *fdt, int nodeoffset, const char *name) +{ + struct fdt_property *prop; + int len; + + prop = fdt_get_property_w(fdt, nodeoffset, name, &len); + if (!prop) + return len; + + fdt_nop_region_(prop, len + sizeof(*prop)); + + return 0; +} + +int fdt_node_end_offset_(void *fdt, int offset) +{ + int depth = 0; + + while ((offset >= 0) && (depth >= 0)) + offset = fdt_next_node(fdt, offset, &depth); + + return offset; +} + +int fdt_nop_node(void *fdt, int nodeoffset) +{ + int endoffset; + + endoffset = fdt_node_end_offset_(fdt, nodeoffset); + if (endoffset < 0) + return endoffset; + + fdt_nop_region_(fdt_offset_ptr_w(fdt, nodeoffset, 0), + endoffset - nodeoffset); + return 0; +} diff --git a/tools/src/libfdt/libfdt.h b/tools/src/libfdt/libfdt.h new file mode 100644 index 0000000..fe49b5d --- /dev/null +++ b/tools/src/libfdt/libfdt.h @@ -0,0 +1,2080 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */ +#ifndef LIBFDT_H +#define LIBFDT_H +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ + +#include "libfdt_env.h" +#include "fdt.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define FDT_FIRST_SUPPORTED_VERSION 0x02 +#define FDT_LAST_SUPPORTED_VERSION 0x11 + +/* Error codes: informative error codes */ +#define FDT_ERR_NOTFOUND 1 + /* FDT_ERR_NOTFOUND: The requested node or property does not exist */ +#define FDT_ERR_EXISTS 2 + /* FDT_ERR_EXISTS: Attempted to create a node or property which + * already exists */ +#define FDT_ERR_NOSPACE 3 + /* FDT_ERR_NOSPACE: Operation needed to expand the device + * tree, but its buffer did not have sufficient space to + * contain the expanded tree. Use fdt_open_into() to move the + * device tree to a buffer with more space. */ + +/* Error codes: codes for bad parameters */ +#define FDT_ERR_BADOFFSET 4 + /* FDT_ERR_BADOFFSET: Function was passed a structure block + * offset which is out-of-bounds, or which points to an + * unsuitable part of the structure for the operation. */ +#define FDT_ERR_BADPATH 5 + /* FDT_ERR_BADPATH: Function was passed a badly formatted path + * (e.g. missing a leading / for a function which requires an + * absolute path) */ +#define FDT_ERR_BADPHANDLE 6 + /* FDT_ERR_BADPHANDLE: Function was passed an invalid phandle. + * This can be caused either by an invalid phandle property + * length, or the phandle value was either 0 or -1, which are + * not permitted. */ +#define FDT_ERR_BADSTATE 7 + /* FDT_ERR_BADSTATE: Function was passed an incomplete device + * tree created by the sequential-write functions, which is + * not sufficiently complete for the requested operation. */ + +/* Error codes: codes for bad device tree blobs */ +#define FDT_ERR_TRUNCATED 8 + /* FDT_ERR_TRUNCATED: FDT or a sub-block is improperly + * terminated (overflows, goes outside allowed bounds, or + * isn't properly terminated). */ +#define FDT_ERR_BADMAGIC 9 + /* FDT_ERR_BADMAGIC: Given "device tree" appears not to be a + * device tree at all - it is missing the flattened device + * tree magic number. */ +#define FDT_ERR_BADVERSION 10 + /* FDT_ERR_BADVERSION: Given device tree has a version which + * can't be handled by the requested operation. For + * read-write functions, this may mean that fdt_open_into() is + * required to convert the tree to the expected version. */ +#define FDT_ERR_BADSTRUCTURE 11 + /* FDT_ERR_BADSTRUCTURE: Given device tree has a corrupt + * structure block or other serious error (e.g. misnested + * nodes, or subnodes preceding properties). */ +#define FDT_ERR_BADLAYOUT 12 + /* FDT_ERR_BADLAYOUT: For read-write functions, the given + * device tree has it's sub-blocks in an order that the + * function can't handle (memory reserve map, then structure, + * then strings). Use fdt_open_into() to reorganize the tree + * into a form suitable for the read-write operations. */ + +/* "Can't happen" error indicating a bug in libfdt */ +#define FDT_ERR_INTERNAL 13 + /* FDT_ERR_INTERNAL: libfdt has failed an internal assertion. + * Should never be returned, if it is, it indicates a bug in + * libfdt itself. */ + +/* Errors in device tree content */ +#define FDT_ERR_BADNCELLS 14 + /* FDT_ERR_BADNCELLS: Device tree has a #address-cells, #size-cells + * or similar property with a bad format or value */ + +#define FDT_ERR_BADVALUE 15 + /* FDT_ERR_BADVALUE: Device tree has a property with an unexpected + * value. For example: a property expected to contain a string list + * is not NUL-terminated within the length of its value. */ + +#define FDT_ERR_BADOVERLAY 16 + /* FDT_ERR_BADOVERLAY: The device tree overlay, while + * correctly structured, cannot be applied due to some + * unexpected or missing value, property or node. */ + +#define FDT_ERR_NOPHANDLES 17 + /* FDT_ERR_NOPHANDLES: The device tree doesn't have any + * phandle available anymore without causing an overflow */ + +#define FDT_ERR_BADFLAGS 18 + /* FDT_ERR_BADFLAGS: The function was passed a flags field that + * contains invalid flags or an invalid combination of flags. */ + +#define FDT_ERR_MAX 18 + +/* constants */ +#define FDT_MAX_PHANDLE 0xfffffffe + /* Valid values for phandles range from 1 to 2^32-2. */ + +/**********************************************************************/ +/* Low-level functions (you probably don't need these) */ +/**********************************************************************/ + +#ifndef SWIG /* This function is not useful in Python */ +const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int checklen); +#endif +static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen) +{ + return (void *)(uintptr_t)fdt_offset_ptr(fdt, offset, checklen); +} + +uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset); + +/* + * Alignment helpers: + * These helpers access words from a device tree blob. They're + * built to work even with unaligned pointers on platforms (ike + * ARM) that don't like unaligned loads and stores + */ + +static inline uint32_t fdt32_ld(const fdt32_t *p) +{ + const uint8_t *bp = (const uint8_t *)p; + + return ((uint32_t)bp[0] << 24) + | ((uint32_t)bp[1] << 16) + | ((uint32_t)bp[2] << 8) + | bp[3]; +} + +static inline void fdt32_st(void *property, uint32_t value) +{ + uint8_t *bp = (uint8_t *)property; + + bp[0] = value >> 24; + bp[1] = (value >> 16) & 0xff; + bp[2] = (value >> 8) & 0xff; + bp[3] = value & 0xff; +} + +static inline uint64_t fdt64_ld(const fdt64_t *p) +{ + const uint8_t *bp = (const uint8_t *)p; + + return ((uint64_t)bp[0] << 56) + | ((uint64_t)bp[1] << 48) + | ((uint64_t)bp[2] << 40) + | ((uint64_t)bp[3] << 32) + | ((uint64_t)bp[4] << 24) + | ((uint64_t)bp[5] << 16) + | ((uint64_t)bp[6] << 8) + | bp[7]; +} + +static inline void fdt64_st(void *property, uint64_t value) +{ + uint8_t *bp = (uint8_t *)property; + + bp[0] = value >> 56; + bp[1] = (value >> 48) & 0xff; + bp[2] = (value >> 40) & 0xff; + bp[3] = (value >> 32) & 0xff; + bp[4] = (value >> 24) & 0xff; + bp[5] = (value >> 16) & 0xff; + bp[6] = (value >> 8) & 0xff; + bp[7] = value & 0xff; +} + +/**********************************************************************/ +/* Traversal functions */ +/**********************************************************************/ + +int fdt_next_node(const void *fdt, int offset, int *depth); + +/** + * fdt_first_subnode() - get offset of first direct subnode + * + * @fdt: FDT blob + * @offset: Offset of node to check + * @return offset of first subnode, or -FDT_ERR_NOTFOUND if there is none + */ +int fdt_first_subnode(const void *fdt, int offset); + +/** + * fdt_next_subnode() - get offset of next direct subnode + * + * After first calling fdt_first_subnode(), call this function repeatedly to + * get direct subnodes of a parent node. + * + * @fdt: FDT blob + * @offset: Offset of previous subnode + * @return offset of next subnode, or -FDT_ERR_NOTFOUND if there are no more + * subnodes + */ +int fdt_next_subnode(const void *fdt, int offset); + +/** + * fdt_for_each_subnode - iterate over all subnodes of a parent + * + * @node: child node (int, lvalue) + * @fdt: FDT blob (const void *) + * @parent: parent node (int) + * + * This is actually a wrapper around a for loop and would be used like so: + * + * fdt_for_each_subnode(node, fdt, parent) { + * Use node + * ... + * } + * + * if ((node < 0) && (node != -FDT_ERR_NOTFOUND)) { + * Error handling + * } + * + * Note that this is implemented as a macro and @node is used as + * iterator in the loop. The parent variable be constant or even a + * literal. + * + */ +#define fdt_for_each_subnode(node, fdt, parent) \ + for (node = fdt_first_subnode(fdt, parent); \ + node >= 0; \ + node = fdt_next_subnode(fdt, node)) + +/**********************************************************************/ +/* General functions */ +/**********************************************************************/ +#define fdt_get_header(fdt, field) \ + (fdt32_ld(&((const struct fdt_header *)(fdt))->field)) +#define fdt_magic(fdt) (fdt_get_header(fdt, magic)) +#define fdt_totalsize(fdt) (fdt_get_header(fdt, totalsize)) +#define fdt_off_dt_struct(fdt) (fdt_get_header(fdt, off_dt_struct)) +#define fdt_off_dt_strings(fdt) (fdt_get_header(fdt, off_dt_strings)) +#define fdt_off_mem_rsvmap(fdt) (fdt_get_header(fdt, off_mem_rsvmap)) +#define fdt_version(fdt) (fdt_get_header(fdt, version)) +#define fdt_last_comp_version(fdt) (fdt_get_header(fdt, last_comp_version)) +#define fdt_boot_cpuid_phys(fdt) (fdt_get_header(fdt, boot_cpuid_phys)) +#define fdt_size_dt_strings(fdt) (fdt_get_header(fdt, size_dt_strings)) +#define fdt_size_dt_struct(fdt) (fdt_get_header(fdt, size_dt_struct)) + +#define fdt_set_hdr_(name) \ + static inline void fdt_set_##name(void *fdt, uint32_t val) \ + { \ + struct fdt_header *fdth = (struct fdt_header *)fdt; \ + fdth->name = cpu_to_fdt32(val); \ + } +fdt_set_hdr_(magic); +fdt_set_hdr_(totalsize); +fdt_set_hdr_(off_dt_struct); +fdt_set_hdr_(off_dt_strings); +fdt_set_hdr_(off_mem_rsvmap); +fdt_set_hdr_(version); +fdt_set_hdr_(last_comp_version); +fdt_set_hdr_(boot_cpuid_phys); +fdt_set_hdr_(size_dt_strings); +fdt_set_hdr_(size_dt_struct); +#undef fdt_set_hdr_ + +/** + * fdt_header_size - return the size of the tree's header + * @fdt: pointer to a flattened device tree + */ +size_t fdt_header_size(const void *fdt); + +/** + * fdt_header_size_ - internal function which takes a version number + */ +size_t fdt_header_size_(uint32_t version); + +/** + * fdt_check_header - sanity check a device tree header + + * @fdt: pointer to data which might be a flattened device tree + * + * fdt_check_header() checks that the given buffer contains what + * appears to be a flattened device tree, and that the header contains + * valid information (to the extent that can be determined from the + * header alone). + * + * returns: + * 0, if the buffer appears to contain a valid device tree + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_TRUNCATED, standard meanings, as above + */ +int fdt_check_header(const void *fdt); + +/** + * fdt_move - move a device tree around in memory + * @fdt: pointer to the device tree to move + * @buf: pointer to memory where the device is to be moved + * @bufsize: size of the memory space at buf + * + * fdt_move() relocates, if possible, the device tree blob located at + * fdt to the buffer at buf of size bufsize. The buffer may overlap + * with the existing device tree blob at fdt. Therefore, + * fdt_move(fdt, fdt, fdt_totalsize(fdt)) + * should always succeed. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, bufsize is insufficient to contain the device tree + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +int fdt_move(const void *fdt, void *buf, int bufsize); + +/**********************************************************************/ +/* Read-only functions */ +/**********************************************************************/ + +int fdt_check_full(const void *fdt, size_t bufsize); + +/** + * fdt_get_string - retrieve a string from the strings block of a device tree + * @fdt: pointer to the device tree blob + * @stroffset: offset of the string within the strings block (native endian) + * @lenp: optional pointer to return the string's length + * + * fdt_get_string() retrieves a pointer to a single string from the + * strings block of the device tree blob at fdt, and optionally also + * returns the string's length in *lenp. + * + * returns: + * a pointer to the string, on success + * NULL, if stroffset is out of bounds, or doesn't point to a valid string + */ +const char *fdt_get_string(const void *fdt, int stroffset, int *lenp); + +/** + * fdt_string - retrieve a string from the strings block of a device tree + * @fdt: pointer to the device tree blob + * @stroffset: offset of the string within the strings block (native endian) + * + * fdt_string() retrieves a pointer to a single string from the + * strings block of the device tree blob at fdt. + * + * returns: + * a pointer to the string, on success + * NULL, if stroffset is out of bounds, or doesn't point to a valid string + */ +const char *fdt_string(const void *fdt, int stroffset); + +/** + * fdt_find_max_phandle - find and return the highest phandle in a tree + * @fdt: pointer to the device tree blob + * @phandle: return location for the highest phandle value found in the tree + * + * fdt_find_max_phandle() finds the highest phandle value in the given device + * tree. The value returned in @phandle is only valid if the function returns + * success. + * + * returns: + * 0 on success or a negative error code on failure + */ +int fdt_find_max_phandle(const void *fdt, uint32_t *phandle); + +/** + * fdt_get_max_phandle - retrieves the highest phandle in a tree + * @fdt: pointer to the device tree blob + * + * fdt_get_max_phandle retrieves the highest phandle in the given + * device tree. This will ignore badly formatted phandles, or phandles + * with a value of 0 or -1. + * + * This function is deprecated in favour of fdt_find_max_phandle(). + * + * returns: + * the highest phandle on success + * 0, if no phandle was found in the device tree + * -1, if an error occurred + */ +static inline uint32_t fdt_get_max_phandle(const void *fdt) +{ + uint32_t phandle; + int err; + + err = fdt_find_max_phandle(fdt, &phandle); + if (err < 0) + return (uint32_t)-1; + + return phandle; +} + +/** + * fdt_generate_phandle - return a new, unused phandle for a device tree blob + * @fdt: pointer to the device tree blob + * @phandle: return location for the new phandle + * + * Walks the device tree blob and looks for the highest phandle value. On + * success, the new, unused phandle value (one higher than the previously + * highest phandle value in the device tree blob) will be returned in the + * @phandle parameter. + * + * Returns: + * 0 on success or a negative error-code on failure + */ +int fdt_generate_phandle(const void *fdt, uint32_t *phandle); + +/** + * fdt_num_mem_rsv - retrieve the number of memory reserve map entries + * @fdt: pointer to the device tree blob + * + * Returns the number of entries in the device tree blob's memory + * reservation map. This does not include the terminating 0,0 entry + * or any other (0,0) entries reserved for expansion. + * + * returns: + * the number of entries + */ +int fdt_num_mem_rsv(const void *fdt); + +/** + * fdt_get_mem_rsv - retrieve one memory reserve map entry + * @fdt: pointer to the device tree blob + * @address, @size: pointers to 64-bit variables + * + * On success, *address and *size will contain the address and size of + * the n-th reserve map entry from the device tree blob, in + * native-endian format. + * + * returns: + * 0, on success + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size); + +/** + * fdt_subnode_offset_namelen - find a subnode based on substring + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * @namelen: number of characters of name to consider + * + * Identical to fdt_subnode_offset(), but only examine the first + * namelen characters of name for matching the subnode name. This is + * useful for finding subnodes based on a portion of a larger string, + * such as a full path. + */ +#ifndef SWIG /* Not available in Python */ +int fdt_subnode_offset_namelen(const void *fdt, int parentoffset, + const char *name, int namelen); +#endif +/** + * fdt_subnode_offset - find a subnode of a given node + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * + * fdt_subnode_offset() finds a subnode of the node at structure block + * offset parentoffset with the given name. name may include a unit + * address, in which case fdt_subnode_offset() will find the subnode + * with that unit address, or the unit address may be omitted, in + * which case fdt_subnode_offset() will find an arbitrary subnode + * whose name excluding unit address matches the given name. + * + * returns: + * structure block offset of the requested subnode (>=0), on success + * -FDT_ERR_NOTFOUND, if the requested subnode does not exist + * -FDT_ERR_BADOFFSET, if parentoffset did not point to an FDT_BEGIN_NODE + * tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_subnode_offset(const void *fdt, int parentoffset, const char *name); + +/** + * fdt_path_offset_namelen - find a tree node by its full path + * @fdt: pointer to the device tree blob + * @path: full path of the node to locate + * @namelen: number of characters of path to consider + * + * Identical to fdt_path_offset(), but only consider the first namelen + * characters of path as the path name. + */ +#ifndef SWIG /* Not available in Python */ +int fdt_path_offset_namelen(const void *fdt, const char *path, int namelen); +#endif + +/** + * fdt_path_offset - find a tree node by its full path + * @fdt: pointer to the device tree blob + * @path: full path of the node to locate + * + * fdt_path_offset() finds a node of a given path in the device tree. + * Each path component may omit the unit address portion, but the + * results of this are undefined if any such path component is + * ambiguous (that is if there are multiple nodes at the relevant + * level matching the given component, differentiated only by unit + * address). + * + * returns: + * structure block offset of the node with the requested path (>=0), on + * success + * -FDT_ERR_BADPATH, given path does not begin with '/' or is invalid + * -FDT_ERR_NOTFOUND, if the requested node does not exist + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_path_offset(const void *fdt, const char *path); + +/** + * fdt_get_name - retrieve the name of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of the starting node + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_get_name() retrieves the name (including unit address) of the + * device tree node at structure block offset nodeoffset. If lenp is + * non-NULL, the length of this name is also returned, in the integer + * pointed to by lenp. + * + * returns: + * pointer to the node's name, on success + * If lenp is non-NULL, *lenp contains the length of that name + * (>=0) + * NULL, on error + * if lenp is non-NULL *lenp contains an error code (<0): + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE + * tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +const char *fdt_get_name(const void *fdt, int nodeoffset, int *lenp); + +/** + * fdt_first_property_offset - find the offset of a node's first property + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of a node + * + * fdt_first_property_offset() finds the first property of the node at + * the given structure block offset. + * + * returns: + * structure block offset of the property (>=0), on success + * -FDT_ERR_NOTFOUND, if the requested node has no properties + * -FDT_ERR_BADOFFSET, if nodeoffset did not point to an FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_first_property_offset(const void *fdt, int nodeoffset); + +/** + * fdt_next_property_offset - step through a node's properties + * @fdt: pointer to the device tree blob + * @offset: structure block offset of a property + * + * fdt_next_property_offset() finds the property immediately after the + * one at the given structure block offset. This will be a property + * of the same node as the given property. + * + * returns: + * structure block offset of the next property (>=0), on success + * -FDT_ERR_NOTFOUND, if the given property is the last in its node + * -FDT_ERR_BADOFFSET, if nodeoffset did not point to an FDT_PROP tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_next_property_offset(const void *fdt, int offset); + +/** + * fdt_for_each_property_offset - iterate over all properties of a node + * + * @property_offset: property offset (int, lvalue) + * @fdt: FDT blob (const void *) + * @node: node offset (int) + * + * This is actually a wrapper around a for loop and would be used like so: + * + * fdt_for_each_property_offset(property, fdt, node) { + * Use property + * ... + * } + * + * if ((property < 0) && (property != -FDT_ERR_NOTFOUND)) { + * Error handling + * } + * + * Note that this is implemented as a macro and property is used as + * iterator in the loop. The node variable can be constant or even a + * literal. + */ +#define fdt_for_each_property_offset(property, fdt, node) \ + for (property = fdt_first_property_offset(fdt, node); \ + property >= 0; \ + property = fdt_next_property_offset(fdt, property)) + +/** + * fdt_get_property_by_offset - retrieve the property at a given offset + * @fdt: pointer to the device tree blob + * @offset: offset of the property to retrieve + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_get_property_by_offset() retrieves a pointer to the + * fdt_property structure within the device tree blob at the given + * offset. If lenp is non-NULL, the length of the property value is + * also returned, in the integer pointed to by lenp. + * + * Note that this code only works on device tree versions >= 16. fdt_getprop() + * works on all versions. + * + * returns: + * pointer to the structure representing the property + * if lenp is non-NULL, *lenp contains the length of the property + * value (>=0) + * NULL, on error + * if lenp is non-NULL, *lenp contains an error code (<0): + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_PROP tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +const struct fdt_property *fdt_get_property_by_offset(const void *fdt, + int offset, + int *lenp); + +/** + * fdt_get_property_namelen - find a property based on substring + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to find + * @name: name of the property to find + * @namelen: number of characters of name to consider + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * Identical to fdt_get_property(), but only examine the first namelen + * characters of name for matching the property name. + */ +#ifndef SWIG /* Not available in Python */ +const struct fdt_property *fdt_get_property_namelen(const void *fdt, + int nodeoffset, + const char *name, + int namelen, int *lenp); +#endif + +/** + * fdt_get_property - find a given property in a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to find + * @name: name of the property to find + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_get_property() retrieves a pointer to the fdt_property + * structure within the device tree blob corresponding to the property + * named 'name' of the node at offset nodeoffset. If lenp is + * non-NULL, the length of the property value is also returned, in the + * integer pointed to by lenp. + * + * returns: + * pointer to the structure representing the property + * if lenp is non-NULL, *lenp contains the length of the property + * value (>=0) + * NULL, on error + * if lenp is non-NULL, *lenp contains an error code (<0): + * -FDT_ERR_NOTFOUND, node does not have named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE + * tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +const struct fdt_property *fdt_get_property(const void *fdt, int nodeoffset, + const char *name, int *lenp); +static inline struct fdt_property *fdt_get_property_w(void *fdt, int nodeoffset, + const char *name, + int *lenp) +{ + return (struct fdt_property *)(uintptr_t) + fdt_get_property(fdt, nodeoffset, name, lenp); +} + +/** + * fdt_getprop_by_offset - retrieve the value of a property at a given offset + * @fdt: pointer to the device tree blob + * @offset: offset of the property to read + * @namep: pointer to a string variable (will be overwritten) or NULL + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_getprop_by_offset() retrieves a pointer to the value of the + * property at structure block offset 'offset' (this will be a pointer + * to within the device blob itself, not a copy of the value). If + * lenp is non-NULL, the length of the property value is also + * returned, in the integer pointed to by lenp. If namep is non-NULL, + * the property's namne will also be returned in the char * pointed to + * by namep (this will be a pointer to within the device tree's string + * block, not a new copy of the name). + * + * returns: + * pointer to the property's value + * if lenp is non-NULL, *lenp contains the length of the property + * value (>=0) + * if namep is non-NULL *namep contiains a pointer to the property + * name. + * NULL, on error + * if lenp is non-NULL, *lenp contains an error code (<0): + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_PROP tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#ifndef SWIG /* This function is not useful in Python */ +const void *fdt_getprop_by_offset(const void *fdt, int offset, + const char **namep, int *lenp); +#endif + +/** + * fdt_getprop_namelen - get property value based on substring + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to find + * @name: name of the property to find + * @namelen: number of characters of name to consider + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * Identical to fdt_getprop(), but only examine the first namelen + * characters of name for matching the property name. + */ +#ifndef SWIG /* Not available in Python */ +const void *fdt_getprop_namelen(const void *fdt, int nodeoffset, + const char *name, int namelen, int *lenp); +static inline void *fdt_getprop_namelen_w(void *fdt, int nodeoffset, + const char *name, int namelen, + int *lenp) +{ + return (void *)(uintptr_t)fdt_getprop_namelen(fdt, nodeoffset, name, + namelen, lenp); +} +#endif + +/** + * fdt_getprop - retrieve the value of a given property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to find + * @name: name of the property to find + * @lenp: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_getprop() retrieves a pointer to the value of the property + * named 'name' of the node at offset nodeoffset (this will be a + * pointer to within the device blob itself, not a copy of the value). + * If lenp is non-NULL, the length of the property value is also + * returned, in the integer pointed to by lenp. + * + * returns: + * pointer to the property's value + * if lenp is non-NULL, *lenp contains the length of the property + * value (>=0) + * NULL, on error + * if lenp is non-NULL, *lenp contains an error code (<0): + * -FDT_ERR_NOTFOUND, node does not have named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE + * tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +const void *fdt_getprop(const void *fdt, int nodeoffset, + const char *name, int *lenp); +static inline void *fdt_getprop_w(void *fdt, int nodeoffset, + const char *name, int *lenp) +{ + return (void *)(uintptr_t)fdt_getprop(fdt, nodeoffset, name, lenp); +} + +/** + * fdt_get_phandle - retrieve the phandle of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of the node + * + * fdt_get_phandle() retrieves the phandle of the device tree node at + * structure block offset nodeoffset. + * + * returns: + * the phandle of the node at nodeoffset, on success (!= 0, != -1) + * 0, if the node has no phandle, or another error occurs + */ +uint32_t fdt_get_phandle(const void *fdt, int nodeoffset); + +/** + * fdt_get_alias_namelen - get alias based on substring + * @fdt: pointer to the device tree blob + * @name: name of the alias th look up + * @namelen: number of characters of name to consider + * + * Identical to fdt_get_alias(), but only examine the first namelen + * characters of name for matching the alias name. + */ +#ifndef SWIG /* Not available in Python */ +const char *fdt_get_alias_namelen(const void *fdt, + const char *name, int namelen); +#endif + +/** + * fdt_get_alias - retrieve the path referenced by a given alias + * @fdt: pointer to the device tree blob + * @name: name of the alias th look up + * + * fdt_get_alias() retrieves the value of a given alias. That is, the + * value of the property named 'name' in the node /aliases. + * + * returns: + * a pointer to the expansion of the alias named 'name', if it exists + * NULL, if the given alias or the /aliases node does not exist + */ +const char *fdt_get_alias(const void *fdt, const char *name); + +/** + * fdt_get_path - determine the full path of a node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose path to find + * @buf: character buffer to contain the returned path (will be overwritten) + * @buflen: size of the character buffer at buf + * + * fdt_get_path() computes the full path of the node at offset + * nodeoffset, and records that path in the buffer at buf. + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset. + * + * returns: + * 0, on success + * buf contains the absolute path of the node at + * nodeoffset, as a NUL-terminated string. + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_NOSPACE, the path of the given node is longer than (bufsize-1) + * characters and will not fit in the given buffer. + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_get_path(const void *fdt, int nodeoffset, char *buf, int buflen); + +/** + * fdt_supernode_atdepth_offset - find a specific ancestor of a node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose parent to find + * @supernodedepth: depth of the ancestor to find + * @nodedepth: pointer to an integer variable (will be overwritten) or NULL + * + * fdt_supernode_atdepth_offset() finds an ancestor of the given node + * at a specific depth from the root (where the root itself has depth + * 0, its immediate subnodes depth 1 and so forth). So + * fdt_supernode_atdepth_offset(fdt, nodeoffset, 0, NULL); + * will always return 0, the offset of the root node. If the node at + * nodeoffset has depth D, then: + * fdt_supernode_atdepth_offset(fdt, nodeoffset, D, NULL); + * will return nodeoffset itself. + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset. + * + * returns: + * structure block offset of the node at node offset's ancestor + * of depth supernodedepth (>=0), on success + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_NOTFOUND, supernodedepth was greater than the depth of + * nodeoffset + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_supernode_atdepth_offset(const void *fdt, int nodeoffset, + int supernodedepth, int *nodedepth); + +/** + * fdt_node_depth - find the depth of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose parent to find + * + * fdt_node_depth() finds the depth of a given node. The root node + * has depth 0, its immediate subnodes depth 1 and so forth. + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset. + * + * returns: + * depth of the node at nodeoffset (>=0), on success + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_depth(const void *fdt, int nodeoffset); + +/** + * fdt_parent_offset - find the parent of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose parent to find + * + * fdt_parent_offset() locates the parent node of a given node (that + * is, it finds the offset of the node which contains the node at + * nodeoffset as a subnode). + * + * NOTE: This function is expensive, as it must scan the device tree + * structure from the start to nodeoffset, *twice*. + * + * returns: + * structure block offset of the parent of the node at nodeoffset + * (>=0), on success + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_parent_offset(const void *fdt, int nodeoffset); + +/** + * fdt_node_offset_by_prop_value - find nodes with a given property value + * @fdt: pointer to the device tree blob + * @startoffset: only find nodes after this offset + * @propname: property name to check + * @propval: property value to search for + * @proplen: length of the value in propval + * + * fdt_node_offset_by_prop_value() returns the offset of the first + * node after startoffset, which has a property named propname whose + * value is of length proplen and has value equal to propval; or if + * startoffset is -1, the very first such node in the tree. + * + * To iterate through all nodes matching the criterion, the following + * idiom can be used: + * offset = fdt_node_offset_by_prop_value(fdt, -1, propname, + * propval, proplen); + * while (offset != -FDT_ERR_NOTFOUND) { + * // other code here + * offset = fdt_node_offset_by_prop_value(fdt, offset, propname, + * propval, proplen); + * } + * + * Note the -1 in the first call to the function, if 0 is used here + * instead, the function will never locate the root node, even if it + * matches the criterion. + * + * returns: + * structure block offset of the located node (>= 0, >startoffset), + * on success + * -FDT_ERR_NOTFOUND, no node matching the criterion exists in the + * tree after startoffset + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_offset_by_prop_value(const void *fdt, int startoffset, + const char *propname, + const void *propval, int proplen); + +/** + * fdt_node_offset_by_phandle - find the node with a given phandle + * @fdt: pointer to the device tree blob + * @phandle: phandle value + * + * fdt_node_offset_by_phandle() returns the offset of the node + * which has the given phandle value. If there is more than one node + * in the tree with the given phandle (an invalid tree), results are + * undefined. + * + * returns: + * structure block offset of the located node (>= 0), on success + * -FDT_ERR_NOTFOUND, no node with that phandle exists + * -FDT_ERR_BADPHANDLE, given phandle value was invalid (0 or -1) + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_offset_by_phandle(const void *fdt, uint32_t phandle); + +/** + * fdt_node_check_compatible: check a node's compatible property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of a tree node + * @compatible: string to match against + * + * + * fdt_node_check_compatible() returns 0 if the given node contains a + * 'compatible' property with the given string as one of its elements, + * it returns non-zero otherwise, or on error. + * + * returns: + * 0, if the node has a 'compatible' property listing the given string + * 1, if the node has a 'compatible' property, but it does not list + * the given string + * -FDT_ERR_NOTFOUND, if the given node has no 'compatible' property + * -FDT_ERR_BADOFFSET, if nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_check_compatible(const void *fdt, int nodeoffset, + const char *compatible); + +/** + * fdt_node_offset_by_compatible - find nodes with a given 'compatible' value + * @fdt: pointer to the device tree blob + * @startoffset: only find nodes after this offset + * @compatible: 'compatible' string to match against + * + * fdt_node_offset_by_compatible() returns the offset of the first + * node after startoffset, which has a 'compatible' property which + * lists the given compatible string; or if startoffset is -1, the + * very first such node in the tree. + * + * To iterate through all nodes matching the criterion, the following + * idiom can be used: + * offset = fdt_node_offset_by_compatible(fdt, -1, compatible); + * while (offset != -FDT_ERR_NOTFOUND) { + * // other code here + * offset = fdt_node_offset_by_compatible(fdt, offset, compatible); + * } + * + * Note the -1 in the first call to the function, if 0 is used here + * instead, the function will never locate the root node, even if it + * matches the criterion. + * + * returns: + * structure block offset of the located node (>= 0, >startoffset), + * on success + * -FDT_ERR_NOTFOUND, no node matching the criterion exists in the + * tree after startoffset + * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, standard meanings + */ +int fdt_node_offset_by_compatible(const void *fdt, int startoffset, + const char *compatible); + +/** + * fdt_stringlist_contains - check a string list property for a string + * @strlist: Property containing a list of strings to check + * @listlen: Length of property + * @str: String to search for + * + * This is a utility function provided for convenience. The list contains + * one or more strings, each terminated by \0, as is found in a device tree + * "compatible" property. + * + * @return: 1 if the string is found in the list, 0 not found, or invalid list + */ +int fdt_stringlist_contains(const char *strlist, int listlen, const char *str); + +/** + * fdt_stringlist_count - count the number of strings in a string list + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of a tree node + * @property: name of the property containing the string list + * @return: + * the number of strings in the given property + * -FDT_ERR_BADVALUE if the property value is not NUL-terminated + * -FDT_ERR_NOTFOUND if the property does not exist + */ +int fdt_stringlist_count(const void *fdt, int nodeoffset, const char *property); + +/** + * fdt_stringlist_search - find a string in a string list and return its index + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of a tree node + * @property: name of the property containing the string list + * @string: string to look up in the string list + * + * Note that it is possible for this function to succeed on property values + * that are not NUL-terminated. That's because the function will stop after + * finding the first occurrence of @string. This can for example happen with + * small-valued cell properties, such as #address-cells, when searching for + * the empty string. + * + * @return: + * the index of the string in the list of strings + * -FDT_ERR_BADVALUE if the property value is not NUL-terminated + * -FDT_ERR_NOTFOUND if the property does not exist or does not contain + * the given string + */ +int fdt_stringlist_search(const void *fdt, int nodeoffset, const char *property, + const char *string); + +/** + * fdt_stringlist_get() - obtain the string at a given index in a string list + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of a tree node + * @property: name of the property containing the string list + * @index: index of the string to return + * @lenp: return location for the string length or an error code on failure + * + * Note that this will successfully extract strings from properties with + * non-NUL-terminated values. For example on small-valued cell properties + * this function will return the empty string. + * + * If non-NULL, the length of the string (on success) or a negative error-code + * (on failure) will be stored in the integer pointer to by lenp. + * + * @return: + * A pointer to the string at the given index in the string list or NULL on + * failure. On success the length of the string will be stored in the memory + * location pointed to by the lenp parameter, if non-NULL. On failure one of + * the following negative error codes will be returned in the lenp parameter + * (if non-NULL): + * -FDT_ERR_BADVALUE if the property value is not NUL-terminated + * -FDT_ERR_NOTFOUND if the property does not exist + */ +const char *fdt_stringlist_get(const void *fdt, int nodeoffset, + const char *property, int index, + int *lenp); + +/**********************************************************************/ +/* Read-only functions (addressing related) */ +/**********************************************************************/ + +/** + * FDT_MAX_NCELLS - maximum value for #address-cells and #size-cells + * + * This is the maximum value for #address-cells, #size-cells and + * similar properties that will be processed by libfdt. IEE1275 + * requires that OF implementations handle values up to 4. + * Implementations may support larger values, but in practice higher + * values aren't used. + */ +#define FDT_MAX_NCELLS 4 + +/** + * fdt_address_cells - retrieve address size for a bus represented in the tree + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node to find the address size for + * + * When the node has a valid #address-cells property, returns its value. + * + * returns: + * 0 <= n < FDT_MAX_NCELLS, on success + * 2, if the node has no #address-cells property + * -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid + * #address-cells property + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_address_cells(const void *fdt, int nodeoffset); + +/** + * fdt_size_cells - retrieve address range size for a bus represented in the + * tree + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node to find the address range size for + * + * When the node has a valid #size-cells property, returns its value. + * + * returns: + * 0 <= n < FDT_MAX_NCELLS, on success + * 1, if the node has no #size-cells property + * -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid + * #size-cells property + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_size_cells(const void *fdt, int nodeoffset); + + +/**********************************************************************/ +/* Write-in-place functions */ +/**********************************************************************/ + +/** + * fdt_setprop_inplace_namelen_partial - change a property's value, + * but not its size + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @namelen: number of characters of name to consider + * @idx: index of the property to change in the array + * @val: pointer to data to replace the property value with + * @len: length of the property value + * + * Identical to fdt_setprop_inplace(), but modifies the given property + * starting from the given index, and using only the first characters + * of the name. It is useful when you want to manipulate only one value of + * an array and you have a string that doesn't end with \0. + */ +#ifndef SWIG /* Not available in Python */ +int fdt_setprop_inplace_namelen_partial(void *fdt, int nodeoffset, + const char *name, int namelen, + uint32_t idx, const void *val, + int len); +#endif + +/** + * fdt_setprop_inplace - change a property's value, but not its size + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: pointer to data to replace the property value with + * @len: length of the property value + * + * fdt_setprop_inplace() replaces the value of a given property with + * the data in val, of length len. This function cannot change the + * size of a property, and so will only work if len is equal to the + * current length of the property. + * + * This function will alter only the bytes in the blob which contain + * the given property value, and will not alter or move any other part + * of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, if len is not equal to the property's current length + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#ifndef SWIG /* Not available in Python */ +int fdt_setprop_inplace(void *fdt, int nodeoffset, const char *name, + const void *val, int len); +#endif + +/** + * fdt_setprop_inplace_u32 - change the value of a 32-bit integer property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 32-bit integer value to replace the property with + * + * fdt_setprop_inplace_u32() replaces the value of a given property + * with the 32-bit integer value in val, converting val to big-endian + * if necessary. This function cannot change the size of a property, + * and so will only work if the property already exists and has length + * 4. + * + * This function will alter only the bytes in the blob which contain + * the given property value, and will not alter or move any other part + * of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, if the property's length is not equal to 4 + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_setprop_inplace_u32(void *fdt, int nodeoffset, + const char *name, uint32_t val) +{ + fdt32_t tmp = cpu_to_fdt32(val); + return fdt_setprop_inplace(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_setprop_inplace_u64 - change the value of a 64-bit integer property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 64-bit integer value to replace the property with + * + * fdt_setprop_inplace_u64() replaces the value of a given property + * with the 64-bit integer value in val, converting val to big-endian + * if necessary. This function cannot change the size of a property, + * and so will only work if the property already exists and has length + * 8. + * + * This function will alter only the bytes in the blob which contain + * the given property value, and will not alter or move any other part + * of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, if the property's length is not equal to 8 + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_setprop_inplace_u64(void *fdt, int nodeoffset, + const char *name, uint64_t val) +{ + fdt64_t tmp = cpu_to_fdt64(val); + return fdt_setprop_inplace(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_setprop_inplace_cell - change the value of a single-cell property + * + * This is an alternative name for fdt_setprop_inplace_u32() + */ +static inline int fdt_setprop_inplace_cell(void *fdt, int nodeoffset, + const char *name, uint32_t val) +{ + return fdt_setprop_inplace_u32(fdt, nodeoffset, name, val); +} + +/** + * fdt_nop_property - replace a property with nop tags + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to nop + * @name: name of the property to nop + * + * fdt_nop_property() will replace a given property's representation + * in the blob with FDT_NOP tags, effectively removing it from the + * tree. + * + * This function will alter only the bytes in the blob which contain + * the property, and will not alter or move any other part of the + * tree. + * + * returns: + * 0, on success + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_nop_property(void *fdt, int nodeoffset, const char *name); + +/** + * fdt_nop_node - replace a node (subtree) with nop tags + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node to nop + * + * fdt_nop_node() will replace a given node's representation in the + * blob, including all its subnodes, if any, with FDT_NOP tags, + * effectively removing it from the tree. + * + * This function will alter only the bytes in the blob which contain + * the node and its properties and subnodes, and will not alter or + * move any other part of the tree. + * + * returns: + * 0, on success + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_nop_node(void *fdt, int nodeoffset); + +/**********************************************************************/ +/* Sequential write functions */ +/**********************************************************************/ + +/* fdt_create_with_flags flags */ +#define FDT_CREATE_FLAG_NO_NAME_DEDUP 0x1 + /* FDT_CREATE_FLAG_NO_NAME_DEDUP: Do not try to de-duplicate property + * names in the fdt. This can result in faster creation times, but + * a larger fdt. */ + +#define FDT_CREATE_FLAGS_ALL (FDT_CREATE_FLAG_NO_NAME_DEDUP) + +/** + * fdt_create_with_flags - begin creation of a new fdt + * @fdt: pointer to memory allocated where fdt will be created + * @bufsize: size of the memory space at fdt + * @flags: a valid combination of FDT_CREATE_FLAG_ flags, or 0. + * + * fdt_create_with_flags() begins the process of creating a new fdt with + * the sequential write interface. + * + * fdt creation process must end with fdt_finished() to produce a valid fdt. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, bufsize is insufficient for a minimal fdt + * -FDT_ERR_BADFLAGS, flags is not valid + */ +int fdt_create_with_flags(void *buf, int bufsize, uint32_t flags); + +/** + * fdt_create - begin creation of a new fdt + * @fdt: pointer to memory allocated where fdt will be created + * @bufsize: size of the memory space at fdt + * + * fdt_create() is equivalent to fdt_create_with_flags() with flags=0. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, bufsize is insufficient for a minimal fdt + */ +int fdt_create(void *buf, int bufsize); + +int fdt_resize(void *fdt, void *buf, int bufsize); +int fdt_add_reservemap_entry(void *fdt, uint64_t addr, uint64_t size); +int fdt_finish_reservemap(void *fdt); +int fdt_begin_node(void *fdt, const char *name); +int fdt_property(void *fdt, const char *name, const void *val, int len); +static inline int fdt_property_u32(void *fdt, const char *name, uint32_t val) +{ + fdt32_t tmp = cpu_to_fdt32(val); + return fdt_property(fdt, name, &tmp, sizeof(tmp)); +} +static inline int fdt_property_u64(void *fdt, const char *name, uint64_t val) +{ + fdt64_t tmp = cpu_to_fdt64(val); + return fdt_property(fdt, name, &tmp, sizeof(tmp)); +} + +#ifndef SWIG /* Not available in Python */ +static inline int fdt_property_cell(void *fdt, const char *name, uint32_t val) +{ + return fdt_property_u32(fdt, name, val); +} +#endif + +/** + * fdt_property_placeholder - add a new property and return a ptr to its value + * + * @fdt: pointer to the device tree blob + * @name: name of property to add + * @len: length of property value in bytes + * @valp: returns a pointer to where where the value should be placed + * + * returns: + * 0, on success + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_NOSPACE, standard meanings + */ +int fdt_property_placeholder(void *fdt, const char *name, int len, void **valp); + +#define fdt_property_string(fdt, name, str) \ + fdt_property(fdt, name, str, strlen(str)+1) +int fdt_end_node(void *fdt); +int fdt_finish(void *fdt); + +/**********************************************************************/ +/* Read-write functions */ +/**********************************************************************/ + +int fdt_create_empty_tree(void *buf, int bufsize); +int fdt_open_into(const void *fdt, void *buf, int bufsize); +int fdt_pack(void *fdt); + +/** + * fdt_add_mem_rsv - add one memory reserve map entry + * @fdt: pointer to the device tree blob + * @address, @size: 64-bit values (native endian) + * + * Adds a reserve map entry to the given blob reserving a region at + * address address of length size. + * + * This function will insert data into the reserve map and will + * therefore change the indexes of some entries in the table. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new reservation entry + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_add_mem_rsv(void *fdt, uint64_t address, uint64_t size); + +/** + * fdt_del_mem_rsv - remove a memory reserve map entry + * @fdt: pointer to the device tree blob + * @n: entry to remove + * + * fdt_del_mem_rsv() removes the n-th memory reserve map entry from + * the blob. + * + * This function will delete data from the reservation table and will + * therefore change the indexes of some entries in the table. + * + * returns: + * 0, on success + * -FDT_ERR_NOTFOUND, there is no entry of the given index (i.e. there + * are less than n+1 reserve map entries) + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_del_mem_rsv(void *fdt, int n); + +/** + * fdt_set_name - change the name of a given node + * @fdt: pointer to the device tree blob + * @nodeoffset: structure block offset of a node + * @name: name to give the node + * + * fdt_set_name() replaces the name (including unit address, if any) + * of the given node with the given string. NOTE: this function can't + * efficiently check if the new name is unique amongst the given + * node's siblings; results are undefined if this function is invoked + * with a name equal to one of the given node's siblings. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob + * to contain the new name + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, standard meanings + */ +int fdt_set_name(void *fdt, int nodeoffset, const char *name); + +/** + * fdt_setprop - create or change a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: pointer to data to set the property value to + * @len: length of the property value + * + * fdt_setprop() sets the value of the named property in the given + * node to the given value and length, creating the property if it + * does not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_setprop(void *fdt, int nodeoffset, const char *name, + const void *val, int len); + +/** + * fdt_setprop_placeholder - allocate space for a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @len: length of the property value + * @prop_data: return pointer to property data + * + * fdt_setprop_placeholer() allocates the named property in the given node. + * If the property exists it is resized. In either case a pointer to the + * property data is returned. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_setprop_placeholder(void *fdt, int nodeoffset, const char *name, + int len, void **prop_data); + +/** + * fdt_setprop_u32 - set a property to a 32-bit integer + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 32-bit integer value for the property (native endian) + * + * fdt_setprop_u32() sets the value of the named property in the given + * node to the given 32-bit integer value (converting to big-endian if + * necessary), or creates a new property with that value if it does + * not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_setprop_u32(void *fdt, int nodeoffset, const char *name, + uint32_t val) +{ + fdt32_t tmp = cpu_to_fdt32(val); + return fdt_setprop(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_setprop_u64 - set a property to a 64-bit integer + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 64-bit integer value for the property (native endian) + * + * fdt_setprop_u64() sets the value of the named property in the given + * node to the given 64-bit integer value (converting to big-endian if + * necessary), or creates a new property with that value if it does + * not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_setprop_u64(void *fdt, int nodeoffset, const char *name, + uint64_t val) +{ + fdt64_t tmp = cpu_to_fdt64(val); + return fdt_setprop(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_setprop_cell - set a property to a single cell value + * + * This is an alternative name for fdt_setprop_u32() + */ +static inline int fdt_setprop_cell(void *fdt, int nodeoffset, const char *name, + uint32_t val) +{ + return fdt_setprop_u32(fdt, nodeoffset, name, val); +} + +/** + * fdt_setprop_string - set a property to a string value + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @str: string value for the property + * + * fdt_setprop_string() sets the value of the named property in the + * given node to the given string value (using the length of the + * string to determine the new length of the property), or creates a + * new property with that value if it does not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#define fdt_setprop_string(fdt, nodeoffset, name, str) \ + fdt_setprop((fdt), (nodeoffset), (name), (str), strlen(str)+1) + + +/** + * fdt_setprop_empty - set a property to an empty value + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * + * fdt_setprop_empty() sets the value of the named property in the + * given node to an empty (zero length) value, or creates a new empty + * property if it does not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#define fdt_setprop_empty(fdt, nodeoffset, name) \ + fdt_setprop((fdt), (nodeoffset), (name), NULL, 0) + +/** + * fdt_appendprop - append to or create a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to append to + * @val: pointer to data to append to the property value + * @len: length of the data to append to the property value + * + * fdt_appendprop() appends the value to the named property in the + * given node, creating the property if it does not already exist. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_appendprop(void *fdt, int nodeoffset, const char *name, + const void *val, int len); + +/** + * fdt_appendprop_u32 - append a 32-bit integer value to a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 32-bit integer value to append to the property (native endian) + * + * fdt_appendprop_u32() appends the given 32-bit integer value + * (converting to big-endian if necessary) to the value of the named + * property in the given node, or creates a new property with that + * value if it does not already exist. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_appendprop_u32(void *fdt, int nodeoffset, + const char *name, uint32_t val) +{ + fdt32_t tmp = cpu_to_fdt32(val); + return fdt_appendprop(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_appendprop_u64 - append a 64-bit integer value to a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @val: 64-bit integer value to append to the property (native endian) + * + * fdt_appendprop_u64() appends the given 64-bit integer value + * (converting to big-endian if necessary) to the value of the named + * property in the given node, or creates a new property with that + * value if it does not already exist. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +static inline int fdt_appendprop_u64(void *fdt, int nodeoffset, + const char *name, uint64_t val) +{ + fdt64_t tmp = cpu_to_fdt64(val); + return fdt_appendprop(fdt, nodeoffset, name, &tmp, sizeof(tmp)); +} + +/** + * fdt_appendprop_cell - append a single cell value to a property + * + * This is an alternative name for fdt_appendprop_u32() + */ +static inline int fdt_appendprop_cell(void *fdt, int nodeoffset, + const char *name, uint32_t val) +{ + return fdt_appendprop_u32(fdt, nodeoffset, name, val); +} + +/** + * fdt_appendprop_string - append a string to a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * @str: string value to append to the property + * + * fdt_appendprop_string() appends the given string to the value of + * the named property in the given node, or creates a new property + * with that value if it does not already exist. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#define fdt_appendprop_string(fdt, nodeoffset, name, str) \ + fdt_appendprop((fdt), (nodeoffset), (name), (str), strlen(str)+1) + +/** + * fdt_appendprop_addrrange - append a address range property + * @fdt: pointer to the device tree blob + * @parent: offset of the parent node + * @nodeoffset: offset of the node to add a property at + * @name: name of property + * @addr: start address of a given range + * @size: size of a given range + * + * fdt_appendprop_addrrange() appends an address range value (start + * address and size) to the value of the named property in the given + * node, or creates a new property with that value if it does not + * already exist. + * If "name" is not specified, a default "reg" is used. + * Cell sizes are determined by parent's #address-cells and #size-cells. + * + * This function may insert data into the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid + * #address-cells property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADVALUE, addr or size doesn't fit to respective cells size + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain a new property + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_appendprop_addrrange(void *fdt, int parent, int nodeoffset, + const char *name, uint64_t addr, uint64_t size); + +/** + * fdt_delprop - delete a property + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to nop + * @name: name of the property to nop + * + * fdt_del_property() will delete the given property. + * + * This function will delete data from the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOTFOUND, node does not have the named property + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_delprop(void *fdt, int nodeoffset, const char *name); + +/** + * fdt_add_subnode_namelen - creates a new node based on substring + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * @namelen: number of characters of name to consider + * + * Identical to fdt_add_subnode(), but use only the first namelen + * characters of name as the name of the new node. This is useful for + * creating subnodes based on a portion of a larger string, such as a + * full path. + */ +#ifndef SWIG /* Not available in Python */ +int fdt_add_subnode_namelen(void *fdt, int parentoffset, + const char *name, int namelen); +#endif + +/** + * fdt_add_subnode - creates a new node + * @fdt: pointer to the device tree blob + * @parentoffset: structure block offset of a node + * @name: name of the subnode to locate + * + * fdt_add_subnode() creates a new node as a subnode of the node at + * structure block offset parentoffset, with the given name (which + * should include the unit address, if any). + * + * This function will insert data into the blob, and will therefore + * change the offsets of some existing nodes. + + * returns: + * structure block offset of the created nodeequested subnode (>=0), on + * success + * -FDT_ERR_NOTFOUND, if the requested subnode does not exist + * -FDT_ERR_BADOFFSET, if parentoffset did not point to an FDT_BEGIN_NODE + * tag + * -FDT_ERR_EXISTS, if the node at parentoffset already has a subnode of + * the given name + * -FDT_ERR_NOSPACE, if there is insufficient free space in the + * blob to contain the new node + * -FDT_ERR_NOSPACE + * -FDT_ERR_BADLAYOUT + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings. + */ +int fdt_add_subnode(void *fdt, int parentoffset, const char *name); + +/** + * fdt_del_node - delete a node (subtree) + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node to nop + * + * fdt_del_node() will remove the given node, including all its + * subnodes if any, from the blob. + * + * This function will delete data from the blob, and will therefore + * change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_del_node(void *fdt, int nodeoffset); + +/** + * fdt_overlay_apply - Applies a DT overlay on a base DT + * @fdt: pointer to the base device tree blob + * @fdto: pointer to the device tree overlay blob + * + * fdt_overlay_apply() will apply the given device tree overlay on the + * given base device tree. + * + * Expect the base device tree to be modified, even if the function + * returns an error. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there's not enough space in the base device tree + * -FDT_ERR_NOTFOUND, the overlay points to some inexistant nodes or + * properties in the base DT + * -FDT_ERR_BADPHANDLE, + * -FDT_ERR_BADOVERLAY, + * -FDT_ERR_NOPHANDLES, + * -FDT_ERR_INTERNAL, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADOFFSET, + * -FDT_ERR_BADPATH, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_TRUNCATED, standard meanings + */ +int fdt_overlay_apply(void *fdt, void *fdto); + +/**********************************************************************/ +/* Debugging / informational functions */ +/**********************************************************************/ + +const char *fdt_strerror(int errval); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBFDT_H */ diff --git a/tools/src/libfdt/libfdt_env.h b/tools/src/libfdt/libfdt_env.h new file mode 100644 index 0000000..6d028a4 --- /dev/null +++ b/tools/src/libfdt/libfdt_env.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */ +#ifndef LIBFDT_ENV_H +#define LIBFDT_ENV_H +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + * Copyright 2012 Kim Phillips, Freescale Semiconductor. + */ + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <limits.h> + +#ifdef __CHECKER__ +#define FDT_FORCE __attribute__((force)) +#define FDT_BITWISE __attribute__((bitwise)) +#else +#define FDT_FORCE +#define FDT_BITWISE +#endif + +typedef uint16_t FDT_BITWISE fdt16_t; +typedef uint32_t FDT_BITWISE fdt32_t; +typedef uint64_t FDT_BITWISE fdt64_t; + +#define EXTRACT_BYTE(x, n) ((unsigned long long)((uint8_t *)&x)[n]) +#define CPU_TO_FDT16(x) ((EXTRACT_BYTE(x, 0) << 8) | EXTRACT_BYTE(x, 1)) +#define CPU_TO_FDT32(x) ((EXTRACT_BYTE(x, 0) << 24) | (EXTRACT_BYTE(x, 1) << 16) | \ + (EXTRACT_BYTE(x, 2) << 8) | EXTRACT_BYTE(x, 3)) +#define CPU_TO_FDT64(x) ((EXTRACT_BYTE(x, 0) << 56) | (EXTRACT_BYTE(x, 1) << 48) | \ + (EXTRACT_BYTE(x, 2) << 40) | (EXTRACT_BYTE(x, 3) << 32) | \ + (EXTRACT_BYTE(x, 4) << 24) | (EXTRACT_BYTE(x, 5) << 16) | \ + (EXTRACT_BYTE(x, 6) << 8) | EXTRACT_BYTE(x, 7)) + +static inline uint16_t fdt16_to_cpu(fdt16_t x) +{ + return (FDT_FORCE uint16_t)CPU_TO_FDT16(x); +} +static inline fdt16_t cpu_to_fdt16(uint16_t x) +{ + return (FDT_FORCE fdt16_t)CPU_TO_FDT16(x); +} + +static inline uint32_t fdt32_to_cpu(fdt32_t x) +{ + return (FDT_FORCE uint32_t)CPU_TO_FDT32(x); +} +static inline fdt32_t cpu_to_fdt32(uint32_t x) +{ + return (FDT_FORCE fdt32_t)CPU_TO_FDT32(x); +} + +static inline uint64_t fdt64_to_cpu(fdt64_t x) +{ + return (FDT_FORCE uint64_t)CPU_TO_FDT64(x); +} +static inline fdt64_t cpu_to_fdt64(uint64_t x) +{ + return (FDT_FORCE fdt64_t)CPU_TO_FDT64(x); +} +#undef CPU_TO_FDT64 +#undef CPU_TO_FDT32 +#undef CPU_TO_FDT16 +#undef EXTRACT_BYTE + +#ifdef __APPLE__ +#include <AvailabilityMacros.h> + +/* strnlen() is not available on Mac OS < 10.7 */ +# if !defined(MAC_OS_X_VERSION_10_7) || (MAC_OS_X_VERSION_MAX_ALLOWED < \ + MAC_OS_X_VERSION_10_7) + +#define strnlen fdt_strnlen + +/* + * fdt_strnlen: returns the length of a string or max_count - which ever is + * smallest. + * Input 1 string: the string whose size is to be determined + * Input 2 max_count: the maximum value returned by this function + * Output: length of the string or max_count (the smallest of the two) + */ +static inline size_t fdt_strnlen(const char *string, size_t max_count) +{ + const char *p = memchr(string, 0, max_count); + return p ? p - string : max_count; +} + +#endif /* !defined(MAC_OS_X_VERSION_10_7) || (MAC_OS_X_VERSION_MAX_ALLOWED < + MAC_OS_X_VERSION_10_7) */ + +#endif /* __APPLE__ */ + +#endif /* LIBFDT_ENV_H */ diff --git a/tools/src/libfdt/libfdt_internal.h b/tools/src/libfdt/libfdt_internal.h new file mode 100644 index 0000000..1a393d0 --- /dev/null +++ b/tools/src/libfdt/libfdt_internal.h @@ -0,0 +1,173 @@ +/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */ +#ifndef LIBFDT_INTERNAL_H +#define LIBFDT_INTERNAL_H +/* + * libfdt - Flat Device Tree manipulation + * Copyright (C) 2006 David Gibson, IBM Corporation. + */ +#include "fdt.h" + +#define FDT_ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) +#define FDT_TAGALIGN(x) (FDT_ALIGN((x), FDT_TAGSIZE)) + +int32_t fdt_ro_probe_(const void *fdt); +#define FDT_RO_PROBE(fdt) \ + { \ + int32_t totalsize_; \ + if ((totalsize_ = fdt_ro_probe_(fdt)) < 0) \ + return totalsize_; \ + } + +int fdt_check_node_offset_(const void *fdt, int offset); +int fdt_check_prop_offset_(const void *fdt, int offset); +const char *fdt_find_string_(const char *strtab, int tabsize, const char *s); +int fdt_node_end_offset_(void *fdt, int nodeoffset); + +static inline const void *fdt_offset_ptr_(const void *fdt, int offset) +{ + return (const char *)fdt + fdt_off_dt_struct(fdt) + offset; +} + +static inline void *fdt_offset_ptr_w_(void *fdt, int offset) +{ + return (void *)(uintptr_t)fdt_offset_ptr_(fdt, offset); +} + +static inline const struct fdt_reserve_entry *fdt_mem_rsv_(const void *fdt, int n) +{ + const struct fdt_reserve_entry *rsv_table = + (const struct fdt_reserve_entry *) + ((const char *)fdt + fdt_off_mem_rsvmap(fdt)); + + return rsv_table + n; +} +static inline struct fdt_reserve_entry *fdt_mem_rsv_w_(void *fdt, int n) +{ + return (void *)(uintptr_t)fdt_mem_rsv_(fdt, n); +} + +#define FDT_SW_MAGIC (~FDT_MAGIC) + +/**********************************************************************/ +/* Checking controls */ +/**********************************************************************/ + +#ifndef FDT_ASSUME_MASK +#define FDT_ASSUME_MASK 0 +#endif + +/* + * Defines assumptions which can be enabled. Each of these can be enabled + * individually. For maximum safety, don't enable any assumptions! + * + * For minimal code size and no safety, use ASSUME_PERFECT at your own risk. + * You should have another method of validating the device tree, such as a + * signature or hash check before using libfdt. + * + * For situations where security is not a concern it may be safe to enable + * ASSUME_SANE. + */ +enum { + /* + * This does essentially no checks. Only the latest device-tree + * version is correctly handled. Inconsistencies or errors in the device + * tree may cause undefined behaviour or crashes. Invalid parameters + * passed to libfdt may do the same. + * + * If an error occurs when modifying the tree it may leave the tree in + * an intermediate (but valid) state. As an example, adding a property + * where there is insufficient space may result in the property name + * being added to the string table even though the property itself is + * not added to the struct section. + * + * Only use this if you have a fully validated device tree with + * the latest supported version and wish to minimise code size. + */ + ASSUME_PERFECT = 0xff, + + /* + * This assumes that the device tree is sane. i.e. header metadata + * and basic hierarchy are correct. + * + * With this assumption enabled, normal device trees produced by libfdt + * and the compiler should be handled safely. Malicious device trees and + * complete garbage may cause libfdt to behave badly or crash. Truncated + * device trees (e.g. those only partially loaded) can also cause + * problems. + * + * Note: Only checks that relate exclusively to the device tree itself + * (not the parameters passed to libfdt) are disabled by this + * assumption. This includes checking headers, tags and the like. + */ + ASSUME_VALID_DTB = 1 << 0, + + /* + * This builds on ASSUME_VALID_DTB and further assumes that libfdt + * functions are called with valid parameters, i.e. not trigger + * FDT_ERR_BADOFFSET or offsets that are out of bounds. It disables any + * extensive checking of parameters and the device tree, making various + * assumptions about correctness. + * + * It doesn't make sense to enable this assumption unless + * ASSUME_VALID_DTB is also enabled. + */ + ASSUME_VALID_INPUT = 1 << 1, + + /* + * This disables checks for device-tree version and removes all code + * which handles older versions. + * + * Only enable this if you know you have a device tree with the latest + * version. + */ + ASSUME_LATEST = 1 << 2, + + /* + * This assumes that it is OK for a failed addition to the device tree, + * due to lack of space or some other problem, to skip any rollback + * steps (such as dropping the property name from the string table). + * This is safe to enable in most circumstances, even though it may + * leave the tree in a sub-optimal state. + */ + ASSUME_NO_ROLLBACK = 1 << 3, + + /* + * This assumes that the device tree components appear in a 'convenient' + * order, i.e. the memory reservation block first, then the structure + * block and finally the string block. + * + * This order is not specified by the device-tree specification, + * but is expected by libfdt. The device-tree compiler always created + * device trees with this order. + * + * This assumption disables a check in fdt_open_into() and removes the + * ability to fix the problem there. This is safe if you know that the + * device tree is correctly ordered. See fdt_blocks_misordered_(). + */ + ASSUME_LIBFDT_ORDER = 1 << 4, + + /* + * This assumes that libfdt itself does not have any internal bugs. It + * drops certain checks that should never be needed unless libfdt has an + * undiscovered bug. + * + * This can generally be considered safe to enable. + */ + ASSUME_LIBFDT_FLAWLESS = 1 << 5, +}; + +/** + * can_assume_() - check if a particular assumption is enabled + * + * @mask: Mask to check (ASSUME_...) + * @return true if that assumption is enabled, else false + */ +static inline bool can_assume_(int mask) +{ + return FDT_ASSUME_MASK & mask; +} + +/** helper macros for checking assumptions */ +#define can_assume(_assume) can_assume_(ASSUME_ ## _assume) + +#endif /* LIBFDT_INTERNAL_H */ diff --git a/tools/src/main.c b/tools/src/main.c new file mode 100644 index 0000000..8d69e03 --- /dev/null +++ b/tools/src/main.c @@ -0,0 +1,205 @@ +/* SPDX-License-Identifier: MIT */ + +#include "../build/build_cfg.h" +#include "../build/build_tag.h" + +#include "../config.h" + +#include "adt.h" +#include "aic.h" +#include "clk.h" +#include "cpufreq.h" +#include "display.h" +#include "exception.h" +#include "fb.h" +#include "firmware.h" +#include "gxf.h" +#include "heapblock.h" +#include "mcc.h" +#include "memory.h" +#include "nvme.h" +#include "payload.h" +#include "pcie.h" +#include "pmgr.h" +#include "sep.h" +#include "smp.h" +#include "string.h" +#include "tunables.h" +#include "uart.h" +#include "uartproxy.h" +#include "usb.h" +#include "utils.h" +#include "wdt.h" +#include "xnuboot.h" + +struct vector_args next_stage; + +const char version_tag[] = "##m1n1_ver##" BUILD_TAG; +const char *const m1n1_version = version_tag + 12; + +u32 board_id = ~0, chip_id = ~0; + +void get_device_info(void) +{ + printf("Device info:\n"); + printf(" Model: %s\n", (const char *)adt_getprop(adt, 0, "model", NULL)); + printf(" Target: %s\n", (const char *)adt_getprop(adt, 0, "target-type", NULL)); + + int chosen = adt_path_offset(adt, "/chosen"); + if (chosen > 0) { + if (ADT_GETPROP(adt, chosen, "board-id", &board_id) < 0) + printf("Failed to find board-id\n"); + if (ADT_GETPROP(adt, chosen, "chip-id", &chip_id) < 0) + printf("Failed to find chip-id\n"); + + printf(" Board-ID: 0x%x\n", board_id); + printf(" Chip-ID: 0x%x\n", chip_id); + } else { + printf("No chosen node!\n"); + } + + printf("\n"); +} + +void run_actions(void) +{ + bool usb_up = false; + +#ifndef BRINGUP +#ifdef EARLY_PROXY_TIMEOUT + int node = adt_path_offset(adt, "/chosen/asmb"); + u64 lp_sip0 = 0; + + if (node >= 0) { + ADT_GETPROP(adt, node, "lp-sip0", &lp_sip0); + printf("Boot policy: sip0 = %ld\n", lp_sip0); + } + + if (!cur_boot_args.video.display && lp_sip0 == 127) { + printf("Bringing up USB for early debug...\n"); + + usb_init(); + usb_iodev_init(); + + usb_up = true; + + printf("Waiting for proxy connection... "); + for (int i = 0; i < EARLY_PROXY_TIMEOUT * 100; i++) { + for (int j = 0; j < USB_IODEV_COUNT; j++) { + iodev_id_t iodev = IODEV_USB0 + j; + + if (!(iodev_get_usage(iodev) & USAGE_UARTPROXY)) + continue; + + usb_iodev_vuart_setup(iodev); + iodev_handle_events(iodev); + if (iodev_can_write(iodev) || iodev_can_write(IODEV_USB_VUART)) { + printf(" Connected!\n"); + uartproxy_run(NULL); + return; + } + } + + mdelay(10); + if (i % 100 == 99) + printf("."); + } + printf(" Timed out\n"); + } +#endif +#endif + + printf("Checking for payloads...\n"); + + if (payload_run() == 0) { + printf("Valid payload found\n"); + return; + } + + fb_set_active(true); + + printf("No valid payload found\n"); + +#ifndef BRINGUP + if (!usb_up) { + usb_init(); + usb_iodev_init(); + } +#endif + + printf("Running proxy...\n"); + + uartproxy_run(NULL); +} + +void m1n1_main(void) +{ + printf("\n\nm1n1 %s\n", m1n1_version); + printf("Copyright The Asahi Linux Contributors\n"); + printf("Licensed under the MIT license\n\n"); + + printf("Running in EL%lu\n\n", mrs(CurrentEL) >> 2); + + get_device_info(); + firmware_init(); + + heapblock_init(); + +#ifndef BRINGUP + gxf_init(); + mcc_init(); + mmu_init(); + aic_init(); +#endif + wdt_disable(); +#ifndef BRINGUP + pmgr_init(); + tunables_apply_static(); + +#ifdef USE_FB + display_init(); + // Kick DCP to sleep, so dodgy monitors which cause reconnect cycles don't cause us to lose the + // framebuffer. + display_shutdown(DCP_SLEEP_IF_EXTERNAL); + fb_init(false); + fb_display_logo(); +#ifdef FB_SILENT_MODE + fb_set_active(!cur_boot_args.video.display); +#else + fb_set_active(true); +#endif +#endif + + clk_init(); + cpufreq_init(); + sep_init(); +#endif + + printf("Initialization complete.\n"); + + run_actions(); + + if (!next_stage.entry) { + panic("Nothing to do!\n"); + } + + printf("Preparing to run next stage at %p...\n", next_stage.entry); + + nvme_shutdown(); + exception_shutdown(); +#ifndef BRINGUP + usb_iodev_shutdown(); + display_shutdown(DCP_SLEEP_IF_EXTERNAL); +#ifdef USE_FB + fb_shutdown(next_stage.restore_logo); +#endif + mmu_shutdown(); +#endif + + printf("Vectoring to next stage...\n"); + + next_stage.entry(next_stage.args[0], next_stage.args[1], next_stage.args[2], next_stage.args[3], + next_stage.args[4]); + + panic("Next stage returned!\n"); +} diff --git a/tools/src/math/exp2f_data.c b/tools/src/math/exp2f_data.c new file mode 100644 index 0000000..38c9333 --- /dev/null +++ b/tools/src/math/exp2f_data.c @@ -0,0 +1,42 @@ +/* + * Shared data between expf, exp2f and powf. + * + * Copyright (c) 2017-2018, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +#include "exp2f_data.h" + +#define N (1 << EXP2F_TABLE_BITS) + +const struct exp2f_data __exp2f_data = { + /* tab[i] = uint(2^(i/N)) - (i << 52-BITS) + used for computing 2^(k/N) for an int |k| < 150 N as + double(tab[k%N] + (k << 52-BITS)) */ + .tab = + { + 0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51, + 0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1, + 0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d, + 0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585, + 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13, + 0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d, + 0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069, + 0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540, + }, + .shift_scaled = 0x1.8p+52 / N, + .poly = + { + 0x1.c6af84b912394p-5, + 0x1.ebfce50fac4f3p-3, + 0x1.62e42ff0c52d6p-1, + }, + .shift = 0x1.8p+52, + .invln2_scaled = 0x1.71547652b82fep+0 * N, + .poly_scaled = + { + 0x1.c6af84b912394p-5 / N / N / N, + 0x1.ebfce50fac4f3p-3 / N / N, + 0x1.62e42ff0c52d6p-1 / N, + }, +}; diff --git a/tools/src/math/exp2f_data.h b/tools/src/math/exp2f_data.h new file mode 100644 index 0000000..a88c6ce --- /dev/null +++ b/tools/src/math/exp2f_data.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2017-2018, Arm Limited. + * SPDX-License-Identifier: MIT + */ +#ifndef _EXP2F_DATA_H +#define _EXP2F_DATA_H + +#include <stdint.h> + +/* Shared between expf, exp2f and powf. */ +#define EXP2F_TABLE_BITS 5 +#define EXP2F_POLY_ORDER 3 +extern const struct exp2f_data { + uint64_t tab[1 << EXP2F_TABLE_BITS]; + double shift_scaled; + double poly[EXP2F_POLY_ORDER]; + double shift; + double invln2_scaled; + double poly_scaled[EXP2F_POLY_ORDER]; +} __exp2f_data; + +#endif diff --git a/tools/src/math/expf.c b/tools/src/math/expf.c new file mode 100644 index 0000000..c9f1b3f --- /dev/null +++ b/tools/src/math/expf.c @@ -0,0 +1,83 @@ +/* + * Single-precision e^x function. + * + * Copyright (c) 2017-2018, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +#include <math.h> +#include <stdint.h> + +#include "exp2f_data.h" +#include "libm.h" + +#define double_t double + +/* +EXP2F_TABLE_BITS = 5 +EXP2F_POLY_ORDER = 3 + +ULP error: 0.502 (nearest rounding.) +Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.) +Wrong count: 170635 (all nearest rounding wrong results with fma.) +Non-nearest ULP error: 1 (rounded ULP error) +*/ + +#define N (1 << EXP2F_TABLE_BITS) +#define InvLn2N __exp2f_data.invln2_scaled +#define T __exp2f_data.tab +#define C __exp2f_data.poly_scaled + +static inline uint32_t top12(float x) +{ + return asuint(x) >> 20; +} + +float expf(float x) +{ + uint32_t abstop; + uint64_t ki, t; + double_t kd, xd, z, r, r2, y, s; + + xd = (double_t)x; + abstop = top12(x) & 0x7ff; + if (predict_false(abstop >= top12(88.0f))) { + /* |x| >= 88 or x is nan. */ + if (asuint(x) == asuint(-INFINITY)) + return 0.0f; + if (abstop >= top12(INFINITY)) + return x + x; + if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */ + return __math_oflowf(0); + if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */ + return __math_uflowf(0); + } + + /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */ + z = InvLn2N * xd; + + /* Round and convert z to int, the result is in [-150*N, 128*N] and + ideally ties-to-even rule is used, otherwise the magnitude of r + can be bigger which gives larger approximation error. */ +#if TOINT_INTRINSICS + kd = roundtoint(z); + ki = converttoint(z); +#else +#define SHIFT __exp2f_data.shift + kd = eval_as_double(z + SHIFT); + ki = asuint64(kd); + kd -= SHIFT; +#endif + r = z - kd; + + /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ + t = T[ki % N]; + t += ki << (52 - EXP2F_TABLE_BITS); + s = asdouble(t); + z = C[0] * r + C[1]; + r2 = r * r; + y = C[2] * r + 1; + y = z * r2 + y; + y = y * s; + return eval_as_float(y); +} diff --git a/tools/src/math/libm.h b/tools/src/math/libm.h new file mode 100644 index 0000000..1616c74 --- /dev/null +++ b/tools/src/math/libm.h @@ -0,0 +1,271 @@ +#ifndef _LIBM_H +#define _LIBM_H + +#include <endian.h> +#include <float.h> +#include <math.h> +#include <stdint.h> + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN +union ldshape { + long double f; + struct { + uint64_t m; + uint16_t se; + } i; +}; +#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN +/* This is the m68k variant of 80-bit long double, and this definition only works + * on archs where the alignment requirement of uint64_t is <= 4. */ +union ldshape { + long double f; + struct { + uint16_t se; + uint16_t pad; + uint64_t m; + } i; +}; +#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN +union ldshape { + long double f; + struct { + uint64_t lo; + uint32_t mid; + uint16_t top; + uint16_t se; + } i; + struct { + uint64_t lo; + uint64_t hi; + } i2; +}; +#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN +union ldshape { + long double f; + struct { + uint16_t se; + uint16_t top; + uint32_t mid; + uint64_t lo; + } i; + struct { + uint64_t hi; + uint64_t lo; + } i2; +}; +#else +#error Unsupported long double representation +#endif + +/* Support non-nearest rounding mode. */ +#define WANT_ROUNDING 1 +/* Support signaling NaNs. */ +#define WANT_SNAN 0 + +#if WANT_SNAN +#error SNaN is unsupported +#else +#define issignalingf_inline(x) 0 +#define issignaling_inline(x) 0 +#endif + +#ifndef TOINT_INTRINSICS +#define TOINT_INTRINSICS 0 +#endif + +#if TOINT_INTRINSICS +/* Round x to nearest int in all rounding modes, ties have to be rounded + consistently with converttoint so the results match. If the result + would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */ +static double_t roundtoint(double_t); + +/* Convert x to nearest int in all rounding modes, ties have to be rounded + consistently with roundtoint. If the result is not representible in an + int32_t then the semantics is unspecified. */ +static int32_t converttoint(double_t); +#endif + +/* Helps static branch prediction so hot path can be better optimized. */ +#ifdef __GNUC__ +#define predict_true(x) __builtin_expect(!!(x), 1) +#define predict_false(x) __builtin_expect(x, 0) +#else +#define predict_true(x) (x) +#define predict_false(x) (x) +#endif + +/* Evaluate an expression as the specified type. With standard excess + precision handling a type cast or assignment is enough (with + -ffloat-store an assignment is required, in old compilers argument + passing and return statement may not drop excess precision). */ + +static inline float eval_as_float(float x) +{ + float y = x; + return y; +} + +static inline double eval_as_double(double x) +{ + double y = x; + return y; +} + +/* fp_barrier returns its input, but limits code transformations + as if it had a side-effect (e.g. observable io) and returned + an arbitrary value. */ + +#ifndef fp_barrierf +#define fp_barrierf fp_barrierf +static inline float fp_barrierf(float x) +{ + volatile float y = x; + return y; +} +#endif + +#ifndef fp_barrier +#define fp_barrier fp_barrier +static inline double fp_barrier(double x) +{ + volatile double y = x; + return y; +} +#endif + +#ifndef fp_barrierl +#define fp_barrierl fp_barrierl +static inline long double fp_barrierl(long double x) +{ + volatile long double y = x; + return y; +} +#endif + +/* fp_force_eval ensures that the input value is computed when that's + otherwise unused. To prevent the constant folding of the input + expression, an additional fp_barrier may be needed or a compilation + mode that does so (e.g. -frounding-math in gcc). Then it can be + used to evaluate an expression for its fenv side-effects only. */ + +#ifndef fp_force_evalf +#define fp_force_evalf fp_force_evalf +static inline void fp_force_evalf(float x) +{ + volatile float y; + y = x; + (void)y; +} +#endif + +#ifndef fp_force_eval +#define fp_force_eval fp_force_eval +static inline void fp_force_eval(double x) +{ + volatile double y; + y = x; + (void)y; +} +#endif + +#ifndef fp_force_evall +#define fp_force_evall fp_force_evall +static inline void fp_force_evall(long double x) +{ + volatile long double y; + y = x; + (void)y; +} +#endif + +#define FORCE_EVAL(x) \ + do { \ + if (sizeof(x) == sizeof(float)) { \ + fp_force_evalf(x); \ + } else if (sizeof(x) == sizeof(double)) { \ + fp_force_eval(x); \ + } else { \ + fp_force_evall(x); \ + } \ + } while (0) + +#define asuint(f) \ + ((union { \ + float _f; \ + uint32_t _i; \ + }){f}) \ + ._i +#define asfloat(i) \ + ((union { \ + uint32_t _i; \ + float _f; \ + }){i}) \ + ._f +#define asuint64(f) \ + ((union { \ + double _f; \ + uint64_t _i; \ + }){f}) \ + ._i +#define asdouble(i) \ + ((union { \ + uint64_t _i; \ + double _f; \ + }){i}) \ + ._f + +#define EXTRACT_WORDS(hi, lo, d) \ + do { \ + uint64_t __u = asuint64(d); \ + (hi) = __u >> 32; \ + (lo) = (uint32_t)__u; \ + } while (0) + +#define GET_HIGH_WORD(hi, d) \ + do { \ + (hi) = asuint64(d) >> 32; \ + } while (0) + +#define GET_LOW_WORD(lo, d) \ + do { \ + (lo) = (uint32_t)asuint64(d); \ + } while (0) + +#define INSERT_WORDS(d, hi, lo) \ + do { \ + (d) = asdouble(((uint64_t)(hi) << 32) | (uint32_t)(lo)); \ + } while (0) + +#define SET_HIGH_WORD(d, hi) INSERT_WORDS(d, hi, (uint32_t)asuint64(d)) + +#define SET_LOW_WORD(d, lo) INSERT_WORDS(d, asuint64(d) >> 32, lo) + +#define GET_FLOAT_WORD(w, d) \ + do { \ + (w) = asuint(d); \ + } while (0) + +#define SET_FLOAT_WORD(d, w) \ + do { \ + (d) = asfloat(w); \ + } while (0) + +/* error handling functions */ + +static inline float __math_xflowf(uint32_t sign, float y) +{ + return eval_as_float(fp_barrierf(sign ? -y : y) * y); +} + +static inline float __math_oflowf(uint32_t sign) +{ + return __math_xflowf(sign, 0x1p97f); +} + +float __math_uflowf(uint32_t sign) +{ + return __math_xflowf(sign, 0x1p-95f); +} + +#endif diff --git a/tools/src/mcc.c b/tools/src/mcc.c new file mode 100644 index 0000000..d687d73 --- /dev/null +++ b/tools/src/mcc.c @@ -0,0 +1,271 @@ +/* SPDX-License-Identifier: MIT */ + +#include "mcc.h" +#include "adt.h" +#include "hv.h" +#include "memory.h" +#include "string.h" +#include "utils.h" + +static bool mcc_initialized = false; + +#define MAX_MCC_INSTANCES 16 + +#define T8103_PLANES 4 +#define T8103_PLANE_STRIDE 0x40000 +#define T8103_DCS_STRIDE 0x40000 + +#define T6000_PLANES 4 +#define T6000_PLANE_OFFSET 0 +#define T6000_PLANE_STRIDE 0x40000 +#define T6000_GLOBAL_OFFSET 0x100000 +#define T6000_DCS_OFFSET 0x200000 +#define T6000_DCS_STRIDE 0x100000 +#define T6000_DCS_COUNT 4 + +#define PLANE_TZ_START(i) (0x6a0 + i * 0x10) +#define PLANE_TZ_END(i) (0x6a4 + i * 0x10) +#define PLANE_TZ_ENABLE(i) (0x6a8 + i * 0x10) +#define PLANE_TZ_REGS 4 + +#define PLANE_CACHE_ENABLE 0x1c00 +#define PLANE_CACHE_STATUS 0x1c04 + +#define T8103_CACHE_STATUS_DATA_COUNT GENMASK(14, 10) +#define T8103_CACHE_STATUS_TAG_COUNT GENMASK(9, 5) + +#define T6000_CACHE_STATUS_DATA_COUNT GENMASK(13, 9) +#define T6000_CACHE_STATUS_TAG_COUNT GENMASK(8, 4) + +#define T6000_CACHE_WAYS 12 +#define T6000_CACHE_STATUS_MASK (T6000_CACHE_STATUS_DATA_COUNT | T6000_CACHE_STATUS_TAG_COUNT) +#define T6000_CACHE_STATUS_VAL \ + (FIELD_PREP(T6000_CACHE_STATUS_DATA_COUNT, T6000_CACHE_WAYS) | \ + FIELD_PREP(T6000_CACHE_STATUS_TAG_COUNT, T6000_CACHE_WAYS)) + +#define T8103_CACHE_WAYS 16 +#define T8103_CACHE_STATUS_MASK (T8103_CACHE_STATUS_DATA_COUNT | T8103_CACHE_STATUS_TAG_COUNT) +#define T8103_CACHE_STATUS_VAL \ + (FIELD_PREP(T8103_CACHE_STATUS_DATA_COUNT, T8103_CACHE_WAYS) | \ + FIELD_PREP(T8103_CACHE_STATUS_TAG_COUNT, T8103_CACHE_WAYS)) + +#define CACHE_ENABLE_TIMEOUT 10000 + +#define T8103_DCC_DRAMCFG0 0xdc4 +#define T8103_DCC_DRAMCFG1 0xdbc +#define T8103_DCC_DRAMCFG0_DEFAULT 0x813057f +#define T8103_DCC_DRAMCFG1_DEFAULT 0x1800180 +#define T8103_DCC_DRAMCFG0_FAST 0x133 +#define T8103_DCC_DRAMCFG1_FAST 0x55555340 + +#define T6000_DCC_DRAMCFG 0x13cc +#define T6000_DCC_DRAMCFG_DEFAULT 0x55551555 +#define T6000_DCC_DRAMCFG_FAST 0xffff0000 + +size_t mcc_carveout_count; +struct mcc_carveout mcc_carveouts[PLANE_TZ_REGS + 1]; + +struct mcc_regs { + u64 plane_base; + u64 plane_stride; + int plane_count; + + u64 global_base; + + u64 dcs_base; + u64 dcs_stride; + int dcs_count; + + int cache_ways; + u32 cache_status_mask; + u32 cache_status_val; +}; + +static int mcc_count; +static struct mcc_regs mcc_regs[MAX_MCC_INSTANCES]; + +static u32 plane_read32(int mcc, int plane, u64 offset) +{ + return read32(mcc_regs[mcc].plane_base + plane * mcc_regs[mcc].plane_stride + offset); +} + +static void plane_write32(int mcc, int plane, u64 offset, u32 value) +{ + write32(mcc_regs[mcc].plane_base + plane * mcc_regs[mcc].plane_stride + offset, value); +} + +static int plane_poll32(int mcc, int plane, u64 offset, u32 mask, u32 target, u32 timeout) +{ + return poll32(mcc_regs[mcc].plane_base + plane * mcc_regs[mcc].plane_stride + offset, mask, + target, timeout); +} + +static void mcc_enable_cache(void) +{ + if (!mcc_initialized) + return; + + for (int mcc = 0; mcc < mcc_count; mcc++) { + for (int plane = 0; plane < mcc_regs[mcc].plane_count; plane++) { + plane_write32(mcc, plane, PLANE_CACHE_ENABLE, mcc_regs[mcc].cache_ways); + if (plane_poll32(mcc, plane, PLANE_CACHE_STATUS, mcc_regs[mcc].cache_status_mask, + mcc_regs[mcc].cache_status_val, CACHE_ENABLE_TIMEOUT)) + printf("MCC: timeout while enabling cache for MCC %d plane %d: 0x%x\n", mcc, plane, + plane_read32(mcc, plane, PLANE_CACHE_STATUS)); + } + } +} + +int mcc_unmap_carveouts(void) +{ + if (!mcc_initialized) + return -1; + + mcc_carveout_count = 0; + memset(mcc_carveouts, 0, sizeof mcc_carveouts); + // All MCCs and planes should have identical configs + for (int i = 0; i < PLANE_TZ_REGS; i++) { + uint64_t start = plane_read32(0, 0, PLANE_TZ_START(i)); + uint64_t end = plane_read32(0, 0, PLANE_TZ_END(i)); + bool enabled = plane_read32(0, 0, PLANE_TZ_ENABLE(i)); + + if (enabled) { + if (!start || start == end) { + printf("MMU: TZ%d region has bad bounds 0x%lx..0x%lx (iBoot bug?)\n", i, start, + end); + continue; + } + + start = start << 12; + end = (end + 1) << 12; + start |= ram_base; + end |= ram_base; + printf("MMU: Unmapping TZ%d region at 0x%lx..0x%lx\n", i, start, end); + mmu_rm_mapping(start, end - start); + mmu_rm_mapping(start | REGION_RWX_EL0, end - start); + mmu_rm_mapping(start | REGION_RW_EL0, end - start); + mmu_rm_mapping(start | REGION_RX_EL1, end - start); + mcc_carveouts[mcc_carveout_count].base = start; + mcc_carveouts[mcc_carveout_count].size = end - start; + mcc_carveout_count++; + } + } + + return 0; +} + +int mcc_init_t8103(int node, int *path) +{ + printf("MCC: Initializing T8103 MCC...\n"); + + mcc_count = 1; + mcc_regs[0].plane_stride = T8103_PLANE_STRIDE; + mcc_regs[0].plane_count = T8103_PLANES; + mcc_regs[0].dcs_stride = T8103_DCS_STRIDE; + + if (adt_get_reg(adt, path, "reg", 0, &mcc_regs[0].global_base, NULL)) { + printf("MCC: Failed to get reg property 0!\n"); + return -1; + } + + if (adt_get_reg(adt, path, "reg", 1, &mcc_regs[0].plane_base, NULL)) { + printf("MCC: Failed to get reg property 1!\n"); + return -1; + } + + if (adt_get_reg(adt, path, "reg", 2, &mcc_regs[0].dcs_base, NULL)) { + printf("MCC: Failed to get reg property 2!\n"); + return -1; + } + + u32 val; + if (ADT_GETPROP(adt, node, "dcs_num_channels", &val) < 0) { + printf("MCC: Failed to get dcs_num_channels property!\n"); + return -1; + } + + mcc_regs[0].dcs_count = val; + mcc_regs[0].cache_ways = T8103_CACHE_WAYS; + mcc_regs[0].cache_status_mask = T8103_CACHE_STATUS_MASK; + mcc_regs[0].cache_status_val = T8103_CACHE_STATUS_VAL; + + mcc_enable_cache(); + + printf("MCC: Initialized T8103 MCC (%d channels)\n", val); + + mcc_initialized = true; + + return 0; +} + +int mcc_init_t6000(int node, int *path) +{ + u32 reg_len; + + if (!adt_getprop(adt, node, "reg", ®_len)) { + printf("MCC: Failed to get reg property!\n"); + return -1; + } + + mcc_count = reg_len / 16; + + printf("MCC: Initializing T6000 MCCs (%d instances)...\n", mcc_count); + + if (mcc_count > MAX_MCC_INSTANCES) { + printf("MCC: Too many instances, increase MAX_MCC_INSTANCES!\n"); + mcc_count = MAX_MCC_INSTANCES; + } + + for (int i = 0; i < mcc_count; i++) { + u64 base; + if (adt_get_reg(adt, path, "reg", 0, &base, NULL)) { + printf("MCC: Failed to get reg index %d!\n", i); + return -1; + } + + mcc_regs[i].plane_base = base + T6000_PLANE_OFFSET; + mcc_regs[i].plane_stride = T6000_PLANE_STRIDE; + mcc_regs[i].plane_count = T6000_PLANES; + + mcc_regs[i].global_base = base + T6000_GLOBAL_OFFSET; + + mcc_regs[i].dcs_base = base + T6000_DCS_OFFSET; + mcc_regs[i].dcs_stride = T6000_DCS_STRIDE; + mcc_regs[i].dcs_count = T6000_DCS_COUNT; + + mcc_regs[i].cache_ways = T6000_CACHE_WAYS; + mcc_regs[i].cache_status_mask = T6000_CACHE_STATUS_MASK; + mcc_regs[i].cache_status_val = T6000_CACHE_STATUS_VAL; + } + + mcc_enable_cache(); + + printf("MCC: Initialized T6000 MCCs (%d instances, %d planes, %d channels)\n", mcc_count, + mcc_regs[0].plane_count, mcc_regs[0].dcs_count); + + mcc_initialized = true; + + return 0; +} + +int mcc_init(void) +{ + int path[8]; + int node = adt_path_offset_trace(adt, "/arm-io/mcc", path); + + if (node < 0) { + printf("MCC: MCC node not found!\n"); + return -1; + } + + if (adt_is_compatible(adt, node, "mcc,t8103")) { + return mcc_init_t8103(node, path); + } else if (adt_is_compatible(adt, node, "mcc,t8112")) { + return mcc_init_t8103(node, path); + } else if (adt_is_compatible(adt, node, "mcc,t6000")) { + return mcc_init_t6000(node, path); + } else { + printf("MCC: Unsupported version\n"); + return -1; + } +} diff --git a/tools/src/mcc.h b/tools/src/mcc.h new file mode 100644 index 0000000..b059d47 --- /dev/null +++ b/tools/src/mcc.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef MCC_H +#define MCC_H + +#include "types.h" + +struct mcc_carveout { + u64 base; + u64 size; +}; + +extern size_t mcc_carveout_count; +extern struct mcc_carveout mcc_carveouts[]; + +int mcc_init(void); +int mcc_unmap_carveouts(void); + +#endif diff --git a/tools/src/memory.c b/tools/src/memory.c new file mode 100644 index 0000000..aec6782 --- /dev/null +++ b/tools/src/memory.c @@ -0,0 +1,566 @@ +/* SPDX-License-Identifier: MIT */ + +#include "memory.h" +#include "adt.h" +#include "assert.h" +#include "cpu_regs.h" +#include "fb.h" +#include "gxf.h" +#include "malloc.h" +#include "mcc.h" +#include "smp.h" +#include "string.h" +#include "utils.h" +#include "xnuboot.h" + +#define PAGE_SIZE 0x4000 +#define CACHE_LINE_SIZE 64 + +#define CACHE_RANGE_OP(func, op) \ + void func(void *addr, size_t length) \ + { \ + u64 p = (u64)addr; \ + u64 end = p + length; \ + while (p < end) { \ + cacheop(op, p); \ + p += CACHE_LINE_SIZE; \ + } \ + } + +CACHE_RANGE_OP(ic_ivau_range, "ic ivau") +CACHE_RANGE_OP(dc_ivac_range, "dc ivac") +CACHE_RANGE_OP(dc_zva_range, "dc zva") +CACHE_RANGE_OP(dc_cvac_range, "dc cvac") +CACHE_RANGE_OP(dc_cvau_range, "dc cvau") +CACHE_RANGE_OP(dc_civac_range, "dc civac") + +extern u8 _stack_top[]; + +uint64_t ram_base = 0; + +static inline u64 read_sctlr(void) +{ + sysop("isb"); + return mrs(SCTLR_EL1); +} + +static inline void write_sctlr(u64 val) +{ + msr(SCTLR_EL1, val); + sysop("isb"); +} + +#define VADDR_L3_INDEX_BITS 11 +#define VADDR_L2_INDEX_BITS 11 +// We treat two concatenated L1 page tables as one +#define VADDR_L1_INDEX_BITS 12 + +#define VADDR_L3_OFFSET_BITS 14 +#define VADDR_L2_OFFSET_BITS 25 +#define VADDR_L1_OFFSET_BITS 36 + +#define VADDR_L1_ALIGN_MASK GENMASK(VADDR_L1_OFFSET_BITS - 1, VADDR_L2_OFFSET_BITS) +#define VADDR_L2_ALIGN_MASK GENMASK(VADDR_L2_OFFSET_BITS - 1, VADDR_L3_OFFSET_BITS) +#define PTE_TARGET_MASK GENMASK(49, VADDR_L3_OFFSET_BITS) + +#define ENTRIES_PER_L1_TABLE BIT(VADDR_L1_INDEX_BITS) +#define ENTRIES_PER_L2_TABLE BIT(VADDR_L2_INDEX_BITS) +#define ENTRIES_PER_L3_TABLE BIT(VADDR_L3_INDEX_BITS) + +#define IS_PTE(pte) ((pte) && pte & PTE_VALID) + +#define L1_IS_TABLE(pte) (IS_PTE(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE) +#define L1_IS_BLOCK(pte) (IS_PTE(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_BLOCK) +#define L2_IS_TABLE(pte) (IS_PTE(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE) +#define L2_IS_BLOCK(pte) (IS_PTE(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_BLOCK) +#define L3_IS_BLOCK(pte) (IS_PTE(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_PAGE) + +/* + * We use 16KB pages which results in the following virtual address space: + * + * [L0 index] [L1 index] [L2 index] [L3 index] [page offset] + * 1 bit 11 bits 11 bits 11 bits 14 bits + * + * To simplify things we treat the L1 page table as a concatenated table, + * which results in the following layout: + * + * [L1 index] [L2 index] [L3 index] [page offset] + * 12 bits 11 bits 11 bits 14 bits + * + * We initalize one double-size L1 table which covers the entire virtual memory space, + * point to the two halves in the single L0 table and then create L2/L3 tables on demand. + */ + +/* + * SPRR mappings interpret these bits as a 4-bit index as follows + * [AP1][AP0][PXN][UXN] + */ +#define SPRR_INDEX(perm) \ + (((PTE_AP_RO & (perm)) ? 0b1000 : 0) | ((PTE_AP_EL0 & (perm)) ? 0b0100 : 0) | \ + ((PTE_UXN & (perm)) ? 0b0010 : 0) | ((PTE_PXN & (perm)) ? 0b0001 : 0)) + +enum SPRR_val_t { + EL0_GL0, + ELrx_GL0, + ELr_GL0, + ELrw_GL0, + EL0_GLrx, + ELrx_GLrx, + ELr_GLrx, + EL0_GLrx_ALT, + EL0_GLr, + ELx_GLr, + ELr_GLr, + ELrw_GLr, + EL0_GLrw, + ELrx_GLrw, + ELr_GLrw, + ELrw_GLrw, +}; + +/* + * With SPRR enabled, RWX mappings get downgraded to RW. + */ + +#define SPRR_PERM(ap, val) (((u64)val) << (4 * SPRR_INDEX(ap))) + +#define SPRR_DEFAULT_PERM_EL1 \ + SPRR_PERM(PERM_RO_EL0, ELrw_GLrw) | SPRR_PERM(PERM_RW_EL0, ELrw_GLrw) | \ + SPRR_PERM(PERM_RX_EL0, ELrx_GLrx) | SPRR_PERM(PERM_RWX_EL0, ELrw_GLrw) | \ + SPRR_PERM(PERM_RO, ELr_GLr) | SPRR_PERM(PERM_RW, ELrw_GLrw) | \ + SPRR_PERM(PERM_RX, ELrx_GLrx) | SPRR_PERM(PERM_RWX, ELrw_GLrw) + +#define SPRR_DEFAULT_PERM_EL0 \ + SPRR_PERM(PERM_RO_EL0, ELr_GLr) | SPRR_PERM(PERM_RW_EL0, ELrw_GLrw) | \ + SPRR_PERM(PERM_RX_EL0, ELrx_GLrx) | SPRR_PERM(PERM_RWX_EL0, ELrx_GLrx) | \ + SPRR_PERM(PERM_RO, ELr_GLr) | SPRR_PERM(PERM_RW, ELrw_GLrw) | \ + SPRR_PERM(PERM_RX, ELrx_GLrx) | SPRR_PERM(PERM_RWX, ELrw_GLrw) + +/* + * aarch64 allows to configure attribute sets for up to eight different memory + * types. we need normal memory and two types of device memory (nGnRnE and + * nGnRE) in m1n1. + * The indexes here are selected arbitrarily: A page table entry + * contains a field to select one of these which will then be used + * to select the corresponding memory access flags from MAIR. + */ + +#define MAIR_SHIFT_NORMAL (MAIR_IDX_NORMAL * 8) +#define MAIR_SHIFT_NORMAL_NC (MAIR_IDX_NORMAL_NC * 8) +#define MAIR_SHIFT_DEVICE_nGnRnE (MAIR_IDX_DEVICE_nGnRnE * 8) +#define MAIR_SHIFT_DEVICE_nGnRE (MAIR_IDX_DEVICE_nGnRE * 8) +#define MAIR_SHIFT_DEVICE_nGRE (MAIR_IDX_DEVICE_nGRE * 8) +#define MAIR_SHIFT_DEVICE_GRE (MAIR_IDX_DEVICE_GRE * 8) + +/* + * https://developer.arm.com/documentation/ddi0500/e/system-control/aarch64-register-descriptions/memory-attribute-indirection-register--el1 + * + * MAIR_ATTR_NORMAL_DEFAULT sets Normal Memory, Outer Write-back non-transient, + * Inner Write-back non-transient, R=1, W=1 + * MAIR_ATTR_DEVICE_nGnRnE sets Device-nGnRnE memory + * MAIR_ATTR_DEVICE_nGnRE sets Device-nGnRE memory + */ +#define MAIR_ATTR_NORMAL_DEFAULT 0xffUL +#define MAIR_ATTR_NORMAL_NC 0x44UL +#define MAIR_ATTR_DEVICE_nGnRnE 0x00UL +#define MAIR_ATTR_DEVICE_nGnRE 0x04UL +#define MAIR_ATTR_DEVICE_nGRE 0x08UL +#define MAIR_ATTR_DEVICE_GRE 0x0cUL + +static u64 *mmu_pt_L0; +static u64 *mmu_pt_L1; + +static u64 *mmu_pt_get_l2(u64 from) +{ + u64 l1idx = from >> VADDR_L1_OFFSET_BITS; + assert(l1idx < ENTRIES_PER_L1_TABLE); + u64 l1d = mmu_pt_L1[l1idx]; + + if (L1_IS_TABLE(l1d)) + return (u64 *)(l1d & PTE_TARGET_MASK); + + u64 *l2 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L2_TABLE * sizeof(u64)); + assert(!IS_PTE(l1d)); + memset64(l2, 0, ENTRIES_PER_L2_TABLE * sizeof(u64)); + + l1d = ((u64)l2) | FIELD_PREP(PTE_TYPE, PTE_TABLE) | PTE_VALID; + mmu_pt_L1[l1idx] = l1d; + return l2; +} + +static void mmu_pt_map_l2(u64 from, u64 to, u64 size) +{ + assert((from & MASK(VADDR_L2_OFFSET_BITS)) == 0); + assert((to & PTE_TARGET_MASK & MASK(VADDR_L2_OFFSET_BITS)) == 0); + assert((size & MASK(VADDR_L2_OFFSET_BITS)) == 0); + + to |= FIELD_PREP(PTE_TYPE, PTE_BLOCK); + + for (; size; size -= BIT(VADDR_L2_OFFSET_BITS)) { + u64 idx = (from >> VADDR_L2_OFFSET_BITS) & MASK(VADDR_L2_INDEX_BITS); + u64 *l2 = mmu_pt_get_l2(from); + + if (L2_IS_TABLE(l2[idx])) + free((void *)(l2[idx] & PTE_TARGET_MASK)); + + l2[idx] = to; + from += BIT(VADDR_L2_OFFSET_BITS); + to += BIT(VADDR_L2_OFFSET_BITS); + } +} + +static u64 *mmu_pt_get_l3(u64 from) +{ + u64 *l2 = mmu_pt_get_l2(from); + u64 l2idx = (from >> VADDR_L2_OFFSET_BITS) & MASK(VADDR_L2_INDEX_BITS); + assert(l2idx < ENTRIES_PER_L2_TABLE); + u64 l2d = l2[l2idx]; + + if (L2_IS_TABLE(l2d)) + return (u64 *)(l2d & PTE_TARGET_MASK); + + u64 *l3 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L3_TABLE * sizeof(u64)); + if (IS_PTE(l2d)) { + u64 l3d = l2d; + l3d &= ~PTE_TYPE; + l3d |= FIELD_PREP(PTE_TYPE, PTE_PAGE); + for (u64 idx = 0; idx < ENTRIES_PER_L3_TABLE; idx++, l3d += BIT(VADDR_L3_OFFSET_BITS)) + l3[idx] = l3d; + } else { + memset64(l3, 0, ENTRIES_PER_L3_TABLE * sizeof(u64)); + } + + l2d = ((u64)l3) | FIELD_PREP(PTE_TYPE, PTE_TABLE) | PTE_VALID; + l2[l2idx] = l2d; + return l3; +} + +static void mmu_pt_map_l3(u64 from, u64 to, u64 size) +{ + assert((from & MASK(VADDR_L3_OFFSET_BITS)) == 0); + assert((to & PTE_TARGET_MASK & MASK(VADDR_L3_OFFSET_BITS)) == 0); + assert((size & MASK(VADDR_L3_OFFSET_BITS)) == 0); + + to |= FIELD_PREP(PTE_TYPE, PTE_PAGE); + + for (; size; size -= BIT(VADDR_L3_OFFSET_BITS)) { + u64 idx = (from >> VADDR_L3_OFFSET_BITS) & MASK(VADDR_L3_INDEX_BITS); + u64 *l3 = mmu_pt_get_l3(from); + + l3[idx] = to; + from += BIT(VADDR_L3_OFFSET_BITS); + to += BIT(VADDR_L3_OFFSET_BITS); + } +} + +int mmu_map(u64 from, u64 to, u64 size) +{ + u64 chunk; + if (from & MASK(VADDR_L3_OFFSET_BITS) || size & MASK(VADDR_L3_OFFSET_BITS)) + return -1; + + // L3 mappings to boundary + u64 boundary = ALIGN_UP(from, MASK(VADDR_L2_OFFSET_BITS)); + // CPU CTRR doesn't like L2 mappings crossing CTRR boundaries! + // Map everything below the m1n1 base as L3 + if (boundary >= ram_base && boundary < (u64)_base) + boundary = ALIGN_UP((u64)_base, MASK(VADDR_L2_OFFSET_BITS)); + + chunk = min(size, boundary - from); + if (chunk) { + mmu_pt_map_l3(from, to, chunk); + from += chunk; + to += chunk; + size -= chunk; + } + + // L2 mappings + chunk = ALIGN_DOWN(size, MASK(VADDR_L2_OFFSET_BITS)); + if (chunk && (to & VADDR_L2_ALIGN_MASK) == 0) { + mmu_pt_map_l2(from, to, chunk); + from += chunk; + to += chunk; + size -= chunk; + } + + // L3 mappings to end + if (size) { + mmu_pt_map_l3(from, to, size); + } + + return 0; +} + +static u64 mmu_make_table_pte(u64 *addr) +{ + u64 pte = FIELD_PREP(PTE_TYPE, PTE_TABLE) | PTE_VALID; + pte |= (uintptr_t)addr; + pte |= PTE_ACCESS; + return pte; +} + +static void mmu_init_pagetables(void) +{ + mmu_pt_L0 = memalign(PAGE_SIZE, sizeof(u64) * 2); + mmu_pt_L1 = memalign(PAGE_SIZE, sizeof(u64) * ENTRIES_PER_L1_TABLE); + + memset64(mmu_pt_L0, 0, sizeof(u64) * 2); + memset64(mmu_pt_L1, 0, sizeof(u64) * ENTRIES_PER_L1_TABLE); + + mmu_pt_L0[0] = mmu_make_table_pte(&mmu_pt_L1[0]); + mmu_pt_L0[1] = mmu_make_table_pte(&mmu_pt_L1[ENTRIES_PER_L1_TABLE >> 1]); +} + +void mmu_add_mapping(u64 from, u64 to, size_t size, u8 attribute_index, u64 perms) +{ + if (mmu_map(from, + to | PTE_MAIR_IDX(attribute_index) | PTE_ACCESS | PTE_VALID | PTE_SH_OS | perms, + size) < 0) + panic("Failed to add MMU mapping 0x%lx -> 0x%lx (0x%lx)\n", from, to, size); + + sysop("dsb ishst"); + sysop("tlbi vmalle1is"); + sysop("dsb ish"); + sysop("isb"); +} + +void mmu_rm_mapping(u64 from, size_t size) +{ + if (mmu_map(from, 0, size) < 0) + panic("Failed to rm MMU mapping at 0x%lx (0x%lx)\n", from, size); +} + +static void mmu_map_mmio(void) +{ + int node = adt_path_offset(adt, "/arm-io"); + if (node < 0) { + printf("MMU: ARM-IO node not found!\n"); + return; + } + u32 ranges_len; + const u32 *ranges = adt_getprop(adt, node, "ranges", &ranges_len); + if (!ranges) { + printf("MMU: Failed to get ranges property!\n"); + return; + } + // Assume all cell counts are 2 (64bit) + int range_cnt = ranges_len / 24; + while (range_cnt--) { + u64 bus = ranges[2] | ((u64)ranges[3] << 32); + u64 size = ranges[4] | ((u64)ranges[5] << 32); + + mmu_add_mapping(bus, bus, size, MAIR_IDX_DEVICE_nGnRnE, PERM_RW_EL0); + + ranges += 6; + } +} + +static void mmu_remap_ranges(void) +{ + + int node = adt_path_offset(adt, "/defaults"); + if (node < 0) { + printf("MMU: defaults node not found!\n"); + return; + } + u32 ranges_len; + const u32 *ranges = adt_getprop(adt, node, "pmap-io-ranges", &ranges_len); + if (!ranges) { + printf("MMU: Failed to get pmap-io-ranges property!\n"); + return; + } + int range_cnt = ranges_len / 24; + while (range_cnt--) { + u64 addr = ranges[0] | ((u64)ranges[1] << 32); + u64 size = ranges[2] | ((u64)ranges[3] << 32); + u32 flags = ranges[4]; + + // TODO: is this the right logic? + if ((flags >> 28) == 8) { + printf("MMU: Adding Device-nGnRE mapping at 0x%lx (0x%lx)\n", addr, size); + mmu_add_mapping(addr, addr, size, MAIR_IDX_DEVICE_nGnRE, PERM_RW_EL0); + } else if (flags == 0x60004016) { + printf("MMU: Adding Normal-NC mapping at 0x%lx (0x%lx)\n", addr, size); + mmu_add_mapping(addr, addr, size, MAIR_IDX_NORMAL_NC, PERM_RW_EL0); + } + + ranges += 6; + } +} + +void mmu_map_framebuffer(u64 addr, size_t size) +{ + printf("MMU: Adding Normal-NC mapping at 0x%lx (0x%zx) for framebuffer\n", addr, size); + dc_civac_range((void *)addr, size); + mmu_add_mapping(addr, addr, size, MAIR_IDX_NORMAL_NC, PERM_RW_EL0); +} + +static void mmu_add_default_mappings(void) +{ + ram_base = ALIGN_DOWN(cur_boot_args.phys_base, BIT(32)); + uint64_t ram_size = cur_boot_args.mem_size + cur_boot_args.phys_base - ram_base; + ram_size = ALIGN_DOWN(ram_size, 0x4000); + + printf("MMU: RAM base: 0x%lx\n", ram_base); + printf("MMU: Top of normal RAM: 0x%lx\n", ram_base + ram_size); + + mmu_map_mmio(); + + /* + * Create identity mapping for RAM from 0x08_0000_0000 + * With SPRR enabled, this becomes RW. + * This range includes all real RAM, including carveouts + */ + mmu_add_mapping(ram_base, ram_base, cur_boot_args.mem_size_actual, MAIR_IDX_NORMAL, PERM_RWX); + + /* Unmap carveout regions */ + mcc_unmap_carveouts(); + + /* + * Remap m1n1 executable code as RX. + */ + mmu_add_mapping((u64)_base, (u64)_base, (u64)_rodata_end - (u64)_base, MAIR_IDX_NORMAL, + PERM_RX_EL0); + + /* + * Make guard page at the end of the main stack + */ + mmu_rm_mapping((u64)_stack_top, PAGE_SIZE); + + /* + * Create mapping for RAM from 0x88_0000_0000, + * read/writable/exec by EL0 (but not executable by EL1) + * With SPRR enabled, this becomes RX_EL0. + */ + mmu_add_mapping(ram_base | REGION_RWX_EL0, ram_base, ram_size, MAIR_IDX_NORMAL, PERM_RWX_EL0); + /* + * Create mapping for RAM from 0x98_0000_0000, + * read/writable by EL0 (but not executable by EL1) + * With SPRR enabled, this becomes RW_EL0. + */ + mmu_add_mapping(ram_base | REGION_RW_EL0, ram_base, ram_size, MAIR_IDX_NORMAL, PERM_RW_EL0); + /* + * Create mapping for RAM from 0xa8_0000_0000, + * read/executable by EL1 + * This allows executing from dynamic regions in EL1 + */ + mmu_add_mapping(ram_base | REGION_RX_EL1, ram_base, ram_size, MAIR_IDX_NORMAL, PERM_RX_EL0); + + /* + * Create four seperate full mappings of MMIO space, with different access types + */ + mmu_add_mapping(0xc000000000, 0x0000000000, 0x0800000000, MAIR_IDX_DEVICE_GRE, PERM_RW_EL0); + mmu_add_mapping(0xd000000000, 0x0000000000, 0x0800000000, MAIR_IDX_DEVICE_nGRE, PERM_RW_EL0); + mmu_add_mapping(0xe000000000, 0x0000000000, 0x0800000000, MAIR_IDX_DEVICE_nGnRnE, PERM_RW_EL0); + mmu_add_mapping(0xf000000000, 0x0000000000, 0x0800000000, MAIR_IDX_DEVICE_nGnRE, PERM_RW_EL0); + + /* + * Handle pmap-ranges + */ + mmu_remap_ranges(); +} + +static void mmu_configure(void) +{ + msr(MAIR_EL1, (MAIR_ATTR_NORMAL_DEFAULT << MAIR_SHIFT_NORMAL) | + (MAIR_ATTR_DEVICE_nGnRnE << MAIR_SHIFT_DEVICE_nGnRnE) | + (MAIR_ATTR_DEVICE_nGnRE << MAIR_SHIFT_DEVICE_nGnRE) | + (MAIR_ATTR_NORMAL_NC << MAIR_SHIFT_NORMAL_NC)); + msr(TCR_EL1, FIELD_PREP(TCR_IPS, TCR_IPS_4TB) | FIELD_PREP(TCR_TG1, TCR_TG1_16K) | + FIELD_PREP(TCR_SH1, TCR_SH1_IS) | FIELD_PREP(TCR_ORGN1, TCR_ORGN1_WBWA) | + FIELD_PREP(TCR_IRGN1, TCR_IRGN1_WBWA) | FIELD_PREP(TCR_T1SZ, TCR_T1SZ_48BIT) | + FIELD_PREP(TCR_TG0, TCR_TG0_16K) | FIELD_PREP(TCR_SH0, TCR_SH0_IS) | + FIELD_PREP(TCR_ORGN0, TCR_ORGN0_WBWA) | FIELD_PREP(TCR_IRGN0, TCR_IRGN0_WBWA) | + FIELD_PREP(TCR_T0SZ, TCR_T0SZ_48BIT)); + + msr(TTBR0_EL1, (uintptr_t)mmu_pt_L0); + msr(TTBR1_EL1, (uintptr_t)mmu_pt_L0); + + // Armv8-A Address Translation, 100940_0101_en, page 28 + sysop("dsb ishst"); + sysop("tlbi vmalle1is"); + sysop("dsb ish"); + sysop("isb"); +} + +static void mmu_init_sprr(void) +{ + msr_sync(SYS_IMP_APL_SPRR_CONFIG_EL1, 1); + msr_sync(SYS_IMP_APL_SPRR_PERM_EL0, SPRR_DEFAULT_PERM_EL0); + msr_sync(SYS_IMP_APL_SPRR_PERM_EL1, SPRR_DEFAULT_PERM_EL1); + msr_sync(SYS_IMP_APL_SPRR_CONFIG_EL1, 0); +} + +void mmu_init(void) +{ + printf("MMU: Initializing...\n"); + + if (read_sctlr() & SCTLR_M) { + printf("MMU: already intialized.\n"); + return; + } + + mmu_init_pagetables(); + mmu_add_default_mappings(); + mmu_configure(); + mmu_init_sprr(); + + // Enable EL0 memory access by EL1 + msr(PAN, 0); + + // RES1 bits + u64 sctlr = SCTLR_LSMAOE | SCTLR_nTLSMD | SCTLR_TSCXT | SCTLR_ITD; + // Configure translation + sctlr |= SCTLR_I | SCTLR_C | SCTLR_M | SCTLR_SPAN; + + printf("MMU: SCTLR_EL1: %lx -> %lx\n", mrs(SCTLR_EL1), sctlr); + write_sctlr(sctlr); + printf("MMU: running with MMU and caches enabled!\n"); +} + +static void mmu_secondary_setup(void) +{ + mmu_configure(); + mmu_init_sprr(); + + // Enable EL0 memory access by EL1 + msr(PAN, 0); + + // RES1 bits + u64 sctlr = SCTLR_LSMAOE | SCTLR_nTLSMD | SCTLR_TSCXT | SCTLR_ITD; + // Configure translation + sctlr |= SCTLR_I | SCTLR_C | SCTLR_M | SCTLR_SPAN; + write_sctlr(sctlr); +} + +void mmu_init_secondary(int cpu) +{ + smp_call4(cpu, mmu_secondary_setup, 0, 0, 0, 0); + smp_wait(cpu); +} + +void mmu_shutdown(void) +{ + fb_console_reserve_lines(3); + printf("MMU: shutting down...\n"); + write_sctlr(read_sctlr() & ~(SCTLR_I | SCTLR_C | SCTLR_M)); + printf("MMU: shutdown successful, clearing caches\n"); + dcsw_op_all(DCSW_OP_DCCISW); +} + +u64 mmu_disable(void) +{ + u64 sctlr_old = read_sctlr(); + if (!(sctlr_old & SCTLR_M)) + return sctlr_old; + + write_sctlr(sctlr_old & ~(SCTLR_I | SCTLR_C | SCTLR_M)); + dcsw_op_all(DCSW_OP_DCCISW); + + return sctlr_old; +} + +void mmu_restore(u64 state) +{ + write_sctlr(state); +} diff --git a/tools/src/memory.h b/tools/src/memory.h new file mode 100644 index 0000000..247a5d3 --- /dev/null +++ b/tools/src/memory.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef MEMORY_H +#define MEMORY_H + +#include "cpu_regs.h" +#include "types.h" + +#define REGION_RWX_EL0 0x80000000000 +#define REGION_RW_EL0 0xa0000000000 +#define REGION_RX_EL1 0xc0000000000 + +/* + * https://armv8-ref.codingbelief.com/en/chapter_d4/d43_2_armv8_translation_table_level_3_descriptor_formats.html + * PTE_TYPE:PTE_BLOCK indicates that the page table entry (PTE) points to a physical memory block + * PTE_TYPE:PTE_TABLE indicates that the PTE points to another PTE + * PTE_TYPE:PTE_PAGE indicates that the PTE points to a single page + * PTE_FLAG_ACCESS is required to allow access to the memory region + * PTE_MAIR_IDX sets the MAIR index to be used for this PTE + */ +#define PTE_VALID BIT(0) +#define PTE_TYPE BIT(1) +#define PTE_BLOCK 0 +#define PTE_TABLE 1 +#define PTE_PAGE 1 +#define PTE_ACCESS BIT(10) +#define PTE_MAIR_IDX(i) ((i & 7) << 2) +#define PTE_PXN BIT(53) +#define PTE_UXN BIT(54) +#define PTE_AP_RO BIT(7) +#define PTE_AP_EL0 BIT(6) +#define PTE_SH_NS (0b00 << 8) +#define PTE_SH_OS (0b10 << 8) +#define PTE_SH_IS (0b11 << 8) + +#define PERM_RO_EL0 PTE_AP_EL0 | PTE_AP_RO | PTE_PXN | PTE_UXN +#define PERM_RW_EL0 PTE_AP_EL0 | PTE_PXN | PTE_UXN +#define PERM_RX_EL0 PTE_AP_EL0 | PTE_AP_RO +#define PERM_RWX_EL0 PTE_AP_EL0 + +#define PERM_RO PTE_AP_RO | PTE_PXN | PTE_UXN +#define PERM_RW PTE_PXN | PTE_UXN +#define PERM_RX PTE_AP_RO | PTE_UXN +#define PERM_RWX 0 + +#define MAIR_IDX_NORMAL 0 +#define MAIR_IDX_NORMAL_NC 1 +#define MAIR_IDX_DEVICE_nGnRnE 2 +#define MAIR_IDX_DEVICE_nGnRE 3 +#define MAIR_IDX_DEVICE_nGRE 4 +#define MAIR_IDX_DEVICE_GRE 5 + +#ifndef __ASSEMBLER__ + +#include "utils.h" + +extern uint64_t ram_base; + +void ic_ivau_range(void *addr, size_t length); +void dc_ivac_range(void *addr, size_t length); +void dc_zva_range(void *addr, size_t length); +void dc_cvac_range(void *addr, size_t length); +void dc_cvau_range(void *addr, size_t length); +void dc_civac_range(void *addr, size_t length); + +#define DCSW_OP_DCISW 0x0 +#define DCSW_OP_DCCISW 0x1 +#define DCSW_OP_DCCSW 0x2 +void dcsw_op_all(u64 op_type); + +void mmu_init(void); +void mmu_init_secondary(int cpu); +void mmu_shutdown(void); +void mmu_add_mapping(u64 from, u64 to, size_t size, u8 attribute_index, u64 perms); +void mmu_rm_mapping(u64 from, size_t size); +void mmu_map_framebuffer(u64 addr, size_t size); + +u64 mmu_disable(void); +void mmu_restore(u64 state); + +static inline bool mmu_active(void) +{ + return mrs(SCTLR_EL1) & SCTLR_M; +} + +#endif + +#endif diff --git a/tools/src/memory_asm.S b/tools/src/memory_asm.S new file mode 100644 index 0000000..2c2c778 --- /dev/null +++ b/tools/src/memory_asm.S @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2013-2020, ARM Limited and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#define LOC_SHIFT 24 +#define CLIDR_FIELD_WIDTH 3 +#define LEVEL_SHIFT 1 + +.macro func, name +.globl \name +.type \name, @function +\name: +.endm + + .globl dcsw_op_all + +/* + * This macro can be used for implementing various data cache operations `op` + */ +.macro do_dcache_maintenance_by_mva op + /* Exit early if size is zero */ + cbz x1, exit_loop_\op + dcache_line_size x2, x3 + add x1, x0, x1 + sub x3, x2, #1 + bic x0, x0, x3 +loop_\op: + dc \op, x0 + add x0, x0, x2 + cmp x0, x1 + b.lo loop_\op + dsb sy +exit_loop_\op: + ret +.endm + + /* --------------------------------------------------------------- + * Data cache operations by set/way to the level specified + * + * The main function, do_dcsw_op requires: + * x0: The operation type (0-2), as defined in arch.h + * x3: The last cache level to operate on + * x9: clidr_el1 + * x10: The cache level to begin operation from + * and will carry out the operation on each data cache from level 0 + * to the level in x3 in sequence + * + * The dcsw_op macro sets up the x3 and x9 parameters based on + * clidr_el1 cache information before invoking the main function + * --------------------------------------------------------------- + */ + + .macro dcsw_op shift, fw, ls + mrs x9, clidr_el1 + ubfx x3, x9, \shift, \fw + lsl x3, x3, \ls + mov x10, xzr + b do_dcsw_op + .endm + +func do_dcsw_op + cbz x3, exit + adr x14, dcsw_loop_table // compute inner loop address + add x14, x14, x0, lsl #5 // inner loop is 8x32-bit instructions + mov x0, x9 + mov w8, #1 +loop1: + add x2, x10, x10, lsr #1 // work out 3x current cache level + lsr x1, x0, x2 // extract cache type bits from clidr + and x1, x1, #7 // mask the bits for current cache only + cmp x1, #2 // see what cache we have at this level + b.lo level_done // nothing to do if no cache or icache + + msr csselr_el1, x10 // select current cache level in csselr + isb // isb to sych the new cssr&csidr + mrs x1, ccsidr_el1 // read the new ccsidr + and x2, x1, #7 // extract the length of the cache lines + add x2, x2, #4 // add 4 (line length offset) + ubfx x4, x1, #3, #10 // maximum way number + clz w5, w4 // bit position of way size increment + lsl w9, w4, w5 // w9 = aligned max way number + lsl w16, w8, w5 // w16 = way number loop decrement + orr w9, w10, w9 // w9 = combine way and cache number + ubfx w6, w1, #13, #15 // w6 = max set number + lsl w17, w8, w2 // w17 = set number loop decrement + dsb sy // barrier before we start this level + br x14 // jump to DC operation specific loop + + .macro dcsw_loop _op +loop2_\_op: + lsl w7, w6, w2 // w7 = aligned max set number + +loop3_\_op: + orr w11, w9, w7 // combine cache, way and set number + dc \_op, x11 + subs w7, w7, w17 // decrement set number + b.hs loop3_\_op + + subs x9, x9, x16 // decrement way number + b.hs loop2_\_op + + b level_done + .endm + +level_done: + add x10, x10, #2 // increment cache number + cmp x3, x10 + b.hi loop1 + msr csselr_el1, xzr // select cache level 0 in csselr + dsb sy // barrier to complete final cache operation + isb +exit: + ret + +dcsw_loop_table: + dcsw_loop isw + dcsw_loop cisw + dcsw_loop csw + + +func dcsw_op_all + dcsw_op #LOC_SHIFT, #CLIDR_FIELD_WIDTH, #LEVEL_SHIFT + + /* --------------------------------------------------------------- + * Helper macro for data cache operations by set/way for the + * level specified + * --------------------------------------------------------------- + */ + .macro dcsw_op_level level + mrs x9, clidr_el1 + mov x3, \level + sub x10, x3, #2 + b do_dcsw_op + .endm + + /* --------------------------------------------------------------- + * Data cache operations by set/way for level 1 cache + * + * The main function, do_dcsw_op requires: + * x0: The operation type (0-2), as defined in arch.h + * --------------------------------------------------------------- + */ +func dcsw_op_level1 + dcsw_op_level #(1 << LEVEL_SHIFT) + + /* --------------------------------------------------------------- + * Data cache operations by set/way for level 2 cache + * + * The main function, do_dcsw_op requires: + * x0: The operation type (0-2), as defined in arch.h + * --------------------------------------------------------------- + */ +func dcsw_op_level2 + dcsw_op_level #(2 << LEVEL_SHIFT) + + /* --------------------------------------------------------------- + * Data cache operations by set/way for level 3 cache + * + * The main function, do_dcsw_op requires: + * x0: The operation type (0-2), as defined in arch.h + * --------------------------------------------------------------- + */ +func dcsw_op_level3 + dcsw_op_level #(3 << LEVEL_SHIFT) diff --git a/tools/src/minilzlib/dictbuf.c b/tools/src/minilzlib/dictbuf.c new file mode 100644 index 0000000..02875dc --- /dev/null +++ b/tools/src/minilzlib/dictbuf.c @@ -0,0 +1,155 @@ +/*++ + +Copyright (c) Alex Ionescu. All rights reserved. + +Module Name: + + dictbuf.c + +Abstract: + + This module implements the management of the LZMA "history buffer" which is + often called the "dictionary". Routines for writing into the history buffer + as well as for reading back from it are implemented, as well as mechanisms + for repeating previous symbols forward into the dictionary. This forms the + basis for LZMA match distance-length pairs that are found and decompressed. + Note that for simplicity's sake, the dictionary is stored directly in the + output buffer, such that no "flushing" or copying is needed back and forth. + +Author: + + Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version + +Environment: + + Windows & Linux, user mode and kernel mode. + +--*/ + +#include "minlzlib.h" + +// +// State used for the history buffer (dictionary) +// +typedef struct _DICTIONARY_STATE +{ + // + // Buffer, start position, current position, and offset limit in the buffer + // + uint8_t* Buffer; + uint32_t BufferSize; + uint32_t Start; + uint32_t Offset; + uint32_t Limit; +} DICTIONARY_STATE, *PDICTIONARY_STATE; +DICTIONARY_STATE Dictionary; + +void +DtInitialize ( + uint8_t* HistoryBuffer, + uint32_t Size + ) +{ + // + // Initialize the buffer and reset the position + // + Dictionary.Buffer = HistoryBuffer; + Dictionary.Offset = 0; + Dictionary.BufferSize = Size; +} + +bool +DtSetLimit ( + uint32_t Limit + ) +{ + // + // Make sure that the passed in dictionary limit fits within the size, and + // then set this as the new limit. Save the starting point (current offset) + // + if ((Dictionary.Offset + Limit) > Dictionary.BufferSize) + { + return false; + } + Dictionary.Limit = Dictionary.Offset + Limit; + Dictionary.Start = Dictionary.Offset; + return true; +} + +bool +DtIsComplete ( + uint32_t* BytesProcessed + ) +{ + // + // Return bytes processed and if the dictionary has been fully written to + // + *BytesProcessed = Dictionary.Offset - Dictionary.Start; + return (Dictionary.Offset == Dictionary.Limit); +} + +bool +DtCanWrite ( + uint32_t* Position + ) +{ + // + // Return our position and make sure it's not beyond the uncompressed size + // + *Position = Dictionary.Offset; + return (Dictionary.Offset < Dictionary.Limit); +} + +uint8_t +DtGetSymbol ( + uint32_t Distance + ) +{ + // + // If the dictionary is still empty, just return 0, otherwise, return the + // symbol that is Distance bytes backward. + // + if (Distance > Dictionary.Offset) + { + return 0; + } + return Dictionary.Buffer[Dictionary.Offset - Distance]; +} + +void +DtPutSymbol ( + uint8_t Symbol + ) +{ + // + // Write the symbol and advance our position + // + Dictionary.Buffer[Dictionary.Offset++] = Symbol; +} + +bool +DtRepeatSymbol ( + uint32_t Length, + uint32_t Distance + ) +{ + // + // Make sure we never get asked to write past the end of the dictionary. We + // should also not allow the distance to go beyond the current offset since + // DtGetSymbol will return 0 thinking the dictionary is empty. + // + if (((Length + Dictionary.Offset) > Dictionary.Limit) || + (Distance > Dictionary.Offset)) + { + return false; + } + + // + // Now rewrite the stream of past symbols forward into the dictionary. + // + do + { + DtPutSymbol(DtGetSymbol(Distance)); + } while (--Length > 0); + return true; +} diff --git a/tools/src/minilzlib/inputbuf.c b/tools/src/minilzlib/inputbuf.c new file mode 100644 index 0000000..67d652c --- /dev/null +++ b/tools/src/minilzlib/inputbuf.c @@ -0,0 +1,144 @@ +/*++ + +Copyright (c) Alex Ionescu. All rights reserved. + +Module Name: + + inputbuf.c + +Abstract: + + This module implements helper functions for managing the input buffer that + contains arithmetic-coded LZ77 match distance-length pairs and raw literals + Both seeking (such that an external reader can refer to multiple bytes) and + reading (capturing) an individual byte are supported. Support for aligning + input data to 4 bytes (which is a requirement for XZ-encoded files) is also + implemented. + +Author: + + Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version + +Environment: + + Windows & Linux, user mode and kernel mode. + +--*/ + +#include "minlzlib.h" + +// +// Input Buffer State +// +typedef struct _BUFFER_STATE +{ + // + // Start of the buffer, current offset, current packet end, and total input size + // + uint8_t* Buffer; + uint32_t Offset; + uint32_t SoftLimit; + uint32_t Size; +} BUFFER_STATE, * PBUFFER_STATE; +BUFFER_STATE In; + +bool +BfAlign ( + void + ) +{ + uint8_t padByte; + // + // Keep reading until we reach 32-bit alignment. All bytes must be zero. + // + while (In.Offset & 3) + { + if (!BfRead(&padByte) || (padByte != 0)) + { + return false; + } + } + return true; +} + +bool +BfSetSoftLimit ( + uint32_t Remaining + ) +{ + if ((In.Size - In.Offset) < Remaining) + { + return false; + } + In.SoftLimit = In.Offset + Remaining; + return true; +} + +void +BfResetSoftLimit ( + void + ) +{ + In.SoftLimit = In.Size; +} + +bool +BfSeek ( + uint32_t Length, + uint8_t** Bytes + ) +{ + // + // Make sure the input buffer has enough space to seek the desired size, if + // it does, return the current position and then seek past the desired size + // + if ((In.Offset + Length) > In.SoftLimit) + { + *Bytes = 0; + return false; + } + *Bytes = &In.Buffer[In.Offset]; + In.Offset += Length; + return true; +} + +uint32_t +BfTell ( + void + ) +{ + return In.Offset; +} + +bool +BfRead ( + uint8_t* Byte + ) +{ + uint8_t* pByte; + // + // Seek past the byte and read it + // + if (!BfSeek(sizeof(*Byte), &pByte)) + { + *Byte = 0; + return false; + } + *Byte = *pByte; + return true; +} + +void +BfInitialize ( + uint8_t* InputBuffer, + uint32_t InputSize + ) +{ + // + // Save all the data in the context buffer state + // + In.Buffer = InputBuffer; + In.Size = InputSize; + In.SoftLimit = InputSize; + In.Offset = 0; +} diff --git a/tools/src/minilzlib/lzma2dec.c b/tools/src/minilzlib/lzma2dec.c new file mode 100644 index 0000000..7a15513 --- /dev/null +++ b/tools/src/minilzlib/lzma2dec.c @@ -0,0 +1,228 @@ +/*++ + +Copyright (c) Alex Ionescu. All rights reserved. + +Module Name: + + lzma2dec.c + +Abstract: + + This module implements the LZMA2 decoding logic responsible for parsing the + LZMA2 Control Byte, the Information Bytes (Compressed & Uncompressed Stream + Size), and the Property Byte during the initial Dictionary Reset. Note that + this module only implements support for a single such reset (i.e.: archives + in "solid" mode). + +Author: + + Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version + +Environment: + + Windows & Linux, user mode and kernel mode. + +--*/ + +#include "minlzlib.h" +#include "lzma2dec.h" + +bool +Lz2DecodeChunk ( + uint32_t* BytesProcessed, + uint32_t RawSize, + uint16_t CompressedSize + ) +{ + uint32_t bytesProcessed; + + // + // Go and decode this chunk, sequence by sequence + // + if (!LzDecode()) + { + return false; + } + + // + // In a correctly formatted stream, the last arithmetic-coded sequence must + // be zero once we finished with the last chunk. Make sure the stream ended + // exactly where we expected it to. + // + if (!RcIsComplete(&bytesProcessed) || (bytesProcessed != CompressedSize)) + { + return false; + } + + // + // The entire output stream must have been written to, and the dictionary + // must be full now. + // + if (!DtIsComplete(&bytesProcessed) || (bytesProcessed != RawSize)) + { + return false; + } + *BytesProcessed += bytesProcessed; + return true; +} + +bool +Lz2DecodeStream ( + uint32_t* BytesProcessed, + bool GetSizeOnly + ) +{ + uint8_t* inBytes; + LZMA2_CONTROL_BYTE controlByte; + uint8_t propertyByte; + uint32_t rawSize; + uint16_t compressedSize; + + // + // Read the first control byte + // + *BytesProcessed = 0; + while (BfRead(&controlByte.Value)) + { + // + // When the LZMA2 control byte is 0, the entire stream is decoded. This + // is the only success path out of this function. + // + if (controlByte.Value == 0) + { + return true; + } + + // + // Read the appropriate number of info bytes based on the stream type. + // + if (!BfSeek((controlByte.u.Common.IsLzma == 1 ) ? 4 : 2, &inBytes)) + { + break; + } + + // + // For LZMA streams calculate both the uncompressed and compressed size + // from the info bytes. Uncompressed streams only have the former. + // + if (controlByte.u.Common.IsLzma == 1) + { + rawSize = controlByte.u.Lzma.RawSize << 16; + compressedSize = inBytes[2] << 8; + compressedSize += inBytes[3] + 1; + } + else + { + rawSize = 0; + compressedSize = 0; + } + + // + // Make sure that the output buffer that was supplied is big enough to + // fit the uncompressed chunk, unless we're just calculating the size. + // + rawSize += inBytes[0] << 8; + rawSize += inBytes[1] + 1; + if (!GetSizeOnly && !DtSetLimit(rawSize)) + { + break; + } + + // + // Check if the full LZMA state needs to be reset, which must happen at + // the start of stream. Also check for a property reset, which occurs + // when an LZMA stream follows an uncompressed stream. Separately, + // check for a state reset without a property byte (happens rarely, + // but does happen in a few compressed streams). + // + if ((controlByte.u.Lzma.ResetState == Lzma2FullReset) || + (controlByte.u.Lzma.ResetState == Lzma2PropertyReset)) + { + // + // Read the LZMA properties and then initialize the decoder. + // + if (!BfRead(&propertyByte) || !LzInitialize(propertyByte)) + { + break; + } + } + else if (controlByte.u.Lzma.ResetState == Lzma2SimpleReset) + { + LzResetState(); + } + // + // else controlByte.u.Lzma.ResetState == Lzma2NoReset, since a two-bit + // field only has four possible values + // + + // + // Don't do any decompression if the caller only wants to know the size + // + if (GetSizeOnly) + { + *BytesProcessed += rawSize; + BfSeek((controlByte.u.Common.IsLzma == 1) ? compressedSize : rawSize, + &inBytes); + continue; + } + else if (controlByte.u.Common.IsLzma == 0) + { + // + // Seek to the requested size in the input buffer + // + if (!BfSeek(rawSize, &inBytes)) + { + return false; + } + + // + // Copy the data into the dictionary as-is + // + for (uint32_t i = 0; i < rawSize; i++) + { + DtPutSymbol(inBytes[i]); + } + + // + // Update bytes and keep going to the next chunk + // + *BytesProcessed += rawSize; + continue; + } + + // + // Record how many bytes are left in this sequence as our SoftLimit for + // the other operations. This allows us to omit most range checking + // logic in rangedec.c. This soft limit lasts until reset below. + // + if (!BfSetSoftLimit(compressedSize)) + { + break; + } + + // + // Read the initial range and code bytes to initialize the arithmetic + // coding decoder, and let it know how much input data exists. We've + // already validated that this much space exists in the input buffer. + // + if (!RcInitialize(&compressedSize)) + { + break; + } + + // + // Start decoding the LZMA sequences in this chunk + // + if (!Lz2DecodeChunk(BytesProcessed, rawSize, compressedSize)) + { + break; + } + + // + // Having decoded that chunk, reset our soft limit (to the full + // input stream) so we can read the next chunk. + // + BfResetSoftLimit(); + } + return false; +} diff --git a/tools/src/minilzlib/lzma2dec.h b/tools/src/minilzlib/lzma2dec.h new file mode 100644 index 0000000..0b31440 --- /dev/null +++ b/tools/src/minilzlib/lzma2dec.h @@ -0,0 +1,91 @@ +/*++ + +Copyright (c) Alex Ionescu. All rights reserved. + +Module Name: + + lzma2dec.h + +Abstract: + + This header file contains C-style data structures and enumerations that map + back to the LZMA2 standard. This includes the encoding of the LZMA2 Control + Byte and the possible LZMA2 Reset States. + +Author: + + Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version + +Environment: + + Windows & Linux, user mode and kernel mode. + +--*/ + +#pragma once + +// +// The most complex LZMA sequence possible is a "match" sequence where the +// the length is > 127 bytes, and the distance is > 127 bytes. This type of +// sequence starts with {1,1} for "match", followed by {1,1,nnnnnnnn} for +// "8-bit encoded length", followed by {1,1,1,1,1,1} to select the distance +// slot (63). That's 18 bits so far, which all come from arithmetic-coded +// bit trees with various probabilities. The next 26 bits are going to be +// fixed-probability, meaning that the bit tree is mathematically hardcoded +// at 50%. Finally, there are the last 4 "align" distance bits which also +// come from an arithmetic-coded bit tree, bringing the total such bits to +// 22. +// +// Each time we have to "normalize" the arithmetic coder, it consumes an +// additional byte. Normalization is done whenever we consume more than 8 +// of the high bits of the coder's range (i.e.: below 2^24), so exactly +// every 8 direct bits (which always halve the range due to their 50%). +// The other bits can have arbitrary probabilities, but in the worst case +// we need to normalize the range every n bits. As such, this is a total of +// 20 worst-case normalization per LZMA sequence. Finally, we do one last +// normalization at the end of LzDecode, to make sure that the decoder is +// always in a normalized state. This means that a compressed chunk should +// be at least 21 bytes if we want to guarantee that LzDecode can never +// read past the current input stream, and avoid range checking. +// +#define LZMA_MAX_SEQUENCE_SIZE 21 + +// +// This describes the different ways an LZMA2 control byte can request a reset +// +typedef enum _LZMA2_COMPRESSED_RESET_STATE +{ + Lzma2NoReset = 0, + Lzma2SimpleReset = 1, + Lzma2PropertyReset = 2, + Lzma2FullReset = 3 +} LZMA2_COMPRESSED_RESET_STATE; + +// +// This describes how an LZMA2 control byte can be parsed +// +typedef union _LZMA2_CONTROL_BYTE +{ + union + { + struct + { + uint8_t ResetState : 2; + uint8_t Reserved : 5; + uint8_t IsLzma : 1; + } Raw; + struct + { + uint8_t RawSize : 5; + uint8_t ResetState : 2; + uint8_t IsLzma : 1; + } Lzma; + struct + { + uint8_t : 7; + uint8_t IsLzma : 1; + } Common; + } u; + uint8_t Value; +} LZMA2_CONTROL_BYTE; +static_assert(sizeof(LZMA2_CONTROL_BYTE) == 1, "Invalid control byte size"); diff --git a/tools/src/minilzlib/lzmadec.c b/tools/src/minilzlib/lzmadec.c new file mode 100644 index 0000000..1a3c420 --- /dev/null +++ b/tools/src/minilzlib/lzmadec.c @@ -0,0 +1,627 @@ +/*++ + +Copyright (c) Alex Ionescu. All rights reserved. + +Module Name: + + lzmadec.c + +Abstract: + + This module implements the LZMA Decoding Logic responsible for decoding the + three possible types of LZMA "packets": matches, repetitions (short \& long) + and literals. The probability model for each type of packet is also stored + in this file, along with the management of the previously seen packet types + (which is tracked as the "sequence"). + +Author: + + Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version + +Environment: + + Windows & Linux, user mode and kernel mode. + +--*/ + +#include "minlzlib.h" +#include "lzmadec.h" + +// +// Probability Bit Model for Lenghts in Rep and in Match sequences +// +typedef struct _LENGTH_DECODER_STATE +{ + // + // Bit Model for the choosing the type of length encoding + // + uint16_t Choice; + uint16_t Choice2; + // + // Bit Model for each of the length encodings + // + uint16_t Low[LZMA_POSITION_COUNT][LZMA_MAX_LOW_LENGTH]; + uint16_t Mid[LZMA_POSITION_COUNT][LZMA_MAX_MID_LENGTH]; + uint16_t High[LZMA_MAX_HIGH_LENGTH]; +} LENGTH_DECODER_STATE, * PLENGTH_DECODER_STATE; + +// +// State used for LZMA decoding +// +typedef struct _DECODER_STATE +{ + // + // Current type of sequence last decoded + // + LZMA_SEQUENCE_STATE Sequence; + // + // History of last 4 decoded distances + // + uint32_t Rep0; + uint32_t Rep1; + uint32_t Rep2; + uint32_t Rep3; + // + // Pending length to repeat from dictionary + // + uint32_t Len; + // + // Probability Bit Models for all sequence types + // + union + { + struct + { + // + // Literal model + // + uint16_t Literal[LZMA_LITERAL_CODERS][LZMA_LC_MODEL_SIZE]; + // + // Last-used-distance based models + // + uint16_t Rep[LzmaMaxState]; + uint16_t Rep0[LzmaMaxState]; + uint16_t Rep0Long[LzmaMaxState][LZMA_POSITION_COUNT]; + uint16_t Rep1[LzmaMaxState]; + uint16_t Rep2[LzmaMaxState]; + LENGTH_DECODER_STATE RepLen; + // + // Explicit distance match based models + // + uint16_t Match[LzmaMaxState][LZMA_POSITION_COUNT]; + uint16_t DistSlot[LZMA_FIRST_CONTEXT_DISTANCE_SLOT][LZMA_DISTANCE_SLOTS]; + uint16_t Dist[(1 << 7) - LZMA_FIRST_FIXED_DISTANCE_SLOT]; + uint16_t Align[LZMA_DISTANCE_ALIGN_SLOTS]; + LENGTH_DECODER_STATE MatchLen; + } BitModel; + uint16_t RawProbabilities[LZMA_BIT_MODEL_SLOTS]; + } u; +} DECODER_STATE, *PDECODER_STATE; +DECODER_STATE Decoder; + +// +// LZMA decoding uses 3 "properties" which determine how the probability +// bit model will be laid out. These store the number of bits that are used +// to pick the correct Literal Coder ("lc"), the number of Position bits to +// select the Literal coder ("lp"), and the number of Position Bits used to +// select various lengths ("pb"). In LZMA2, these properties are encoded in +// a single byte with the formula: ((pb * 45) + lp * 9) + lc). +// +// We only support the default {lc = 3, lp = 0, pb = 2} properties, which +// are what the main encoders out there use. This means that a total of 2 +// bits will be used for arithmetic-coded bit trees that are dependent on +// the current position, and that a total of 3 bits will be used when we +// pick the arithmetic-coded bit tree used for literal coding. The 0 means +// this selection will _not_ be dependent on the position in the buffer. +// +const uint8_t k_LzSupportedProperties = + (LZMA_PB * 45) + (LZMA_LP * 9) + (LZMA_LC); + +void +LzSetLiteral ( + PLZMA_SEQUENCE_STATE State + ) +{ + if (*State <= LzmaLitShortrepLitLitState) + { + // + // States 0-3 represent packets with at least 2 back-to-back literals, + // so another literal now takes us to state 0 (3 back-to-back literals) + // + *State = LzmaLitLitLitState; + } + else if (*State <= LzmaLitShortrepState) + { + // + // States 4-6 represent packets with a literal at the end, so seeing + // another literal now takes us to 2 back-to-back literals, which are + // state packets 1-3. + // + // States 7-9 represent packets with a literal at the start, followed + // by a match/rep/shortrep. Seeing another literal now drops this first + // literal and takes us to having a literal at the end, which are state + // packets 4-6 that we just described in the paragraph above. + // + *State = (LZMA_SEQUENCE_STATE)(*State - 3); + } + else + { + // + // Finally, state 10 and 11 represent cases without a single literal in + // the last 2 sequence packets, so seeing a literal now takes us to a + // "literal at the end" state, either following a match or a rep. + // + *State = (LZMA_SEQUENCE_STATE)(*State - 6); + } +} + +bool +LzIsLiteral ( + LZMA_SEQUENCE_STATE State + ) +{ + // + // States 0-6 describe literal packet sequences + // + return State < LzmaMaxLitState; +} + +void +LzSetMatch ( + PLZMA_SEQUENCE_STATE State + ) +{ + // + // Move to the appropriate "match" state based on current literal state + // + *State = LzIsLiteral(*State) ? LzmaLitMatchState : LzmaNonlitMatchState; +} + +void +LzSetLongRep ( + PLZMA_SEQUENCE_STATE State + ) +{ + // + // Move to the appropriate "long rep" state based on current literal state + // + *State = LzIsLiteral(*State) ? LzmaLitRepState : LzmaNonlitRepState; +} + +void +LzSetShortRep ( + PLZMA_SEQUENCE_STATE State + ) +{ + // + // Move to the appropriate "short rep" state based on current literal state + // + *State = LzIsLiteral(*State) ? LzmaLitShortrepState : LzmaNonlitRepState; +} + +uint16_t* +LzGetLiteralSlot ( + void + ) +{ + uint8_t symbol; + + // + // To pick the correct literal coder arithmetic-coded bit tree, LZMA uses + // the "lc" parameter to choose the number of high bits from the previous + // symbol (in the normal case, 3). It then combines that with the "lp" + // parameter to choose the number of low bits from the current position in + // the dictionary. However, since "lp" is normally 0, we can omit this. + // + symbol = DtGetSymbol(1); + return Decoder.u.BitModel.Literal[symbol >> (8 - LZMA_LC)]; +} + +uint16_t* +LzGetDistSlot ( + void + ) +{ + uint8_t slotIndex; + + // + // There are 4 different arithmetic-coded bit trees which are used to pick + // the correct "distance slot" when doing match distance decoding. Each of + // them is used based on the length of the symbol that is being repeated. + // For lengths of 2, 3, 4 bytes, a dedicated set of distance slots is used. + // For lengths of 5 bytes or above, a shared set of distance slots is used. + // + if (Decoder.Len < (LZMA_FIRST_CONTEXT_DISTANCE_SLOT + LZMA_MIN_LENGTH)) + { + slotIndex = (uint8_t)(Decoder.Len - LZMA_MIN_LENGTH); + } + else + { + slotIndex = LZMA_FIRST_CONTEXT_DISTANCE_SLOT - 1; + } + return Decoder.u.BitModel.DistSlot[slotIndex]; +} + +void +LzDecodeLiteral ( + void + ) +{ + uint16_t* probArray; + uint8_t symbol, matchByte; + + // + // First, choose the correct arithmetic-coded bit tree (which is based on + // the last symbol we just decoded), then see if we last decoded a literal. + // + // If so, simply get the symbol from the bit tree as normal. However, if + // we didn't last see a literal, we need to read the "match byte" that is + // "n" bytes away from the last decoded match. We previously stored this in + // rep0. + // + // Based on this match byte, we'll then use 2 other potential bit trees, + // see LzDecodeMatched for more information. + // + probArray = LzGetLiteralSlot(); + if (LzIsLiteral(Decoder.Sequence)) + { + + symbol = RcGetBitTree(probArray, (1 << 8)); + } + else + { + matchByte = DtGetSymbol(Decoder.Rep0 + 1); + symbol = RcDecodeMatchedBitTree(probArray, matchByte); + } + + // + // Write the symbol and indicate that the last sequence was a literal + // + DtPutSymbol(symbol); + LzSetLiteral(&Decoder.Sequence); +} + +void +LzDecodeLen ( + PLENGTH_DECODER_STATE LenState, + uint8_t PosBit + ) +{ + uint16_t* probArray; + uint16_t limit; + + // + // Lenghts of 2 and higher are encoded in 3 possible types of arithmetic- + // coded bit trees, depending on the size of the length. + // + // Lengths 2-9 are encoded in trees called "Low" using 3 bits of data. + // Lengths 10-17 are encoded in trees called "Mid" using 3 bits of data. + // Lengths 18-273 are encoded in a tree called "high" using 8 bits of data. + // + // The appropriate "Low" or "Mid" tree is selected based on the bottom 2 + // position bits (0-3) (in the LZMA standard, this is based on the "pb", + // while the "High" tree is shared for all positions. + // + // Two arithmetic-coded bit trees, called "Choice" and "Choice2" tell us + // the type of Length, so we can choose the right tree. {0, n} tells us + // to use the Low trees, while {1, 0} tells us to use the Mid trees. Lastly + // {1, 1} tells us to use the High tree. + // + Decoder.Len = LZMA_MIN_LENGTH; + if (RcIsBitSet(&LenState->Choice)) + { + if (RcIsBitSet(&LenState->Choice2)) + { + probArray = LenState->High; + limit = LZMA_MAX_HIGH_LENGTH; + Decoder.Len += LZMA_MAX_LOW_LENGTH + LZMA_MAX_MID_LENGTH; + } + else + { + probArray = LenState->Mid[PosBit]; + limit = LZMA_MAX_MID_LENGTH; + Decoder.Len += LZMA_MAX_LOW_LENGTH; + } + } + else + { + probArray = LenState->Low[PosBit]; + limit = LZMA_MAX_LOW_LENGTH; + } + Decoder.Len += RcGetBitTree(probArray, limit); +} + +void +LzDecodeMatch ( + uint8_t PosBit + ) +{ + uint16_t* probArray; + uint8_t distSlot, distBits; + + // + // Decode the length component of the "match" sequence. Then, since we're + // about to decode a new distance, update our history by one level. + // + LzDecodeLen(&Decoder.u.BitModel.MatchLen, PosBit); + Decoder.Rep3 = Decoder.Rep2; + Decoder.Rep2 = Decoder.Rep1; + Decoder.Rep1 = Decoder.Rep0; + + // + // Read the first 6 bits, which make up the "distance slot" + // + probArray = LzGetDistSlot(); + distSlot = RcGetBitTree(probArray, LZMA_DISTANCE_SLOTS); + if (distSlot < LZMA_FIRST_CONTEXT_DISTANCE_SLOT) + { + // + // Slots 0-3 directly encode the distance as a literal number + // + Decoder.Rep0 = distSlot; + } + else + { + // + // For slots 4-13, figure out how many "context encoded bits" are used + // to encode this distance. The math works out such that slots 4-5 use + // 1 bit, 6-7 use 2 bits, 8-9 use 3 bits, and so on and so forth until + // slots 12-13 which use 5 bits. + // + // This gives us anywhere from 1-5 bits, plus the two upper bits which + // can either be 0b10 or 0b11 (based on the bottom bit of the distance + // slot). Thus, with the context encoded bits, we can represent lengths + // anywhere from 0b10[0] to 0b11[11111] (i.e.: 4-127). + // + // For slots 14-63, we use "fixed 50% probability bits" which are also + // called "direct bits". The formula below also tells us how many such + // direct bits to use in this scenario. In other words, distBits can + // either be the number of "context encoded bits" for slots 4-13, or it + // can be the the number of "direct bits" for slots 14-63. This gives + // us a range of of 2 to 26 bits, which are then used as middle bits. + // Finally, the last 4 bits are called the "align" bits. The smallest + // possible number we can encode is now going to be 0b10[00][0000] and + // the highest is 0b11[1111111111111111111111111][1111], in other words + // 128 to (2^31)-1. + // + distBits = (distSlot >> 1) - 1; + Decoder.Rep0 = (0b10 | (distSlot & 1)) << distBits; + + // + // Slots 4-13 have their own arithmetic-coded reverse bit trees. Slots + // 14-63 encode the middle "direct bits" with fixed 50% probability and + // the bottom 4 "align bits" with a shared arithmetic-coded reverse bit + // tree. + // + if (distSlot < LZMA_FIRST_FIXED_DISTANCE_SLOT) + { + probArray = &Decoder.u.BitModel.Dist[Decoder.Rep0 - distSlot]; + } + else + { + Decoder.Rep0 |= RcGetFixed(distBits - LZMA_DISTANCE_ALIGN_BITS) << + LZMA_DISTANCE_ALIGN_BITS; + distBits = LZMA_DISTANCE_ALIGN_BITS; + probArray = Decoder.u.BitModel.Align; + } + Decoder.Rep0 |= RcGetReverseBitTree(probArray, distBits); + } + + // + // Indicate that the last sequence was a "match" + // + LzSetMatch(&Decoder.Sequence); +} + +void +LzDecodeRepLen ( + uint8_t PosBit, + bool IsLongRep + ) +{ + // + // Decode the length byte and indicate the last sequence was a "rep". + // If this is a short rep, then the length is always hard-coded to 1. + // + if (IsLongRep) + { + LzDecodeLen(&Decoder.u.BitModel.RepLen, PosBit); + LzSetLongRep(&Decoder.Sequence); + } + else + { + Decoder.Len = 1; + LzSetShortRep(&Decoder.Sequence); + } +} + +void +LzDecodeRep0( + uint8_t PosBit + ) +{ + uint8_t bit; + + // + // This could be a "short rep" with a length of 1, or a "long rep0" with + // a length that we have to decode. The next bit tells us this, using the + // arithmetic-coded bit trees stored in "Rep0Long", with 1 tree for each + // position bit (0-3). + // + bit = RcIsBitSet(&Decoder.u.BitModel.Rep0Long[Decoder.Sequence][PosBit]); + LzDecodeRepLen(PosBit, bit); +} + +void +LzDecodeLongRep ( + uint8_t PosBit + ) +{ + uint32_t newRep; + + // + // Read the next 2 bits to figure out which of the recently used distances + // we should use for this match. The following three states are possible : + // + // {0,n} - "Long rep1", where the length is stored in an arithmetic-coded + // bit tree, and the distance is the 2nd most recently used distance (Rep1) + // + // {1,0} - "Long rep2", where the length is stored in an arithmetic-coded + // bit tree, and the distance is the 3rd most recently used distance (Rep2) + // + // {1,1} - "Long rep3", where the length is stored in an arithmetic-coded + // bit tree, and the distance is the 4th most recently used distance (Rep3) + // + // Once we have the right one, we must slide down each previously recently + // used distance, so that the distance we're now using (Rep1, Rep2 or Rep3) + // becomes "Rep0" again. + // + if (RcIsBitSet(&Decoder.u.BitModel.Rep1[Decoder.Sequence])) + { + if (RcIsBitSet(&Decoder.u.BitModel.Rep2[Decoder.Sequence])) + { + newRep = Decoder.Rep3; + Decoder.Rep3 = Decoder.Rep2; + } + else + { + newRep = Decoder.Rep2; + } + Decoder.Rep2 = Decoder.Rep1; + } + else + { + newRep = Decoder.Rep1; + } + Decoder.Rep1 = Decoder.Rep0; + Decoder.Rep0 = newRep; + LzDecodeRepLen(PosBit, true); +} + +void +LzDecodeRep ( + uint8_t PosBit + ) +{ + // + // We know this is an LZ77 distance-length pair where the distance is based + // on a history of up to 4 previously used distance (Rep0-3). To know which + // distance to use, the following 5 bit positions are possible (keeping in + // mind that we've already decoded the first 2 bits {1,1} in LzDecode which + // got us here in the first place): + // + // {0,0} - "Short rep", where the length is always 1 and distance is always + // the most recently used distance (Rep0). + // + // {0,1} - "Long rep0", where the length is stored in an arithmetic-coded + // bit tree, and the distance is the most recently used distance (Rep0). + // + // Because both of these possibilities just use Rep0, LzDecodeRep0 handles + // these two cases. Otherwise, we use LzDecodeLongRep to read up to two + // additional bits to figure out which recently used distance (1, 2, or 3) + // to use. + // + if (RcIsBitSet(&Decoder.u.BitModel.Rep0[Decoder.Sequence])) + { + LzDecodeLongRep(PosBit); + } + else + { + LzDecodeRep0(PosBit); + } +} + +bool +LzDecode ( + void + ) +{ + uint32_t position; + uint8_t posBit; + + // + // Get the current position in dictionary, making sure we have input bytes. + // Once we run out of bytes, normalize the last arithmetic coded byte and + // ensure there's no pending lengths that we haven't yet repeated. + // + while (DtCanWrite(&position) && RcCanRead()) + { + // + // An LZMA packet begins here, which can have 3 possible initial bit + // sequences that correspond to the type of encoding that was chosen + // to represent the next stream of symbols. + // + // {0, n} represents a "literal", which LzDecodeLiteral decodes. + // Literals are a single byte encoded with arithmetic-coded bit trees + // + // {1, 0} represents a "match", which LzDecodeMatch decodes. + // Matches are typical LZ77 sequences with explicit length and distance + // + // {1, 1} represents a "rep", which LzDecodeRep decodes. + // Reps are LZ77 sequences where the distance is encoded as a reference + // to a previously used distance (up to 4 -- called "Rep0-3"). + // + // Once we've decoded either the "match" or the "rep', we now have the + // distance in "Rep0" (the most recently used distance) and the length + // in "Len", so we will use DtRepeatSymbol to go back in the dictionary + // buffer "Rep0" bytes and repeat that character "Len" times. + // + posBit = position & (LZMA_POSITION_COUNT - 1); + if (RcIsBitSet(&Decoder.u.BitModel.Match[Decoder.Sequence][posBit])) + { + if (RcIsBitSet(&Decoder.u.BitModel.Rep[Decoder.Sequence])) + { + LzDecodeRep(posBit); + } + else + { + LzDecodeMatch(posBit); + } + + if (!DtRepeatSymbol(Decoder.Len, Decoder.Rep0 + 1)) + { + return false; + } + Decoder.Len = 0; + } + else + { + LzDecodeLiteral(); + } + } + RcNormalize(); + return (Decoder.Len == 0); +} + +void +LzResetState ( + void + ) +{ + // + // Initialize decoder to default state in case we're called more than once. + // The LZMA "Bit Model" is an adaptive arithmetic-coded probability-based + // bit tree which encodes either a "0" or a "1". + // + Decoder.Sequence = LzmaLitLitLitState; + Decoder.Rep0 = Decoder.Rep1 = Decoder.Rep2 = Decoder.Rep3 = 0; + static_assert((LZMA_BIT_MODEL_SLOTS * 2) == sizeof(Decoder.u.BitModel), + "Invalid size"); + for (int i = 0; i < LZMA_BIT_MODEL_SLOTS; i++) + { + RcSetDefaultProbability(&Decoder.u.RawProbabilities[i]); + } +} + +bool +LzInitialize ( + uint8_t Properties + ) +{ + if (Properties != k_LzSupportedProperties) + { + return false; + } + LzResetState(); + return true; +} diff --git a/tools/src/minilzlib/lzmadec.h b/tools/src/minilzlib/lzmadec.h new file mode 100644 index 0000000..652165d --- /dev/null +++ b/tools/src/minilzlib/lzmadec.h @@ -0,0 +1,114 @@ +/*++ + +Copyright (c) Alex Ionescu. All rights reserved. + +Module Name: + + lzmadec.h + +Abstract: + + This header file contains C-style definitions, constants, and enumerations + that map back to the LZMA Standard, specifically the probability model that + is used for encoding probabilities. + +Author: + + Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version + +Environment: + + Windows & Linux, user mode and kernel mode. + +--*/ + +#pragma once + +// +// Literals can be 0-255 and are encoded in 3 different types of slots based on +// the previous literal decoded and the "match byte" used. +// +#define LZMA_LITERALS 256 +#define LZMA_LC_TYPES 3 +#define LZMA_LC_MODEL_SIZE (LZMA_LC_TYPES * LZMA_LITERALS) + +// +// These are the hardcoded LZMA properties we support for position and coders +// +#define LZMA_LC 3 +#define LZMA_PB 2 +#define LZMA_LP 0 +#define LZMA_LITERAL_CODERS (1 << LZMA_LC) +#define LZMA_POSITION_COUNT (1 << LZMA_PB) + +// +// Lengths are described in three different ways using "low", "mid", and "high" +// bit trees. The first two trees encode 3 bits, the last encodes 8. We never +// encode a length less than 2 bytes, since that's wasteful. +// +#define LZMA_MAX_LOW_LENGTH (1 << 3) +#define LZMA_MAX_MID_LENGTH (1 << 3) +#define LZMA_MAX_HIGH_LENGTH (1 << 8) +#define LZMA_MIN_LENGTH 2 + +// +// Distances can be encoded in different ways, based on the distance slot. +// Lengths of 2, 3, 4 bytes are directly encoded with their own slot. Lengths +// over 5 share a slot, which is then further subdivded into 3 different ways +// of encoding them, which are described in the source. +// +#define LZMA_DISTANCE_SLOTS 64 +#define LZMA_FIRST_CONTEXT_DISTANCE_SLOT 4 +#define LZMA_FIRST_FIXED_DISTANCE_SLOT 14 +#define LZMA_DISTANCE_ALIGN_BITS 4 +#define LZMA_DISTANCE_ALIGN_SLOTS (1 << LZMA_DISTANCE_ALIGN_BITS) + +// +// Total number of probabilities that we need to store +// +#define LZMA_BIT_MODEL_SLOTS (1174 + \ + (LZMA_LITERAL_CODERS * \ + LZMA_LC_MODEL_SIZE)) + +// +// The LZMA probability bit model is typically based on the last LZMA sequences +// that were decoded. There are 11 such possibilities that are tracked. +// +typedef enum _LZMA_SEQUENCE_STATE +{ + // + // State where we last saw three literals + // + LzmaLitLitLitState, + // + // States where we last saw two literals preceeded by a non-literal + // + LzmaMatchLitLitState, + LzmaRepLitLitState, + LzmaLitShortrepLitLitState, + // + // States where we last saw one literal preceeded by a non-literal + // + LzmaMatchLitState, + LzmaRepLitState, + LzmaLitShortrepLitState, + // + // Separator between states where we last saw at least one literal + // + LzmaMaxLitState, + // + // States where we last saw a non-literal preceeded by a literal + // + LzmaLitMatchState = 7, + LzmaLitRepState, + LzmaLitShortrepState, + // + // States where we last saw two non-literals + // + LzmaNonlitMatchState, + LzmaNonlitRepState, + // + // Separator for number of total states + // + LzmaMaxState +} LZMA_SEQUENCE_STATE, * PLZMA_SEQUENCE_STATE; diff --git a/tools/src/minilzlib/minlzlib.h b/tools/src/minilzlib/minlzlib.h new file mode 100644 index 0000000..c5276ae --- /dev/null +++ b/tools/src/minilzlib/minlzlib.h @@ -0,0 +1,88 @@ +/*++ + +Copyright (c) Alex Ionescu. All rights reserved. + +Module Name: + + minlzlib.h + +Abstract: + + This header file is the main include for the minlz library. It contains the + internal function definitions for the history \& input buffers, the LZMA and + LZMA2 decoders, and the arithmetic (de)coder. + +Author: + + Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version + +Environment: + + Windows & Linux, user mode and kernel mode. + +--*/ + +#pragma once + +// +// C Standard Headers +// +#include <stddef.h> +#include <stdint.h> +#include <stdbool.h> +#include <assert.h> + +// +// Input Buffer Management +// +bool BfRead(uint8_t* Byte); +bool BfSeek(uint32_t Length, uint8_t** Bytes); +uint32_t BfTell(void); +bool BfAlign(void); +void BfInitialize(uint8_t* InputBuffer, uint32_t InputSize); +bool BfSetSoftLimit(uint32_t Remaining); +void BfResetSoftLimit(void); + +// +// Dictionary (History Buffer) Management +// +bool DtRepeatSymbol(uint32_t Length, uint32_t Distance); +void DtInitialize(uint8_t* HistoryBuffer, uint32_t Position); +bool DtSetLimit(uint32_t Limit); +void DtPutSymbol(uint8_t Symbol); +uint8_t DtGetSymbol(uint32_t Distance); +bool DtCanWrite(uint32_t* Position); +bool DtIsComplete(uint32_t* BytesProcessed); + +// +// Range Decoder +// +uint8_t RcGetBitTree(uint16_t* BitModel, uint16_t Limit); +uint8_t RcGetReverseBitTree(uint16_t* BitModel, uint8_t HighestBit); +uint8_t RcDecodeMatchedBitTree(uint16_t* BitModel, uint8_t MatchByte); +uint32_t RcGetFixed(uint8_t HighestBit); +bool RcInitialize(uint16_t* ChunkSize); +uint8_t RcIsBitSet(uint16_t* Probability); +void RcNormalize(void); +bool RcCanRead(void); +bool RcIsComplete(uint32_t* Offset); +void RcSetDefaultProbability(uint16_t* Probability); + +// +// LZMA Decoder +// +bool LzDecode(void); +bool LzInitialize(uint8_t Properties); +void LzResetState(void); + +// +// LZMA2 Decoder +// +bool Lz2DecodeStream(uint32_t* BytesProcessed, bool GetSizeOnly); +#ifdef MINLZ_INTEGRITY_CHECKS +// +// Checksum Management +// +uint32_t OsComputeCrc32(uint32_t Initial, const uint8_t* Data, uint32_t Length); +#define Crc32(Buffer, Length) OsComputeCrc32(0, (const uint8_t*)Buffer, Length) +#endif diff --git a/tools/src/minilzlib/minlzma.h b/tools/src/minilzlib/minlzma.h new file mode 100644 index 0000000..f7ca4bd --- /dev/null +++ b/tools/src/minilzlib/minlzma.h @@ -0,0 +1,33 @@ +#pragma once + +#include <stdbool.h> + +/*! + * @brief Decompresses an XZ stream from InputBuffer into OutputBuffer. + * + * @detail The XZ stream must contain a single block with an LZMA2 filter + * and no BJC2 filters, using default LZMA properties, and using + * either CRC32 or None as the checksum type. + * + * @param[in] InputBuffer - A fully formed buffer containing the XZ stream. + * @param[in,out] InputSize - The size of the input buffer. On output, the size + * consumed from the input buffer. + * @param[in] OutputBuffer - A fully allocated buffer to receive the output. + * Callers can pass in NULL if they do not intend to decompress, + * in combination with setting OutputSize to 0, in order to query + * the final expected size of the decompressed buffer. + * @param[in,out] OutputSize - On input, the size of the buffer. On output, the + * size of the decompressed result. + * + * @return true - The input buffer was fully decompressed in OutputBuffer, + * or no decompression was requested, the size of the decompressed + * buffer was returned in OutputSIze. + * false - A failure occurred during the decompression process. + */ +bool +XzDecode ( + uint8_t* InputBuffer, + uint32_t* InputSize, + uint8_t* OutputBuffer, + uint32_t* OutputSize + ); diff --git a/tools/src/minilzlib/rangedec.c b/tools/src/minilzlib/rangedec.c new file mode 100644 index 0000000..6a9f84f --- /dev/null +++ b/tools/src/minilzlib/rangedec.c @@ -0,0 +1,395 @@ +/*++ + +Copyright (c) Alex Ionescu. All rights reserved. + +Module Name: + + rangedec.c + +Abstract: + + This module implements the Range Decoder, which is how LZMA describes the + arithmetic coder that it uses to represent the binary representation of the + LZ77 match length-distance pairs after the initial compression pass. At the + implementation level, this coder works with an alphabet of only 2 symbols: + the bit "0", and the bit "1", so there are only ever two probability ranges + that need to be checked each pass. In LZMA, a probability of 100% encodes a + "0", while 0% encodes a "1". Initially, all probabilities are assumed to be + 50%. Probabilities are stored using 11-bits (2048 \=\= 100%), and thus use 16 + bits of storage. Finally, the range decoder is adaptive, meaning that each + time a bit is decoded, the probabilities are updated: each 0 increases the + probability of another 0, and each 1 decrases it. The algorithm adapts the + probabilities using an exponential moving average with a shift ratio of 5. + +Author: + + Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version + +Environment: + + Windows & Linux, user mode and kernel mode. + +--*/ + +#include "minlzlib.h" + +// +// The range decoder uses 11 probability bits, where 2048 is 100% chance of a 0 +// +#define LZMA_RC_PROBABILITY_BITS 11 +#define LZMA_RC_MAX_PROBABILITY (1 << LZMA_RC_PROBABILITY_BITS) +const uint16_t k_LzmaRcHalfProbability = LZMA_RC_MAX_PROBABILITY / 2; + +// +// The range decoder uses an exponential moving average of the last probability +// hit (match or miss) with an adaptation rate of 5 bits (which falls in the +// middle of its 11 bits used to encode a probability. +// +#define LZMA_RC_ADAPTATION_RATE_SHIFT 5 + +// +// The range decoder has enough precision for the range only as long as the top +// 8 bits are still set. Once it falls below, it needs a renormalization step. +// +#define LZMA_RC_MIN_RANGE (1 << 24) + +// +// The range decoder must be initialized with 5 bytes, the first of which is +// ignored +// +#define LZMA_RC_INIT_BYTES 5 + +// +// State used for the binary adaptive arithmetic coder (LZMA Range Decoder) +// +typedef struct _RANGE_DECODER_STATE +{ + // + // Start and end location of the current stream's range encoder buffer + // + uint8_t* Start; + uint8_t* Limit; + // + // Current probability range and 32-bit arithmetic encoded sequence code + // + uint32_t Range; + uint32_t Code; +} RANGE_DECODER_STATE, *PRANGE_DECODER_STATE; +RANGE_DECODER_STATE RcState; + +bool +RcInitialize ( + uint16_t* ChunkSize + ) +{ + uint8_t i, rcByte; + uint8_t* chunkEnd; + + // + // Make sure that the input buffer has enough space for the requirements of + // the range encoder. We (temporarily) seek forward to validate this. + // + if (!BfSeek(*ChunkSize, &chunkEnd)) + { + return false; + } + BfSeek(-*ChunkSize, &chunkEnd); + + // + // The initial probability range is set to its highest value, after which + // the next 5 bytes are used to initialize the initial code. Note that the + // first byte outputted by the encoder is always going to be zero, so it is + // ignored here. + // + RcState.Range = (uint32_t)-1; + RcState.Code = 0; + for (i = 0; i < LZMA_RC_INIT_BYTES; i++) + { + BfRead(&rcByte); + RcState.Code = (RcState.Code << 8) | rcByte; + } + + // + // Store our current location in the buffer now, and how far we can go on + // reading. Then decrease the total chunk size by the count of init bytes, + // so that the caller can check, once done (RcIsComplete), if the code has + // become 0 exactly when the compressed chunk size has been fully consumed + // by the decoder. + // + BfSeek(0, &RcState.Start); + RcState.Limit = RcState.Start + *ChunkSize; + *ChunkSize -= LZMA_RC_INIT_BYTES; + return true; +} + +bool +RcCanRead ( + void + ) +{ + uint8_t* pos; + // + // We can keep reading symbols as long as we haven't reached the end of the + // input buffer yet. + // + BfSeek(0, &pos); + return pos <= RcState.Limit; +} + +bool +RcIsComplete ( + uint32_t* BytesProcessed + ) +{ + uint8_t* pos; + // + // When the last symbol has been decoded, the last code should be zero as + // there is nothing left to describe. Return the offset in the buffer where + // this occurred (which should be equal to the compressed size). + // + BfSeek(0, &pos); + *BytesProcessed = (uint32_t)(pos - RcState.Start); + return (RcState.Code == 0); +} + +void +RcNormalize ( + void + ) +{ + uint8_t rcByte; + // + // Whenever we drop below 24 bits, there is no longer enough precision in + // the probability range not to avoid a "stuck" state where we cannot tell + // apart the two branches (above/below the probability range) because the + // two options appear identical with the number of precision bits that we + // have. In this case, shift the state by a byte (8 bits) and read another. + // + if (RcState.Range < LZMA_RC_MIN_RANGE) + { + RcState.Range <<= 8; + RcState.Code <<= 8; + BfRead(&rcByte); + RcState.Code |= rcByte; + } +} + +void +RcAdapt ( + bool Miss, + uint16_t* Probability + ) +{ + // + // In the canonical range encoders out there (including this one used by + // LZMA, we want the probability to adapt (change) as we read more or less + // bits that match our expectation. In order to quickly adapt to change, + // use an exponential moving average. The standard way of doing this is to + // use an integer based adaptation with a shift that's somewhere between + // {1, bits-1}. Since LZMA uses 11 bits for its model, 5 is a nice number + // that lands exactly between 1 and 10. + // + if (Miss) + { + *Probability -= *Probability >> LZMA_RC_ADAPTATION_RATE_SHIFT; + } + else + { + *Probability += (LZMA_RC_MAX_PROBABILITY - *Probability) >> + LZMA_RC_ADAPTATION_RATE_SHIFT; + } +} + +uint8_t +RcIsBitSet ( + uint16_t* Probability + ) +{ + uint32_t bound; + uint8_t bit; + + // + // Always begin by making sure the range has been normalized for precision + // + RcNormalize(); + + // + // Check if the current arithmetic code is descried by the next calculated + // proportionally-divided probability range. Recall that the probabilities + // encode the chance of the symbol (bit) being a 0 -- not a 1! + // + // Therefore, if the next chunk of the code lies outside of this new range, + // we are still on the path to our 0. Otherwise, if the code is now part of + // the newly defined range (inclusive), then we produce a 1 and limit the + // range to produce a new range and code for the next decoding pass. + // + bound = (RcState.Range >> LZMA_RC_PROBABILITY_BITS) * *Probability; + if (RcState.Code < bound) + { + RcState.Range = bound; + bit = 0; + } + else + { + RcState.Range -= bound; + RcState.Code -= bound; + bit = 1; + } + + // + // Always finish by adapt the probabilities based on the bit value + // + RcAdapt(bit, Probability); + return bit; +} + +uint8_t +RcIsFixedBitSet( + void + ) +{ + uint8_t bit; + + // + // This is a specialized version of RcIsBitSet with two differences: + // + // First, there is no adaptive probability -- it is hardcoded to 50%. + // + // Second, because there are 11 bits per probability, and 50% is 1<<10, + // "(LZMA_RC_PROBABILITY_BITS) * Probability" is essentially 1. As such, + // we can just shift by 1 (in other words, halving the range). + // + RcNormalize(); + RcState.Range >>= 1; + if (RcState.Code < RcState.Range) + { + bit = 0; + } + else + { + RcState.Code -= RcState.Range; + bit = 1; + } + return bit; +} + +uint8_t +RcGetBitTree ( + uint16_t* BitModel, + uint16_t Limit + ) +{ + uint16_t symbol; + + // + // Context probability bit trees always begin at index 1. Iterate over each + // decoded bit and just keep shifting it in place, until we reach the total + // expected number of bits, which should never be over 8 (limit is 0x100). + // + // Once decoded, always subtract the limit back from the symbol since we go + // one bit "past" the limit in the loop, as a side effect of the tree being + // off-by-one. + // + for (symbol = 1; symbol < Limit; ) + { + symbol = (symbol << 1) | RcIsBitSet(&BitModel[symbol]); + } + return (symbol - Limit) & 0xFF; +} + +uint8_t +RcGetReverseBitTree ( + uint16_t* BitModel, + uint8_t HighestBit + ) +{ + uint16_t symbol; + uint8_t i, bit, result; + + // + // This is the same logic as in RcGetBitTree, but with the bits actually + // encoded in reverse order. We keep track of the probability index as the + // "symbol" just like RcGetBitTree, but actually decode the result in the + // opposite order. + // + for (i = 0, symbol = 1, result = 0; i < HighestBit; i++) + { + bit = RcIsBitSet(&BitModel[symbol]); + symbol = (symbol << 1) | bit; + result |= bit << i; + } + return result; +} + +uint8_t +RcDecodeMatchedBitTree ( + uint16_t* BitModel, + uint8_t MatchByte + ) +{ + uint16_t symbol, bytePos, matchBit; + uint8_t bit; + + // + // Parse each bit in the "match byte" (see LzDecodeLiteral), which we call + // a "match bit". + // + // Then, treat this as a special bit tree decoding where two possible trees + // are used: one for when the "match bit" is set, and a separate one for + // when the "match bit" is not set. Since each tree can encode up to 256 + // symbols, each one has 0x100 slots. + // + // Finally, we have the original bit tree which we'll revert back to once + // the match bits are no longer in play, which we parse for the remainder + // of the symbol. + // + for (bytePos = MatchByte, symbol = 1; symbol < 0x100; bytePos <<= 1) + { + matchBit = (bytePos >> 7) & 1; + + bit = RcIsBitSet(&BitModel[symbol + (0x100 * (matchBit + 1))]); + symbol = (symbol << 1) | bit; + + if (matchBit != bit) + { + while (symbol < 0x100) + { + symbol = (symbol << 1) | RcIsBitSet(&BitModel[symbol]); + } + break; + } + } + return symbol & 0xFF; +} + +uint32_t +RcGetFixed ( + uint8_t HighestBit + ) +{ + uint32_t symbol; + + // + // Fixed probability bit trees always begin at index 0. Iterate over each + // decoded bit and just keep shifting it in place, until we reach the total + // expected number of bits (typically never higher than 26 -- the maximum + // number of "direct bits" that the distance of a "match" can encode). + // + symbol = 0; + do + { + symbol = (symbol << 1) | RcIsFixedBitSet(); + } while (--HighestBit > 0); + return symbol; +} + +void +RcSetDefaultProbability ( + uint16_t* Probability + ) +{ + // + // By default, we initialize the probabilities to 0.5 (50% chance). + // + *Probability = k_LzmaRcHalfProbability; +} diff --git a/tools/src/minilzlib/xzstream.c b/tools/src/minilzlib/xzstream.c new file mode 100644 index 0000000..dd5078c --- /dev/null +++ b/tools/src/minilzlib/xzstream.c @@ -0,0 +1,547 @@ +/*++ + +Copyright (c) Alex Ionescu. All rights reserved. + +Module Name: + + xzstream.c + +Abstract: + + This module implements the XZ stream format decoding, including support for + parsing the stream header and block header, and then handing off the block + decoding to the LZMA2 decoder. Finally, if "meta checking" is enabled, then + the index and stream footer are also parsed and validated. Optionally, each + of these component structures can be checked against its CRC32 checksum, if + "integrity checking" has been enabled. Note that this library only supports + single-stream, single-block XZ files that have CRC32 (or None) set as their + block checking algorithm. Finally, no BJC filters are supported, and files + with a compressed/uncompressed size metadata indicator are not handled. + +Author: + + Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version + +Environment: + + Windows & Linux, user mode and kernel mode. + +--*/ + +#define MINLZ_META_CHECKS + +#include "minlzlib.h" +#include "xzstream.h" +#include "../utils.h" + +// +// XzDecodeBlockHeader can return "I successfully found a block", +// "I failed/bad block header", or "there was no block header". +// Though minlzlib explicitly only claims to handle files with a +// single block, it needs to also handle files with no blocks at all. +// (Produced by "xz" when compressing an empty input file) +// +typedef enum _XZ_DECODE_BLOCK_HEADER_RESULT { + XzBlockHeaderFail = 0, + XzBlockHeaderSuccess = 1, + XzBlockHeaderNoBlock = 2 +} XZ_DECODE_BLOCK_HEADER_RESULT; + +const uint8_t k_XzLzma2FilterIdentifier = 0x21; + +#ifdef _WIN32 +void __security_check_cookie(_In_ uintptr_t _StackCookie) { (void)(_StackCookie); } +#endif + +#ifdef MINLZ_META_CHECKS +// +// XZ Stream Container State +// +typedef struct _CONTAINER_STATE +{ + // + // Size of the XZ header and the index, used to validate against footer + // + uint32_t HeaderSize; + uint32_t IndexSize; + // + // Size of the compressed block and its checksum + // + uint32_t UncompressedBlockSize; + uint32_t UnpaddedBlockSize; + uint32_t ChecksumSize; +} CONTAINER_STATE, * PCONTAINER_STATE; +CONTAINER_STATE Container; +#endif + +#ifdef MINLZ_META_CHECKS +bool +XzDecodeVli ( + vli_type* Vli + ) +{ + uint8_t vliByte; + uint32_t bitPos; + + // + // Read the initial VLI byte (might be the value itself) + // + if (!BfRead(&vliByte)) + { + return false; + } + *Vli = vliByte & 0x7F; + + // + // Check if this was a complex VLI (and we have space for it) + // + bitPos = 7; + while ((vliByte & 0x80) != 0) + { + // + // Read the next byte + // + if (!BfRead(&vliByte)) + { + return false; + } + + // + // Make sure we're not decoding an invalid VLI + // + if ((bitPos == (7 * VLI_BYTES_MAX)) || (vliByte == 0)) + { + return false; + } + + // + // Decode it and move to the next 7 bits + // + *Vli |= (vli_type)((vliByte & 0x7F) << bitPos); + bitPos += 7; + } + return true; +} + +bool +XzDecodeIndex ( + void + ) +{ + uint32_t vli; + uint8_t* indexStart; + uint8_t* indexEnd; + uint32_t* pCrc32; + uint8_t indexByte; + + // + // Remember where the index started so we can compute its size + // + BfSeek(0, &indexStart); + + // + // The index always starts out with an empty byte + // + if (!BfRead(&indexByte) || (indexByte != 0)) + { + return false; + } + + // + // Then the count of blocks, which we expect to be 1 + // + if (!XzDecodeVli(&vli) || (vli != 1)) + { + return false; + } + + // + // Then the unpadded block size, which should match + // + if (!XzDecodeVli(&vli) || (Container.UnpaddedBlockSize != vli)) + { + return false; + } + + // + // Then the uncompressed block size, which should match + // + if (!XzDecodeVli(&vli) || (Container.UncompressedBlockSize != vli)) + { + return false; + } + + // + // Then we pad to the next multiple of 4 + // + if (!BfAlign()) + { + return false; + } + + // + // Store the index size with padding to validate the footer later + // + BfSeek(0, &indexEnd); + Container.IndexSize = (uint32_t)(indexEnd - indexStart); + + // + // Read the CRC32, which is not part of the index size + // + if (!BfSeek(sizeof(*pCrc32), (uint8_t**)&pCrc32)) + { + return false; + } +#ifdef MINLZ_INTEGRITY_CHECKS + // + // Make sure the index is not corrupt + // + if (Crc32(indexStart, Container.IndexSize) != *pCrc32) + { + return false; + } +#endif + return true; +} + +bool +XzDecodeStreamFooter ( + void + ) +{ + PXZ_STREAM_FOOTER streamFooter; + + // + // Seek past the footer, making sure we have space in the input stream + // + if (!BfSeek(sizeof(*streamFooter), (uint8_t**)&streamFooter)) + { + return false; + } + + // + // Validate the footer magic + // + if (streamFooter->Magic != 'ZY') + { + return false; + } + + // + // Validate no flags other than checksum type are set + // + if ((streamFooter->u.Flags != 0) && + ((streamFooter->u.s.CheckType != XzCheckTypeCrc32) && + (streamFooter->u.s.CheckType != XzCheckTypeCrc64) && + (streamFooter->u.s.CheckType != XzCheckTypeSha2) && + (streamFooter->u.s.CheckType != XzCheckTypeNone))) + { + return false; + } + + // + // Validate if the footer accurately describes the size of the index + // + if (Container.IndexSize != (streamFooter->BackwardSize * 4)) + { + return false; + } +#ifdef MINLZ_INTEGRITY_CHECKS + // + // Compute the footer's CRC32 and make sure it's not corrupted + // + if (Crc32(&streamFooter->BackwardSize, + sizeof(streamFooter->BackwardSize) + + sizeof(streamFooter->u.Flags)) != + streamFooter->Crc32) + { + return false; + } +#endif + return true; +} +#endif + +bool +XzDecodeBlock ( + uint8_t* OutputBuffer, + uint32_t* BlockSize + ) +{ +#ifdef MINLZ_META_CHECKS + uint8_t *inputStart, *inputEnd; +#endif + // + // Decode the LZMA2 stream. If full integrity checking is enabled, also + // save the offset before and after decoding, so we can save the block + // sizes and compare them against the footer and index after decoding. + // +#ifdef MINLZ_META_CHECKS + BfSeek(0, &inputStart); +#endif + if (!Lz2DecodeStream(BlockSize, OutputBuffer == NULL)) + { + return false; + } +#ifdef MINLZ_META_CHECKS + BfSeek(0, &inputEnd); + Container.UnpaddedBlockSize = Container.HeaderSize + + (uint32_t)(inputEnd - inputStart); + Container.UncompressedBlockSize = *BlockSize; +#endif + // + // After the block data, we need to pad to 32-bit alignment + // + if (!BfAlign()) + { + return false; + } +#if defined(MINLZ_INTEGRITY_CHECKS) || defined(MINLZ_META_CHECKS) + // + // Finally, move past the size of the checksum if any, then compare it with + // with the actual CRC32 of the block, if integrity checks are enabled. If + // meta checks are enabled, update the block size so the index checking can + // validate it. + // + if (!BfSeek(Container.ChecksumSize, &inputEnd)) + { + return false; + } +#endif + (void)(OutputBuffer); +#ifdef MINLZ_INTEGRITY_CHECKS + if ((OutputBuffer != NULL) && + (Crc32(OutputBuffer, *BlockSize) != *(uint32_t*)inputEnd)) + { + return false; + } +#endif +#ifdef MINLZ_META_CHECKS + Container.UnpaddedBlockSize += Container.ChecksumSize; +#endif + return true; +} + +bool +XzDecodeStreamHeader ( + void + ) +{ + PXZ_STREAM_HEADER streamHeader; + + // + // Seek past the header, making sure we have space in the input stream + // + if (!BfSeek(sizeof(*streamHeader), (uint8_t**)&streamHeader)) + { + return false; + } +#ifdef MINLZ_META_CHECKS + // + // Validate the header magic + // + if ((*(uint32_t*)&streamHeader->Magic[1] != 'ZXz7') || + (streamHeader->Magic[0] != 0xFD) || + (streamHeader->Magic[5] != 0x00)) + { + return false; + } + + // + // Validate no flags other than checksum type are set + // + if ((streamHeader->u.Flags != 0) && + ((streamHeader->u.s.CheckType != XzCheckTypeCrc32) && + (streamHeader->u.s.CheckType != XzCheckTypeCrc64) && + (streamHeader->u.s.CheckType != XzCheckTypeSha2) && + (streamHeader->u.s.CheckType != XzCheckTypeNone))) + { + return false; + } + + // + // Remember that a checksum might come at the end of the block later + // + if (streamHeader->u.s.CheckType == 0) + { + Container.ChecksumSize = 0; + } else { + Container.ChecksumSize = 4 << ((streamHeader->u.s.CheckType - 1) / 3); + } + +#endif +#ifdef MINLZ_INTEGRITY_CHECKS + // + // Compute the header's CRC32 and make sure it's not corrupted + // + if (Crc32(&streamHeader->u.Flags, sizeof(streamHeader->u.Flags)) != + streamHeader->Crc32) + { + return false; + } +#endif + return true; +} + +XZ_DECODE_BLOCK_HEADER_RESULT +XzDecodeBlockHeader ( + void + ) +{ + PXZ_BLOCK_HEADER blockHeader; +#ifdef MINLZ_META_CHECKS + uint32_t size; +#endif + // + // Seek past the header, making sure we have space in the input stream + // + if (!BfSeek(sizeof(*blockHeader), (uint8_t**)&blockHeader)) + { + return XzBlockHeaderFail; + } + if (blockHeader->Size == 0) + { + // + // That's no block! That's an index! + // + BfSeek((uint32_t)(-(uint16_t)sizeof(*blockHeader)), + (uint8_t**)&blockHeader); + return XzBlockHeaderNoBlock; + } +#ifdef MINLZ_META_CHECKS + // + // Validate that the size of the header is what we expect + // + Container.HeaderSize = (blockHeader->Size + 1) * 4; + if (Container.HeaderSize != sizeof(*blockHeader)) + { + return XzBlockHeaderFail; + } + + // + // Validate that no additional flags or filters are enabled + // + if (blockHeader->u.Flags != 0) + { + return XzBlockHeaderFail; + } + + // + // Validate that the only filter is the LZMA2 filter + // + if (blockHeader->LzmaFlags.Id != k_XzLzma2FilterIdentifier) + { + return XzBlockHeaderFail; + } + + // + // With the expected number of property bytes + // + if (blockHeader->LzmaFlags.Size + != sizeof(blockHeader->LzmaFlags.u.Properties)) + { + return XzBlockHeaderFail; + } + + // + // The only property is the dictionary size, make sure it is valid. + // + // We don't actually need to store or compare the size with anything since + // the library expects the caller to always put in a buffer that's large + // enough to contain the full uncompressed file (or calling it in "get size + // only" mode to get this information). + // + // This output buffer can thus be smaller than the size of the dictionary + // which is absolutely OK as long as that's actually the size of the output + // file. If callers pass in a buffer size that's too small, decoding will + // fail at later stages anyway, and that's incorrect use of minlzlib. + // + size = blockHeader->LzmaFlags.u.s.DictionarySize; + if (size > 39) + { + return XzBlockHeaderFail; + } +#ifdef MINLZ_INTEGRITY_CHECKS + // + // Compute the header's CRC32 and make sure it's not corrupted + // + if (Crc32(blockHeader, + Container.HeaderSize - sizeof(blockHeader->Crc32)) != + blockHeader->Crc32) + { + return XzBlockHeaderFail; + } +#endif +#endif + return XzBlockHeaderSuccess; +} + +bool +XzDecode ( + uint8_t* InputBuffer, + uint32_t* InputSize, + uint8_t* OutputBuffer, + uint32_t* OutputSize + ) +{ + + // + // Initialize the input buffer descriptor and history buffer (dictionary) + // + BfInitialize(InputBuffer, *InputSize ? *InputSize : UINT32_MAX); + DtInitialize(OutputBuffer, *OutputSize); + + // + // Decode the stream header for check for validity + // + if (!XzDecodeStreamHeader()) + { + printf("header decode failed\n"); + return false; + } + + // + // Decode the block header for check for validity + // + switch (XzDecodeBlockHeader()) + { + case XzBlockHeaderFail: + printf("block header failed\n"); + return false; + case XzBlockHeaderNoBlock: + *OutputSize = 0; + break; + case XzBlockHeaderSuccess: + // + // Decode the actual block + // + if (!XzDecodeBlock(OutputBuffer, OutputSize)) + { + printf("block decode failed\n"); + return false; + } + break; + } + +#ifdef MINLZ_META_CHECKS + // + // Decode the index for validity checks + // + if (!XzDecodeIndex()) + { + return false; + } + + // + // And finally decode the footer as a final set of checks + // + if (!XzDecodeStreamFooter()) + { + return false; + } + + if (!*InputSize) + *InputSize = BfTell(); +#endif + return true; +} diff --git a/tools/src/minilzlib/xzstream.h b/tools/src/minilzlib/xzstream.h new file mode 100644 index 0000000..f227879 --- /dev/null +++ b/tools/src/minilzlib/xzstream.h @@ -0,0 +1,123 @@ +/*++ + +Copyright (c) Alex Ionescu. All rights reserved. + +Module Name: + + xzstream.h + +Abstract: + + This header file contains C-style data structures and enumerations that map + back to the XZ stream and file format standard, including for the decoding + of Variable Length Integers (VLI). This includes definitions for the stream + header, block header, index and stream footer, and associated check types. + +Author: + + Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version + +Environment: + + Windows & Linux, user mode and kernel mode. + +--*/ + +#pragma once + +// +// XZ streams encode certain numbers as "variable length integers", with 7 bits +// for the data, and a high bit to encode that another byte must be consumed. +// +typedef uint32_t vli_type; +#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7) + +// +// These are the possible supported types for integrity checking in an XZ file +// +typedef enum _XZ_CHECK_TYPES +{ + XzCheckTypeNone = 0, + XzCheckTypeCrc32 = 1, + XzCheckTypeCrc64 = 4, + XzCheckTypeSha2 = 10 +} XZ_CHECK_TYPES; + +// +// This describes the first 12 bytes of any XZ container file / stream +// +typedef struct _XZ_STREAM_HEADER +{ + uint8_t Magic[6]; + union + { + struct + { + uint8_t ReservedFlags; + uint8_t CheckType : 4; + uint8_t ReservedType : 4; + } s; + uint16_t Flags; + } u; + uint32_t Crc32; +} XZ_STREAM_HEADER, * PXZ_STREAM_HEADER; +static_assert(sizeof(XZ_STREAM_HEADER) == 12, "Invalid Stream Header Size"); + +// +// This describes the last 12 bytes of any XZ container file / stream +// +typedef struct _XZ_STREAM_FOOTER +{ + uint32_t Crc32; + uint32_t BackwardSize; + union + { + struct + { + uint8_t ReservedFlags; + uint8_t CheckType : 4; + uint8_t ReservedType : 4; + } s; + uint16_t Flags; + } u; + uint16_t Magic; +} XZ_STREAM_FOOTER, * PXZ_STREAM_FOOTER; +static_assert(sizeof(XZ_STREAM_FOOTER) == 12, "Invalid Stream Footer Size"); + +// +// This describes the beginning of a compressed payload stored in an XZ stream, +// with hardcoded expectations for an LZMA2-compressed payload that has 0 extra +// filters (such as BCJ2). +// +typedef struct _XZ_BLOCK_HEADER +{ + uint8_t Size; + union + { + struct + { + uint8_t FilterCount : 2; + uint8_t Reserved : 4; + uint8_t HasCompressedSize : 1; + uint8_t HasUncompressedSize : 1; + } s; + uint8_t Flags; + } u; + struct + { + uint8_t Id; + uint8_t Size; + union + { + struct + { + uint8_t DictionarySize : 6; + uint8_t Reserved : 2; + } s; + uint8_t Properties; + } u; + } LzmaFlags; + uint8_t Padding[3]; + uint32_t Crc32; +} XZ_BLOCK_HEADER, * PXZ_BLOCK_HEADER; +static_assert(sizeof(XZ_BLOCK_HEADER) == 12, "Invalid Block Header Size"); diff --git a/tools/src/nvme.c b/tools/src/nvme.c new file mode 100644 index 0000000..e6741eb --- /dev/null +++ b/tools/src/nvme.c @@ -0,0 +1,505 @@ +/* SPDX-License-Identifier: MIT */ + +#include "adt.h" +#include "assert.h" +#include "malloc.h" +#include "nvme.h" +#include "pmgr.h" +#include "rtkit.h" +#include "sart.h" +#include "string.h" +#include "utils.h" + +#define NVME_TIMEOUT 1000000 +#define NVME_ENABLE_TIMEOUT 5000000 +#define NVME_SHUTDOWN_TIMEOUT 5000000 +#define NVME_QUEUE_SIZE 64 + +#define NVME_CC 0x14 +#define NVME_CC_SHN GENMASK(15, 14) +#define NVME_CC_SHN_NONE 0 +#define NVME_CC_SHN_NORMAL 1 +#define NVME_CC_SHN_ABRUPT 2 +#define NVME_CC_EN BIT(0) + +#define NVME_CSTS 0x1c +#define NVME_CSTS_SHST GENMASK(3, 2) +#define NVME_CSTS_SHST_NORMAL 0 +#define NVME_CSTS_SHST_BUSY 1 +#define NVME_CSTS_SHST_DONE 2 +#define NVME_CSTS_RDY BIT(0) + +#define NVME_AQA 0x24 +#define NVME_ASQ 0x28 +#define NVME_ACQ 0x30 + +#define NVME_DB_ACQ 0x1004 +#define NVME_DB_IOCQ 0x100c + +#define NVME_BOOT_STATUS 0x1300 +#define NVME_BOOT_STATUS_OK 0xde71ce55 + +#define NVME_LINEAR_SQ_CTRL 0x24908 +#define NVME_LINEAR_SQ_CTRL_EN BIT(0) + +#define NVME_UNKNONW_CTRL 0x24008 +#define NVME_UNKNONW_CTRL_PRP_NULL_CHECK BIT(11) + +#define NVME_MAX_PEND_CMDS_CTRL 0x1210 +#define NVME_DB_LINEAR_ASQ 0x2490c +#define NVME_DB_LINEAR_IOSQ 0x24910 + +#define NVMMU_NUM 0x28100 +#define NVMMU_ASQ_BASE 0x28108 +#define NVMMU_IOSQ_BASE 0x28110 +#define NVMMU_TCB_INVAL 0x28118 +#define NVMMU_TCB_STAT 0x29120 + +#define NVME_ADMIN_CMD_DELETE_SQ 0x00 +#define NVME_ADMIN_CMD_CREATE_SQ 0x01 +#define NVME_ADMIN_CMD_DELETE_CQ 0x04 +#define NVME_ADMIN_CMD_CREATE_CQ 0x05 +#define NVME_QUEUE_CONTIGUOUS BIT(0) + +#define NVME_CMD_FLUSH 0x00 +#define NVME_CMD_WRITE 0x01 +#define NVME_CMD_READ 0x02 + +struct nvme_command { + u8 opcode; + u8 flags; + u8 tag; + u8 rsvd; // normal NVMe has tag as u16 + u32 nsid; + u32 cdw2; + u32 cdw3; + u64 metadata; + u64 prp1; + u64 prp2; + u32 cdw10; + u32 cdw11; + u32 cdw12; + u32 cdw13; + u32 cdw14; + u32 cdw15; +}; + +struct nvme_completion { + u64 result; + u32 rsvd; // normal NVMe has the sq_head and sq_id here + u16 tag; + u16 status; +}; + +struct apple_nvmmu_tcb { + u8 opcode; + u8 dma_flags; + u8 slot_id; + u8 unk0; + u32 len; + u64 unk1[2]; + u64 prp1; + u64 prp2; + u64 unk2[2]; + u8 aes_iv[8]; + u8 _aes_unk[64]; +}; + +struct nvme_queue { + struct apple_nvmmu_tcb *tcbs; + struct nvme_command *cmds; + struct nvme_completion *cqes; + + u8 cq_head; + u8 cq_phase; + + bool adminq; +}; + +static_assert(sizeof(struct nvme_command) == 64, "invalid nvme_command size"); +static_assert(sizeof(struct nvme_completion) == 16, "invalid nvme_completion size"); +static_assert(sizeof(struct apple_nvmmu_tcb) == 128, "invalid apple_nvmmu_tcb size"); + +static bool nvme_initialized = false; +static u8 nvme_die; + +static asc_dev_t *nvme_asc = NULL; +static rtkit_dev_t *nvme_rtkit = NULL; +static sart_dev_t *nvme_sart = NULL; + +static u64 nvme_base; + +static struct nvme_queue adminq, ioq; + +static bool alloc_queue(struct nvme_queue *q) +{ + memset(q, 0, sizeof(*q)); + + q->tcbs = memalign(SZ_16K, NVME_QUEUE_SIZE * sizeof(*q->tcbs)); + if (!q->tcbs) + return false; + + q->cmds = memalign(SZ_16K, NVME_QUEUE_SIZE * sizeof(*q->cmds)); + if (!q->cmds) + goto free_tcbs; + + q->cqes = memalign(SZ_16K, NVME_QUEUE_SIZE * sizeof(*q->cqes)); + if (!q->cqes) + goto free_cmds; + + memset(q->tcbs, 0, NVME_QUEUE_SIZE * sizeof(*q->tcbs)); + memset(q->cmds, 0, NVME_QUEUE_SIZE * sizeof(*q->cmds)); + memset(q->cqes, 0, NVME_QUEUE_SIZE * sizeof(*q->cqes)); + q->cq_head = 0; + q->cq_phase = 1; + return true; + +free_cmds: + free(q->cmds); +free_tcbs: + free(q->tcbs); + return false; +} + +static void free_queue(struct nvme_queue *q) +{ + free(q->cmds); + free(q->tcbs); + free(q->cqes); +} + +static void nvme_poll_syslog(void) +{ + struct rtkit_message msg; + rtkit_recv(nvme_rtkit, &msg); +} + +static bool nvme_ctrl_disable(void) +{ + u64 timeout = timeout_calculate(NVME_TIMEOUT); + + clear32(nvme_base + NVME_CC, NVME_CC_EN); + while (read32(nvme_base + NVME_CSTS) & NVME_CSTS_RDY && !timeout_expired(timeout)) + nvme_poll_syslog(); + + return !(read32(nvme_base + NVME_CSTS) & NVME_CSTS_RDY); +} + +static bool nvme_ctrl_enable(void) +{ + u64 timeout = timeout_calculate(NVME_ENABLE_TIMEOUT); + + mask32(nvme_base + NVME_CC, NVME_CC_SHN, NVME_CC_EN); + while (!(read32(nvme_base + NVME_CSTS) & NVME_CSTS_RDY) && !timeout_expired(timeout)) + nvme_poll_syslog(); + + return read32(nvme_base + NVME_CSTS) & NVME_CSTS_RDY; +} + +static bool nvme_ctrl_shutdown(void) +{ + u64 timeout = timeout_calculate(NVME_SHUTDOWN_TIMEOUT); + + mask32(nvme_base + NVME_CC, NVME_CC_SHN, FIELD_PREP(NVME_CC_SHN, NVME_CC_SHN_NORMAL)); + while (FIELD_GET(NVME_CSTS_SHST, read32(nvme_base + NVME_CSTS)) != NVME_CSTS_SHST_DONE && + !timeout_expired(timeout)) + nvme_poll_syslog(); + + return FIELD_GET(NVME_CSTS_SHST, read32(nvme_base + NVME_CSTS)) == NVME_CSTS_SHST_DONE; +} + +static bool nvme_exec_command(struct nvme_queue *q, struct nvme_command *cmd, u64 *result) +{ + bool found = false; + u64 timeout; + u8 tag = 0; + struct nvme_command *queue_cmd = &q->cmds[tag]; + struct apple_nvmmu_tcb *tcb = &q->tcbs[tag]; + + memcpy(queue_cmd, cmd, sizeof(*cmd)); + queue_cmd->tag = tag; + + memset(tcb, 0, sizeof(*tcb)); + tcb->opcode = queue_cmd->opcode; + tcb->dma_flags = 3; // always allow read+write to the PRP pages + tcb->slot_id = tag; + tcb->len = queue_cmd->cdw12; + tcb->prp1 = queue_cmd->prp1; + tcb->prp2 = queue_cmd->prp2; + + /* make sure ANS2 can see the command and tcb before triggering it */ + dma_wmb(); + + nvme_poll_syslog(); + if (q->adminq) + write32(nvme_base + NVME_DB_LINEAR_ASQ, tag); + else + write32(nvme_base + NVME_DB_LINEAR_IOSQ, tag); + nvme_poll_syslog(); + + timeout = timeout_calculate(NVME_TIMEOUT); + struct nvme_completion cqe; + while (!timeout_expired(timeout)) { + nvme_poll_syslog(); + + /* we need a DMA read barrier here since the CQ will be updated using DMA */ + dma_rmb(); + memcpy(&cqe, &q->cqes[q->cq_head], sizeof(cqe)); + if ((cqe.status & 1) != q->cq_phase) + continue; + + if (cqe.tag == tag) { + found = true; + if (result) + *result = cqe.result; + } else { + printf("nvme: invalid tag in CQ: expected %d but got %d\n", tag, cqe.tag); + } + + write32(nvme_base + NVMMU_TCB_INVAL, cqe.tag); + if (read32(nvme_base + NVMMU_TCB_STAT)) + printf("nvme: NVMMU invalidation for tag %d failed\n", cqe.tag); + + /* increment head and switch phase once the end of the queue has been reached */ + q->cq_head += 1; + if (q->cq_head == NVME_QUEUE_SIZE) { + q->cq_head = 0; + q->cq_phase ^= 1; + } + + if (q->adminq) + write32(nvme_base + NVME_DB_ACQ, q->cq_head); + else + write32(nvme_base + NVME_DB_IOCQ, q->cq_head); + break; + } + + if (!found) { + printf("nvme: could not find command completion in CQ\n"); + return false; + } + + cqe.status >>= 1; + if (cqe.status) { + printf("nvme: command failed with status %d\n", cqe.status); + return false; + } + + return true; +} + +bool nvme_init(void) +{ + if (nvme_initialized) { + printf("nvme: already initialized\n"); + return true; + } + + int adt_path[8]; + int node = adt_path_offset_trace(adt, "/arm-io/ans", adt_path); + if (node < 0) { + printf("nvme: Error getting NVMe node /arm-io/ans\n"); + return NULL; + } + + u32 cg; + if (ADT_GETPROP(adt, node, "clock-gates", &cg) < 0) { + printf("nvme: Error getting NVMe clock-gates\n"); + return NULL; + } + nvme_die = FIELD_GET(PMGR_DIE_ID, cg); + printf("nvme: ANS is on die %d\n", nvme_die); + + if (adt_get_reg(adt, adt_path, "reg", 3, &nvme_base, NULL) < 0) { + printf("nvme: Error getting NVMe base address.\n"); + return NULL; + } + + if (!alloc_queue(&adminq)) { + printf("nvme: Error allocating admin queue\n"); + return NULL; + } + if (!alloc_queue(&ioq)) { + printf("nvme: Error allocating admin queue\n"); + goto out_adminq; + } + + ioq.adminq = false; + adminq.adminq = true; + + nvme_asc = asc_init("/arm-io/ans"); + if (!nvme_asc) + goto out_ioq; + + nvme_sart = sart_init("/arm-io/sart-ans"); + if (!nvme_sart) + goto out_asc; + + nvme_rtkit = rtkit_init("nvme", nvme_asc, NULL, NULL, nvme_sart); + if (!nvme_rtkit) + goto out_sart; + + if (!rtkit_boot(nvme_rtkit)) + goto out_rtkit; + + if (poll32(nvme_base + NVME_BOOT_STATUS, 0xffffffff, NVME_BOOT_STATUS_OK, USEC_PER_SEC) < 0) { + printf("nvme: ANS did not boot correctly.\n"); + goto out_shutdown; + } + + /* setup controller and NVMMU for linear submission queue */ + set32(nvme_base + NVME_LINEAR_SQ_CTRL, NVME_LINEAR_SQ_CTRL_EN); + clear32(nvme_base + NVME_UNKNONW_CTRL, NVME_UNKNONW_CTRL_PRP_NULL_CHECK); + write32(nvme_base + NVME_MAX_PEND_CMDS_CTRL, + ((NVME_QUEUE_SIZE - 1) << 16) | (NVME_QUEUE_SIZE - 1)); + write32(nvme_base + NVMMU_NUM, NVME_QUEUE_SIZE - 1); + write64_lo_hi(nvme_base + NVMMU_ASQ_BASE, (u64)adminq.tcbs); + write64_lo_hi(nvme_base + NVMMU_IOSQ_BASE, (u64)ioq.tcbs); + + /* setup admin queue */ + if (!nvme_ctrl_disable()) { + printf("nvme: timeout while waiting for CSTS.RDY to clear\n"); + goto out_shutdown; + } + write64_lo_hi(nvme_base + NVME_ASQ, (u64)adminq.cmds); + write64_lo_hi(nvme_base + NVME_ACQ, (u64)adminq.cqes); + write32(nvme_base + NVME_AQA, ((NVME_QUEUE_SIZE - 1) << 16) | (NVME_QUEUE_SIZE - 1)); + if (!nvme_ctrl_enable()) { + printf("nvme: timeout while waiting for CSTS.RDY to be set\n"); + goto out_disable_ctrl; + } + + /* setup IO queue */ + struct nvme_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = NVME_ADMIN_CMD_CREATE_CQ; + cmd.prp1 = (u64)ioq.cqes; + cmd.cdw10 = 1; // cq id + cmd.cdw10 |= (NVME_QUEUE_SIZE - 1) << 16; + cmd.cdw11 = NVME_QUEUE_CONTIGUOUS; + if (!nvme_exec_command(&adminq, &cmd, NULL)) { + printf("nvme: create cq command failed\n"); + goto out_disable_ctrl; + } + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = NVME_ADMIN_CMD_CREATE_SQ; + cmd.prp1 = (u64)ioq.cmds; + cmd.cdw10 = 1; // sq id + cmd.cdw10 |= (NVME_QUEUE_SIZE - 1) << 16; + cmd.cdw11 = NVME_QUEUE_CONTIGUOUS; + cmd.cdw11 |= 1 << 16; // cq id for this sq + if (!nvme_exec_command(&adminq, &cmd, NULL)) { + printf("nvme: create sq command failed\n"); + goto out_delete_cq; + } + + nvme_initialized = true; + printf("nvme: initialized at 0x%lx\n", nvme_base); + return true; + +out_delete_cq: + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = NVME_ADMIN_CMD_DELETE_CQ; + cmd.cdw10 = 1; // cq id + if (!nvme_exec_command(&adminq, &cmd, NULL)) + printf("nvme: delete cq command failed\n"); +out_disable_ctrl: + nvme_ctrl_shutdown(); + nvme_ctrl_disable(); + nvme_poll_syslog(); +out_shutdown: + rtkit_sleep(nvme_rtkit); + // Some machines call this ANS, some ANS2... + pmgr_reset(nvme_die, "ANS"); + pmgr_reset(nvme_die, "ANS2"); +out_rtkit: + rtkit_free(nvme_rtkit); +out_sart: + sart_free(nvme_sart); +out_asc: + asc_free(nvme_asc); +out_ioq: + free_queue(&ioq); +out_adminq: + free_queue(&adminq); + return false; +} + +void nvme_shutdown(void) +{ + if (!nvme_initialized) { + printf("nvme: trying to shut down but not initialized\n"); + return; + } + + struct nvme_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = NVME_ADMIN_CMD_DELETE_SQ; + cmd.cdw10 = 1; // sq id + if (!nvme_exec_command(&adminq, &cmd, NULL)) + printf("nvme: delete sq command failed\n"); + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = NVME_ADMIN_CMD_DELETE_CQ; + cmd.cdw10 = 1; // cq id + if (!nvme_exec_command(&adminq, &cmd, NULL)) + printf("nvme: delete cq command failed\n"); + + if (!nvme_ctrl_shutdown()) + printf("nvme: timeout while waiting for controller shutdown\n"); + if (!nvme_ctrl_disable()) + printf("nvme: timeout while waiting for CSTS.RDY to clear\n"); + + rtkit_sleep(nvme_rtkit); + // Some machines call this ANS, some ANS2... + pmgr_reset(nvme_die, "ANS"); + pmgr_reset(nvme_die, "ANS2"); + rtkit_free(nvme_rtkit); + sart_free(nvme_sart); + asc_free(nvme_asc); + free_queue(&ioq); + free_queue(&adminq); + nvme_initialized = false; + + printf("nvme: shutdown done\n"); +} + +bool nvme_flush(u32 nsid) +{ + struct nvme_command cmd; + + if (!nvme_initialized) + return false; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = NVME_CMD_FLUSH; + cmd.nsid = nsid; + + return nvme_exec_command(&ioq, &cmd, NULL); +} + +bool nvme_read(u32 nsid, u64 lba, void *buffer) +{ + struct nvme_command cmd; + u64 buffer_addr = (u64)buffer; + + if (!nvme_initialized) + return false; + + /* no need for 16K alignment here since the NVME page size is 4k */ + if (buffer_addr & (SZ_4K - 1)) + return false; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = NVME_CMD_READ; + cmd.nsid = nsid; + cmd.prp1 = (u64)buffer_addr; + cmd.cdw10 = lba; + cmd.cdw11 = lba >> 32; + cmd.cdw12 = 1; // 4096 bytes + + return nvme_exec_command(&ioq, &cmd, NULL); +} diff --git a/tools/src/nvme.h b/tools/src/nvme.h new file mode 100644 index 0000000..8989a60 --- /dev/null +++ b/tools/src/nvme.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef NVME_H +#define NVME_H + +#include "types.h" + +bool nvme_init(void); +void nvme_shutdown(void); + +bool nvme_flush(u32 nsid); +bool nvme_read(u32 nsid, u64 lba, void *buffer); + +#endif diff --git a/tools/src/payload.c b/tools/src/payload.c new file mode 100644 index 0000000..69c9129 --- /dev/null +++ b/tools/src/payload.c @@ -0,0 +1,281 @@ +/* SPDX-License-Identifier: MIT */ + +#include "payload.h" +#include "adt.h" +#include "assert.h" +#include "chainload.h" +#include "display.h" +#include "heapblock.h" +#include "kboot.h" +#include "smp.h" +#include "utils.h" + +#include "libfdt/libfdt.h" +#include "minilzlib/minlzma.h" +#include "tinf/tinf.h" + +// Kernels must be 2MB aligned +#define KERNEL_ALIGN (2 << 20) + +static const u8 gz_magic[] = {0x1f, 0x8b}; +static const u8 xz_magic[] = {0xfd, '7', 'z', 'X', 'Z', 0x00}; +static const u8 fdt_magic[] = {0xd0, 0x0d, 0xfe, 0xed}; +static const u8 kernel_magic[] = {'A', 'R', 'M', 0x64}; // at 0x38 +static const u8 cpio_magic[] = {'0', '7', '0', '7', '0'}; // '1' or '2' next +static const u8 img4_magic[] = {0x16, 0x04, 'I', 'M', 'G', '4'}; // IA5String 'IMG4' +static const u8 sig_magic[] = {'m', '1', 'n', '1', '_', 's', 'i', 'g'}; +static const u8 empty[] = {0, 0, 0, 0}; + +static char expect_compatible[256]; +static struct kernel_header *kernel = NULL; +static void *fdt = NULL; +static char *chainload_spec = NULL; + +static void *load_one_payload(void *start, size_t size); + +static void finalize_uncompression(void *dest, size_t dest_len) +{ + // Actually reserve the space. malloc is safe after this, but... + assert(dest == heapblock_alloc_aligned(dest_len, KERNEL_ALIGN)); + + void *end = ((u8 *)dest) + dest_len; + void *next = load_one_payload(dest, dest_len); + assert(!next || next >= dest); + + // If the payload needs padding, we need to reserve more, so it better have not used + // malloc either. + if (next > end) { + // Explicitly *un*aligned or it'll fail this assert, since 64b alignment is the default + assert(end == heapblock_alloc_aligned((u8 *)next - (u8 *)end, 1)); + } +} + +static void *decompress_gz(void *p, size_t size) +{ + unsigned int source_len = size, dest_len = 1 << 30; // 1 GiB should be enough hopefully + + // Start at the end of the heap area, no allocation yet. The following code must not use + // malloc or heapblock, until finalize_uncompression is called. + void *dest = heapblock_alloc_aligned(0, KERNEL_ALIGN); + + printf("Uncompressing... "); + int ret = tinf_gzip_uncompress(dest, &dest_len, p, &source_len); + + if (ret != TINF_OK) { + printf("Error %d\n", ret); + return NULL; + } + + printf("%d bytes uncompressed to %d bytes\n", source_len, dest_len); + + finalize_uncompression(dest, dest_len); + + return ((u8 *)p) + source_len; +} + +static void *decompress_xz(void *p, size_t size) +{ + uint32_t source_len = size, dest_len = 1 << 30; // 1 GiB should be enough hopefully + + // Start at the end of the heap area, no allocation yet. The following code must not use + // malloc or heapblock, until finalize_uncompression is called. + void *dest = heapblock_alloc_aligned(0, KERNEL_ALIGN); + + printf("Uncompressing... "); + int ret = XzDecode(p, &source_len, dest, &dest_len); + + if (!ret) { + printf("XZ decode failed\n"); + return NULL; + } + + printf("%d bytes uncompressed to %d bytes\n", source_len, dest_len); + + finalize_uncompression(dest, dest_len); + + return ((u8 *)p) + source_len; +} + +static void *load_fdt(void *p, size_t size) +{ + if (fdt_node_check_compatible(p, 0, expect_compatible) == 0) { + printf("Found a devicetree for %s at %p\n", expect_compatible, p); + fdt = p; + } + assert(!size || size == fdt_totalsize(p)); + return ((u8 *)p) + fdt_totalsize(p); +} + +static void *load_cpio(void *p, size_t size) +{ + if (!size) { + // We could handle this, but who uses uncompressed initramfs? + printf("Uncompressed cpio archives not supported\n"); + return NULL; + } + + kboot_set_initrd(p, size); + return ((u8 *)p) + size; +} + +static void *load_kernel(void *p, size_t size) +{ + kernel = p; + + assert(size <= kernel->image_size); + + // If this is an in-line kernel, it's probably not aligned, so we need to make a copy + if (((u64)kernel) & (KERNEL_ALIGN - 1)) { + void *new_addr = heapblock_alloc_aligned(kernel->image_size, KERNEL_ALIGN); + memcpy(new_addr, kernel, size ? size : kernel->image_size); + kernel = new_addr; + } + + /* + * Kernel blobs unfortunately do not have an accurate file size header, so + * this will fail for in-line payloads. However, conversely, this is required for + * compressed payloads, in order to allocate padding that the kernel needs, which will be + * beyond the end of the compressed data. So if we know the input size, tell the caller + * about the true image size; otherwise don't. + */ + if (size) { + return ((u8 *)p) + kernel->image_size; + } else { + return NULL; + } +} + +#define MAX_VAR_NAME 64 +#define MAX_VAR_SIZE 1024 + +#define IS_VAR(x) !strncmp((char *)*p, x, strlen(x)) + +#define MAX_CHOSEN_VARS 16 + +static size_t chosen_cnt = 0; +static char *chosen[MAX_CHOSEN_VARS]; + +static bool check_var(u8 **p) +{ + char *val = memchr(*p, '=', strnlen((char *)*p, MAX_VAR_NAME + 1)); + if (!val) + return false; + + val++; + + char *end = memchr(val, '\n', strnlen(val, MAX_VAR_SIZE + 1)); + if (!end) + return false; + + *end = 0; + printf("Found a variable at %p: %s\n", *p, (char *)*p); + + if (IS_VAR("chosen.")) { + if (chosen_cnt >= MAX_CHOSEN_VARS) + printf("Too many chosen vars, ignoring %s\n", *p); + else + chosen[chosen_cnt++] = (char *)*p; + } else if (IS_VAR("chainload=")) { + chainload_spec = val; + } else if (IS_VAR("display=")) { + display_configure(val); + } else { + printf("Unknown variable %s\n", *p); + } + + *p = (u8 *)(end + 1); + return true; +} + +static void *load_one_payload(void *start, size_t size) +{ + u8 *p = start; + + if (!start) + return NULL; + + if (!memcmp(p, gz_magic, sizeof gz_magic)) { + printf("Found a gzip compressed payload at %p\n", p); + return decompress_gz(p, size); + } else if (!memcmp(p, xz_magic, sizeof xz_magic)) { + printf("Found an XZ compressed payload at %p\n", p); + return decompress_xz(p, size); + } else if (!memcmp(p, fdt_magic, sizeof fdt_magic)) { + return load_fdt(p, size); + } else if (!memcmp(p, cpio_magic, sizeof cpio_magic)) { + printf("Found a cpio initramfs at %p\n", p); + return load_cpio(p, size); + } else if (!memcmp(p + 0x38, kernel_magic, sizeof kernel_magic)) { + printf("Found a kernel at %p\n", p); + return load_kernel(p, size); + } else if (!memcmp(p, sig_magic, sizeof sig_magic)) { + u32 size; + memcpy(&size, p + 8, 4); + + printf("Found a m1n1 signature at %p, skipping 0x%x bytes\n", p, size); + return p + size; + } else if (check_var(&p)) { + return p; + } else if (!memcmp(p, empty, sizeof empty) || + !memcmp(p + 0x05, img4_magic, sizeof img4_magic)) { // SEPFW after m1n1 + printf("No more payloads at %p\n", p); + return NULL; + } else { + printf("Unknown payload at %p (magic: %02x%02x%02x%02x)\n", p, p[0], p[1], p[2], p[3]); + return NULL; + } +} + +int payload_run(void) +{ + const char *target = adt_getprop(adt, 0, "target-type", NULL); + if (target) { + strcpy(expect_compatible, "apple,"); + char *p = expect_compatible + strlen(expect_compatible); + while (*target && p != expect_compatible + sizeof(expect_compatible) - 1) { + *p++ = tolower(*target++); + } + *p = 0; + printf("Devicetree compatible value: %s\n", expect_compatible); + } else { + printf("Cannot find target type! %p %p\n", target, adt); + return -1; + } + + chosen_cnt = 0; + + void *p = _payload_start; + + while (p) + p = load_one_payload(p, 0); + + if (chainload_spec) { + return chainload_load(chainload_spec, chosen, chosen_cnt); + } + + if (kernel && fdt) { + smp_start_secondaries(); + + for (size_t i = 0; i < chosen_cnt; i++) { + char *val = memchr(chosen[i], '=', MAX_VAR_NAME + 1); + + assert(val); + val[0] = 0; // Terminate var name + if (kboot_set_chosen(chosen[i] + 7, val + 1) < 0) + printf("Failed to kboot set %s='%s'\n", chosen[i], val); + } + + if (kboot_prepare_dt(fdt)) { + printf("Failed to prepare FDT!\n"); + return -1; + } + + return kboot_boot(kernel); + } else if (kernel && !fdt) { + printf("ERROR: Kernel found but no devicetree for %s available.\n", expect_compatible); + } else if (!kernel && fdt) { + printf("ERROR: Devicetree found but no kernel.\n"); + } + + return -1; +} diff --git a/tools/src/payload.h b/tools/src/payload.h new file mode 100644 index 0000000..8e6aa72 --- /dev/null +++ b/tools/src/payload.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef __PAYLOAD_H__ +#define __PAYLOAD_H__ + +int payload_run(void); + +#endif diff --git a/tools/src/pcie.c b/tools/src/pcie.c new file mode 100644 index 0000000..39d6a23 --- /dev/null +++ b/tools/src/pcie.c @@ -0,0 +1,388 @@ +/* SPDX-License-Identifier: MIT */ + +#include "adt.h" +#include "pcie.h" +#include "pmgr.h" +#include "tunables.h" +#include "utils.h" + +/* + * The ADT uses 17 register sets: + * + * 0: 90000000 00000006 10000000 00000000 ECAM + * 1: 80000000 00000006 00040000 00000000 RC + * 2: 80080000 00000006 00090000 00000000 PHY + * 3: 800c0000 00000006 00020000 00000000 PHY IP + * 4: 8c000000 00000006 00004000 00000000 AXI + * 5: 3d2bc000 00000000 00001000 00000000 fuses + * 6: 81000000 00000006 00008000 00000000 port 0 config + * 7: 81010000 00000006 00001000 00000000 port 0 LTSSM debug + * 8: 80084000 00000006 00004000 00000000 port 0 PHY + * 9: 800c8000 00000006 00016610 00000000 port 0 PHY IP + <macOS 12.0 RC and later add a per-port Intr2AXI reg here> + * 10: 82000000 00000006 00008000 00000000 port 1 config + * 11: 82010000 00000006 00001000 00000000 port 1 LTSSM debug + * 12: 80088000 00000006 00004000 00000000 port 1 PHY + * 13: 800d0000 00000006 00006000 00000000 port 1 PHY IP + <...> + * 14: 83000000 00000006 00008000 00000000 port 2 config + * 15: 83010000 00000006 00001000 00000000 port 2 LTSSM debug + * 16: 8008c000 00000006 00004000 00000000 port 2 PHY + * 17: 800d8000 00000006 00006000 00000000 port 2 PHY IP + <...> + */ + +/* PHY registers */ + +#define APCIE_PHY_CTRL 0x000 +#define APCIE_PHY_CTRL_CLK0REQ BIT(0) +#define APCIE_PHY_CTRL_CLK1REQ BIT(1) +#define APCIE_PHY_CTRL_CLK0ACK BIT(2) +#define APCIE_PHY_CTRL_CLK1ACK BIT(3) +#define APCIE_PHY_CTRL_RESET BIT(7) + +#define APCIE_PHYIF_CTRL 0x024 +#define APCIE_PHYIF_CTRL_RUN BIT(0) + +/* Port registers */ + +#define APCIE_PORT_LINKSTS 0x208 +#define APCIE_PORT_LINKSTS_BUSY BIT(2) + +#define APCIE_PORT_APPCLK 0x800 +#define APCIE_PORT_APPCLK_EN BIT(0) + +#define APCIE_PORT_STATUS 0x804 +#define APCIE_PORT_STATUS_RUN BIT(0) + +#define APCIE_PORT_RESET 0x814 +#define APCIE_PORT_RESET_DIS BIT(0) + +/* PCIe capability registers */ +#define PCIE_CAP_BASE 0x70 +#define PCIE_LNKCAP 0x0c +#define PCIE_LNKCAP_SLS GENMASK(3, 0) +#define PCIE_LNKCAP2 0x2c +#define PCIE_LNKCAP2_SLS GENMASK(6, 1) +#define PCIE_LNKCTL2 0x30 +#define PCIE_LNKCTL2_TLS GENMASK(3, 0) + +/* DesignWare PCIe Core registers */ + +#define DWC_DBI_RO_WR 0x8bc +#define DWC_DBI_RO_WR_EN BIT(0) + +#define DWC_DBI_LINK_WIDTH_SPEED_CONTROL 0x80c +#define DWC_DBI_SPEED_CHANGE BIT(17) + +struct fuse_bits { + u16 src_reg; + u16 tgt_reg; + u8 src_bit; + u8 tgt_bit; + u8 width; +}; + +const struct fuse_bits pcie_fuse_bits_t8103[] = { + {0x0084, 0x6238, 4, 0, 6}, {0x0084, 0x6220, 10, 14, 3}, {0x0084, 0x62a4, 13, 17, 2}, + {0x0418, 0x522c, 27, 9, 2}, {0x0418, 0x522c, 13, 12, 3}, {0x0418, 0x5220, 18, 14, 3}, + {0x0418, 0x52a4, 21, 17, 2}, {0x0418, 0x522c, 23, 16, 5}, {0x0418, 0x5278, 23, 20, 3}, + {0x0418, 0x5018, 31, 2, 1}, {0x041c, 0x1204, 0, 2, 5}, {}, +}; + +const struct fuse_bits pcie_fuse_bits_t6000[] = { + {0x004c, 0x1004, 3, 2, 5}, {0x0048, 0x522c, 26, 16, 5}, {0x0048, 0x522c, 29, 9, 2}, + {0x0048, 0x522c, 26, 12, 3}, {0x0048, 0x522c, 26, 16, 5}, {0x0048, 0x52a4, 24, 17, 2}, + {0x004c, 0x5018, 2, 3, 1}, {0x0048, 0x50a4, 14, 17, 2}, {0x0048, 0x62a4, 14, 17, 2}, + {0x0048, 0x6220, 8, 14, 3}, {0x0048, 0x6238, 2, 0, 6}, {}, +}; + +/* clang-format off */ +const struct fuse_bits pcie_fuse_bits_t8112[] = { + {0x0490, 0x6238, 0, 0, 6}, {0x0490, 0x6220, 6, 14, 3}, {0x0490, 0x62a4, 12, 17, 2}, + {0x0490, 0x5018, 14, 2, 1}, {0x0490, 0x5220, 15, 14, 3}, {0x0490, 0x52a4, 18, 17, 2}, + {0x0490, 0x5278, 20, 20, 3}, {0x0490, 0x522c, 23, 12, 3}, {0x0490, 0x522c, 26, 9, 2}, + {0x0490, 0x522c, 28, 16, 4}, {0x0494, 0x522c, 0, 20, 1}, {0x0494, 0x1204, 5, 2, 5}, + {}, +}; +/* clang-format on */ + +static bool pcie_initialized = false; +static u64 rc_base; +static u64 phy_base; +static u64 phy_ip_base; +static u64 fuse_base; +static u32 port_count; +static u64 port_base[8]; + +#define SHARED_REG_COUNT 6 + +int pcie_init(void) +{ + const char *path = "/arm-io/apcie"; + int adt_path[8]; + int adt_offset; + const struct fuse_bits *fuse_bits; + + if (pcie_initialized) + return 0; + + adt_offset = adt_path_offset_trace(adt, path, adt_path); + if (adt_offset < 0) { + printf("pcie: Error getting node %s\n", path); + return -1; + } + + if (adt_is_compatible(adt, adt_offset, "apcie,t8103")) { + fuse_bits = pcie_fuse_bits_t8103; + printf("pcie: Initializing t8103 PCIe controller\n"); + } else if (adt_is_compatible(adt, adt_offset, "apcie,t6000")) { + fuse_bits = pcie_fuse_bits_t6000; + printf("pcie: Initializing t6000 PCIe controller\n"); + } else if (adt_is_compatible(adt, adt_offset, "apcie,t8112")) { + fuse_bits = pcie_fuse_bits_t8112; + printf("pcie: Initializing t8112 PCIe controller\n"); + } else { + printf("pcie: Unsupported compatible\n"); + return -1; + } + + if (ADT_GETPROP(adt, adt_offset, "#ports", &port_count) < 0) { + printf("pcie: Error getting port count for %s\n", path); + return -1; + } + + u64 config_base; + if (adt_get_reg(adt, adt_path, "reg", 0, &config_base, NULL)) { + printf("pcie: Error getting reg with index %d for %s\n", 0, path); + return -1; + } + + if (adt_get_reg(adt, adt_path, "reg", 1, &rc_base, NULL)) { + printf("pcie: Error getting reg with index %d for %s\n", 1, path); + return -1; + } + + if (adt_get_reg(adt, adt_path, "reg", 2, &phy_base, NULL)) { + printf("pcie: Error getting reg with index %d for %s\n", 2, path); + return -1; + } + + if (adt_get_reg(adt, adt_path, "reg", 3, &phy_ip_base, NULL)) { + printf("pcie: Error getting reg with index %d for %s\n", 3, path); + return -1; + } + + if (adt_get_reg(adt, adt_path, "reg", 5, &fuse_base, NULL)) { + printf("pcie: Error getting reg with index %d for %s\n", 5, path); + return -1; + } + + u32 reg_len; + if (!adt_getprop(adt, adt_offset, "reg", ®_len)) { + printf("pcie: Error getting reg length for %s\n", path); + return -1; + } + + int port_regs = (reg_len / 16) - SHARED_REG_COUNT; + + if (port_regs % port_count) { + printf("pcie: %d port registers do not evenly divide into %d ports\n", port_regs, + port_count); + return -1; + } + + int port_reg_cnt = port_regs / port_count; + printf("pcie: ADT uses %d reg entries per port\n", port_reg_cnt); + + if (pmgr_adt_power_enable(path)) { + printf("pcie: Error enabling power for %s\n", path); + return -1; + } + + if (tunables_apply_local(path, "apcie-axi2af-tunables", 4)) { + printf("pcie: Error applying %s for %s\n", "apcie-axi2af-tunables", path); + return -1; + } + + /* ??? */ + write32(rc_base + 0x4, 0); + + if (tunables_apply_local(path, "apcie-common-tunables", 1)) { + printf("pcie: Error applying %s for %s\n", "apcie-common-tunables", path); + return -1; + } + + /* + * Initialize PHY. + */ + + if (tunables_apply_local(path, "apcie-phy-tunables", 2)) { + printf("pcie: Error applying %s for %s\n", "apcie-phy-tunables", path); + return -1; + } + + set32(phy_base + APCIE_PHY_CTRL, APCIE_PHY_CTRL_CLK0REQ); + if (poll32(phy_base + APCIE_PHY_CTRL, APCIE_PHY_CTRL_CLK0ACK, APCIE_PHY_CTRL_CLK0ACK, 50000)) { + printf("pcie: Timeout enabling PHY CLK0\n"); + return -1; + } + + set32(phy_base + APCIE_PHY_CTRL, APCIE_PHY_CTRL_CLK1REQ); + if (poll32(phy_base + APCIE_PHY_CTRL, APCIE_PHY_CTRL_CLK1ACK, APCIE_PHY_CTRL_CLK1ACK, 50000)) { + printf("pcie: Timeout enabling PHY CLK1\n"); + return -1; + } + + clear32(phy_base + APCIE_PHY_CTRL, APCIE_PHY_CTRL_RESET); + udelay(1); + + /* ??? */ + set32(rc_base + APCIE_PHYIF_CTRL, APCIE_PHYIF_CTRL_RUN); + udelay(1); + + /* Apply "fuses". */ + for (int i = 0; fuse_bits[i].width; i++) { + u32 fuse; + fuse = (read32(fuse_base + fuse_bits[i].src_reg) >> fuse_bits[i].src_bit); + fuse &= (1 << fuse_bits[i].width) - 1; + mask32(phy_ip_base + fuse_bits[i].tgt_reg, + ((1 << fuse_bits[i].width) - 1) << fuse_bits[i].tgt_bit, + fuse << fuse_bits[i].tgt_bit); + } + + if (tunables_apply_local(path, "apcie-phy-ip-pll-tunables", 3)) { + printf("pcie: Error applying %s for %s\n", "apcie-phy-ip-pll-tunables", path); + return -1; + } + if (tunables_apply_local(path, "apcie-phy-ip-auspma-tunables", 3)) { + printf("pcie: Error applying %s for %s\n", "apcie-phy-ip-auspma-tunables", path); + return -1; + } + + for (u32 port = 0; port < port_count; port++) { + char bridge[64]; + int bridge_offset; + + /* + * Initialize RC port. + */ + + snprintf(bridge, sizeof(bridge), "/arm-io/apcie/pci-bridge%d", port); + + if ((bridge_offset = adt_path_offset(adt, bridge)) < 0) + continue; + + printf("pcie: Initializing port %d\n", port); + + if (adt_get_reg(adt, adt_path, "reg", port * port_reg_cnt + SHARED_REG_COUNT, + &port_base[port], NULL)) { + printf("pcie: Error getting reg with index %d for %s\n", + port * port_reg_cnt + SHARED_REG_COUNT, path); + return -1; + } + + if (tunables_apply_local_addr(bridge, "apcie-config-tunables", port_base[port])) { + printf("pcie: Error applying %s for %s\n", "apcie-config-tunables", bridge); + return -1; + } + + set32(port_base[port] + APCIE_PORT_APPCLK, APCIE_PORT_APPCLK_EN); + + /* PERSTN */ + set32(port_base[port] + APCIE_PORT_RESET, APCIE_PORT_RESET_DIS); + + if (poll32(port_base[port] + APCIE_PORT_STATUS, APCIE_PORT_STATUS_RUN, + APCIE_PORT_STATUS_RUN, 250000)) { + printf("pcie: Port failed to come up on %s\n", bridge); + return -1; + } + + if (poll32(port_base[port] + APCIE_PORT_LINKSTS, APCIE_PORT_LINKSTS_BUSY, 0, 250000)) { + printf("pcie: Port failed to become idle on %s\n", bridge); + return -1; + } + + /* Make Designware PCIe Core registers writable. */ + set32(config_base + DWC_DBI_RO_WR, DWC_DBI_RO_WR_EN); + + if (tunables_apply_local_addr(bridge, "pcie-rc-tunables", config_base)) { + printf("pcie: Error applying %s for %s\n", "pcie-rc-tunables", bridge); + return -1; + } + if (tunables_apply_local_addr(bridge, "pcie-rc-gen3-shadow-tunables", config_base)) { + printf("pcie: Error applying %s for %s\n", "pcie-rc-gen3-shadow-tunables", bridge); + return -1; + } + if (tunables_apply_local_addr(bridge, "pcie-rc-gen4-shadow-tunables", config_base)) { + printf("pcie: Error applying %s for %s\n", "pcie-rc-gen4-shadow-tunables", bridge); + return -1; + } + + u32 max_speed; + if (ADT_GETPROP(adt, bridge_offset, "maximum-link-speed", &max_speed) >= 0) { + /* Apple changed how they announce the link speed for the 10gb nic + * at the latest in MacOS 12.3. The "lan-10gb" subnode has now a + * "target-link-speed" property and "maximum-link-speed" remains + * at 1. + */ + int lan_10gb = adt_subnode_offset(adt, bridge_offset, "lan-10gb"); + if (lan_10gb > 0) { + int target_speed; + if (ADT_GETPROP(adt, lan_10gb, "target-link-speed", &target_speed) >= 0) { + if (target_speed > 0) + max_speed = target_speed; + } + } + + printf("pcie: Port %d max speed = %d\n", port, max_speed); + + if (max_speed == 0) { + printf("pcie: Invalid max-speed\n"); + return -1; + } + + mask32(config_base + PCIE_CAP_BASE + PCIE_LNKCAP, PCIE_LNKCAP_SLS, + FIELD_PREP(PCIE_LNKCAP_SLS, max_speed)); + + mask32(config_base + PCIE_CAP_BASE + PCIE_LNKCAP2, PCIE_LNKCAP2_SLS, + FIELD_PREP(PCIE_LNKCAP2_SLS, (1 << max_speed) - 1)); + + mask16(config_base + PCIE_CAP_BASE + PCIE_LNKCTL2, PCIE_LNKCTL2_TLS, + FIELD_PREP(PCIE_LNKCTL2_TLS, max_speed)); + } + + set32(config_base + DWC_DBI_LINK_WIDTH_SPEED_CONTROL, DWC_DBI_SPEED_CHANGE); + + /* Make Designware PCIe Core registers readonly. */ + clear32(config_base + DWC_DBI_RO_WR, DWC_DBI_RO_WR_EN); + + /* Move to the next PCIe device on this bus. */ + config_base += (1 << 15); + } + + pcie_initialized = true; + printf("pcie: Initialized.\n"); + + return 0; +} + +int pcie_shutdown(void) +{ + if (!pcie_initialized) + return 0; + + for (u32 port = 0; port < port_count; port++) { + clear32(port_base[port] + APCIE_PORT_RESET, APCIE_PORT_RESET_DIS); + clear32(port_base[port] + APCIE_PORT_APPCLK, APCIE_PORT_APPCLK_EN); + } + + clear32(phy_base + APCIE_PHY_CTRL, APCIE_PHY_CTRL_RESET); + clear32(phy_base + APCIE_PHY_CTRL, APCIE_PHY_CTRL_CLK1REQ); + clear32(phy_base + APCIE_PHY_CTRL, APCIE_PHY_CTRL_CLK0REQ); + + pcie_initialized = false; + printf("pcie: Shutdown.\n"); + + return 0; +} diff --git a/tools/src/pcie.h b/tools/src/pcie.h new file mode 100644 index 0000000..e33d59d --- /dev/null +++ b/tools/src/pcie.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef PCIE_H +#define PCIE_H + +int pcie_init(void); +int pcie_shutdown(void); + +#endif diff --git a/tools/src/pmgr.c b/tools/src/pmgr.c new file mode 100644 index 0000000..0ae3888 --- /dev/null +++ b/tools/src/pmgr.c @@ -0,0 +1,358 @@ +/* SPDX-License-Identifier: MIT */ + +#include "pmgr.h" +#include "adt.h" +#include "string.h" +#include "types.h" +#include "utils.h" + +#define PMGR_RESET BIT(31) +#define PMGR_AUTO_ENABLE BIT(28) +#define PMGR_PS_AUTO GENMASK(27, 24) +#define PMGR_PARENT_OFF BIT(11) +#define PMGR_DEV_DISABLE BIT(10) +#define PMGR_WAS_CLKGATED BIT(9) +#define PMGR_WAS_PWRGATED BIT(8) +#define PMGR_PS_ACTUAL GENMASK(7, 4) +#define PMGR_PS_TARGET GENMASK(3, 0) + +#define PMGR_PS_ACTIVE 0xf +#define PMGR_PS_CLKGATE 0x4 +#define PMGR_PS_PWRGATE 0x0 + +#define PMGR_POLL_TIMEOUT 10000 + +#define PMGR_FLAG_VIRTUAL 0x10 + +struct pmgr_device { + u32 flags; + u16 parent[2]; + u8 unk1[2]; + u8 addr_offset; + u8 psreg_idx; + u8 unk2[14]; + u16 id; + u8 unk3[4]; + const char name[0x10]; +} PACKED; + +static int pmgr_initialized = 0; + +static int pmgr_path[8]; +static int pmgr_offset; +static int pmgr_dies; + +static const u32 *pmgr_ps_regs = NULL; +static u32 pmgr_ps_regs_len = 0; + +static const struct pmgr_device *pmgr_devices = NULL; +static u32 pmgr_devices_len = 0; + +static uintptr_t pmgr_get_psreg(u8 idx) +{ + if (idx * 12 >= pmgr_ps_regs_len) { + printf("pmgr: Index %d is out of bounds for ps-regs\n", idx); + return 0; + } + + u32 reg_idx = pmgr_ps_regs[3 * idx]; + u32 reg_offset = pmgr_ps_regs[3 * idx + 1]; + + u64 pmgr_reg; + if (adt_get_reg(adt, pmgr_path, "reg", reg_idx, &pmgr_reg, NULL) < 0) { + printf("pmgr: Error getting /arm-io/pmgr regs\n"); + return 0; + } + + return pmgr_reg + reg_offset; +} + +static int pmgr_set_mode(uintptr_t addr, u8 target_mode) +{ + mask32(addr, PMGR_PS_TARGET, FIELD_PREP(PMGR_PS_TARGET, target_mode)); + if (poll32(addr, PMGR_PS_ACTUAL, FIELD_PREP(PMGR_PS_ACTUAL, target_mode), PMGR_POLL_TIMEOUT) < + 0) { + printf("pmgr: timeout while trying to set mode %x for device at 0x%lx: %x\n", target_mode, + addr, read32(addr)); + return -1; + } + + return 0; +} + +static int pmgr_find_device(u16 id, const struct pmgr_device **device) +{ + for (size_t i = 0; i < pmgr_devices_len; ++i) { + const struct pmgr_device *i_device = &pmgr_devices[i]; + if (i_device->id != id) + continue; + + *device = i_device; + return 0; + } + + return -1; +} + +static uintptr_t pmgr_device_get_addr(u8 die, const struct pmgr_device *device) +{ + uintptr_t addr = pmgr_get_psreg(device->psreg_idx); + if (addr == 0) + return 0; + + addr += PMGR_DIE_OFFSET * die; + + addr += (device->addr_offset << 3); + return addr; +} + +static int pmgr_set_mode_recursive(u8 die, u16 id, u8 target_mode, bool recurse) +{ + if (!pmgr_initialized) { + printf("pmgr: pmgr_set_mode_recursive() called before successful pmgr_init()\n"); + return -1; + } + + if (id == 0) + return -1; + + const struct pmgr_device *device; + + if (pmgr_find_device(id, &device)) + return -1; + + if (!(device->flags & PMGR_FLAG_VIRTUAL)) { + uintptr_t addr = pmgr_device_get_addr(die, device); + if (!addr) + return -1; + if (pmgr_set_mode(addr, target_mode)) + return -1; + } + if (!recurse) + return 0; + + for (int i = 0; i < 2; i++) { + if (device->parent[i]) { + u16 parent = FIELD_GET(PMGR_DEVICE_ID, device->parent[i]); + int ret = pmgr_set_mode_recursive(die, parent, target_mode, true); + if (ret < 0) + return ret; + } + } + + return 0; +} + +int pmgr_power_enable(u32 id) +{ + u16 device = FIELD_GET(PMGR_DEVICE_ID, id); + u8 die = FIELD_GET(PMGR_DIE_ID, id); + return pmgr_set_mode_recursive(die, device, PMGR_PS_ACTIVE, true); +} + +int pmgr_power_disable(u32 id) +{ + u16 device = FIELD_GET(PMGR_DEVICE_ID, id); + u8 die = FIELD_GET(PMGR_DIE_ID, id); + return pmgr_set_mode_recursive(die, device, PMGR_PS_PWRGATE, false); +} + +static int pmgr_adt_find_devices(const char *path, const u32 **devices, u32 *n_devices) +{ + int node_offset = adt_path_offset(adt, path); + if (node_offset < 0) { + printf("pmgr: Error getting node %s\n", path); + return -1; + } + + *devices = adt_getprop(adt, node_offset, "clock-gates", n_devices); + if (*devices == NULL || *n_devices == 0) { + printf("pmgr: Error getting %s clock-gates.\n", path); + return -1; + } + + *n_devices /= 4; + + return 0; +} + +static int pmgr_adt_devices_set_mode(const char *path, u8 target_mode, int recurse) +{ + const u32 *devices; + u32 n_devices; + int ret = 0; + + if (pmgr_adt_find_devices(path, &devices, &n_devices) < 0) + return -1; + + for (u32 i = 0; i < n_devices; ++i) { + u16 device = FIELD_GET(PMGR_DEVICE_ID, devices[i]); + u8 die = FIELD_GET(PMGR_DIE_ID, devices[i]); + if (pmgr_set_mode_recursive(die, device, target_mode, recurse)) + ret = -1; + } + + return ret; +} + +int pmgr_adt_power_enable(const char *path) +{ + int ret = pmgr_adt_devices_set_mode(path, PMGR_PS_ACTIVE, true); + return ret; +} + +int pmgr_adt_power_disable(const char *path) +{ + return pmgr_adt_devices_set_mode(path, PMGR_PS_PWRGATE, false); +} + +static int pmgr_reset_device(int die, const struct pmgr_device *dev) +{ + if (die < 0 || die > 16) { + printf("pmgr: invalid die id %d for device %s\n", die, dev->name); + return -1; + } + + uintptr_t addr = pmgr_device_get_addr(die, dev); + + u32 reg = read32(addr); + if (FIELD_GET(PMGR_PS_ACTUAL, reg) != PMGR_PS_ACTIVE) { + printf("pmgr: will not reset disabled device %d.%s\n", die, dev->name); + return -1; + } + + printf("pmgr: resetting device %d.%s\n", die, dev->name); + + set32(addr, PMGR_DEV_DISABLE); + set32(addr, PMGR_RESET); + udelay(10); + clear32(addr, PMGR_RESET); + clear32(addr, PMGR_DEV_DISABLE); + + return 0; +} + +int pmgr_adt_reset(const char *path) +{ + const u32 *devices; + u32 n_devices; + int ret = 0; + + if (pmgr_adt_find_devices(path, &devices, &n_devices) < 0) + return -1; + + for (u32 i = 0; i < n_devices; ++i) { + const struct pmgr_device *device; + u16 id = FIELD_GET(PMGR_DEVICE_ID, devices[i]); + u8 die = FIELD_GET(PMGR_DIE_ID, devices[i]); + + if (pmgr_find_device(id, &device)) { + ret = -1; + continue; + } + + if (pmgr_reset_device(die, device)) + ret = -1; + } + + return ret; +} + +int pmgr_reset(int die, const char *name) +{ + const struct pmgr_device *dev = NULL; + + for (unsigned int i = 0; i < pmgr_devices_len; ++i) { + if (strncmp(pmgr_devices[i].name, name, 0x10) == 0) { + dev = &pmgr_devices[i]; + break; + } + } + + if (!dev) + return -1; + + return pmgr_reset_device(die, dev); +} + +int pmgr_init(void) +{ + int node = adt_path_offset(adt, "/arm-io"); + if (node < 0) { + printf("pmgr: Error getting /arm-io node\n"); + return -1; + } + if (ADT_GETPROP(adt, node, "die-count", &pmgr_dies) < 0) + pmgr_dies = 1; + + pmgr_offset = adt_path_offset_trace(adt, "/arm-io/pmgr", pmgr_path); + if (pmgr_offset < 0) { + printf("pmgr: Error getting /arm-io/pmgr node\n"); + return -1; + } + + pmgr_ps_regs = adt_getprop(adt, pmgr_offset, "ps-regs", &pmgr_ps_regs_len); + if (pmgr_ps_regs == NULL || pmgr_ps_regs_len == 0) { + printf("pmgr: Error getting /arm-io/pmgr ps-regs\n."); + return -1; + } + + pmgr_devices = adt_getprop(adt, pmgr_offset, "devices", &pmgr_devices_len); + if (pmgr_devices == NULL || pmgr_devices_len == 0) { + printf("pmgr: Error getting /arm-io/pmgr devices.\n"); + return -1; + } + + pmgr_devices_len /= sizeof(*pmgr_devices); + pmgr_initialized = 1; + + printf("pmgr: Cleaning up device states...\n"); + + for (u8 die = 0; die < pmgr_dies; ++die) { + for (size_t i = 0; i < pmgr_devices_len; ++i) { + const struct pmgr_device *device = &pmgr_devices[i]; + + if ((device->flags & PMGR_FLAG_VIRTUAL)) + continue; + + uintptr_t addr = pmgr_device_get_addr(die, device); + if (!addr) + continue; + + u32 reg = read32(addr); + + if (reg & PMGR_AUTO_ENABLE || FIELD_GET(PMGR_PS_TARGET, reg) == PMGR_PS_ACTIVE) { + for (int j = 0; j < 2; j++) { + if (device->parent[j]) { + const struct pmgr_device *pdevice; + if (pmgr_find_device(device->parent[j], &pdevice)) { + printf("pmgr: Failed to find parent #%d for %s\n", device->parent[j], + device->name); + continue; + } + + if ((pdevice->flags & PMGR_FLAG_VIRTUAL)) + continue; + + addr = pmgr_device_get_addr(die, pdevice); + if (!addr) + continue; + + reg = read32(addr); + + if (!(reg & PMGR_AUTO_ENABLE) && + FIELD_GET(PMGR_PS_TARGET, reg) != PMGR_PS_ACTIVE) { + printf("pmgr: Enabling %d.%s, parent of active device %s\n", die, + pdevice->name, device->name); + pmgr_set_mode(addr, PMGR_PS_ACTIVE); + } + } + } + } + } + } + + printf("pmgr: initialized, %d devices on %u dies found.\n", pmgr_devices_len, pmgr_dies); + + return 0; +} diff --git a/tools/src/pmgr.h b/tools/src/pmgr.h new file mode 100644 index 0000000..5dcc939 --- /dev/null +++ b/tools/src/pmgr.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef PMGR_H +#define PMGR_H + +#include "types.h" + +#define PMGR_DIE_OFFSET 0x2000000000 + +#define PMGR_DEVICE_ID GENMASK(15, 0) +#define PMGR_DIE_ID GENMASK(31, 28) + +int pmgr_init(void); + +int pmgr_power_enable(u32 id); +int pmgr_power_disable(u32 id); + +int pmgr_adt_power_enable(const char *path); +int pmgr_adt_power_disable(const char *path); +int pmgr_adt_reset(const char *path); + +int pmgr_reset(int die, const char *name); + +#endif diff --git a/tools/src/proxy.c b/tools/src/proxy.c new file mode 100644 index 0000000..3925d7e --- /dev/null +++ b/tools/src/proxy.c @@ -0,0 +1,575 @@ +/* SPDX-License-Identifier: MIT */ + +#include "proxy.h" +#include "dapf.h" +#include "dart.h" +#include "display.h" +#include "exception.h" +#include "fb.h" +#include "gxf.h" +#include "heapblock.h" +#include "hv.h" +#include "iodev.h" +#include "kboot.h" +#include "malloc.h" +#include "mcc.h" +#include "memory.h" +#include "nvme.h" +#include "pcie.h" +#include "pmgr.h" +#include "smp.h" +#include "string.h" +#include "tunables.h" +#include "types.h" +#include "uart.h" +#include "uartproxy.h" +#include "usb.h" +#include "utils.h" +#include "xnuboot.h" + +#include "minilzlib/minlzma.h" +#include "tinf/tinf.h" + +int proxy_process(ProxyRequest *request, ProxyReply *reply) +{ + enum exc_guard_t guard_save = exc_guard; + + reply->opcode = request->opcode; + reply->status = S_OK; + reply->retval = 0; + switch (request->opcode) { + case P_NOP: + break; + case P_EXIT: + if (request->args[0]) + return request->args[0]; + return 1; + case P_CALL: { + generic_func *f = (generic_func *)request->args[0]; + reply->retval = f(request->args[1], request->args[2], request->args[3], + request->args[4], request->args[5]); + break; + } + case P_GET_BOOTARGS: + reply->retval = boot_args_addr; + break; + case P_GET_BASE: + reply->retval = (u64)_base; + break; + case P_SET_BAUD: { + int cnt = request->args[1]; + printf("Changing baud rate to %lu...\n", request->args[0]); + uart_setbaud(request->args[0]); + while (cnt--) { + uart_putbyte(request->args[2]); + uart_putbyte(request->args[2] >> 8); + uart_putbyte(request->args[2] >> 16); + uart_putbyte(request->args[2] >> 24); + } + break; + } + case P_UDELAY: + udelay(request->args[0]); + break; + case P_SET_EXC_GUARD: + exc_count = 0; + guard_save = request->args[0]; + break; + case P_GET_EXC_COUNT: + reply->retval = exc_count; + exc_count = 0; + break; + case P_EL0_CALL: + reply->retval = el0_call((void *)request->args[0], request->args[1], request->args[2], + request->args[3], request->args[4]); + break; + case P_EL1_CALL: + reply->retval = el1_call((void *)request->args[0], request->args[1], request->args[2], + request->args[3], request->args[4]); + break; + case P_VECTOR: + // forcefully restore tps6598x IRQs + usb_hpm_restore_irqs(1); + iodev_console_flush(); + next_stage.entry = (generic_func *)request->args[0]; + memcpy(next_stage.args, &request->args[1], 5 * sizeof(u64)); + next_stage.restore_logo = true; + return 1; + case P_GL1_CALL: + reply->retval = gl1_call((void *)request->args[0], request->args[1], request->args[2], + request->args[3], request->args[4]); + break; + case P_GL2_CALL: + reply->retval = gl2_call((void *)request->args[0], request->args[1], request->args[2], + request->args[3], request->args[4]); + break; + case P_GET_SIMD_STATE: + get_simd_state((void *)request->args[0]); + break; + case P_PUT_SIMD_STATE: + put_simd_state((void *)request->args[0]); + break; + case P_REBOOT: + reboot(); + break; + + case P_WRITE64: + exc_guard = GUARD_SKIP; + write64(request->args[0], request->args[1]); + break; + case P_WRITE32: + exc_guard = GUARD_SKIP; + write32(request->args[0], request->args[1]); + break; + case P_WRITE16: + exc_guard = GUARD_SKIP; + write16(request->args[0], request->args[1]); + break; + case P_WRITE8: + exc_guard = GUARD_SKIP; + write8(request->args[0], request->args[1]); + break; + + case P_READ64: + exc_guard = GUARD_MARK; + reply->retval = read64(request->args[0]); + break; + case P_READ32: + exc_guard = GUARD_MARK; + reply->retval = read32(request->args[0]); + break; + case P_READ16: + exc_guard = GUARD_MARK; + reply->retval = read16(request->args[0]); + break; + case P_READ8: + exc_guard = GUARD_MARK; + reply->retval = read8(request->args[0]); + break; + + case P_SET64: + exc_guard = GUARD_MARK; + reply->retval = set64(request->args[0], request->args[1]); + break; + case P_SET32: + exc_guard = GUARD_MARK; + reply->retval = set32(request->args[0], request->args[1]); + break; + case P_SET16: + exc_guard = GUARD_MARK; + reply->retval = set16(request->args[0], request->args[1]); + break; + case P_SET8: + exc_guard = GUARD_MARK; + reply->retval = set8(request->args[0], request->args[1]); + break; + + case P_CLEAR64: + exc_guard = GUARD_MARK; + reply->retval = clear64(request->args[0], request->args[1]); + break; + case P_CLEAR32: + exc_guard = GUARD_MARK; + reply->retval = clear32(request->args[0], request->args[1]); + break; + case P_CLEAR16: + exc_guard = GUARD_MARK; + reply->retval = clear16(request->args[0], request->args[1]); + break; + case P_CLEAR8: + exc_guard = GUARD_MARK; + reply->retval = clear8(request->args[0], request->args[1]); + break; + + case P_MASK64: + exc_guard = GUARD_MARK; + reply->retval = mask64(request->args[0], request->args[1], request->args[2]); + break; + case P_MASK32: + exc_guard = GUARD_MARK; + reply->retval = mask32(request->args[0], request->args[1], request->args[2]); + break; + case P_MASK16: + exc_guard = GUARD_MARK; + reply->retval = mask16(request->args[0], request->args[1], request->args[2]); + break; + case P_MASK8: + exc_guard = GUARD_MARK; + reply->retval = mask8(request->args[0], request->args[1], request->args[2]); + break; + + case P_WRITEREAD64: + exc_guard = GUARD_MARK; + reply->retval = writeread64(request->args[0], request->args[1]); + break; + case P_WRITEREAD32: + exc_guard = GUARD_MARK; + reply->retval = writeread32(request->args[0], request->args[1]); + break; + case P_WRITEREAD16: + exc_guard = GUARD_MARK; + reply->retval = writeread16(request->args[0], request->args[1]); + break; + case P_WRITEREAD8: + exc_guard = GUARD_MARK; + reply->retval = writeread8(request->args[0], request->args[1]); + break; + + case P_MEMCPY64: + exc_guard = GUARD_RETURN; + memcpy64((void *)request->args[0], (void *)request->args[1], request->args[2]); + break; + case P_MEMCPY32: + exc_guard = GUARD_RETURN; + memcpy32((void *)request->args[0], (void *)request->args[1], request->args[2]); + break; + case P_MEMCPY16: + exc_guard = GUARD_RETURN; + memcpy16((void *)request->args[0], (void *)request->args[1], request->args[2]); + break; + case P_MEMCPY8: + exc_guard = GUARD_RETURN; + memcpy8((void *)request->args[0], (void *)request->args[1], request->args[2]); + break; + + case P_MEMSET64: + exc_guard = GUARD_RETURN; + memset64((void *)request->args[0], request->args[1], request->args[2]); + break; + case P_MEMSET32: + exc_guard = GUARD_RETURN; + memset32((void *)request->args[0], request->args[1], request->args[2]); + break; + case P_MEMSET16: + exc_guard = GUARD_RETURN; + memset16((void *)request->args[0], request->args[1], request->args[2]); + break; + case P_MEMSET8: + exc_guard = GUARD_RETURN; + memset8((void *)request->args[0], request->args[1], request->args[2]); + break; + + case P_IC_IALLUIS: + ic_ialluis(); + break; + case P_IC_IALLU: + ic_iallu(); + break; + case P_IC_IVAU: + ic_ivau_range((void *)request->args[0], request->args[1]); + break; + case P_DC_IVAC: + dc_ivac_range((void *)request->args[0], request->args[1]); + break; + case P_DC_ISW: + dc_isw((void *)request->args[0]); + break; + case P_DC_CSW: + dc_csw((void *)request->args[0]); + break; + case P_DC_CISW: + dc_cisw((void *)request->args[0]); + break; + case P_DC_ZVA: + dc_zva_range((void *)request->args[0], request->args[1]); + break; + case P_DC_CVAC: + dc_cvac_range((void *)request->args[0], request->args[1]); + break; + case P_DC_CVAU: + dc_cvau_range((void *)request->args[0], request->args[1]); + break; + case P_DC_CIVAC: + dc_civac_range((void *)request->args[0], request->args[1]); + break; + case P_MMU_SHUTDOWN: + mmu_shutdown(); + break; + case P_MMU_INIT: + mmu_init(); + break; + case P_MMU_DISABLE: + reply->retval = mmu_disable(); + break; + case P_MMU_RESTORE: + mmu_restore(request->args[0]); + break; + case P_MMU_INIT_SECONDARY: + mmu_init_secondary(request->args[0]); + break; + + case P_XZDEC: { + uint32_t destlen, srclen; + destlen = request->args[3]; + srclen = request->args[1]; + if (XzDecode((void *)request->args[0], &srclen, (void *)request->args[2], &destlen)) + reply->retval = destlen; + else + reply->retval = ~0L; + break; + } + case P_GZDEC: { + unsigned int destlen, srclen; + destlen = request->args[3]; + srclen = request->args[1]; + size_t ret = tinf_gzip_uncompress((void *)request->args[2], &destlen, + (void *)request->args[0], &srclen); + if (ret != TINF_OK) + reply->retval = ret; + else + reply->retval = destlen; + break; + } + + case P_SMP_START_SECONDARIES: + smp_start_secondaries(); + break; + case P_SMP_CALL: + smp_call4(request->args[0], (void *)request->args[1], request->args[2], + request->args[3], request->args[4], request->args[5]); + break; + case P_SMP_CALL_SYNC: + smp_call4(request->args[0], (void *)request->args[1], request->args[2], + request->args[3], request->args[4], request->args[5]); + reply->retval = smp_wait(request->args[0]); + break; + case P_SMP_WAIT: + reply->retval = smp_wait(request->args[0]); + break; + case P_SMP_SET_WFE_MODE: + smp_set_wfe_mode(request->args[0]); + break; + + case P_HEAPBLOCK_ALLOC: + reply->retval = (u64)heapblock_alloc(request->args[0]); + break; + case P_MALLOC: + reply->retval = (u64)malloc(request->args[0]); + break; + case P_MEMALIGN: + reply->retval = (u64)memalign(request->args[0], request->args[1]); + break; + case P_FREE: + free((void *)request->args[0]); + break; + + case P_KBOOT_BOOT: + if (kboot_boot((void *)request->args[0]) == 0) + return 1; + break; + case P_KBOOT_SET_CHOSEN: + reply->retval = kboot_set_chosen((void *)request->args[0], (void *)request->args[1]); + break; + case P_KBOOT_SET_INITRD: + kboot_set_initrd((void *)request->args[0], request->args[1]); + break; + case P_KBOOT_PREPARE_DT: + reply->retval = kboot_prepare_dt((void *)request->args[0]); + break; + + case P_PMGR_POWER_ENABLE: + reply->retval = pmgr_power_enable(request->args[0]); + break; + case P_PMGR_POWER_DISABLE: + reply->retval = pmgr_power_enable(request->args[0]); + break; + case P_PMGR_ADT_POWER_ENABLE: + reply->retval = pmgr_adt_power_enable((const char *)request->args[0]); + break; + case P_PMGR_ADT_POWER_DISABLE: + reply->retval = pmgr_adt_power_disable((const char *)request->args[0]); + break; + case P_PMGR_RESET: + reply->retval = pmgr_reset(request->args[0], (const char *)request->args[1]); + break; + + case P_IODEV_SET_USAGE: + iodev_set_usage(request->args[0], request->args[1]); + break; + case P_IODEV_CAN_READ: + reply->retval = iodev_can_read(request->args[0]); + break; + case P_IODEV_CAN_WRITE: + reply->retval = iodev_can_write(request->args[0]); + break; + case P_IODEV_READ: + reply->retval = + iodev_read(request->args[0], (void *)request->args[1], request->args[2]); + break; + case P_IODEV_WRITE: + reply->retval = + iodev_write(request->args[0], (void *)request->args[1], request->args[2]); + break; + case P_IODEV_WHOAMI: + reply->retval = uartproxy_iodev; + break; + + case P_USB_IODEV_VUART_SETUP: + usb_iodev_vuart_setup(request->args[0]); + break; + + case P_TUNABLES_APPLY_GLOBAL: + reply->retval = tunables_apply_global((const char *)request->args[0], + (const char *)request->args[1]); + break; + case P_TUNABLES_APPLY_LOCAL: + reply->retval = tunables_apply_local((const char *)request->args[0], + (const char *)request->args[1], request->args[2]); + break; + case P_TUNABLES_APPLY_LOCAL_ADDR: + reply->retval = tunables_apply_local_addr( + (const char *)request->args[0], (const char *)request->args[1], request->args[2]); + break; + + case P_DART_INIT: + reply->retval = (u64)dart_init(request->args[0], request->args[1], request->args[2], + request->args[3]); + break; + case P_DART_SHUTDOWN: + dart_shutdown((dart_dev_t *)request->args[0]); + break; + case P_DART_MAP: + reply->retval = dart_map((dart_dev_t *)request->args[0], request->args[1], + (void *)request->args[2], request->args[3]); + break; + case P_DART_UNMAP: + dart_unmap((dart_dev_t *)request->args[0], request->args[1], request->args[2]); + break; + + case P_HV_INIT: + hv_init(); + break; + case P_HV_MAP: + hv_map(request->args[0], request->args[1], request->args[2], request->args[3]); + break; + case P_HV_START: + hv_start((void *)request->args[0], &request->args[1]); + break; + case P_HV_TRANSLATE: + reply->retval = hv_translate(request->args[0], request->args[1], request->args[2], + (void *)request->args[3]); + break; + case P_HV_PT_WALK: + reply->retval = hv_pt_walk(request->args[0]); + break; + case P_HV_MAP_VUART: + hv_map_vuart(request->args[0], request->args[1], request->args[2]); + break; + case P_HV_MAP_VIRTIO: + hv_map_virtio(request->args[0], (void *)request->args[1]); + break; + case P_VIRTIO_PUT_BUFFER: + virtio_put_buffer(request->args[0], request->args[1], request->args[2], + request->args[3]); + break; + case P_HV_TRACE_IRQ: + reply->retval = hv_trace_irq(request->args[0], request->args[1], request->args[2], + request->args[3]); + break; + case P_HV_WDT_START: + hv_wdt_start(request->args[0]); + break; + case P_HV_START_SECONDARY: + hv_start_secondary(request->args[0], (void *)request->args[1], &request->args[2]); + break; + case P_HV_SWITCH_CPU: + reply->retval = hv_switch_cpu(request->args[0]); + break; + case P_HV_SET_TIME_STEALING: + hv_set_time_stealing(request->args[0], request->args[1]); + break; + case P_HV_PIN_CPU: + hv_pin_cpu(request->args[0]); + break; + case P_HV_WRITE_HCR: + hv_write_hcr(request->args[0]); + break; + + case P_FB_INIT: + fb_init(request->args[0]); + break; + case P_FB_SHUTDOWN: + fb_shutdown(request->args[0]); + break; + case P_FB_BLIT: + // HACK: Running out of args, stash pix fmt in high bits of stride... + fb_blit(request->args[0], request->args[1], request->args[2], request->args[3], + (void *)request->args[4], (u32)request->args[5], request->args[5] >> 32); + break; + case P_FB_UNBLIT: + fb_unblit(request->args[0], request->args[1], request->args[2], request->args[3], + (void *)request->args[4], request->args[5]); + break; + case P_FB_FILL: + fb_fill(request->args[0], request->args[1], request->args[2], request->args[3], + int2rgb(request->args[4])); + break; + case P_FB_CLEAR: + fb_clear(int2rgb(request->args[0])); + break; + case P_FB_DISPLAY_LOGO: + fb_display_logo(); + break; + case P_FB_RESTORE_LOGO: + fb_restore_logo(); + break; + case P_FB_IMPROVE_LOGO: + fb_improve_logo(); + break; + + case P_PCIE_INIT: + pcie_init(); + break; + case P_PCIE_SHUTDOWN: + pcie_shutdown(); + break; + + case P_NVME_INIT: + reply->retval = nvme_init(); + break; + case P_NVME_SHUTDOWN: + nvme_shutdown(); + break; + case P_NVME_READ: + reply->retval = nvme_read(request->args[0], request->args[1], (void *)request->args[2]); + break; + case P_NVME_FLUSH: + reply->retval = nvme_flush(request->args[0]); + break; + + case P_MCC_GET_CARVEOUTS: + reply->retval = (u64)mcc_carveouts; + break; + + case P_DISPLAY_INIT: + reply->retval = display_init(); + break; + case P_DISPLAY_CONFIGURE: + reply->retval = display_configure((char *)request->args[0]); + break; + case P_DISPLAY_SHUTDOWN: + display_shutdown(request->args[0]); + break; + case P_DISPLAY_START_DCP: + display_start_dcp(); + break; + case P_DISPLAY_IS_EXTERNAL: + reply->retval = display_is_external; + break; + + case P_DAPF_INIT_ALL: + reply->retval = dapf_init_all(); + break; + case P_DAPF_INIT: + reply->retval = dapf_init((const char *)request->args[0]); + break; + + default: + reply->status = S_BADCMD; + break; + } + sysop("dsb sy"); + sysop("isb"); + exc_guard = guard_save; + return 0; +} diff --git a/tools/src/proxy.h b/tools/src/proxy.h new file mode 100644 index 0000000..27a3f8e --- /dev/null +++ b/tools/src/proxy.h @@ -0,0 +1,183 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef __PROXY_H__ +#define __PROXY_H__ + +#include "types.h" + +typedef enum { + P_NOP = 0x000, // System functions + P_EXIT, + P_CALL, + P_GET_BOOTARGS, + P_GET_BASE, + P_SET_BAUD, + P_UDELAY, + P_SET_EXC_GUARD, + P_GET_EXC_COUNT, + P_EL0_CALL, + P_EL1_CALL, + P_VECTOR, + P_GL1_CALL, + P_GL2_CALL, + P_GET_SIMD_STATE, + P_PUT_SIMD_STATE, + P_REBOOT, + + P_WRITE64 = 0x100, // Generic register functions + P_WRITE32, + P_WRITE16, + P_WRITE8, + P_READ64, + P_READ32, + P_READ16, + P_READ8, + P_SET64, + P_SET32, + P_SET16, + P_SET8, + P_CLEAR64, + P_CLEAR32, + P_CLEAR16, + P_CLEAR8, + P_MASK64, + P_MASK32, + P_MASK16, + P_MASK8, + P_WRITEREAD64, + P_WRITEREAD32, + P_WRITEREAD16, + P_WRITEREAD8, + + P_MEMCPY64 = 0x200, // Memory block transfer functions + P_MEMCPY32, + P_MEMCPY16, + P_MEMCPY8, + P_MEMSET64, + P_MEMSET32, + P_MEMSET16, + P_MEMSET8, + + P_IC_IALLUIS = 0x300, // Cache and memory ops + P_IC_IALLU, + P_IC_IVAU, + P_DC_IVAC, + P_DC_ISW, + P_DC_CSW, + P_DC_CISW, + P_DC_ZVA, + P_DC_CVAC, + P_DC_CVAU, + P_DC_CIVAC, + P_MMU_SHUTDOWN, + P_MMU_INIT, + P_MMU_DISABLE, + P_MMU_RESTORE, + P_MMU_INIT_SECONDARY, + + P_XZDEC = 0x400, // Decompression and data processing ops + P_GZDEC, + + P_SMP_START_SECONDARIES = 0x500, // SMP and system management ops + P_SMP_CALL, + P_SMP_CALL_SYNC, + P_SMP_WAIT, + P_SMP_SET_WFE_MODE, + + P_HEAPBLOCK_ALLOC = 0x600, // Heap and memory management ops + P_MALLOC, + P_MEMALIGN, + P_FREE, + + P_KBOOT_BOOT = 0x700, // Kernel boot ops + P_KBOOT_SET_CHOSEN, + P_KBOOT_SET_INITRD, + P_KBOOT_PREPARE_DT, + + P_PMGR_POWER_ENABLE = 0x800, // power/clock management ops + P_PMGR_POWER_DISABLE, + P_PMGR_ADT_POWER_ENABLE, + P_PMGR_ADT_POWER_DISABLE, + P_PMGR_RESET, + + P_IODEV_SET_USAGE = 0x900, + P_IODEV_CAN_READ, + P_IODEV_CAN_WRITE, + P_IODEV_READ, + P_IODEV_WRITE, + P_IODEV_WHOAMI, + P_USB_IODEV_VUART_SETUP, + + P_TUNABLES_APPLY_GLOBAL = 0xa00, + P_TUNABLES_APPLY_LOCAL, + P_TUNABLES_APPLY_LOCAL_ADDR, + + P_DART_INIT = 0xb00, + P_DART_SHUTDOWN, + P_DART_MAP, + P_DART_UNMAP, + + P_HV_INIT = 0xc00, + P_HV_MAP, + P_HV_START, + P_HV_TRANSLATE, + P_HV_PT_WALK, + P_HV_MAP_VUART, + P_HV_TRACE_IRQ, + P_HV_WDT_START, + P_HV_START_SECONDARY, + P_HV_SWITCH_CPU, + P_HV_SET_TIME_STEALING, + P_HV_PIN_CPU, + P_HV_WRITE_HCR, + P_HV_MAP_VIRTIO, + P_VIRTIO_PUT_BUFFER, + + P_FB_INIT = 0xd00, + P_FB_SHUTDOWN, + P_FB_BLIT, + P_FB_UNBLIT, + P_FB_FILL, + P_FB_CLEAR, + P_FB_DISPLAY_LOGO, + P_FB_RESTORE_LOGO, + P_FB_IMPROVE_LOGO, + + P_PCIE_INIT = 0xe00, + P_PCIE_SHUTDOWN, + + P_NVME_INIT = 0xf00, + P_NVME_SHUTDOWN, + P_NVME_READ, + P_NVME_FLUSH, + + P_MCC_GET_CARVEOUTS = 0x1000, + + P_DISPLAY_INIT = 0x1100, + P_DISPLAY_CONFIGURE, + P_DISPLAY_SHUTDOWN, + P_DISPLAY_START_DCP, + P_DISPLAY_IS_EXTERNAL, + + P_DAPF_INIT_ALL = 0x1200, + P_DAPF_INIT, + +} ProxyOp; + +#define S_OK 0 +#define S_BADCMD -1 + +typedef struct { + u64 opcode; + u64 args[6]; +} ProxyRequest; + +typedef struct { + u64 opcode; + s64 status; + u64 retval; +} ProxyReply; + +int proxy_process(ProxyRequest *request, ProxyReply *reply); + +#endif diff --git a/tools/src/ringbuffer.c b/tools/src/ringbuffer.c new file mode 100644 index 0000000..36b89d9 --- /dev/null +++ b/tools/src/ringbuffer.c @@ -0,0 +1,81 @@ +#include "ringbuffer.h" +#include "malloc.h" +#include "types.h" + +ringbuffer_t *ringbuffer_alloc(size_t len) +{ + ringbuffer_t *bfr = malloc(sizeof(*bfr)); + if (!bfr) + return NULL; + + bfr->buffer = malloc(len); + if (!bfr->buffer) { + free(bfr); + return NULL; + } + + bfr->read = 0; + bfr->write = 0; + bfr->len = len; + + return bfr; +} + +void ringbuffer_free(ringbuffer_t *bfr) +{ + if (bfr) + free(bfr->buffer); + free(bfr); +} + +size_t ringbuffer_read(u8 *target, size_t len, ringbuffer_t *bfr) +{ + size_t read; + + for (read = 0; read < len; ++read) { + if (bfr->read == bfr->write) + break; + + *target = bfr->buffer[bfr->read]; + target++; + + bfr->read++; + bfr->read %= bfr->len; + } + + return read; +} + +size_t ringbuffer_write(const u8 *src, size_t len, ringbuffer_t *bfr) +{ + size_t written; + + for (written = 0; written < len; ++written) { + if (((bfr->write + 1) % bfr->len) == bfr->read) + break; + + bfr->buffer[bfr->write] = *src; + src++; + + bfr->write++; + bfr->write %= bfr->len; + } + + return written; +} + +size_t ringbuffer_get_used(ringbuffer_t *bfr) +{ + size_t read = bfr->read; + size_t write = bfr->write; + + if (write < read) + write += bfr->len; + + return write - read; +} + +size_t ringbuffer_get_free(ringbuffer_t *bfr) +{ + return bfr->len - ringbuffer_get_used(bfr); +} diff --git a/tools/src/ringbuffer.h b/tools/src/ringbuffer.h new file mode 100644 index 0000000..553ae76 --- /dev/null +++ b/tools/src/ringbuffer.h @@ -0,0 +1,22 @@ +#ifndef RINGBUFFER_H +#define RINGBUFFER_H + +#include "types.h" + +typedef struct { + u8 *buffer; + size_t len; + size_t read; + size_t write; +} ringbuffer_t; + +ringbuffer_t *ringbuffer_alloc(size_t len); +void ringbuffer_free(ringbuffer_t *bfr); + +size_t ringbuffer_read(u8 *target, size_t len, ringbuffer_t *bfr); +size_t ringbuffer_write(const u8 *src, size_t len, ringbuffer_t *bfr); + +size_t ringbuffer_get_used(ringbuffer_t *bfr); +size_t ringbuffer_get_free(ringbuffer_t *bfr); + +#endif diff --git a/tools/src/rtkit.c b/tools/src/rtkit.c new file mode 100644 index 0000000..db80258 --- /dev/null +++ b/tools/src/rtkit.c @@ -0,0 +1,710 @@ +/* SPDX-License-Identifier: MIT */ + +#include "../config.h" + +#include "rtkit.h" +#include "adt.h" +#include "asc.h" +#include "dart.h" +#include "iova.h" +#include "malloc.h" +#include "sart.h" +#include "string.h" +#include "types.h" +#include "utils.h" + +#define rtkit_printf(...) \ + do { \ + debug_printf("rtkit(%s): ", rtk->name); \ + debug_printf(__VA_ARGS__); \ + } while (0) + +#define RTKIT_EP_MGMT 0 +#define RTKIT_EP_CRASHLOG 1 +#define RTKIT_EP_SYSLOG 2 +#define RTKIT_EP_DEBUG 3 +#define RTKIT_EP_IOREPORT 4 +#define RTKIT_EP_OSLOG 8 + +#define MGMT_TYPE GENMASK(59, 52) + +#define MGMT_PWR_STATE GENMASK(15, 0) + +#define MSG_BUFFER_REQUEST 1 +#define MSG_BUFFER_REQUEST_SIZE GENMASK(51, 44) +#define MSG_BUFFER_REQUEST_IOVA GENMASK(41, 0) + +#define MSG_SYSLOG_INIT 8 +#define MSG_SYSLOG_INIT_ENTRYSIZE GENMASK(39, 24) +#define MSG_SYSLOG_INIT_COUNT GENMASK(15, 0) +#define MSG_SYSLOG_LOG 5 +#define MSG_SYSLOG_LOG_INDEX GENMASK(7, 0) + +#define MSG_OSLOG_INIT 0x10 +#define MSG_OSLOG_ACK 0x30 + +#define MGMT_MSG_HELLO 1 +#define MGMT_MSG_HELLO_ACK 2 +#define MGMT_MSG_HELLO_MINVER GENMASK(15, 0) +#define MGMT_MSG_HELLO_MAXVER GENMASK(31, 16) + +#define MGMT_MSG_IOP_PWR_STATE 6 +#define MGMT_MSG_IOP_PWR_STATE_ACK 7 + +#define MGMT_MSG_EPMAP 8 +#define MGMT_MSG_EPMAP_DONE BIT(51) +#define MGMT_MSG_EPMAP_BASE GENMASK(34, 32) +#define MGMT_MSG_EPMAP_BITMAP GENMASK(31, 0) + +#define MGMT_MSG_EPMAP_REPLY 8 +#define MGMT_MSG_EPMAP_REPLY_DONE BIT(51) +#define MGMT_MSG_EPMAP_REPLY_MORE BIT(0) + +#define MGMT_MSG_AP_PWR_STATE 0xb +#define MGMT_MSG_AP_PWR_STATE_ACK 0xb + +#define MGMT_MSG_START_EP 5 +#define MGMT_MSG_START_EP_IDX GENMASK(39, 32) +#define MGMT_MSG_START_EP_FLAG BIT(1) + +#define RTKIT_MIN_VERSION 11 +#define RTKIT_MAX_VERSION 12 + +#define IOVA_MASK GENMASK(35, 0) + +enum rtkit_power_state { + RTKIT_POWER_OFF = 0x00, + RTKIT_POWER_SLEEP = 0x01, + RTKIT_POWER_QUIESCED = 0x10, + RTKIT_POWER_ON = 0x20, + RTKIT_POWER_INIT = 0x220, +}; + +struct rtkit_dev { + char *name; + + asc_dev_t *asc; + dart_dev_t *dart; + iova_domain_t *dart_iovad; + sart_dev_t *sart; + + u64 dva_base; + + enum rtkit_power_state iop_power; + enum rtkit_power_state ap_power; + + struct rtkit_buffer syslog_bfr; + struct rtkit_buffer crashlog_bfr; + struct rtkit_buffer ioreport_bfr; + + u32 syslog_cnt, syslog_size; + + bool crashed; +}; + +struct syslog_log { + u32 hdr; + u32 unk; + char context[24]; + char msg[]; +}; + +struct crashlog_hdr { + u32 type; + u32 ver; + u32 total_size; + u32 flags; + u8 _padding[16]; +}; + +struct crashlog_entry { + u32 type; + u32 _padding; + u32 flags; + u32 len; + u8 payload[]; +}; + +rtkit_dev_t *rtkit_init(const char *name, asc_dev_t *asc, dart_dev_t *dart, + iova_domain_t *dart_iovad, sart_dev_t *sart) +{ + if (dart && sart) { + printf("rtkit: Cannot use both SART and DART simultaneously\n"); + return NULL; + } + + if (dart && !dart_iovad) { + printf("rtkit: if DART is used iovad is already required\n"); + return NULL; + } + + rtkit_dev_t *rtk = malloc(sizeof(*rtk)); + if (!rtk) + return NULL; + memset(rtk, 0, sizeof(*rtk)); + + size_t name_len = strlen(name); + rtk->name = malloc(name_len + 1); + if (!rtk->name) + goto out_free_rtk; + strcpy(rtk->name, name); + + rtk->asc = asc; + rtk->dart = dart; + rtk->dart_iovad = dart_iovad; + rtk->sart = sart; + rtk->iop_power = RTKIT_POWER_OFF; + rtk->ap_power = RTKIT_POWER_OFF; + rtk->dva_base = 0; + + int iop_node = asc_get_iop_node(asc); + ADT_GETPROP(adt, iop_node, "asc-dram-mask", &rtk->dva_base); + + return rtk; + +out_free_rtk: + free(rtk); + return NULL; +} + +void rtkit_free(rtkit_dev_t *rtk) +{ + rtkit_free_buffer(rtk, &rtk->syslog_bfr); + rtkit_free_buffer(rtk, &rtk->crashlog_bfr); + rtkit_free_buffer(rtk, &rtk->ioreport_bfr); + free(rtk->name); + free(rtk); +} + +bool rtkit_send(rtkit_dev_t *rtk, const struct rtkit_message *msg) +{ + struct asc_message asc_msg; + + asc_msg.msg0 = msg->msg; + asc_msg.msg1 = msg->ep; + + return asc_send(rtk->asc, &asc_msg); +} + +bool rtkit_map(rtkit_dev_t *rtk, void *phys, size_t sz, u64 *dva) +{ + sz = ALIGN_UP(sz, 16384); + + if (rtk->sart) { + if (!sart_add_allowed_region(rtk->sart, phys, sz)) { + rtkit_printf("sart_add_allowed_region failed (%p, 0x%lx)\n", phys, sz); + return false; + } + *dva = (u64)phys; + return true; + } else if (rtk->dart) { + u64 iova = iova_alloc(rtk->dart_iovad, sz); + if (!iova) { + rtkit_printf("failed to alloc iova (size 0x%lx)\n", sz); + return false; + } + + if (dart_map(rtk->dart, iova, phys, sz) < 0) { + rtkit_printf("failed to DART map %p -> 0x%lx (0x%lx)\n", phys, iova, sz); + iova_free(rtk->dart_iovad, iova, sz); + return false; + } + + *dva = iova | rtk->dva_base; + return true; + } else { + rtkit_printf("TODO: implement no IOMMU buffers\n"); + return false; + } +} + +bool rtkit_unmap(rtkit_dev_t *rtk, u64 dva, size_t sz) +{ + if (rtk->sart) { + if (!sart_remove_allowed_region(rtk->sart, (void *)dva, sz)) + rtkit_printf("sart_remove_allowed_region failed (0x%lx, 0x%lx)\n", dva, sz); + return true; + } else if (rtk->dart) { + dva &= ~rtk->dva_base; + dart_unmap(rtk->dart, dva & IOVA_MASK, sz); + iova_free(rtk->dart_iovad, dva & IOVA_MASK, sz); + return true; + } else { + rtkit_printf("TODO: implement no IOMMU buffers\n"); + return false; + } +} + +bool rtkit_alloc_buffer(rtkit_dev_t *rtk, struct rtkit_buffer *bfr, size_t sz) +{ + bfr->bfr = memalign(SZ_16K, sz); + if (!bfr->bfr) { + rtkit_printf("unable to allocate %zu buffer\n", sz); + return false; + } + + sz = ALIGN_UP(sz, 16384); + + bfr->sz = sz; + if (!rtkit_map(rtk, bfr->bfr, sz, &bfr->dva)) + goto error; + + return true; + +error: + free(bfr->bfr); + bfr->bfr = NULL; + return false; +} + +bool rtkit_free_buffer(rtkit_dev_t *rtk, struct rtkit_buffer *bfr) +{ + if (!bfr->bfr || !is_heap(bfr->bfr)) + return true; + + if (!rtkit_unmap(rtk, bfr->dva, bfr->sz)) + return false; + + free(bfr->bfr); + + return false; +} + +static bool rtkit_handle_buffer_request(rtkit_dev_t *rtk, struct rtkit_message *msg, + struct rtkit_buffer *bfr) +{ + size_t n_4kpages = FIELD_GET(MSG_BUFFER_REQUEST_SIZE, msg->msg); + size_t sz = n_4kpages << 12; + u64 addr = FIELD_GET(MSG_BUFFER_REQUEST_IOVA, msg->msg); + + if (addr) { + bfr->dva = addr & ~rtk->dva_base; + bfr->sz = sz; + bfr->bfr = dart_translate(rtk->dart, bfr->dva & IOVA_MASK); + if (!bfr->bfr) { + rtkit_printf("failed to translate pre-allocated buffer (ep 0x%x, buf 0x%lx)\n", msg->ep, + addr); + return false; + } else { + rtkit_printf("pre-allocated buffer (ep 0x%x, dva 0x%lx, phys %p)\n", msg->ep, addr, + bfr->bfr); + } + return true; + + } else { + if (!rtkit_alloc_buffer(rtk, bfr, sz)) { + rtkit_printf("unable to allocate buffer\n"); + return false; + } + } + + struct asc_message reply; + reply.msg1 = msg->ep; + reply.msg0 = FIELD_PREP(MGMT_TYPE, MSG_BUFFER_REQUEST); + reply.msg0 |= FIELD_PREP(MSG_BUFFER_REQUEST_SIZE, n_4kpages); + if (!addr) + reply.msg0 |= FIELD_PREP(MSG_BUFFER_REQUEST_IOVA, bfr->dva | rtk->dva_base); + + if (!asc_send(rtk->asc, &reply)) { + rtkit_printf("unable to send buffer reply\n"); + rtkit_free_buffer(rtk, bfr); + goto error; + } + + return true; + +error: + return false; +} + +static void rtkit_crashed(rtkit_dev_t *rtk) +{ + struct crashlog_hdr *hdr = rtk->crashlog_bfr.bfr; + rtk->crashed = true; + + rtkit_printf("IOP crashed!\n"); + + if (hdr->type != 'CLHE') { + rtkit_printf("bad crashlog header 0x%x @ %p\n", hdr->type, hdr); + return; + } + + struct crashlog_entry *p = (void *)(hdr + 1); + + rtkit_printf("== CRASH INFO ==\n"); + while (p->type != 'CLHE') { + switch (p->type) { + case 'Cstr': + rtkit_printf(" Message %d: %s\n", p->payload[0], &p->payload[4]); + break; + default: + rtkit_printf(" 0x%x\n", p->type); + break; + } + p = ((void *)p) + p->len; + } +} + +int rtkit_recv(rtkit_dev_t *rtk, struct rtkit_message *msg) +{ + struct asc_message asc_msg; + bool ok = true; + + if (rtk->crashed) + return -1; + + while (asc_recv(rtk->asc, &asc_msg)) { + if (asc_msg.msg1 >= 0x100) { + rtkit_printf("WARNING: received message for invalid endpoint %x >= 0x100\n", + asc_msg.msg1); + continue; + } + + msg->msg = asc_msg.msg0; + msg->ep = (u8)asc_msg.msg1; + + /* if this is an app message we can just forwad it to the caller */ + if (msg->ep >= 0x20) + return 1; + + u32 msgtype = FIELD_GET(MGMT_TYPE, msg->msg); + switch (msg->ep) { + case RTKIT_EP_MGMT: + switch (msgtype) { + case MGMT_MSG_IOP_PWR_STATE_ACK: + rtk->iop_power = FIELD_GET(MGMT_PWR_STATE, msg->msg); + break; + case MGMT_MSG_AP_PWR_STATE_ACK: + rtk->ap_power = FIELD_GET(MGMT_PWR_STATE, msg->msg); + break; + default: + rtkit_printf("unknown management message %x\n", msgtype); + } + break; + case RTKIT_EP_SYSLOG: + switch (msgtype) { + case MSG_BUFFER_REQUEST: + ok = ok && rtkit_handle_buffer_request(rtk, msg, &rtk->syslog_bfr); + break; + case MSG_SYSLOG_INIT: + rtk->syslog_cnt = FIELD_GET(MSG_SYSLOG_INIT_COUNT, msg->msg); + rtk->syslog_size = FIELD_GET(MSG_SYSLOG_INIT_ENTRYSIZE, msg->msg); + break; + case MSG_SYSLOG_LOG: +#ifdef RTKIT_SYSLOG + { + u64 index = FIELD_GET(MSG_SYSLOG_LOG_INDEX, msg->msg); + u64 stride = rtk->syslog_size + sizeof(struct syslog_log); + struct syslog_log *log = rtk->syslog_bfr.bfr + stride * index; + rtkit_printf("syslog: [%s]%s", log->context, log->msg); + if (log->msg[strlen(log->msg) - 1] != '\n') + printf("\n"); + } +#endif + if (!asc_send(rtk->asc, &asc_msg)) + rtkit_printf("failed to ack syslog\n"); + break; + default: + rtkit_printf("unknown syslog message %x\n", msgtype); + } + break; + case RTKIT_EP_CRASHLOG: + switch (msgtype) { + case MSG_BUFFER_REQUEST: + if (!rtk->crashlog_bfr.bfr) { + ok = ok && rtkit_handle_buffer_request(rtk, msg, &rtk->crashlog_bfr); + } else { + rtkit_crashed(rtk); + return -1; + } + break; + default: + rtkit_printf("unknown crashlog message %x\n", msgtype); + } + break; + case RTKIT_EP_IOREPORT: + switch (msgtype) { + case MSG_BUFFER_REQUEST: + ok = ok && rtkit_handle_buffer_request(rtk, msg, &rtk->ioreport_bfr); + break; + /* unknown but must be ACKed */ + case 0x8: + case 0xc: + if (!rtkit_send(rtk, msg)) + rtkit_printf("unable to ACK unknown ioreport message\n"); + break; + default: + rtkit_printf("unknown ioreport message %x\n", msgtype); + } + break; + case RTKIT_EP_OSLOG: + switch (msgtype) { + case MSG_OSLOG_INIT: + msg->msg = FIELD_PREP(MGMT_TYPE, MSG_OSLOG_ACK); + if (!rtkit_send(rtk, msg)) + rtkit_printf("unable to ACK oslog init message\n"); + break; + default: + rtkit_printf("unknown oslog message %x\n", msgtype); + } + break; + default: + rtkit_printf("message to unknown system endpoint 0x%02x: %lx\n", msg->ep, msg->msg); + } + + if (!ok) { + rtkit_printf("failed to handle system message 0x%02x: %lx\n", msg->ep, msg->msg); + return -1; + } + } + + return 0; +} + +bool rtkit_start_ep(rtkit_dev_t *rtk, u8 ep) +{ + struct asc_message msg; + + msg.msg0 = FIELD_PREP(MGMT_TYPE, MGMT_MSG_START_EP); + msg.msg0 |= MGMT_MSG_START_EP_FLAG; + msg.msg0 |= FIELD_PREP(MGMT_MSG_START_EP_IDX, ep); + msg.msg1 = RTKIT_EP_MGMT; + + if (!asc_send(rtk->asc, &msg)) { + rtkit_printf("unable to start endpoint 0x%02x\n", ep); + return false; + } + + return true; +} + +bool rtkit_boot(rtkit_dev_t *rtk) +{ + struct asc_message msg; + + /* boot the IOP if it isn't already */ + asc_cpu_start(rtk->asc); + /* can be sent unconditionally to wake up a possibly sleeping IOP */ + msg.msg0 = FIELD_PREP(MGMT_TYPE, MGMT_MSG_IOP_PWR_STATE) | + FIELD_PREP(MGMT_PWR_STATE, RTKIT_POWER_INIT); + msg.msg1 = RTKIT_EP_MGMT; + if (!asc_send(rtk->asc, &msg)) { + rtkit_printf("unable to send wakeup message\n"); + return false; + } + + if (!asc_recv_timeout(rtk->asc, &msg, USEC_PER_SEC)) { + rtkit_printf("did not receive HELLO\n"); + return false; + } + + if (msg.msg1 != RTKIT_EP_MGMT) { + rtkit_printf("expected HELLO but got message for EP 0x%x", msg.msg1); + return false; + } + + u32 msgtype; + msgtype = FIELD_GET(MGMT_TYPE, msg.msg0); + if (msgtype != MGMT_MSG_HELLO) { + rtkit_printf("expected HELLO but got message with type 0x%02x", msgtype); + + return false; + } + + u32 min_ver, max_ver, want_ver; + min_ver = FIELD_GET(MGMT_MSG_HELLO_MINVER, msg.msg0); + max_ver = FIELD_GET(MGMT_MSG_HELLO_MAXVER, msg.msg0); + want_ver = min(RTKIT_MAX_VERSION, max_ver); + + if (min_ver > RTKIT_MAX_VERSION || max_ver < RTKIT_MIN_VERSION) { + rtkit_printf("supported versions [%d,%d] must overlap versions [%d,%d]\n", + RTKIT_MIN_VERSION, RTKIT_MAX_VERSION, min_ver, max_ver); + return false; + } + + rtkit_printf("booting with version %d\n", want_ver); + + msg.msg0 = FIELD_PREP(MGMT_TYPE, MGMT_MSG_HELLO_ACK); + msg.msg0 |= FIELD_PREP(MGMT_MSG_HELLO_MINVER, want_ver); + msg.msg0 |= FIELD_PREP(MGMT_MSG_HELLO_MAXVER, want_ver); + msg.msg1 = RTKIT_EP_MGMT; + if (!asc_send(rtk->asc, &msg)) { + rtkit_printf("couldn't send HELLO ack\n"); + return false; + } + + bool has_crashlog = false; + bool has_debug = false; + bool has_ioreport = false; + bool has_syslog = false; + bool has_oslog = false; + bool got_epmap = false; + while (!got_epmap) { + if (!asc_recv_timeout(rtk->asc, &msg, USEC_PER_SEC)) { + rtkit_printf("couldn't receive message while waiting for endpoint map\n"); + return false; + } + + if (msg.msg1 != RTKIT_EP_MGMT) { + rtkit_printf("expected management message while waiting for endpoint map but got " + "message for endpoint 0x%x\n", + msg.msg1); + return false; + } + + msgtype = FIELD_GET(MGMT_TYPE, msg.msg0); + if (msgtype != MGMT_MSG_EPMAP) { + rtkit_printf("expected endpoint map message but got 0x%x instead\n", msgtype); + return false; + } + + u32 bitmap = FIELD_GET(MGMT_MSG_EPMAP_BITMAP, msg.msg0); + u32 base = FIELD_GET(MGMT_MSG_EPMAP_BASE, msg.msg0); + for (unsigned int i = 0; i < 32; i++) { + if (bitmap & (1U << i)) { + u8 ep_idx = 32 * base + i; + + if (ep_idx >= 0x20) + continue; + switch (ep_idx) { + case RTKIT_EP_CRASHLOG: + has_crashlog = true; + break; + case RTKIT_EP_DEBUG: + has_debug = true; + break; + case RTKIT_EP_IOREPORT: + has_ioreport = true; + break; + case RTKIT_EP_SYSLOG: + has_syslog = true; + break; + case RTKIT_EP_OSLOG: + has_oslog = true; + case RTKIT_EP_MGMT: + break; + default: + rtkit_printf("unknown system endpoint 0x%02x\n", ep_idx); + } + } + } + + if (msg.msg0 & MGMT_MSG_EPMAP_DONE) + got_epmap = true; + + msg.msg0 = FIELD_PREP(MGMT_TYPE, MGMT_MSG_EPMAP_REPLY); + msg.msg0 |= FIELD_PREP(MGMT_MSG_EPMAP_BASE, base); + if (got_epmap) + msg.msg0 |= MGMT_MSG_EPMAP_REPLY_DONE; + else + msg.msg0 |= MGMT_MSG_EPMAP_REPLY_MORE; + + msg.msg1 = RTKIT_EP_MGMT; + + if (!asc_send(rtk->asc, &msg)) { + rtkit_printf("couldn't reply to endpoint map\n"); + return false; + } + } + + /* start all required system endpoints */ + if (has_debug && !rtkit_start_ep(rtk, RTKIT_EP_DEBUG)) + return false; + if (has_crashlog && !rtkit_start_ep(rtk, RTKIT_EP_CRASHLOG)) + return false; + if (has_syslog && !rtkit_start_ep(rtk, RTKIT_EP_SYSLOG)) + return false; + if (has_ioreport && !rtkit_start_ep(rtk, RTKIT_EP_IOREPORT)) + return false; + if (has_oslog && !rtkit_start_ep(rtk, RTKIT_EP_OSLOG)) + return false; + + while (rtk->iop_power != RTKIT_POWER_ON) { + struct rtkit_message rtk_msg; + int ret = rtkit_recv(rtk, &rtk_msg); + if (ret == 1) + rtkit_printf("unexpected message to non-system endpoint 0x%02x during boot: %lx\n", + rtk_msg.ep, rtk_msg.msg); + else if (ret < 0) + return false; + } + + /* this enables syslog */ + msg.msg0 = + FIELD_PREP(MGMT_TYPE, MGMT_MSG_AP_PWR_STATE) | FIELD_PREP(MGMT_PWR_STATE, RTKIT_POWER_ON); + msg.msg1 = RTKIT_EP_MGMT; + if (!asc_send(rtk->asc, &msg)) { + rtkit_printf("unable to send AP power message\n"); + return false; + } + + return true; +} + +static bool rtkit_switch_power_state(rtkit_dev_t *rtk, enum rtkit_power_state target) +{ + struct asc_message msg; + + if (rtk->crashed) + return false; + + /* AP power should always go to QUIESCED, otherwise rebooting doesn't work */ + msg.msg0 = FIELD_PREP(MGMT_TYPE, MGMT_MSG_AP_PWR_STATE) | + FIELD_PREP(MGMT_PWR_STATE, RTKIT_POWER_QUIESCED); + msg.msg1 = RTKIT_EP_MGMT; + if (!asc_send(rtk->asc, &msg)) { + rtkit_printf("unable to send shutdown message\n"); + return false; + } + + while (rtk->ap_power != RTKIT_POWER_QUIESCED) { + struct rtkit_message rtk_msg; + int ret = rtkit_recv(rtk, &rtk_msg); + + if (ret > 0) { + rtkit_printf("unexpected message to non-system endpoint 0x%02x during shutdown: %lx\n", + rtk_msg.ep, rtk_msg.msg); + continue; + } else if (ret < 0) { + rtkit_printf("IOP died during shutdown\n"); + return false; + } + } + + msg.msg0 = FIELD_PREP(MGMT_TYPE, MGMT_MSG_IOP_PWR_STATE) | FIELD_PREP(MGMT_PWR_STATE, target); + if (!asc_send(rtk->asc, &msg)) { + rtkit_printf("unable to send shutdown message\n"); + return false; + } + + while (rtk->iop_power != target) { + struct rtkit_message rtk_msg; + int ret = rtkit_recv(rtk, &rtk_msg); + + if (ret > 0) { + rtkit_printf("unexpected message to non-system endpoint 0x%02x during shutdown: %lx\n", + rtk_msg.ep, rtk_msg.msg); + continue; + } else if (ret < 0) { + rtkit_printf("IOP died during shutdown\n"); + return false; + } + } + + return true; +} + +bool rtkit_quiesce(rtkit_dev_t *rtk) +{ + return rtkit_switch_power_state(rtk, RTKIT_POWER_QUIESCED); +} + +bool rtkit_sleep(rtkit_dev_t *rtk) +{ + int ret = rtkit_switch_power_state(rtk, RTKIT_POWER_SLEEP); + if (ret < 0) + return ret; + + asc_cpu_stop(rtk->asc); + return 0; +} diff --git a/tools/src/rtkit.h b/tools/src/rtkit.h new file mode 100644 index 0000000..9d87ee1 --- /dev/null +++ b/tools/src/rtkit.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef RTKIT_H +#define RTKIT_H + +#include "asc.h" +#include "dart.h" +#include "iova.h" +#include "sart.h" +#include "types.h" + +typedef struct rtkit_dev rtkit_dev_t; + +struct rtkit_message { + u8 ep; + u64 msg; +}; + +struct rtkit_buffer { + void *bfr; + u64 dva; + size_t sz; +}; + +rtkit_dev_t *rtkit_init(const char *name, asc_dev_t *asc, dart_dev_t *dart, + iova_domain_t *dart_iovad, sart_dev_t *sart); +bool rtkit_quiesce(rtkit_dev_t *rtk); +bool rtkit_sleep(rtkit_dev_t *rtk); +void rtkit_free(rtkit_dev_t *rtk); + +bool rtkit_start_ep(rtkit_dev_t *rtk, u8 ep); +bool rtkit_boot(rtkit_dev_t *rtk); + +int rtkit_recv(rtkit_dev_t *rtk, struct rtkit_message *msg); +bool rtkit_send(rtkit_dev_t *rtk, const struct rtkit_message *msg); + +bool rtkit_map(rtkit_dev_t *rtk, void *phys, size_t sz, u64 *dva); +bool rtkit_unmap(rtkit_dev_t *rtk, u64 dva, size_t sz); + +bool rtkit_alloc_buffer(rtkit_dev_t *rtk, struct rtkit_buffer *bfr, size_t sz); +bool rtkit_free_buffer(rtkit_dev_t *rtk, struct rtkit_buffer *bfr); + +#endif diff --git a/tools/src/sart.c b/tools/src/sart.c new file mode 100644 index 0000000..e0345cd --- /dev/null +++ b/tools/src/sart.c @@ -0,0 +1,219 @@ +/* SPDX-License-Identifier: MIT */ + +#include "adt.h" +#include "malloc.h" +#include "sart.h" +#include "string.h" +#include "utils.h" + +struct sart_dev { + uintptr_t base; + u32 protected_entries; + + void (*get_entry)(sart_dev_t *sart, int index, u8 *flags, void **paddr, size_t *size); + bool (*set_entry)(sart_dev_t *sart, int index, u8 flags, void *paddr, size_t size); +}; + +#define APPLE_SART_MAX_ENTRIES 16 + +/* This is probably a bitfield but the exact meaning of each bit is unknown. */ +#define APPLE_SART_FLAGS_ALLOW 0xff + +/* SARTv2 registers */ +#define APPLE_SART2_CONFIG(idx) (0x00 + 4 * (idx)) +#define APPLE_SART2_CONFIG_FLAGS GENMASK(31, 24) +#define APPLE_SART2_CONFIG_SIZE GENMASK(23, 0) +#define APPLE_SART2_CONFIG_SIZE_SHIFT 12 +#define APPLE_SART2_CONFIG_SIZE_MAX GENMASK(23, 0) + +#define APPLE_SART2_PADDR(idx) (0x40 + 4 * (idx)) +#define APPLE_SART2_PADDR_SHIFT 12 + +/* SARTv3 registers */ +#define APPLE_SART3_CONFIG(idx) (0x00 + 4 * (idx)) + +#define APPLE_SART3_PADDR(idx) (0x40 + 4 * (idx)) +#define APPLE_SART3_PADDR_SHIFT 12 + +#define APPLE_SART3_SIZE(idx) (0x80 + 4 * (idx)) +#define APPLE_SART3_SIZE_SHIFT 12 +#define APPLE_SART3_SIZE_MAX GENMASK(29, 0) + +static void sart2_get_entry(sart_dev_t *sart, int index, u8 *flags, void **paddr, size_t *size) +{ + u32 cfg = read32(sart->base + APPLE_SART2_CONFIG(index)); + *flags = FIELD_GET(APPLE_SART2_CONFIG_FLAGS, cfg); + *size = (size_t)FIELD_GET(APPLE_SART2_CONFIG_SIZE, cfg) << APPLE_SART2_CONFIG_SIZE_SHIFT; + *paddr = + (void *)((u64)read32(sart->base + APPLE_SART2_PADDR(index)) << APPLE_SART2_PADDR_SHIFT); +} + +static bool sart2_set_entry(sart_dev_t *sart, int index, u8 flags, void *paddr_, size_t size) +{ + u32 cfg; + u64 paddr = (u64)paddr_; + + if (size & ((1 << APPLE_SART2_CONFIG_SIZE_SHIFT) - 1)) + return false; + if (paddr & ((1 << APPLE_SART2_PADDR_SHIFT) - 1)) + return false; + + size >>= APPLE_SART2_CONFIG_SIZE_SHIFT; + paddr >>= APPLE_SART2_PADDR_SHIFT; + + if (size > APPLE_SART2_CONFIG_SIZE_MAX) + return false; + + cfg = FIELD_PREP(APPLE_SART2_CONFIG_FLAGS, flags); + cfg |= FIELD_PREP(APPLE_SART2_CONFIG_SIZE, size); + + write32(sart->base + APPLE_SART2_PADDR(index), paddr); + write32(sart->base + APPLE_SART2_CONFIG(index), cfg); + + return true; +} + +static void sart3_get_entry(sart_dev_t *sart, int index, u8 *flags, void **paddr, size_t *size) +{ + *flags = read32(sart->base + APPLE_SART3_CONFIG(index)); + *size = (size_t)read32(sart->base + APPLE_SART3_SIZE(index)) << APPLE_SART3_SIZE_SHIFT; + *paddr = + (void *)((u64)read32(sart->base + APPLE_SART3_PADDR(index)) << APPLE_SART3_PADDR_SHIFT); +} + +static bool sart3_set_entry(sart_dev_t *sart, int index, u8 flags, void *paddr_, size_t size) +{ + u64 paddr = (u64)paddr_; + if (size & ((1 << APPLE_SART3_SIZE_SHIFT) - 1)) + return false; + if (paddr & ((1 << APPLE_SART3_PADDR_SHIFT) - 1)) + return false; + + paddr >>= APPLE_SART3_PADDR_SHIFT; + size >>= APPLE_SART3_SIZE_SHIFT; + + if (size > APPLE_SART3_SIZE_MAX) + return false; + + write32(sart->base + APPLE_SART3_PADDR(index), paddr); + write32(sart->base + APPLE_SART3_SIZE(index), size); + write32(sart->base + APPLE_SART3_CONFIG(index), flags); + + return true; +} + +sart_dev_t *sart_init(const char *adt_path) +{ + int sart_path[8]; + int node = adt_path_offset_trace(adt, adt_path, sart_path); + if (node < 0) { + printf("sart: Error getting SART node %s\n", adt_path); + return NULL; + } + + u64 base; + if (adt_get_reg(adt, sart_path, "reg", 0, &base, NULL) < 0) { + printf("sart: Error getting SART %s base address.\n", adt_path); + return NULL; + } + + const u32 *sart_version = adt_getprop(adt, node, "sart-version", NULL); + if (!sart_version) { + printf("sart: SART %s has no sart-version property\n", adt_path); + return NULL; + } + + sart_dev_t *sart = malloc(sizeof(*sart)); + if (!sart) + return NULL; + + memset(sart, 0, sizeof(*sart)); + sart->base = base; + + switch (*sart_version) { + case 2: + sart->get_entry = sart2_get_entry; + sart->set_entry = sart2_set_entry; + break; + case 3: + sart->get_entry = sart3_get_entry; + sart->set_entry = sart3_set_entry; + break; + default: + printf("sart: SART %s has unknown version %d\n", adt_path, *sart_version); + free(sart); + return NULL; + } + + printf("sart: SARTv%d %s at 0x%lx\n", *sart_version, adt_path, base); + + sart->protected_entries = 0; + for (unsigned int i = 0; i < APPLE_SART_MAX_ENTRIES; ++i) { + void *paddr; + u8 flags; + size_t sz; + + sart->get_entry(sart, i, &flags, &paddr, &sz); + if (flags) + sart->protected_entries |= 1 << i; + } + + return sart; +} + +void sart_free(sart_dev_t *sart) +{ + for (unsigned int i = 0; i < APPLE_SART_MAX_ENTRIES; ++i) { + if (sart->protected_entries & (1 << i)) + continue; + sart->set_entry(sart, i, 0, NULL, 0); + } + + free(sart); +} + +bool sart_add_allowed_region(sart_dev_t *sart, void *paddr, size_t sz) +{ + for (unsigned int i = 0; i < APPLE_SART_MAX_ENTRIES; ++i) { + void *e_paddr; + u8 e_flags; + size_t e_sz; + + if (sart->protected_entries & (1 << i)) + continue; + + sart->get_entry(sart, i, &e_flags, &e_paddr, &e_sz); + if (e_flags) + continue; + + return sart->set_entry(sart, i, APPLE_SART_FLAGS_ALLOW, paddr, sz); + } + + printf("sart: no more free entries\n"); + return false; +} + +bool sart_remove_allowed_region(sart_dev_t *sart, void *paddr, size_t sz) +{ + for (unsigned int i = 0; i < APPLE_SART_MAX_ENTRIES; ++i) { + void *e_paddr; + u8 e_flags; + size_t e_sz; + + if (sart->protected_entries & (1 << i)) + continue; + + sart->get_entry(sart, i, &e_flags, &e_paddr, &e_sz); + if (!e_flags) + continue; + if (e_paddr != paddr) + continue; + if (e_sz != sz) + continue; + + return sart->set_entry(sart, i, 0, NULL, 0); + } + + printf("sart: could not find entry to be removed\n"); + return false; +} diff --git a/tools/src/sart.h b/tools/src/sart.h new file mode 100644 index 0000000..37828c0 --- /dev/null +++ b/tools/src/sart.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef SART_H +#define SART_H + +#include "types.h" + +typedef struct sart_dev sart_dev_t; + +sart_dev_t *sart_init(const char *adt_path); +void sart_free(sart_dev_t *asc); + +bool sart_add_allowed_region(sart_dev_t *sart, void *paddr, size_t sz); +bool sart_remove_allowed_region(sart_dev_t *sart, void *paddr, size_t sz); + +#endif diff --git a/tools/src/sep.c b/tools/src/sep.c new file mode 100644 index 0000000..7a40fef --- /dev/null +++ b/tools/src/sep.c @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: MIT */ + +#include <string.h> + +#include "asc.h" +#include "sep.h" +#include "types.h" +#include "utils.h" + +#define SEP_MSG_EP GENMASK(7, 0) +#define SEP_MSG_CMD GENMASK(23, 16) +#define SEP_MSG_DATA GENMASK(63, 32) + +#define SEP_EP_ROM 0xff + +#define SEP_MSG_GETRAND 16 +#define SEP_REPLY_GETRAND 116 + +#define SEP_TIMEOUT 1000 + +static asc_dev_t *sep_asc = NULL; + +int sep_init(void) +{ + if (!sep_asc) + sep_asc = asc_init("/arm-io/sep"); + if (!sep_asc) + return -1; + return 0; +} + +size_t sep_get_random(void *buffer, size_t len) +{ + const struct asc_message msg_getrand = {.msg0 = FIELD_PREP(SEP_MSG_EP, SEP_EP_ROM) | + FIELD_PREP(SEP_MSG_CMD, SEP_MSG_GETRAND)}; + int ret; + size_t done = 0; + + ret = sep_init(); + if (ret) + return 0; + + while (len) { + struct asc_message reply; + u32 rng; + size_t copy; + + if (!asc_send(sep_asc, &msg_getrand)) + return done; + if (!asc_recv_timeout(sep_asc, &reply, SEP_TIMEOUT)) + return done; + if (FIELD_GET(SEP_MSG_CMD, reply.msg0) != SEP_REPLY_GETRAND) { + printf("SEP: unexpected getrand reply: %016lx\n", reply.msg0); + return done; + } + + rng = FIELD_GET(SEP_MSG_DATA, reply.msg0); + copy = sizeof(rng); + if (copy > len) + copy = len; + memcpy(buffer, &rng, copy); + done += copy; + len -= copy; + buffer += copy; + } + + return done; +} diff --git a/tools/src/sep.h b/tools/src/sep.h new file mode 100644 index 0000000..8d7d04a --- /dev/null +++ b/tools/src/sep.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef SEP_H +#define SEP_H + +#include "asc.h" +#include "types.h" + +int sep_init(void); +size_t sep_get_random(void *buffer, size_t len); + +#endif diff --git a/tools/src/smp.c b/tools/src/smp.c new file mode 100644 index 0000000..6ed522d --- /dev/null +++ b/tools/src/smp.c @@ -0,0 +1,296 @@ +/* SPDX-License-Identifier: MIT */ + +#include "smp.h" +#include "adt.h" +#include "cpu_regs.h" +#include "malloc.h" +#include "pmgr.h" +#include "soc.h" +#include "string.h" +#include "types.h" +#include "utils.h" + +#define CPU_START_OFF_T8103 0x54000 +#define CPU_START_OFF_T8112 0x34000 + +#define CPU_REG_CORE GENMASK(7, 0) +#define CPU_REG_CLUSTER GENMASK(10, 8) +#define CPU_REG_DIE GENMASK(14, 11) + +struct spin_table { + u64 mpidr; + u64 flag; + u64 target; + u64 args[4]; + u64 retval; +}; + +void *_reset_stack; + +#define DUMMY_STACK_SIZE 0x1000 +u8 dummy_stack[DUMMY_STACK_SIZE]; + +u8 *secondary_stacks[MAX_CPUS] = {dummy_stack}; + +static bool wfe_mode = false; + +static int target_cpu; +static struct spin_table spin_table[MAX_CPUS]; + +extern u8 _vectors_start[0]; + +void smp_secondary_entry(void) +{ + struct spin_table *me = &spin_table[target_cpu]; + + if (in_el2()) + msr(TPIDR_EL2, target_cpu); + else + msr(TPIDR_EL1, target_cpu); + + printf(" Index: %d (table: %p)\n\n", target_cpu, me); + + me->mpidr = mrs(MPIDR_EL1) & 0xFFFFFF; + + sysop("dmb sy"); + me->flag = 1; + sysop("dmb sy"); + u64 target; + + while (1) { + while (!(target = me->target)) { + if (wfe_mode) { + sysop("wfe"); + } else { + deep_wfi(); + msr(SYS_IMP_APL_IPI_SR_EL1, 1); + } + sysop("isb"); + } + sysop("dmb sy"); + me->flag++; + sysop("dmb sy"); + me->retval = ((u64(*)(u64 a, u64 b, u64 c, u64 d))target)(me->args[0], me->args[1], + me->args[2], me->args[3]); + sysop("dmb sy"); + me->target = 0; + sysop("dmb sy"); + } +} + +static void smp_start_cpu(int index, int die, int cluster, int core, u64 rvbar, u64 cpu_start_base) +{ + int i; + + if (index >= MAX_CPUS) + return; + + if (spin_table[index].flag) + return; + + printf("Starting CPU %d (%d:%d:%d)... ", index, die, cluster, core); + + memset(&spin_table[index], 0, sizeof(struct spin_table)); + + target_cpu = index; + secondary_stacks[index] = memalign(0x4000, SECONDARY_STACK_SIZE); + _reset_stack = secondary_stacks[index] + SECONDARY_STACK_SIZE; + + sysop("dmb sy"); + + write64(rvbar, (u64)_vectors_start); + + cpu_start_base += die * PMGR_DIE_OFFSET; + + // Some kind of system level startup/status bit + // Without this, IRQs don't work + write32(cpu_start_base + 0x4, 1 << (4 * cluster + core)); + + // Actually start the core + write32(cpu_start_base + 0x8 + 4 * cluster, 1 << core); + + for (i = 0; i < 500; i++) { + sysop("dmb ld"); + if (spin_table[index].flag) + break; + udelay(1000); + } + + if (i >= 500) + printf("Failed!\n"); + else + printf(" Started.\n"); + + _reset_stack = dummy_stack + DUMMY_STACK_SIZE; +} + +void smp_start_secondaries(void) +{ + printf("Starting secondary CPUs...\n"); + + int pmgr_path[8]; + u64 pmgr_reg; + + if (adt_path_offset_trace(adt, "/arm-io/pmgr", pmgr_path) < 0) { + printf("Error getting /arm-io/pmgr node\n"); + return; + } + if (adt_get_reg(adt, pmgr_path, "reg", 0, &pmgr_reg, NULL) < 0) { + printf("Error getting /arm-io/pmgr regs\n"); + return; + } + + int node = adt_path_offset(adt, "/cpus"); + if (node < 0) { + printf("Error getting /cpus node\n"); + return; + } + + int cpu_nodes[MAX_CPUS]; + u64 cpu_start_off; + + memset(cpu_nodes, 0, sizeof(cpu_nodes)); + + switch (chip_id) { + case T8103: + case T6000: + case T6001: + case T6002: + cpu_start_off = CPU_START_OFF_T8103; + break; + case T8112: + cpu_start_off = CPU_START_OFF_T8112; + break; + default: + printf("CPU start offset is unknown for this SoC!\n"); + return; + } + + ADT_FOREACH_CHILD(adt, node) + { + u32 cpu_id; + + if (ADT_GETPROP(adt, node, "cpu-id", &cpu_id) < 0) + continue; + if (cpu_id >= MAX_CPUS) { + printf("cpu-id %d exceeds max CPU count %d: increase MAX_CPUS\n", cpu_id, MAX_CPUS); + continue; + } + + cpu_nodes[cpu_id] = node; + } + + for (int i = 1; i < MAX_CPUS; i++) { + int node = cpu_nodes[i]; + + if (!node) + continue; + + u32 reg; + u64 cpu_impl_reg[2]; + if (ADT_GETPROP(adt, node, "reg", ®) < 0) + continue; + if (ADT_GETPROP_ARRAY(adt, node, "cpu-impl-reg", cpu_impl_reg) < 0) + continue; + + u8 core = FIELD_GET(CPU_REG_CORE, reg); + u8 cluster = FIELD_GET(CPU_REG_CLUSTER, reg); + u8 die = FIELD_GET(CPU_REG_DIE, reg); + + smp_start_cpu(i, die, cluster, core, cpu_impl_reg[0], pmgr_reg + cpu_start_off); + } + + spin_table[0].mpidr = mrs(MPIDR_EL1) & 0xFFFFFF; +} + +void smp_send_ipi(int cpu) +{ + if (cpu >= MAX_CPUS) + return; + + u64 mpidr = spin_table[cpu].mpidr; + msr(SYS_IMP_APL_IPI_RR_GLOBAL_EL1, (mpidr & 0xff) | ((mpidr & 0xff00) << 8)); +} + +void smp_call4(int cpu, void *func, u64 arg0, u64 arg1, u64 arg2, u64 arg3) +{ + if (cpu >= MAX_CPUS) + return; + + struct spin_table *target = &spin_table[cpu]; + + if (cpu == 0) + return; + + u64 flag = target->flag; + target->args[0] = arg0; + target->args[1] = arg1; + target->args[2] = arg2; + target->args[3] = arg3; + sysop("dmb sy"); + target->target = (u64)func; + sysop("dsb sy"); + + if (wfe_mode) + sysop("sev"); + else + smp_send_ipi(cpu); + + while (target->flag == flag) + sysop("dmb sy"); +} + +u64 smp_wait(int cpu) +{ + if (cpu >= MAX_CPUS) + return 0; + + struct spin_table *target = &spin_table[cpu]; + + while (target->target) + sysop("dmb sy"); + + return target->retval; +} + +void smp_set_wfe_mode(bool new_mode) +{ + wfe_mode = new_mode; + sysop("dsb sy"); + + for (int cpu = 1; cpu < MAX_CPUS; cpu++) + if (smp_is_alive(cpu)) + smp_send_ipi(cpu); + + sysop("sev"); +} + +bool smp_is_alive(int cpu) +{ + if (cpu >= MAX_CPUS) + return false; + + return spin_table[cpu].flag; +} + +uint64_t smp_get_mpidr(int cpu) +{ + if (cpu >= MAX_CPUS) + return 0; + + return spin_table[cpu].mpidr; +} + +u64 smp_get_release_addr(int cpu) +{ + struct spin_table *target = &spin_table[cpu]; + + if (cpu >= MAX_CPUS) + return 0; + + target->args[0] = 0; + target->args[1] = 0; + target->args[2] = 0; + target->args[3] = 0; + return (u64)&target->target; +} diff --git a/tools/src/smp.h b/tools/src/smp.h new file mode 100644 index 0000000..c802f3e --- /dev/null +++ b/tools/src/smp.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef __SMP_H__ +#define __SMP_H__ + +#include "types.h" +#include "utils.h" + +#define MAX_CPUS 20 + +#define SECONDARY_STACK_SIZE 0x10000 +extern u8 *secondary_stacks[MAX_CPUS]; + +void smp_secondary_entry(void); + +void smp_start_secondaries(void); + +#define smp_call0(i, f) smp_call4(i, f, 0, 0, 0, 0) +#define smp_call1(i, f, a) smp_call4(i, f, a, 0, 0, 0) +#define smp_call2(i, f, a, b) smp_call4(i, f, a, b, 0, 0) +#define smp_call3(i, f, a, b, c) smp_call4(i, f, a, b, c, 0) + +void smp_call4(int cpu, void *func, u64 arg0, u64 arg1, u64 arg2, u64 arg3); + +u64 smp_wait(int cpu); + +bool smp_is_alive(int cpu); +uint64_t smp_get_mpidr(int cpu); +u64 smp_get_release_addr(int cpu); +void smp_set_wfe_mode(bool new_mode); +void smp_send_ipi(int cpu); + +static inline int smp_id(void) +{ + if (in_el2()) + return mrs(TPIDR_EL2); + else + return mrs(TPIDR_EL1); +} + +#endif diff --git a/tools/src/soc.h b/tools/src/soc.h new file mode 100644 index 0000000..26ddddc --- /dev/null +++ b/tools/src/soc.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef __SOC_H__ +#define __SOC_H__ + +#include "../config.h" + +#define T8103 0x8103 +#define T8112 0x8112 +#define T6000 0x6000 +#define T6001 0x6001 +#define T6002 0x6002 + +#ifdef TARGET + +#if TARGET == T8103 +#define EARLY_UART_BASE 0x235200000 +#elif TARGET == T6000 || TARGET == T6001 || TARGET == T6002 +#define EARLY_UART_BASE 0x39b200000 +#elif TARGET == T8112 +#define EARLY_UART_BASE 0x235200000 +#endif + +#endif +#endif diff --git a/tools/src/start.S b/tools/src/start.S new file mode 100644 index 0000000..b0051e6 --- /dev/null +++ b/tools/src/start.S @@ -0,0 +1,176 @@ +/* SPDX-License-Identifier: MIT */ + +#include "soc.h" + +#define UTRSTAT 0x010 +#define UTXH 0x020 + +.extern _start_c +.extern _stack_bot +.extern _v_sp0_sync +.extern _v_sp0_irq +.extern _v_sp0_fiq +.extern _v_sp0_serr +.extern _reset_stack +.extern _cpu_reset_c +.extern wdt_reboot + +.section .init, "ax" + +.align 11 +.globl _vectors_start +_vectors_start: + + mov x9, '0' + b cpu_reset + .align 7 + mov x9, '1' + b exc_unk + .align 7 + mov x9, '2' + b exc_unk + .align 7 + mov x9, '3' + b exc_unk + .align 7 + b _v_sp0_sync + .align 7 + b _v_sp0_irq + .align 7 + b _v_sp0_fiq + .align 7 + b _v_sp0_serr + .align 7 + b _v_sp0_sync + .align 7 + b _v_sp0_irq + .align 7 + b _v_sp0_fiq + .align 7 + b _v_sp0_serr + .align 7 + mov x9, 'p' + b exc_unk + .align 7 + mov x9, 'q' + b exc_unk + .align 7 + mov x9, 'r' + b exc_unk + .align 7 + mov x9, 's' + b exc_unk + .align 7 + +.globl _start +.type _start, @function +_start: + mov x19, x0 + + mov w0, 'm' + bl debug_putc + + adrp x1, _stack_bot + mov sp, x1 + + mov w0, '1' + bl debug_putc + + ldr x2, [sp, #-8] + + mov w0, 'n' + bl debug_putc + + adrp x0, _base + mov x20, x0 + adrp x1, _rela_start + add x1, x1, :lo12:_rela_start + adrp x2, _rela_end + add x2, x2, :lo12:_rela_end + bl apply_rela + + mov w0, '1' + bl debug_putc + mov w0, 0xd /* '\r', clang compat */ + bl debug_putc + mov w0, '\n' + bl debug_putc + + mov x0, x19 + mov x1, x20 + bl _start_c + b . + +.globl exc_unk +.type exc_unk, @function +exc_unk: + mov w0, 0xd /* '\r', clang compat */ + bl debug_putc + mov w0, '\n' + bl debug_putc + mov w0, '!' + bl debug_putc + mov w0, 'E' + bl debug_putc + mov w0, 'x' + bl debug_putc + mov w0, 'C' + bl debug_putc + mov w0, ':' + bl debug_putc + mov w0, w9 + bl debug_putc + mov w0, '!' + bl debug_putc + mov w0, 0xd /* '\r', clang compat */ + bl debug_putc + mov w0, '\n' + bl debug_putc + b reboot + +.globl cpu_reset +.type cpu_reset, @function +cpu_reset: + mov w0, 'O' + bl debug_putc + + adrp x1, _reset_stack + add x1, x1, :lo12:_reset_stack + ldr x1, [x1] + mov sp, x1 + + ldr x2, [sp, #-8] + + mov w0, 'K' + bl debug_putc + + mov x0, sp + bl _cpu_reset_c + b . + +.globl debug_putc +.type debug_putc, @function +debug_putc: +#ifdef EARLY_UART_BASE + ldr x1, =EARLY_UART_BASE + +1: + ldr w2, [x1, UTRSTAT] + tst w2, #2 + beq 1b + str w0, [x1, UTXH] +#endif + ret + +.globl reboot +.type reboot, @function +reboot: + mrs x0, CurrentEL + cmp x0, #8 + beq 1f + hvc #0 +1: + bl wdt_reboot + b . + +.pool diff --git a/tools/src/startup.c b/tools/src/startup.c new file mode 100644 index 0000000..1052707 --- /dev/null +++ b/tools/src/startup.c @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: MIT */ + +#include "chickens.h" +#include "exception.h" +#include "smp.h" +#include "string.h" +#include "types.h" +#include "uart.h" +#include "utils.h" +#include "xnuboot.h" + +u64 boot_args_addr; +struct boot_args cur_boot_args; +void *adt; + +struct rela_entry { + uint64_t off, type, addend; +}; + +void debug_putc(char c); +void m1n1_main(void); + +extern char _bss_start[0]; +extern char _bss_end[0]; + +#define R_AARCH64_RELATIVE 1027 + +void apply_rela(uint64_t base, struct rela_entry *rela_start, struct rela_entry *rela_end) +{ + struct rela_entry *e = rela_start; + + while (e < rela_end) { + switch (e->type) { + case R_AARCH64_RELATIVE: + *(u64 *)(base + e->off) = base + e->addend; + break; + default: + debug_putc('R'); + debug_putc('!'); + while (1) + ; + } + e++; + } +} + +void dump_boot_args(struct boot_args *ba) +{ + printf(" revision: %d\n", ba->revision); + printf(" version: %d\n", ba->version); + printf(" virt_base: 0x%lx\n", ba->virt_base); + printf(" phys_base: 0x%lx\n", ba->phys_base); + printf(" mem_size: 0x%lx\n", ba->mem_size); + printf(" top_of_kdata: 0x%lx\n", ba->top_of_kernel_data); + printf(" video:\n"); + printf(" base: 0x%lx\n", ba->video.base); + printf(" display: 0x%lx\n", ba->video.display); + printf(" stride: 0x%lx\n", ba->video.stride); + printf(" width: %lu\n", ba->video.width); + printf(" height: %lu\n", ba->video.height); + printf(" depth: %lubpp\n", ba->video.depth & 0xff); + printf(" density: %lu\n", ba->video.depth >> 16); + printf(" machine_type: %d\n", ba->machine_type); + printf(" devtree: %p\n", ba->devtree); + printf(" devtree_size: 0x%x\n", ba->devtree_size); + printf(" cmdline: %s\n", ba->cmdline); + printf(" boot_flags: 0x%lx\n", ba->boot_flags); + printf(" mem_size_act: 0x%lx\n", ba->mem_size_actual); +} + +void _start_c(void *boot_args, void *base) +{ + UNUSED(base); + + if (in_el2()) + msr(TPIDR_EL2, 0); + else + msr(TPIDR_EL1, 0); + + memset64(_bss_start, 0, _bss_end - _bss_start); + boot_args_addr = (u64)boot_args; + memcpy(&cur_boot_args, boot_args, sizeof(cur_boot_args)); + + adt = + (void *)(((u64)cur_boot_args.devtree) - cur_boot_args.virt_base + cur_boot_args.phys_base); + + int ret = uart_init(); + if (ret < 0) { + debug_putc('!'); + } + + uart_puts("Initializing"); + printf("CPU init (MIDR: 0x%lx)...\n", mrs(MIDR_EL1)); + const char *type = init_cpu(); + printf(" CPU: %s\n\n", type); + + printf("boot_args at %p\n", boot_args); + + dump_boot_args(&cur_boot_args); + printf("\n"); + + exception_initialize(); + m1n1_main(); +} + +/* Secondary SMP core boot */ +void _cpu_reset_c(void *stack) +{ + if (mrs(MPIDR_EL1) & 0xffffff) + uart_puts("RVBAR entry on secondary CPU"); + else + uart_puts("RVBAR entry on primary CPU"); + + printf("\n Stack base: %p\n", stack); + printf(" MPIDR: 0x%lx\n", mrs(MPIDR_EL1)); + const char *type = init_cpu(); + printf(" CPU: %s\n", type); + + exception_initialize(); + smp_secondary_entry(); +} diff --git a/tools/src/string.c b/tools/src/string.c new file mode 100644 index 0000000..318d0fc --- /dev/null +++ b/tools/src/string.c @@ -0,0 +1,209 @@ +/* SPDX-License-Identifier: MIT */ + +#include <stdbool.h> + +#include "string.h" + +// Routines based on The Public Domain C Library + +void *memcpy(void *s1, const void *s2, size_t n) +{ + char *dest = (char *)s1; + const char *src = (const char *)s2; + + while (n--) { + *dest++ = *src++; + } + + return s1; +} + +void *memmove(void *s1, const void *s2, size_t n) +{ + char *dest = (char *)s1; + const char *src = (const char *)s2; + + if (dest <= src) { + while (n--) { + *dest++ = *src++; + } + } else { + src += n; + dest += n; + + while (n--) { + *--dest = *--src; + } + } + + return s1; +} + +int memcmp(const void *s1, const void *s2, size_t n) +{ + const unsigned char *p1 = (const unsigned char *)s1; + const unsigned char *p2 = (const unsigned char *)s2; + + while (n--) { + if (*p1 != *p2) { + return *p1 - *p2; + } + + ++p1; + ++p2; + } + + return 0; +} + +void *memset(void *s, int c, size_t n) +{ + unsigned char *p = (unsigned char *)s; + + while (n--) { + *p++ = (unsigned char)c; + } + + return s; +} + +void *memchr(const void *s, int c, size_t n) +{ + const unsigned char *p = (const unsigned char *)s; + + while (n--) { + if (*p == (unsigned char)c) { + return (void *)p; + } + + ++p; + } + + return NULL; +} + +char *strcpy(char *s1, const char *s2) +{ + char *rc = s1; + + while ((*s1++ = *s2++)) { + /* EMPTY */ + } + + return rc; +} + +char *strncpy(char *s1, const char *s2, size_t n) +{ + char *rc = s1; + + while (n && (*s1++ = *s2++)) { + /* Cannot do "n--" in the conditional as size_t is unsigned and we have + to check it again for >0 in the next loop below, so we must not risk + underflow. + */ + --n; + } + + /* Checking against 1 as we missed the last --n in the loop above. */ + while (n-- > 1) { + *s1++ = '\0'; + } + + return rc; +} + +int strcmp(const char *s1, const char *s2) +{ + while ((*s1) && (*s1 == *s2)) { + ++s1; + ++s2; + } + + return (*(unsigned char *)s1 - *(unsigned char *)s2); +} + +int strncmp(const char *s1, const char *s2, size_t n) +{ + while (n && *s1 && (*s1 == *s2)) { + ++s1; + ++s2; + --n; + } + + if (n == 0) { + return 0; + } else { + return (*(unsigned char *)s1 - *(unsigned char *)s2); + } +} + +size_t strlen(const char *s) +{ + size_t rc = 0; + + while (s[rc]) { + ++rc; + } + + return rc; +} + +size_t strnlen(const char *s, size_t n) +{ + size_t rc = 0; + + while (rc < n && s[rc]) { + ++rc; + } + + return rc; +} + +char *strchr(const char *s, int c) +{ + do { + if (*s == (char)c) { + return (char *)s; + } + } while (*s++); + + return NULL; +} + +char *strrchr(const char *s, int c) +{ + size_t i = 0; + + while (s[i++]) { + /* EMPTY */ + } + + do { + if (s[--i] == (char)c) { + return (char *)s + i; + } + } while (i); + + return NULL; +} + +/* Very naive, no attempt to check for errors */ +long atol(const char *s) +{ + long val = 0; + bool neg = false; + + if (*s == '-') { + neg = true; + s++; + } + + while (*s >= '0' && *s <= '9') + val = (val * 10) + (*s++ - '0'); + + if (neg) + val = -val; + + return val; +} diff --git a/tools/src/tinf/adler32.c b/tools/src/tinf/adler32.c new file mode 100644 index 0000000..5b3c54f --- /dev/null +++ b/tools/src/tinf/adler32.c @@ -0,0 +1,95 @@ +/* + * Adler-32 checksum + * + * Copyright (c) 2003-2019 Joergen Ibsen + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must + * not claim that you wrote the original software. If you use this + * software in a product, an acknowledgment in the product + * documentation would be appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must + * not be misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source + * distribution. + */ + +/* + * Adler-32 algorithm taken from the zlib source, which is + * Copyright (C) 1995-1998 Jean-loup Gailly and Mark Adler + */ + +#include "tinf.h" + +#define A32_BASE 65521 +#define A32_NMAX 5552 + +unsigned int tinf_adler32(const void *data, unsigned int length) +{ + const unsigned char *buf = (const unsigned char *) data; + + unsigned int s1 = 1; + unsigned int s2 = 0; + + while (length > 0) { + int k = length < A32_NMAX ? length : A32_NMAX; + int i; + + for (i = k / 16; i; --i, buf += 16) { + s1 += buf[0]; + s2 += s1; + s1 += buf[1]; + s2 += s1; + s1 += buf[2]; + s2 += s1; + s1 += buf[3]; + s2 += s1; + s1 += buf[4]; + s2 += s1; + s1 += buf[5]; + s2 += s1; + s1 += buf[6]; + s2 += s1; + s1 += buf[7]; + s2 += s1; + + s1 += buf[8]; + s2 += s1; + s1 += buf[9]; + s2 += s1; + s1 += buf[10]; + s2 += s1; + s1 += buf[11]; + s2 += s1; + s1 += buf[12]; + s2 += s1; + s1 += buf[13]; + s2 += s1; + s1 += buf[14]; + s2 += s1; + s1 += buf[15]; + s2 += s1; + } + + for (i = k % 16; i; --i) { + s1 += *buf++; + s2 += s1; + } + + s1 %= A32_BASE; + s2 %= A32_BASE; + + length -= k; + } + + return (s2 << 16) | s1; +} diff --git a/tools/src/tinf/crc32.c b/tools/src/tinf/crc32.c new file mode 100644 index 0000000..b83232c --- /dev/null +++ b/tools/src/tinf/crc32.c @@ -0,0 +1,57 @@ +/* + * CRC32 checksum + * + * Copyright (c) 1998-2019 Joergen Ibsen + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must + * not claim that you wrote the original software. If you use this + * software in a product, an acknowledgment in the product + * documentation would be appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must + * not be misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source + * distribution. + */ + +/* + * CRC32 algorithm taken from the zlib source, which is + * Copyright (C) 1995-1998 Jean-loup Gailly and Mark Adler + */ + +#include "tinf.h" + +static const unsigned int tinf_crc32tab[16] = { + 0x00000000, 0x1DB71064, 0x3B6E20C8, 0x26D930AC, 0x76DC4190, + 0x6B6B51F4, 0x4DB26158, 0x5005713C, 0xEDB88320, 0xF00F9344, + 0xD6D6A3E8, 0xCB61B38C, 0x9B64C2B0, 0x86D3D2D4, 0xA00AE278, + 0xBDBDF21C +}; + +unsigned int tinf_crc32(const void *data, unsigned int length) +{ + const unsigned char *buf = (const unsigned char *) data; + unsigned int crc = 0xFFFFFFFF; + unsigned int i; + + if (length == 0) { + return 0; + } + + for (i = 0; i < length; ++i) { + crc ^= buf[i]; + crc = tinf_crc32tab[crc & 0x0F] ^ (crc >> 4); + crc = tinf_crc32tab[crc & 0x0F] ^ (crc >> 4); + } + + return crc ^ 0xFFFFFFFF; +} diff --git a/tools/src/tinf/tinf.h b/tools/src/tinf/tinf.h new file mode 100644 index 0000000..ab23c83 --- /dev/null +++ b/tools/src/tinf/tinf.h @@ -0,0 +1,142 @@ +/* + * tinf - tiny inflate library (inflate, gzip, zlib) + * + * Copyright (c) 2003-2019 Joergen Ibsen + * + * This version of tinfzlib was modified for use with m1n1. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must + * not claim that you wrote the original software. If you use this + * software in a product, an acknowledgment in the product + * documentation would be appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must + * not be misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source + * distribution. + */ + +#ifndef TINF_H_INCLUDED +#define TINF_H_INCLUDED + +#ifdef __cplusplus +extern "C" { +#endif + +#define TINF_VER_MAJOR 1 /**< Major version number */ +#define TINF_VER_MINOR 2 /**< Minor version number */ +#define TINF_VER_PATCH 1 /**< Patch version number */ +#define TINF_VER_STRING "1.2.1" /**< Version number as a string */ + +#ifndef TINFCC +# ifdef __WATCOMC__ +# define TINFCC __cdecl +# else +# define TINFCC +# endif +#endif + +/** + * Status codes returned. + * + * @see tinf_uncompress, tinf_gzip_uncompress, tinf_zlib_uncompress + */ +typedef enum { + TINF_OK = 0, /**< Success */ + TINF_DATA_ERROR = -3, /**< Input error */ + TINF_BUF_ERROR = -5 /**< Not enough room for output */ +} tinf_error_code; + +/** + * Initialize global data used by tinf. + * + * @deprecated No longer required, may be removed in a future version. + */ +void TINFCC tinf_init(void); + +/** + * Decompress `sourceLen` bytes of deflate data from `source` to `dest`. + * + * The variable `destLen` points to must contain the size of `dest` on entry, + * and will be set to the size of the decompressed data on success. + * + * Reads at most `sourceLen` bytes from `source`. + * Writes at most `*destLen` bytes to `dest`. + * + * @param dest pointer to where to place decompressed data + * @param destLen pointer to variable containing size of `dest` + * @param source pointer to compressed data + * @param sourceLen size of compressed data + * @return `TINF_OK` on success, error code on error + */ +int TINFCC tinf_uncompress(void *dest, unsigned int *destLen, + const void *source, unsigned int *sourceLen); + +/** + * Decompress `sourceLen` bytes of gzip data from `source` to `dest`. + * + * The variable `destLen` points to must contain the size of `dest` on entry, + * and will be set to the size of the decompressed data on success. + * + * Reads at most `sourceLen` bytes from `source`. + * Writes at most `*destLen` bytes to `dest`. + * + * @param dest pointer to where to place decompressed data + * @param destLen pointer to variable containing size of `dest` + * @param source pointer to compressed data + * @param sourceLen size of compressed data + * @return `TINF_OK` on success, error code on error + */ +int TINFCC tinf_gzip_uncompress(void *dest, unsigned int *destLen, + const void *source, unsigned int *sourceLen); + +/** + * Decompress `sourceLen` bytes of zlib data from `source` to `dest`. + * + * The variable `destLen` points to must contain the size of `dest` on entry, + * and will be set to the size of the decompressed data on success. + * + * Reads at most `sourceLen` bytes from `source`. + * Writes at most `*destLen` bytes to `dest`. + * + * @param dest pointer to where to place decompressed data + * @param destLen pointer to variable containing size of `dest` + * @param source pointer to compressed data + * @param sourceLen size of compressed data + * @return `TINF_OK` on success, error code on error + */ +int TINFCC tinf_zlib_uncompress(void *dest, unsigned int *destLen, + const void *source, unsigned int *sourceLen); + +/** + * Compute Adler-32 checksum of `length` bytes starting at `data`. + * + * @param data pointer to data + * @param length size of data + * @return Adler-32 checksum + */ +unsigned int TINFCC tinf_adler32(const void *data, unsigned int length); + +/** + * Compute CRC32 checksum of `length` bytes starting at `data`. + * + * @param data pointer to data + * @param length size of data + * @return CRC32 checksum + */ +unsigned int TINFCC tinf_crc32(const void *data, unsigned int length); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* TINF_H_INCLUDED */ diff --git a/tools/src/tinf/tinfgzip.c b/tools/src/tinf/tinfgzip.c new file mode 100644 index 0000000..ea07cd7 --- /dev/null +++ b/tools/src/tinf/tinfgzip.c @@ -0,0 +1,191 @@ +/* + * tinfgzip - tiny gzip decompressor + * + * Copyright (c) 2003-2019 Joergen Ibsen + * + * This version of tinfzlib was modified for use with m1n1. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must + * not claim that you wrote the original software. If you use this + * software in a product, an acknowledgment in the product + * documentation would be appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must + * not be misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source + * distribution. + */ + +#include "tinf.h" + +typedef enum { + FTEXT = 1, + FHCRC = 2, + FEXTRA = 4, + FNAME = 8, + FCOMMENT = 16 +} tinf_gzip_flag; + +static unsigned int read_le16(const unsigned char *p) +{ + return ((unsigned int) p[0]) + | ((unsigned int) p[1] << 8); +} + +static unsigned int read_le32(const unsigned char *p) +{ + return ((unsigned int) p[0]) + | ((unsigned int) p[1] << 8) + | ((unsigned int) p[2] << 16) + | ((unsigned int) p[3] << 24); +} + +int tinf_gzip_uncompress(void *dest, unsigned int *destLen, + const void *source, unsigned int *sourceLen) +{ + const unsigned char *src = (const unsigned char *) source; + unsigned char *dst = (unsigned char *) dest; + const unsigned char *start; + unsigned int dlen, crc32; + int res; + unsigned char flg; + unsigned int sourceDataLen = 0; + + /* -- Check header -- */ + + /* Check room for at least 10 byte header and 8 byte trailer */ + if (*sourceLen && *sourceLen < 18) { + return TINF_DATA_ERROR; + } + + /* Check id bytes */ + if (src[0] != 0x1F || src[1] != 0x8B) { + return TINF_DATA_ERROR; + } + + /* Check method is deflate */ + if (src[2] != 8) { + return TINF_DATA_ERROR; + } + + /* Get flag byte */ + flg = src[3]; + + /* Check that reserved bits are zero */ + if (flg & 0xE0) { + return TINF_DATA_ERROR; + } + + /* -- Find start of compressed data -- */ + + /* Skip base header of 10 bytes */ + start = src + 10; + + /* Skip extra data if present */ + if (flg & FEXTRA) { + unsigned int xlen = read_le16(start); + + if (*sourceLen && xlen > *sourceLen - 12) { + return TINF_DATA_ERROR; + } + + start += xlen + 2; + } + + /* Skip file name if present */ + if (flg & FNAME) { + do { + if (*sourceLen && start - src >= *sourceLen) { + return TINF_DATA_ERROR; + } + } while (*start++); + } + + /* Skip file comment if present */ + if (flg & FCOMMENT) { + do { + if (*sourceLen && start - src >= *sourceLen) { + return TINF_DATA_ERROR; + } + } while (*start++); + } + + /* Check header crc if present */ + if (flg & FHCRC) { + unsigned int hcrc; + + if (*sourceLen && start - src > *sourceLen - 2) { + return TINF_DATA_ERROR; + } + + hcrc = read_le16(start); + + if (hcrc != (tinf_crc32(src, start - src) & 0x0000FFFF)) { + return TINF_DATA_ERROR; + } + + start += 2; + } + + /* -- Get decompressed length if available -- */ + + if (*sourceLen) { + dlen = read_le32(&src[*sourceLen - 4]); + + if (dlen > *destLen) { + return TINF_BUF_ERROR; + } + } + + /* -- Check source length if available -- */ + + if (*sourceLen) { + if ((src + *sourceLen) - start < 8) { + return TINF_DATA_ERROR; + } + sourceDataLen = (src + *sourceLen) - start - 8; + } + + /* -- Decompress data -- */ + + res = tinf_uncompress(dst, destLen, start, &sourceDataLen); + + if (res != TINF_OK) { + return TINF_DATA_ERROR; + } + + sourceDataLen += (start - src) + 8; + + if (*sourceLen && *sourceLen != sourceDataLen) { + return TINF_DATA_ERROR; + } + + *sourceLen = sourceDataLen; + + /* -- Check decompressed length -- */ + + dlen = read_le32(&src[*sourceLen - 4]); + + if (*destLen != dlen) { + return TINF_DATA_ERROR; + } + + /* -- Check CRC32 checksum -- */ + + crc32 = read_le32(&src[*sourceLen - 8]); + + if (crc32 != tinf_crc32(dst, dlen)) { + return TINF_DATA_ERROR; + } + + return TINF_OK; +} diff --git a/tools/src/tinf/tinflate.c b/tools/src/tinf/tinflate.c new file mode 100644 index 0000000..c82526c --- /dev/null +++ b/tools/src/tinf/tinflate.c @@ -0,0 +1,648 @@ +/* + * tinflate - tiny inflate + * + * Copyright (c) 2003-2019 Joergen Ibsen + * + * This version of tinfzlib was modified for use with m1n1. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must + * not claim that you wrote the original software. If you use this + * software in a product, an acknowledgment in the product + * documentation would be appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must + * not be misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source + * distribution. + */ + +#include "tinf.h" + +#include <assert.h> +#include <limits.h> + +#if defined(UINT_MAX) && (UINT_MAX) < 0xFFFFFFFFUL +# error "tinf requires unsigned int to be at least 32-bit" +#endif + +/* -- Internal data structures -- */ + +struct tinf_tree { + unsigned short counts[16]; /* Number of codes with a given length */ + unsigned short symbols[288]; /* Symbols sorted by code */ + int max_sym; +}; + +struct tinf_data { + const unsigned char *source; + const unsigned char *source_end; + unsigned int tag; + int bitcount; + int overflow; + + unsigned char *dest_start; + unsigned char *dest; + unsigned char *dest_end; + + struct tinf_tree ltree; /* Literal/length tree */ + struct tinf_tree dtree; /* Distance tree */ +}; + +/* -- Utility functions -- */ + +static unsigned int read_le16(const unsigned char *p) +{ + return ((unsigned int) p[0]) + | ((unsigned int) p[1] << 8); +} + +/* Build fixed Huffman trees */ +static void tinf_build_fixed_trees(struct tinf_tree *lt, struct tinf_tree *dt) +{ + int i; + + /* Build fixed literal/length tree */ + for (i = 0; i < 16; ++i) { + lt->counts[i] = 0; + } + + lt->counts[7] = 24; + lt->counts[8] = 152; + lt->counts[9] = 112; + + for (i = 0; i < 24; ++i) { + lt->symbols[i] = 256 + i; + } + for (i = 0; i < 144; ++i) { + lt->symbols[24 + i] = i; + } + for (i = 0; i < 8; ++i) { + lt->symbols[24 + 144 + i] = 280 + i; + } + for (i = 0; i < 112; ++i) { + lt->symbols[24 + 144 + 8 + i] = 144 + i; + } + + lt->max_sym = 285; + + /* Build fixed distance tree */ + for (i = 0; i < 16; ++i) { + dt->counts[i] = 0; + } + + dt->counts[5] = 32; + + for (i = 0; i < 32; ++i) { + dt->symbols[i] = i; + } + + dt->max_sym = 29; +} + +/* Given an array of code lengths, build a tree */ +static int tinf_build_tree(struct tinf_tree *t, const unsigned char *lengths, + unsigned int num) +{ + unsigned short offs[16]; + unsigned int i, num_codes, available; + + assert(num <= 288); + + for (i = 0; i < 16; ++i) { + t->counts[i] = 0; + } + + t->max_sym = -1; + + /* Count number of codes for each non-zero length */ + for (i = 0; i < num; ++i) { + assert(lengths[i] <= 15); + + if (lengths[i]) { + t->max_sym = i; + t->counts[lengths[i]]++; + } + } + + /* Compute offset table for distribution sort */ + for (available = 1, num_codes = 0, i = 0; i < 16; ++i) { + unsigned int used = t->counts[i]; + + /* Check length contains no more codes than available */ + if (used > available) { + return TINF_DATA_ERROR; + } + available = 2 * (available - used); + + offs[i] = num_codes; + num_codes += used; + } + + /* + * Check all codes were used, or for the special case of only one + * code that it has length 1 + */ + if ((num_codes > 1 && available > 0) + || (num_codes == 1 && t->counts[1] != 1)) { + return TINF_DATA_ERROR; + } + + /* Fill in symbols sorted by code */ + for (i = 0; i < num; ++i) { + if (lengths[i]) { + t->symbols[offs[lengths[i]]++] = i; + } + } + + /* + * For the special case of only one code (which will be 0) add a + * code 1 which results in a symbol that is too large + */ + if (num_codes == 1) { + t->counts[1] = 2; + t->symbols[1] = t->max_sym + 1; + } + + return TINF_OK; +} + +/* -- Decode functions -- */ + +static void tinf_refill(struct tinf_data *d, int num) +{ + assert(num >= 0 && num <= 32); + + /* Read bytes until at least num bits available */ + while (d->bitcount < num) { + if (d->source != d->source_end) { + d->tag |= (unsigned int) *d->source++ << d->bitcount; + } + else { + d->overflow = 1; + } + d->bitcount += 8; + } + + assert(d->bitcount <= 32); +} + +static unsigned int tinf_getbits_no_refill(struct tinf_data *d, int num) +{ + unsigned int bits; + + assert(num >= 0 && num <= d->bitcount); + + /* Get bits from tag */ + bits = d->tag & ((1UL << num) - 1); + + /* Remove bits from tag */ + d->tag >>= num; + d->bitcount -= num; + + return bits; +} + +/* Get num bits from source stream */ +static unsigned int tinf_getbits(struct tinf_data *d, int num) +{ + tinf_refill(d, num); + return tinf_getbits_no_refill(d, num); +} + +/* Read a num bit value from stream and add base */ +static unsigned int tinf_getbits_base(struct tinf_data *d, int num, int base) +{ + return base + (num ? tinf_getbits(d, num) : 0); +} + +/* Given a data stream and a tree, decode a symbol */ +static int tinf_decode_symbol(struct tinf_data *d, const struct tinf_tree *t) +{ + int base = 0, offs = 0; + int len; + + /* + * Get more bits while code index is above number of codes + * + * Rather than the actual code, we are computing the position of the + * code in the sorted order of codes, which is the index of the + * corresponding symbol. + * + * Conceptually, for each code length (level in the tree), there are + * counts[len] leaves on the left and internal nodes on the right. + * The index we have decoded so far is base + offs, and if that + * falls within the leaves we are done. Otherwise we adjust the range + * of offs and add one more bit to it. + */ + for (len = 1; ; ++len) { + offs = 2 * offs + tinf_getbits(d, 1); + + assert(len <= 15); + + if (offs < t->counts[len]) { + break; + } + + base += t->counts[len]; + offs -= t->counts[len]; + } + + assert(base + offs >= 0 && base + offs < 288); + + return t->symbols[base + offs]; +} + +/* Given a data stream, decode dynamic trees from it */ +static int tinf_decode_trees(struct tinf_data *d, struct tinf_tree *lt, + struct tinf_tree *dt) +{ + unsigned char lengths[288 + 32]; + + /* Special ordering of code length codes */ + static const unsigned char clcidx[19] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, + 11, 4, 12, 3, 13, 2, 14, 1, 15 + }; + + unsigned int hlit, hdist, hclen; + unsigned int i, num, length; + int res; + + /* Get 5 bits HLIT (257-286) */ + hlit = tinf_getbits_base(d, 5, 257); + + /* Get 5 bits HDIST (1-32) */ + hdist = tinf_getbits_base(d, 5, 1); + + /* Get 4 bits HCLEN (4-19) */ + hclen = tinf_getbits_base(d, 4, 4); + + /* + * The RFC limits the range of HLIT to 286, but lists HDIST as range + * 1-32, even though distance codes 30 and 31 have no meaning. While + * we could allow the full range of HLIT and HDIST to make it possible + * to decode the fixed trees with this function, we consider it an + * error here. + * + * See also: https://github.com/madler/zlib/issues/82 + */ + if (hlit > 286 || hdist > 30) { + return TINF_DATA_ERROR; + } + + for (i = 0; i < 19; ++i) { + lengths[i] = 0; + } + + /* Read code lengths for code length alphabet */ + for (i = 0; i < hclen; ++i) { + /* Get 3 bits code length (0-7) */ + unsigned int clen = tinf_getbits(d, 3); + + lengths[clcidx[i]] = clen; + } + + /* Build code length tree (in literal/length tree to save space) */ + res = tinf_build_tree(lt, lengths, 19); + + if (res != TINF_OK) { + return res; + } + + /* Check code length tree is not empty */ + if (lt->max_sym == -1) { + return TINF_DATA_ERROR; + } + + /* Decode code lengths for the dynamic trees */ + for (num = 0; num < hlit + hdist; ) { + int sym = tinf_decode_symbol(d, lt); + + if (sym > lt->max_sym) { + return TINF_DATA_ERROR; + } + + switch (sym) { + case 16: + /* Copy previous code length 3-6 times (read 2 bits) */ + if (num == 0) { + return TINF_DATA_ERROR; + } + sym = lengths[num - 1]; + length = tinf_getbits_base(d, 2, 3); + break; + case 17: + /* Repeat code length 0 for 3-10 times (read 3 bits) */ + sym = 0; + length = tinf_getbits_base(d, 3, 3); + break; + case 18: + /* Repeat code length 0 for 11-138 times (read 7 bits) */ + sym = 0; + length = tinf_getbits_base(d, 7, 11); + break; + default: + /* Values 0-15 represent the actual code lengths */ + length = 1; + break; + } + + if (length > hlit + hdist - num) { + return TINF_DATA_ERROR; + } + + while (length--) { + lengths[num++] = sym; + } + } + + /* Check EOB symbol is present */ + if (lengths[256] == 0) { + return TINF_DATA_ERROR; + } + + /* Build dynamic trees */ + res = tinf_build_tree(lt, lengths, hlit); + + if (res != TINF_OK) { + return res; + } + + res = tinf_build_tree(dt, lengths + hlit, hdist); + + if (res != TINF_OK) { + return res; + } + + return TINF_OK; +} + +/* -- Block inflate functions -- */ + +/* Given a stream and two trees, inflate a block of data */ +static int tinf_inflate_block_data(struct tinf_data *d, struct tinf_tree *lt, + struct tinf_tree *dt) +{ + /* Extra bits and base tables for length codes */ + static const unsigned char length_bits[30] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, + 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, 0, 127 + }; + + static const unsigned short length_base[30] = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, + 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, + 67, 83, 99, 115, 131, 163, 195, 227, 258, 0 + }; + + /* Extra bits and base tables for distance codes */ + static const unsigned char dist_bits[30] = { + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 + }; + + static const unsigned short dist_base[30] = { + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, + 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, + 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577 + }; + + for (;;) { + int sym = tinf_decode_symbol(d, lt); + + /* Check for overflow in bit reader */ + if (d->overflow) { + return TINF_DATA_ERROR; + } + + if (sym < 256) { + if (d->dest == d->dest_end) { + return TINF_BUF_ERROR; + } + *d->dest++ = sym; + } + else { + int length, dist, offs; + int i; + + /* Check for end of block */ + if (sym == 256) { + return TINF_OK; + } + + /* Check sym is within range and distance tree is not empty */ + if (sym > lt->max_sym || sym - 257 > 28 || dt->max_sym == -1) { + return TINF_DATA_ERROR; + } + + sym -= 257; + + /* Possibly get more bits from length code */ + length = tinf_getbits_base(d, length_bits[sym], + length_base[sym]); + + dist = tinf_decode_symbol(d, dt); + + /* Check dist is within range */ + if (dist > dt->max_sym || dist > 29) { + return TINF_DATA_ERROR; + } + + /* Possibly get more bits from distance code */ + offs = tinf_getbits_base(d, dist_bits[dist], + dist_base[dist]); + + if (offs > d->dest - d->dest_start) { + return TINF_DATA_ERROR; + } + + if (d->dest_end - d->dest < length) { + return TINF_BUF_ERROR; + } + + /* Copy match */ + for (i = 0; i < length; ++i) { + d->dest[i] = d->dest[i - offs]; + } + + d->dest += length; + } + } +} + +/* Inflate an uncompressed block of data */ +static int tinf_inflate_uncompressed_block(struct tinf_data *d) +{ + unsigned int length, invlength; + + if (d->source_end && d->source_end - d->source < 4) { + return TINF_DATA_ERROR; + } + + /* Get length */ + length = read_le16(d->source); + + /* Get one's complement of length */ + invlength = read_le16(d->source + 2); + + /* Check length */ + if (length != (~invlength & 0x0000FFFF)) { + return TINF_DATA_ERROR; + } + + d->source += 4; + + if (d->source_end && d->source_end - d->source < length) { + return TINF_DATA_ERROR; + } + + if (d->dest_end - d->dest < length) { + return TINF_BUF_ERROR; + } + + /* Copy block */ + while (length--) { + *d->dest++ = *d->source++; + } + + /* Make sure we start next block on a byte boundary */ + d->tag = 0; + d->bitcount = 0; + + return TINF_OK; +} + +/* Inflate a block of data compressed with fixed Huffman trees */ +static int tinf_inflate_fixed_block(struct tinf_data *d) +{ + /* Build fixed Huffman trees */ + tinf_build_fixed_trees(&d->ltree, &d->dtree); + + /* Decode block using fixed trees */ + return tinf_inflate_block_data(d, &d->ltree, &d->dtree); +} + +/* Inflate a block of data compressed with dynamic Huffman trees */ +static int tinf_inflate_dynamic_block(struct tinf_data *d) +{ + /* Decode trees from stream */ + int res = tinf_decode_trees(d, &d->ltree, &d->dtree); + + if (res != TINF_OK) { + return res; + } + + /* Decode block using decoded trees */ + return tinf_inflate_block_data(d, &d->ltree, &d->dtree); +} + +/* -- Public functions -- */ + +/* Initialize global (static) data */ +void tinf_init(void) +{ + return; +} + +/* Inflate stream from source to dest */ +int tinf_uncompress(void *dest, unsigned int *destLen, + const void *source, unsigned int *sourceLen) +{ + struct tinf_data d; + int bfinal; + + /* Initialise data */ + d.source = (const unsigned char *) source; + if (sourceLen && *sourceLen) + d.source_end = d.source + *sourceLen; + else + d.source_end = 0; + d.tag = 0; + d.bitcount = 0; + d.overflow = 0; + + d.dest = (unsigned char *) dest; + d.dest_start = d.dest; + d.dest_end = d.dest + *destLen; + + do { + unsigned int btype; + int res; + + /* Read final block flag */ + bfinal = tinf_getbits(&d, 1); + + /* Read block type (2 bits) */ + btype = tinf_getbits(&d, 2); + + /* Decompress block */ + switch (btype) { + case 0: + /* Decompress uncompressed block */ + res = tinf_inflate_uncompressed_block(&d); + break; + case 1: + /* Decompress block with fixed Huffman trees */ + res = tinf_inflate_fixed_block(&d); + break; + case 2: + /* Decompress block with dynamic Huffman trees */ + res = tinf_inflate_dynamic_block(&d); + break; + default: + res = TINF_DATA_ERROR; + break; + } + + if (res != TINF_OK) { + return res; + } + } while (!bfinal); + + /* Check for overflow in bit reader */ + if (d.overflow) { + return TINF_DATA_ERROR; + } + + if (sourceLen) { + unsigned int slen = d.source - (const unsigned char *)source; + if (!*sourceLen) + *sourceLen = slen; + else if (*sourceLen != slen) + return TINF_DATA_ERROR; + } + + *destLen = d.dest - d.dest_start; + return TINF_OK; +} + +/* clang -g -O1 -fsanitize=fuzzer,address -DTINF_FUZZING tinflate.c */ +#if defined(TINF_FUZZING) +#include <limits.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +unsigned char depacked[64 * 1024]; + +extern int +LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + if (size > UINT_MAX / 2) { return 0; } + unsigned int destLen = sizeof(depacked); + tinf_uncompress(depacked, &destLen, data, size); + return 0; +} +#endif diff --git a/tools/src/tinf/tinfzlib.c b/tools/src/tinf/tinfzlib.c new file mode 100644 index 0000000..6af07b8 --- /dev/null +++ b/tools/src/tinf/tinfzlib.c @@ -0,0 +1,99 @@ +/* + * tinfzlib - tiny zlib decompressor + * + * This version of tinfzlib was modified for use with m1n1. + * + * Copyright (c) 2003-2019 Joergen Ibsen + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must + * not claim that you wrote the original software. If you use this + * software in a product, an acknowledgment in the product + * documentation would be appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must + * not be misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source + * distribution. + */ + +#include "tinf.h" + +static unsigned int read_be32(const unsigned char *p) +{ + return ((unsigned int) p[0] << 24) + | ((unsigned int) p[1] << 16) + | ((unsigned int) p[2] << 8) + | ((unsigned int) p[3]); +} + +int tinf_zlib_uncompress(void *dest, unsigned int *destLen, + const void *source, unsigned int *sourceLen) +{ + const unsigned char *src = (const unsigned char *) source; + unsigned char *dst = (unsigned char *) dest; + unsigned int a32; + int res; + unsigned char cmf, flg; + unsigned int sourceDataLen = sourceLen ? *sourceLen - 6 : 0; + + /* -- Check header -- */ + + /* Check room for at least 2 byte header and 4 byte trailer */ + if (*sourceLen && *sourceLen < 6) { + return TINF_DATA_ERROR; + } + + /* Get header bytes */ + cmf = src[0]; + flg = src[1]; + + /* Check checksum */ + if ((256 * cmf + flg) % 31) { + return TINF_DATA_ERROR; + } + + /* Check method is deflate */ + if ((cmf & 0x0F) != 8) { + return TINF_DATA_ERROR; + } + + /* Check window size is valid */ + if ((cmf >> 4) > 7) { + return TINF_DATA_ERROR; + } + + /* Check there is no preset dictionary */ + if (flg & 0x20) { + return TINF_DATA_ERROR; + } + + /* -- Decompress data -- */ + + res = tinf_uncompress(dst, destLen, src + 2, &sourceDataLen); + + if (res != TINF_OK) { + return TINF_DATA_ERROR; + } + + /* -- Check Adler-32 checksum -- */ + + a32 = read_be32(&src[sourceDataLen + 2]); + + if (a32 != tinf_adler32(dst, *destLen)) { + return TINF_DATA_ERROR; + } + + if (sourceLen) + *sourceLen = sourceDataLen + 6; + + return TINF_OK; +} diff --git a/tools/src/tps6598x.c b/tools/src/tps6598x.c new file mode 100644 index 0000000..fdb5e11 --- /dev/null +++ b/tools/src/tps6598x.c @@ -0,0 +1,172 @@ +/* SPDX-License-Identifier: MIT */ + +#include "tps6598x.h" +#include "adt.h" +#include "i2c.h" +#include "iodev.h" +#include "malloc.h" +#include "types.h" +#include "utils.h" + +#define TPS_REG_CMD1 0x08 +#define TPS_REG_DATA1 0x09 +#define TPS_REG_INT_EVENT1 0x14 +#define TPS_REG_INT_MASK1 0x16 +#define TPS_REG_INT_CLEAR1 0x18 +#define TPS_REG_POWER_STATE 0x20 +#define TPS_CMD_INVALID 0x21434d44 // !CMD + +struct tps6598x_dev { + i2c_dev_t *i2c; + u8 addr; +}; + +tps6598x_dev_t *tps6598x_init(const char *adt_node, i2c_dev_t *i2c) +{ + int adt_offset; + adt_offset = adt_path_offset(adt, adt_node); + if (adt_offset < 0) { + printf("tps6598x: Error getting %s node\n", adt_node); + return NULL; + } + + const u8 *iic_addr = adt_getprop(adt, adt_offset, "hpm-iic-addr", NULL); + if (iic_addr == NULL) { + printf("tps6598x: Error getting %s hpm-iic-addr\n.", adt_node); + return NULL; + } + + tps6598x_dev_t *dev = malloc(sizeof(*dev)); + if (!dev) + return NULL; + + dev->i2c = i2c; + dev->addr = *iic_addr; + return dev; +} + +void tps6598x_shutdown(tps6598x_dev_t *dev) +{ + free(dev); +} + +int tps6598x_command(tps6598x_dev_t *dev, const char *cmd, const u8 *data_in, size_t len_in, + u8 *data_out, size_t len_out) +{ + if (len_in) { + if (i2c_smbus_write(dev->i2c, dev->addr, TPS_REG_DATA1, data_in, len_in) < 0) + return -1; + } + + if (i2c_smbus_write(dev->i2c, dev->addr, TPS_REG_CMD1, (const u8 *)cmd, 4) < 0) + return -1; + + u32 cmd_status; + do { + if (i2c_smbus_read32(dev->i2c, dev->addr, TPS_REG_CMD1, &cmd_status)) + return -1; + if (cmd_status == TPS_CMD_INVALID) + return -1; + udelay(100); + } while (cmd_status != 0); + + if (len_out) { + if (i2c_smbus_read(dev->i2c, dev->addr, TPS_REG_DATA1, data_out, len_out) != + (ssize_t)len_out) + return -1; + } + + return 0; +} + +int tps6598x_disable_irqs(tps6598x_dev_t *dev, tps6598x_irq_state_t *state) +{ + size_t read; + int written; + static const u8 zeros[CD3218B12_IRQ_WIDTH] = {0x00}; + static const u8 ones[CD3218B12_IRQ_WIDTH] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF}; + + // store IntEvent 1 to restore it later + read = i2c_smbus_read(dev->i2c, dev->addr, TPS_REG_INT_MASK1, state->int_mask1, + sizeof(state->int_mask1)); + if (read != CD3218B12_IRQ_WIDTH) { + printf("tps6598x: reading TPS_REG_INT_MASK1 failed\n"); + return -1; + } + state->valid = 1; + + // mask interrupts and ack all interrupt flags + written = i2c_smbus_write(dev->i2c, dev->addr, TPS_REG_INT_CLEAR1, ones, sizeof(ones)); + if (written != sizeof(zeros)) { + printf("tps6598x: writing TPS_REG_INT_CLEAR1 failed, written: %d\n", written); + return -1; + } + written = i2c_smbus_write(dev->i2c, dev->addr, TPS_REG_INT_MASK1, zeros, sizeof(zeros)); + if (written != sizeof(ones)) { + printf("tps6598x: writing TPS_REG_INT_MASK1 failed, written: %d\n", written); + return -1; + } + +#ifdef DEBUG + u8 tmp[CD3218B12_IRQ_WIDTH] = {0x00}; + read = i2c_smbus_read(dev->i2c, dev->addr, TPS_REG_INT_MASK1, tmp, CD3218B12_IRQ_WIDTH); + if (read != CD3218B12_IRQ_WIDTH) + printf("tps6598x: failed verifcation, can't read TPS_REG_INT_MASK1\n"); + else { + printf("tps6598x: verify: TPS_REG_INT_MASK1 vs. saved IntMask1\n"); + hexdump(tmp, sizeof(tmp)); + hexdump(state->int_mask1, sizeof(state->int_mask1)); + } +#endif + return 0; +} + +int tps6598x_restore_irqs(tps6598x_dev_t *dev, tps6598x_irq_state_t *state) +{ + int written; + + written = i2c_smbus_write(dev->i2c, dev->addr, TPS_REG_INT_MASK1, state->int_mask1, + sizeof(state->int_mask1)); + if (written != sizeof(state->int_mask1)) { + printf("tps6598x: restoring TPS_REG_INT_MASK1 failed\n"); + return -1; + } + +#ifdef DEBUG + int read; + u8 tmp[CD3218B12_IRQ_WIDTH]; + read = i2c_smbus_read(dev->i2c, dev->addr, TPS_REG_INT_MASK1, tmp, sizeof(tmp)); + if (read != sizeof(tmp)) + printf("tps6598x: failed verifcation, can't read TPS_REG_INT_MASK1\n"); + else { + printf("tps6598x: verify saved IntMask1 vs. TPS_REG_INT_MASK1:\n"); + hexdump(state->int_mask1, sizeof(state->int_mask1)); + hexdump(tmp, sizeof(tmp)); + } +#endif + + return 0; +} + +int tps6598x_powerup(tps6598x_dev_t *dev) +{ + u8 power_state; + + if (i2c_smbus_read8(dev->i2c, dev->addr, TPS_REG_POWER_STATE, &power_state)) + return -1; + + if (power_state == 0) + return 0; + + const u8 data = 0; + tps6598x_command(dev, "SSPS", &data, 1, NULL, 0); + + if (i2c_smbus_read8(dev->i2c, dev->addr, TPS_REG_POWER_STATE, &power_state)) + return -1; + + if (power_state != 0) + return -1; + + return 0; +} diff --git a/tools/src/tps6598x.h b/tools/src/tps6598x.h new file mode 100644 index 0000000..9e6d26a --- /dev/null +++ b/tools/src/tps6598x.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef TPS6598X_H +#define TPS6598X_H + +#include "i2c.h" +#include "types.h" + +typedef struct tps6598x_dev tps6598x_dev_t; + +tps6598x_dev_t *tps6598x_init(const char *adt_path, i2c_dev_t *i2c); +void tps6598x_shutdown(tps6598x_dev_t *dev); + +int tps6598x_command(tps6598x_dev_t *dev, const char *cmd, const u8 *data_in, size_t len_in, + u8 *data_out, size_t len_out); +int tps6598x_powerup(tps6598x_dev_t *dev); + +#define CD3218B12_IRQ_WIDTH 9 + +typedef struct tps6598x_irq_state { + u8 int_mask1[CD3218B12_IRQ_WIDTH]; + bool valid; +} tps6598x_irq_state_t; + +int tps6598x_disable_irqs(tps6598x_dev_t *dev, tps6598x_irq_state_t *state); +int tps6598x_restore_irqs(tps6598x_dev_t *dev, tps6598x_irq_state_t *state); + +#endif diff --git a/tools/src/tunables.c b/tools/src/tunables.c new file mode 100644 index 0000000..ced789e --- /dev/null +++ b/tools/src/tunables.c @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: MIT */ + +#include "adt.h" +#include "tunables.h" +#include "types.h" +#include "utils.h" + +struct tunable_info { + int node_offset; + int node_path[8]; + const u32 *tunable_raw; + u32 tunable_len; +}; + +static int tunables_adt_find(const char *path, const char *prop, struct tunable_info *info, + u32 item_size) +{ + info->node_offset = adt_path_offset_trace(adt, path, info->node_path); + if (info->node_offset < 0) { + printf("tunable: unable to find ADT node %s.\n", path); + return -1; + } + + info->tunable_raw = adt_getprop(adt, info->node_offset, prop, &info->tunable_len); + if (info->tunable_raw == NULL || info->tunable_len == 0) { + printf("tunable: Error getting ADT node %s property %s .\n", path, prop); + return -1; + } + + if (info->tunable_len % item_size) { + printf("tunable: tunable length needs to be a multiply of %d but is %d\n", item_size, + info->tunable_len); + return -1; + } + + info->tunable_len /= item_size; + + return 0; +} + +struct tunable_global { + u32 reg_idx; + u32 offset; + u32 mask; + u32 value; +} PACKED; + +int tunables_apply_global(const char *path, const char *prop) +{ + struct tunable_info info; + + if (tunables_adt_find(path, prop, &info, sizeof(struct tunable_global)) < 0) + return -1; + + const struct tunable_global *tunables = (const struct tunable_global *)info.tunable_raw; + for (u32 i = 0; i < info.tunable_len; ++i) { + const struct tunable_global *tunable = &tunables[i]; + + u64 addr; + if (adt_get_reg(adt, info.node_path, "reg", tunable->reg_idx, &addr, NULL) < 0) { + printf("tunable: Error getting regs with index %d\n", tunable->reg_idx); + return -1; + } + + mask32(addr + tunable->offset, tunable->mask, tunable->value); + } + + return 0; +} + +struct tunable_local { + u32 offset; + u32 size; + u64 mask; + u64 value; +} PACKED; + +int tunables_apply_local_addr(const char *path, const char *prop, uintptr_t base) +{ + struct tunable_info info; + + if (tunables_adt_find(path, prop, &info, sizeof(struct tunable_local)) < 0) + return -1; + + const struct tunable_local *tunables = (const struct tunable_local *)info.tunable_raw; + for (u32 i = 0; i < info.tunable_len; ++i) { + const struct tunable_local *tunable = &tunables[i]; + + switch (tunable->size) { + case 1: + mask8(base + tunable->offset, tunable->mask, tunable->value); + break; + case 2: + mask16(base + tunable->offset, tunable->mask, tunable->value); + break; + case 4: + mask32(base + tunable->offset, tunable->mask, tunable->value); + break; + case 8: + mask64(base + tunable->offset, tunable->mask, tunable->value); + break; + default: + printf("tunable: unknown tunable size 0x%08x\n", tunable->size); + return -1; + } + } + return 0; +} + +int tunables_apply_local(const char *path, const char *prop, u32 reg_offset) +{ + struct tunable_info info; + + if (tunables_adt_find(path, prop, &info, sizeof(struct tunable_local)) < 0) + return -1; + + u64 base; + if (adt_get_reg(adt, info.node_path, "reg", reg_offset, &base, NULL) < 0) { + printf("tunable: Error getting regs\n"); + return -1; + } + + return tunables_apply_local_addr(path, prop, base); +} diff --git a/tools/src/tunables.h b/tools/src/tunables.h new file mode 100644 index 0000000..cf3091a --- /dev/null +++ b/tools/src/tunables.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef TUNABLES_H +#define TUNABLES_H + +#include "types.h" + +/* + * This function applies the tunables usually passed in the node "tunable". + * They usually apply to multiple entries from the "reg" node. + * + * Example usage for the USB DRD node: + * tunables_apply_global("/arm-io/usb-drd0", "tunable"); + */ +int tunables_apply_global(const char *path, const char *prop); + +/* + * This function applies the tunables specified in device-specific tunable properties. + * These only apply to a single MMIO region from the "reg" node which needs to + * be specified. + * + * Example usage for two tunables from the USB DRD DART node: + * tunables_apply_local("/arm-io/dart-usb0", "dart-tunables-instance-0", 0); + * tunables_apply_local("/arm-io/dart-usb0", "dart-tunables-instance-1", 1); + * + */ +int tunables_apply_local(const char *path, const char *prop, u32 reg_idx); + +/* + * This functions does the same as tunables_apply_local except that it allows + * to specify the base address to which the tunables will be applied to instead + * of extracting it from the "regs" property. + * + * Example usage for two tunables for the USB DRD DART node: + * tunables_apply_local_addr("/arm-io/dart-usb0", "dart-tunables-instance-0", 0x382f00000); + * tunables_apply_local_addr("/arm-io/dart-usb0", "dart-tunables-instance-1", 0x382f80000); + */ +int tunables_apply_local_addr(const char *path, const char *prop, uintptr_t base); + +int tunables_apply_static(void); + +#endif diff --git a/tools/src/tunables_static.c b/tools/src/tunables_static.c new file mode 100644 index 0000000..e569e6b --- /dev/null +++ b/tools/src/tunables_static.c @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: MIT */ + +#include "tunables.h" +#include "adt.h" +#include "pmgr.h" +#include "soc.h" +#include "types.h" +#include "utils.h" + +/* + * These magic tunable sequences are hardcoded in various places in XNU, and are required for + * proper operation of various fabric features and other miscellanea. Without them, things tend + * to subtly break... + */ + +struct entry { + u32 offset; + u32 clear; + u32 set; +}; + +struct entry t8103_agx_tunables[] = { + {0x30, 0xffffffff, 0x50014}, {0x34, 0xffffffff, 0xa003c}, + {0x400, 0x400103ff, 0x40010001}, {0x600, 0x1ffffff, 0x1ffffff}, + {0x738, 0x1ff01ff, 0x140034}, {0x798, 0x1ff01ff, 0x14003c}, + {0x800, 0x100, 0x100}, {-1, 0, 0}, +}; + +// TODO: check masks +struct entry t600x_agx_tunables[] = { + {0x0, 0x1, 0x1}, + {0x10, 0xfff0000, 0xd0000}, + {0x14, 0x3, 0x1}, + {0x18, 0x3, 0x1}, + {0x1c, 0x3, 0x3}, + {0x20, 0x3, 0x3}, + {0x24, 0x3, 0x3}, + {0x28, 0x3, 0x3}, + {0x2c, 0x3, 0x3}, + {0x400, 0x400103ff, 0x40010001}, + {0x600, 0x1ffffff, 0x1ffffff}, + {0x800, 0x100, 0x100}, + {-1, 0, 0}, +}; + +// TODO: check masks +struct entry t8112_agx_tunables[] = { + {0x0, 0x200, 0x200}, + {0x34, 0xffffffff, 0x50014}, + {0x38, 0xffffffff, 0xa003c}, + {0x400, 0xc00103ff, 0xc0010001}, + {0x600, 0x1ffffff, 0x1ffffff}, + {0x738, 0x1ff01ff, 0x14003c}, + {0x798, 0x1ff01ff, 0x14003c}, + {0x800, 0x100, 0x100}, + {-1, 0, 0}, +}; + +static void tunables_apply(u64 base, struct entry *entry) +{ + while (entry->offset != UINT32_MAX) { + mask32(base + entry->offset, entry->clear, entry->set); + entry++; + } +} + +int power_and_apply(const char *path, u64 base, struct entry *entries) +{ + if (pmgr_adt_power_enable(path) < 0) { + printf("tunables: Failed to enable power: %s\n", path); + return -1; + } + + tunables_apply(base, entries); + + if (pmgr_adt_power_disable(path) < 0) { + printf("tunables: Failed to disable power: %s\n", path); + return -1; + } + + return 0; +} + +int tunables_apply_static(void) +{ + int ret = 0; + + switch (chip_id) { + case T8103: + ret |= power_and_apply("/arm-io/sgx", 0x205000000, t8103_agx_tunables); + break; + case T8112: + ret |= power_and_apply("/arm-io/sgx", 0x205000000, t8112_agx_tunables); + break; + case T6000: + case T6001: + case T6002: + ret |= power_and_apply("/arm-io/sgx", 0x405000000, t600x_agx_tunables); + break; + default: + break; + } + + return ret ? -1 : 0; +} diff --git a/tools/src/types.h b/tools/src/types.h new file mode 100644 index 0000000..6fd0789 --- /dev/null +++ b/tools/src/types.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef TYPES_H +#define TYPES_H + +#ifndef __ASSEMBLER__ + +#include <limits.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +typedef int8_t s8; +typedef int16_t s16; +typedef int32_t s32; +typedef int64_t s64; + +typedef u64 uintptr_t; +typedef s64 ptrdiff_t; + +typedef s64 ssize_t; + +#endif + +#define UNUSED(x) (void)(x) +#define ALIGNED(x) __attribute__((aligned(x))) +#define PACKED __attribute__((packed)) + +#define STACK_ALIGN(type, name, cnt, alignment) \ + u8 _al__##name[((sizeof(type) * (cnt)) + (alignment) + \ + (((sizeof(type) * (cnt)) % (alignment)) > 0 \ + ? ((alignment) - ((sizeof(type) * (cnt)) % (alignment))) \ + : 0))]; \ + type *name = \ + (type *)(((u32)(_al__##name)) + ((alignment) - (((u32)(_al__##name)) & ((alignment)-1)))) + +#define HAVE_PTRDIFF_T 1 +#define HAVE_UINTPTR_T 1 +#define UPTRDIFF_T uintptr_t + +#define SZ_2K (1 << 11) +#define SZ_4K (1 << 12) +#define SZ_16K (1 << 14) +#define SZ_1M (1 << 20) +#define SZ_32M (1 << 25) + +#ifdef __ASSEMBLER__ + +#define sys_reg(op0, op1, CRn, CRm, op2) s##op0##_##op1##_c##CRn##_c##CRm##_##op2 + +#else + +#define sys_reg(op0, op1, CRn, CRm, op2) , _S, op0, op1, CRn, CRm, op2 + +#endif + +#endif diff --git a/tools/src/uart.c b/tools/src/uart.c new file mode 100644 index 0000000..67aa0e3 --- /dev/null +++ b/tools/src/uart.c @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: MIT */ + +#include <stdarg.h> + +#include "adt.h" +#include "iodev.h" +#include "types.h" +#include "uart.h" +#include "uart_regs.h" +#include "utils.h" +#include "vsprintf.h" + +#define UART_CLOCK 24000000 + +static u64 uart_base = 0; + +int uart_init(void) +{ + int path[8]; + int node = adt_path_offset_trace(adt, "/arm-io/uart0", path); + + if (node < 0) { + printf("!!! UART node not found!\n"); + return -1; + } + + if (adt_get_reg(adt, path, "reg", 0, &uart_base, NULL)) { + printf("!!! Failed to get UART reg property!\n"); + return -1; + } + + return 0; +} + +void uart_putbyte(u8 c) +{ + if (!uart_base) + return; + + while (!(read32(uart_base + UTRSTAT) & UTRSTAT_TXBE)) + ; + + write32(uart_base + UTXH, c); +} + +u8 uart_getbyte(void) +{ + if (!uart_base) + return 0; + + while (!(read32(uart_base + UTRSTAT) & UTRSTAT_RXD)) + ; + + return read32(uart_base + URXH); +} + +void uart_putchar(u8 c) +{ + if (c == '\n') + uart_putbyte('\r'); + + uart_putbyte(c); +} + +u8 uart_getchar(void) +{ + return uart_getbyte(); +} + +void uart_puts(const char *s) +{ + while (*s) + uart_putchar(*(s++)); + + uart_putchar('\n'); +} + +void uart_write(const void *buf, size_t count) +{ + const u8 *p = buf; + + while (count--) + uart_putbyte(*p++); +} + +size_t uart_read(void *buf, size_t count) +{ + u8 *p = buf; + size_t recvd = 0; + + while (count--) { + *p++ = uart_getbyte(); + recvd++; + } + + return recvd; +} + +void uart_setbaud(int baudrate) +{ + if (!uart_base) + return; + + uart_flush(); + write32(uart_base + UBRDIV, ((UART_CLOCK / baudrate + 7) / 16) - 1); +} + +void uart_flush(void) +{ + if (!uart_base) + return; + + while (!(read32(uart_base + UTRSTAT) & UTRSTAT_TXE)) + ; +} + +void uart_clear_irqs(void) +{ + if (!uart_base) + return; + + write32(uart_base + UTRSTAT, UTRSTAT_TXTHRESH | UTRSTAT_RXTHRESH | UTRSTAT_RXTO); +} + +int uart_printf(const char *fmt, ...) +{ + va_list args; + char buffer[512]; + int i; + + va_start(args, fmt); + i = vsnprintf(buffer, sizeof(buffer), fmt, args); + va_end(args); + + uart_write(buffer, min(i, (int)(sizeof(buffer) - 1))); + + return i; +} + +static bool uart_iodev_can_write(void *opaque) +{ + UNUSED(opaque); + return true; +} + +static ssize_t uart_iodev_can_read(void *opaque) +{ + UNUSED(opaque); + + if (!uart_base) + return 0; + + return (read32(uart_base + UTRSTAT) & UTRSTAT_RXD) ? 1 : 0; +} + +static ssize_t uart_iodev_read(void *opaque, void *buf, size_t len) +{ + UNUSED(opaque); + return uart_read(buf, len); +} + +static ssize_t uart_iodev_write(void *opaque, const void *buf, size_t len) +{ + UNUSED(opaque); + uart_write(buf, len); + return len; +} + +static struct iodev_ops iodev_uart_ops = { + .can_read = uart_iodev_can_read, + .can_write = uart_iodev_can_write, + .read = uart_iodev_read, + .write = uart_iodev_write, +}; + +struct iodev iodev_uart = { + .ops = &iodev_uart_ops, + .usage = USAGE_CONSOLE | USAGE_UARTPROXY, + .lock = SPINLOCK_INIT, +}; diff --git a/tools/src/uart.h b/tools/src/uart.h new file mode 100644 index 0000000..0b03f2d --- /dev/null +++ b/tools/src/uart.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef UART_H +#define UART_H + +#include "types.h" + +int uart_init(void); + +void uart_putbyte(u8 c); +u8 uart_getbyte(void); + +void uart_putchar(u8 c); +u8 uart_getchar(void); + +void uart_write(const void *buf, size_t count); +size_t uart_read(void *buf, size_t count); + +void uart_puts(const char *s); + +void uart_setbaud(int baudrate); + +void uart_flush(void); + +void uart_clear_irqs(void); + +int uart_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); + +#endif diff --git a/tools/src/uart_regs.h b/tools/src/uart_regs.h new file mode 100644 index 0000000..bca1fe4 --- /dev/null +++ b/tools/src/uart_regs.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: MIT */ + +#define ULCON 0x000 +#define UCON 0x004 +#define UFCON 0x008 +#define UTRSTAT 0x010 +#define UFSTAT 0x018 +#define UTXH 0x020 +#define URXH 0x024 +#define UBRDIV 0x028 +#define UFRACVAL 0x02c + +#define UCON_TXTHRESH_ENA BIT(13) +#define UCON_RXTHRESH_ENA BIT(12) +#define UCON_RXTO_ENA BIT(9) +#define UCON_TXMODE GENMASK(3, 2) +#define UCON_RXMODE GENMASK(1, 0) + +#define UCON_MODE_OFF 0 +#define UCON_MODE_IRQ 1 + +#define UTRSTAT_RXTO BIT(9) +#define UTRSTAT_TXTHRESH BIT(5) +#define UTRSTAT_RXTHRESH BIT(4) +#define UTRSTAT_TXE BIT(2) +#define UTRSTAT_TXBE BIT(1) +#define UTRSTAT_RXD BIT(0) + +#define UFSTAT_TXFULL BIT(9) +#define UFSTAT_RXFULL BIT(8) +#define UFSTAT_TXCNT GENMASK(7, 4) +#define UFSTAT_RXCNT GENMASK(3, 0) diff --git a/tools/src/uartproxy.c b/tools/src/uartproxy.c new file mode 100644 index 0000000..fed9cc5 --- /dev/null +++ b/tools/src/uartproxy.c @@ -0,0 +1,317 @@ +/* SPDX-License-Identifier: MIT */ + +#include "uartproxy.h" +#include "assert.h" +#include "exception.h" +#include "iodev.h" +#include "proxy.h" +#include "string.h" +#include "types.h" +#include "utils.h" + +#define REQ_SIZE 64 + +typedef struct { + u32 _pad; + u32 type; + union { + ProxyRequest prequest; + struct { + u64 addr; + u64 size; + u32 dchecksum; + } mrequest; + u64 features; + }; + u32 checksum; +} UartRequest; + +#define REPLY_SIZE 36 + +typedef struct { + u32 type; + s32 status; + union { + ProxyReply preply; + struct { + u32 dchecksum; + } mreply; + struct uartproxy_msg_start start; + u64 features; + }; + u32 checksum; + u32 _dummy; // Not transferred +} UartReply; + +typedef struct { + u32 type; + u16 len; + u16 event_type; +} UartEventHdr; + +static_assert(sizeof(UartReply) == (REPLY_SIZE + 4), "Invalid UartReply size"); + +#define REQ_NOP 0x00AA55FF +#define REQ_PROXY 0x01AA55FF +#define REQ_MEMREAD 0x02AA55FF +#define REQ_MEMWRITE 0x03AA55FF +#define REQ_BOOT 0x04AA55FF +#define REQ_EVENT 0x05AA55FF + +#define ST_OK 0 +#define ST_BADCMD -1 +#define ST_INVAL -2 +#define ST_XFRERR -3 +#define ST_CSUMERR -4 + +#define PROXY_FEAT_DISABLE_DATA_CSUMS 0x01 +#define PROXY_FEAT_ALL (PROXY_FEAT_DISABLE_DATA_CSUMS) + +static u32 iodev_proxy_buffer[IODEV_MAX]; + +#define CHECKSUM_INIT 0xDEADBEEF +#define CHECKSUM_FINAL 0xADDEDBAD +#define CHECKSUM_SENTINEL 0xD0DECADE +#define DATA_END_SENTINEL 0xB0CACC10 + +static bool disable_data_csums = false; + +// I just totally pulled this out of my arse +// Noinline so that this can be bailed out by exc_guard = EXC_RETURN +// We assume this function does not use the stack +static u32 __attribute__((noinline)) checksum_block(void *start, u32 length, u32 init) +{ + u32 sum = init; + u8 *d = (u8 *)start; + + while (length--) { + sum *= 31337; + sum += (*d++) ^ 0x5A; + } + return sum; +} + +static inline u32 checksum_start(void *start, u32 length) +{ + return checksum_block(start, length, CHECKSUM_INIT); +} + +static inline u32 checksum_add(void *start, u32 length, u32 sum) +{ + return checksum_block(start, length, sum); +} + +static inline u32 checksum_finish(u32 sum) +{ + return sum ^ CHECKSUM_FINAL; +} + +static inline u32 checksum(void *start, u32 length) +{ + return checksum_finish(checksum_start(start, length)); +} + +static u64 data_checksum(void *start, u32 length) +{ + if (disable_data_csums) { + return CHECKSUM_SENTINEL; + } + + return checksum(start, length); +} + +iodev_id_t uartproxy_iodev; + +int uartproxy_run(struct uartproxy_msg_start *start) +{ + int ret; + int running = 1; + size_t bytes; + u64 checksum_val; + u64 enabled_features = 0; + + iodev_id_t iodev = IODEV_MAX; + + UartRequest request; + UartReply reply = {REQ_BOOT}; + if (!start) { + // Startup notification only goes out via UART + reply.checksum = checksum(&reply, REPLY_SIZE - 4); + iodev_write(IODEV_UART, &reply, REPLY_SIZE); + } else { + // Exceptions / hooks keep the current iodev + iodev = uartproxy_iodev; + reply.start = *start; + reply.checksum = checksum(&reply, REPLY_SIZE - 4); + iodev_write(iodev, &reply, REPLY_SIZE); + } + + while (running) { + if (!start) { + // Look for commands from any iodev on startup + for (iodev = 0; iodev < IODEV_MAX;) { + u8 b; + if ((iodev_get_usage(iodev) & USAGE_UARTPROXY)) { + iodev_handle_events(iodev); + if (iodev_can_read(iodev) && iodev_read(iodev, &b, 1) == 1) { + iodev_proxy_buffer[iodev] >>= 8; + iodev_proxy_buffer[iodev] |= b << 24; + if ((iodev_proxy_buffer[iodev] & 0xffffff) == 0xAA55FF) + break; + } + } + iodev++; + if (iodev == IODEV_MAX) + iodev = 0; + } + } else { + // Stick to the current iodev for exceptions + do { + u8 b; + iodev_handle_events(iodev); + if (iodev_read(iodev, &b, 1) != 1) { + printf("Proxy: iodev read failed, exiting.\n"); + return -1; + } + iodev_proxy_buffer[iodev] >>= 8; + iodev_proxy_buffer[iodev] |= b << 24; + } while ((iodev_proxy_buffer[iodev] & 0xffffff) != 0xAA55FF); + } + + memset(&request, 0, sizeof(request)); + request.type = iodev_proxy_buffer[iodev]; + bytes = iodev_read(iodev, (&request.type) + 1, REQ_SIZE - 4); + if (bytes != REQ_SIZE - 4) + continue; + + if (checksum(&(request.type), REQ_SIZE - 4) != request.checksum) { + memset(&reply, 0, sizeof(reply)); + reply.type = request.type; + reply.status = ST_CSUMERR; + reply.checksum = checksum(&reply, REPLY_SIZE - 4); + iodev_write(iodev, &reply, REPLY_SIZE); + continue; + } + + memset(&reply, 0, sizeof(reply)); + reply.type = request.type; + reply.status = ST_OK; + + uartproxy_iodev = iodev; + + switch (request.type) { + case REQ_NOP: + enabled_features = request.features & PROXY_FEAT_ALL; + if (iodev == IODEV_UART) { + // Don't allow disabling checksums on UART + enabled_features &= ~PROXY_FEAT_DISABLE_DATA_CSUMS; + } + + disable_data_csums = enabled_features & PROXY_FEAT_DISABLE_DATA_CSUMS; + reply.features = enabled_features; + break; + case REQ_PROXY: + ret = proxy_process(&request.prequest, &reply.preply); + if (ret != 0) + running = 0; + if (ret < 0) + printf("Proxy req error: %d\n", ret); + break; + case REQ_MEMREAD: + if (request.mrequest.size == 0) + break; + exc_count = 0; + exc_guard = GUARD_RETURN; + checksum_val = data_checksum((void *)request.mrequest.addr, request.mrequest.size); + exc_guard = GUARD_OFF; + if (exc_count) + reply.status = ST_XFRERR; + reply.mreply.dchecksum = checksum_val; + break; + case REQ_MEMWRITE: + exc_count = 0; + exc_guard = GUARD_SKIP; + if (request.mrequest.size != 0) { + // Probe for exception guard + // We can't do the whole buffer easily, because we'd drop UART data + write8(request.mrequest.addr, 0); + write8(request.mrequest.addr + request.mrequest.size - 1, 0); + } + exc_guard = GUARD_OFF; + if (exc_count) { + reply.status = ST_XFRERR; + break; + } + bytes = iodev_read(iodev, (void *)request.mrequest.addr, request.mrequest.size); + if (bytes != request.mrequest.size) { + reply.status = ST_XFRERR; + break; + } + checksum_val = data_checksum((void *)request.mrequest.addr, request.mrequest.size); + reply.mreply.dchecksum = checksum_val; + if (reply.mreply.dchecksum != request.mrequest.dchecksum) { + reply.status = ST_XFRERR; + break; + } + if (disable_data_csums) { + // Check the sentinel that should be present after the data + u32 sentinel = 0; + bytes = iodev_read(iodev, &sentinel, sizeof(sentinel)); + if (bytes != sizeof(sentinel) || sentinel != DATA_END_SENTINEL) { + reply.status = ST_XFRERR; + break; + } + } + break; + default: + reply.status = ST_BADCMD; + break; + } + sysop("dsb sy"); + sysop("isb"); + reply.checksum = checksum(&reply, REPLY_SIZE - 4); + iodev_lock(uartproxy_iodev); + iodev_queue(iodev, &reply, REPLY_SIZE); + + if ((request.type == REQ_MEMREAD) && (reply.status == ST_OK)) { + iodev_queue(iodev, (void *)request.mrequest.addr, request.mrequest.size); + + if (disable_data_csums) { + // Since there is no checksum, put a sentinel after the data so the receiver + // can check that no packets were lost. + u32 sentinel = DATA_END_SENTINEL; + + iodev_queue(iodev, &sentinel, sizeof(sentinel)); + } + } + + iodev_unlock(uartproxy_iodev); + // Flush all queued data + iodev_write(iodev, NULL, 0); + iodev_flush(iodev); + } + + return ret; +} + +void uartproxy_send_event(u16 event_type, void *data, u16 length) +{ + UartEventHdr hdr; + u32 csum; + + hdr.type = REQ_EVENT; + hdr.len = length; + hdr.event_type = event_type; + + if (disable_data_csums) { + csum = CHECKSUM_SENTINEL; + } else { + csum = checksum_start(&hdr, sizeof(UartEventHdr)); + csum = checksum_finish(checksum_add(data, length, csum)); + } + iodev_lock(uartproxy_iodev); + iodev_queue(uartproxy_iodev, &hdr, sizeof(UartEventHdr)); + iodev_queue(uartproxy_iodev, data, length); + iodev_write(uartproxy_iodev, &csum, sizeof(csum)); + iodev_unlock(uartproxy_iodev); +} diff --git a/tools/src/uartproxy.h b/tools/src/uartproxy.h new file mode 100644 index 0000000..23ddd67 --- /dev/null +++ b/tools/src/uartproxy.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef __UARTPROXY_H__ +#define __UARTPROXY_H__ + +#include "iodev.h" + +extern iodev_id_t uartproxy_iodev; + +typedef enum _uartproxy_start_reason_t { + START_BOOT, + START_EXCEPTION, + START_EXCEPTION_LOWER, + START_HV, +} uartproxy_boot_reason_t; + +typedef enum _uartproxy_exc_code_t { + EXC_SYNC, + EXC_IRQ, + EXC_FIQ, + EXC_SERROR, +} uartproxy_exc_code_t; + +typedef enum _uartproxy_exc_ret_t { + EXC_RET_UNHANDLED = 1, + EXC_RET_HANDLED = 2, + EXC_EXIT_GUEST = 3, +} uartproxy_exc_ret_t; + +typedef enum _uartproxy_event_type_t { + EVT_MMIOTRACE = 1, + EVT_IRQTRACE = 2, +} uartproxy_event_type_t; + +struct uartproxy_msg_start { + u32 reason; + u32 code; + void *info; + void *reserved; +}; + +int uartproxy_run(struct uartproxy_msg_start *start); +void uartproxy_send_event(u16 event_type, void *data, u16 length); + +#endif diff --git a/tools/src/usb.c b/tools/src/usb.c new file mode 100644 index 0000000..1f516a1 --- /dev/null +++ b/tools/src/usb.c @@ -0,0 +1,343 @@ +/* SPDX-License-Identifier: MIT */ + +#include "usb.h" +#include "adt.h" +#include "dart.h" +#include "i2c.h" +#include "iodev.h" +#include "malloc.h" +#include "pmgr.h" +#include "tps6598x.h" +#include "types.h" +#include "usb_dwc3.h" +#include "usb_dwc3_regs.h" +#include "utils.h" +#include "vsprintf.h" + +struct usb_drd_regs { + uintptr_t drd_regs; + uintptr_t drd_regs_unk3; + uintptr_t atc; +}; + +#if USB_IODEV_COUNT > 100 +#error "USB_IODEV_COUNT is limited to 100 to prevent overflow in ADT path names" +#endif + +// length of the format string is is used as buffer size +// limits the USB instance numbers to reasonable 2 digits +#define FMT_DART_PATH "/arm-io/dart-usb%u" +#define FMT_DART_MAPPER_PATH "/arm-io/dart-usb%u/mapper-usb%u" +#define FMT_ATC_PATH "/arm-io/atc-phy%u" +#define FMT_DRD_PATH "/arm-io/usb-drd%u" +#define FMT_HPM_PATH "/arm-io/i2c0/hpmBusManager/hpm%u" + +static tps6598x_irq_state_t tps6598x_irq_state[USB_IODEV_COUNT]; +static bool usb_is_initialized = false; + +static dart_dev_t *usb_dart_init(u32 idx) +{ + int mapper_offset; + char path[sizeof(FMT_DART_MAPPER_PATH)]; + + snprintf(path, sizeof(path), FMT_DART_MAPPER_PATH, idx, idx); + mapper_offset = adt_path_offset(adt, path); + if (mapper_offset < 0) { + // Device not present + return NULL; + } + + u32 dart_idx; + if (ADT_GETPROP(adt, mapper_offset, "reg", &dart_idx) < 0) { + printf("usb: Error getting DART %s device index/\n", path); + return NULL; + } + + snprintf(path, sizeof(path), FMT_DART_PATH, idx); + return dart_init_adt(path, 1, dart_idx, false); +} + +static int usb_drd_get_regs(u32 idx, struct usb_drd_regs *regs) +{ + int adt_drd_path[8]; + int adt_drd_offset; + int adt_phy_path[8]; + int adt_phy_offset; + char phy_path[sizeof(FMT_ATC_PATH)]; + char drd_path[sizeof(FMT_DRD_PATH)]; + + snprintf(drd_path, sizeof(drd_path), FMT_DRD_PATH, idx); + adt_drd_offset = adt_path_offset_trace(adt, drd_path, adt_drd_path); + if (adt_drd_offset < 0) { + // Nonexistent device + return -1; + } + + snprintf(phy_path, sizeof(phy_path), FMT_ATC_PATH, idx); + adt_phy_offset = adt_path_offset_trace(adt, phy_path, adt_phy_path); + if (adt_phy_offset < 0) { + printf("usb: Error getting phy node %s\n", phy_path); + return -1; + } + + if (adt_get_reg(adt, adt_phy_path, "reg", 0, ®s->atc, NULL) < 0) { + printf("usb: Error getting reg with index 0 for %s.\n", phy_path); + return -1; + } + if (adt_get_reg(adt, adt_drd_path, "reg", 0, ®s->drd_regs, NULL) < 0) { + printf("usb: Error getting reg with index 0 for %s.\n", drd_path); + return -1; + } + if (adt_get_reg(adt, adt_drd_path, "reg", 3, ®s->drd_regs_unk3, NULL) < 0) { + printf("usb: Error getting reg with index 3 for %s.\n", drd_path); + return -1; + } + + return 0; +} + +int usb_phy_bringup(u32 idx) +{ + char path[24]; + + if (idx >= USB_IODEV_COUNT) + return -1; + + struct usb_drd_regs usb_regs; + if (usb_drd_get_regs(idx, &usb_regs) < 0) + return -1; + + snprintf(path, sizeof(path), FMT_ATC_PATH, idx); + if (pmgr_adt_power_enable(path) < 0) + return -1; + + snprintf(path, sizeof(path), FMT_DART_PATH, idx); + if (pmgr_adt_power_enable(path) < 0) + return -1; + + snprintf(path, sizeof(path), FMT_DRD_PATH, idx); + if (pmgr_adt_power_enable(path) < 0) + return -1; + + write32(usb_regs.atc + 0x08, 0x01c1000f); + write32(usb_regs.atc + 0x04, 0x00000003); + write32(usb_regs.atc + 0x04, 0x00000000); + write32(usb_regs.atc + 0x1c, 0x008c0813); + write32(usb_regs.atc + 0x00, 0x00000002); + + write32(usb_regs.drd_regs_unk3 + 0x0c, 0x00000002); + write32(usb_regs.drd_regs_unk3 + 0x0c, 0x00000022); + write32(usb_regs.drd_regs_unk3 + 0x1c, 0x00000021); + write32(usb_regs.drd_regs_unk3 + 0x20, 0x00009332); + + return 0; +} + +dwc3_dev_t *usb_iodev_bringup(u32 idx) +{ + dart_dev_t *usb_dart = usb_dart_init(idx); + if (!usb_dart) + return NULL; + + struct usb_drd_regs usb_reg; + if (usb_drd_get_regs(idx, &usb_reg) < 0) + return NULL; + + return usb_dwc3_init(usb_reg.drd_regs, usb_dart); +} + +#define USB_IODEV_WRAPPER(name, pipe) \ + static ssize_t usb_##name##_can_read(void *dev) \ + { \ + return usb_dwc3_can_read(dev, pipe); \ + } \ + \ + static bool usb_##name##_can_write(void *dev) \ + { \ + return usb_dwc3_can_write(dev, pipe); \ + } \ + \ + static ssize_t usb_##name##_read(void *dev, void *buf, size_t count) \ + { \ + return usb_dwc3_read(dev, pipe, buf, count); \ + } \ + \ + static ssize_t usb_##name##_write(void *dev, const void *buf, size_t count) \ + { \ + return usb_dwc3_write(dev, pipe, buf, count); \ + } \ + \ + static ssize_t usb_##name##_queue(void *dev, const void *buf, size_t count) \ + { \ + return usb_dwc3_queue(dev, pipe, buf, count); \ + } \ + \ + static void usb_##name##_handle_events(void *dev) \ + { \ + usb_dwc3_handle_events(dev); \ + } \ + \ + static void usb_##name##_flush(void *dev) \ + { \ + usb_dwc3_flush(dev, pipe); \ + } + +USB_IODEV_WRAPPER(0, CDC_ACM_PIPE_0) +USB_IODEV_WRAPPER(1, CDC_ACM_PIPE_1) + +static struct iodev_ops iodev_usb_ops = { + .can_read = usb_0_can_read, + .can_write = usb_0_can_write, + .read = usb_0_read, + .write = usb_0_write, + .queue = usb_0_queue, + .flush = usb_0_flush, + .handle_events = usb_0_handle_events, +}; + +static struct iodev_ops iodev_usb_sec_ops = { + .can_read = usb_1_can_read, + .can_write = usb_1_can_write, + .read = usb_1_read, + .write = usb_1_write, + .queue = usb_1_queue, + .flush = usb_1_flush, + .handle_events = usb_1_handle_events, +}; + +struct iodev iodev_usb_vuart = { + .ops = &iodev_usb_sec_ops, + .usage = 0, + .lock = SPINLOCK_INIT, +}; + +static tps6598x_dev_t *hpm_init(i2c_dev_t *i2c, const char *hpm_path) +{ + tps6598x_dev_t *tps = tps6598x_init(hpm_path, i2c); + if (!tps) { + printf("usb: tps6598x_init failed for %s.\n", hpm_path); + return NULL; + } + + if (tps6598x_powerup(tps) < 0) { + printf("usb: tps6598x_powerup failed for %s.\n", hpm_path); + tps6598x_shutdown(tps); + return NULL; + } + + return tps; +} + +void usb_init(void) +{ + char hpm_path[sizeof(FMT_HPM_PATH)]; + + if (usb_is_initialized) + return; + + i2c_dev_t *i2c = i2c_init("/arm-io/i2c0"); + if (!i2c) { + printf("usb: i2c init failed.\n"); + return; + } + + for (u32 idx = 0; idx < USB_IODEV_COUNT; ++idx) { + snprintf(hpm_path, sizeof(hpm_path), FMT_HPM_PATH, idx); + if (adt_path_offset(adt, hpm_path) < 0) + continue; // device not present + tps6598x_dev_t *tps = hpm_init(i2c, hpm_path); + if (!tps) { + printf("usb: failed to init hpm%d\n", idx); + continue; + } + + if (tps6598x_disable_irqs(tps, &tps6598x_irq_state[idx])) + printf("usb: unable to disable IRQ masks for hpm%d\n", idx); + + tps6598x_shutdown(tps); + } + + i2c_shutdown(i2c); + + for (int idx = 0; idx < USB_IODEV_COUNT; ++idx) + usb_phy_bringup(idx); /* Fails on missing devices, just continue */ + + usb_is_initialized = true; +} + +void usb_hpm_restore_irqs(bool force) +{ + char hpm_path[sizeof(FMT_HPM_PATH)]; + + i2c_dev_t *i2c = i2c_init("/arm-io/i2c0"); + if (!i2c) { + printf("usb: i2c init failed.\n"); + return; + } + + for (u32 idx = 0; idx < USB_IODEV_COUNT; ++idx) { + if (iodev_get_usage(IODEV_USB0 + idx) && !force) + continue; + + if (tps6598x_irq_state[idx].valid) { + snprintf(hpm_path, sizeof(hpm_path), FMT_HPM_PATH, idx); + if (adt_path_offset(adt, hpm_path) < 0) + continue; // device not present + tps6598x_dev_t *tps = hpm_init(i2c, hpm_path); + if (!tps) + continue; + + if (tps6598x_restore_irqs(tps, &tps6598x_irq_state[idx])) + printf("usb: unable to restore IRQ masks for hpm%d\n", idx); + + tps6598x_shutdown(tps); + } + } + + i2c_shutdown(i2c); +} + +void usb_iodev_init(void) +{ + for (int i = 0; i < USB_IODEV_COUNT; i++) { + dwc3_dev_t *opaque; + struct iodev *usb_iodev; + + opaque = usb_iodev_bringup(i); + if (!opaque) + continue; + + usb_iodev = memalign(SPINLOCK_ALIGN, sizeof(*usb_iodev)); + if (!usb_iodev) + continue; + + usb_iodev->ops = &iodev_usb_ops; + usb_iodev->opaque = opaque; + usb_iodev->usage = USAGE_CONSOLE | USAGE_UARTPROXY; + spin_init(&usb_iodev->lock); + + iodev_register_device(IODEV_USB0 + i, usb_iodev); + printf("USB%d: initialized at %p\n", i, opaque); + } +} + +void usb_iodev_shutdown(void) +{ + for (int i = 0; i < USB_IODEV_COUNT; i++) { + struct iodev *usb_iodev = iodev_unregister_device(IODEV_USB0 + i); + if (!usb_iodev) + continue; + + printf("USB%d: shutdown\n", i); + usb_dwc3_shutdown(usb_iodev->opaque); + free(usb_iodev); + } +} + +void usb_iodev_vuart_setup(iodev_id_t iodev) +{ + if (iodev < IODEV_USB0 || iodev >= IODEV_USB0 + USB_IODEV_COUNT) + return; + + iodev_usb_vuart.opaque = iodev_get_opaque(iodev); +} diff --git a/tools/src/usb.h b/tools/src/usb.h new file mode 100644 index 0000000..1ba859a --- /dev/null +++ b/tools/src/usb.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef USB_H +#define USB_H + +#include "iodev.h" +#include "types.h" +#include "usb_dwc3.h" + +dwc3_dev_t *usb_bringup(u32 idx); + +void usb_init(void); +void usb_hpm_restore_irqs(bool force); +void usb_iodev_init(void); +void usb_iodev_shutdown(void); +void usb_iodev_vuart_setup(iodev_id_t iodev); + +#endif diff --git a/tools/src/usb_dwc3.c b/tools/src/usb_dwc3.c new file mode 100644 index 0000000..de05c95 --- /dev/null +++ b/tools/src/usb_dwc3.c @@ -0,0 +1,1416 @@ +/* SPDX-License-Identifier: MIT */ + +/* + * Useful references: + * - TI KeyStone II Architecture Universal Serial Bus 3.0 (USB 3.0) User's Guide + * Literature Number: SPRUHJ7A, https://www.ti.com/lit/ug/spruhj7a/spruhj7a.pdf + * - https://www.beyondlogic.org/usbnutshell/usb1.shtml + */ + +#include "../build/build_tag.h" + +#include "usb_dwc3.h" +#include "dart.h" +#include "malloc.h" +#include "memory.h" +#include "ringbuffer.h" +#include "string.h" +#include "types.h" +#include "usb_dwc3_regs.h" +#include "usb_types.h" +#include "utils.h" + +#define MAX_ENDPOINTS 16 +#define CDC_BUFFER_SIZE SZ_1M + +#define usb_debug_printf(fmt, ...) debug_printf("usb-dwc3@%lx: " fmt, dev->regs, ##__VA_ARGS__) + +#define STRING_DESCRIPTOR_LANGUAGES 0 +#define STRING_DESCRIPTOR_MANUFACTURER 1 +#define STRING_DESCRIPTOR_PRODUCT 2 +#define STRING_DESCRIPTOR_SERIAL 3 + +#define CDC_DEVICE_CLASS 0x02 + +#define CDC_USB_VID 0x1209 +#define CDC_USB_PID 0x316d + +#define CDC_INTERFACE_CLASS 0x02 +#define CDC_INTERFACE_CLASS_DATA 0x0a +#define CDC_INTERFACE_SUBCLASS_ACM 0x02 +#define CDC_INTERFACE_PROTOCOL_NONE 0x00 +#define CDC_INTERFACE_PROTOCOL_AT 0x01 + +#define DWC3_SCRATCHPAD_SIZE SZ_16K +#define TRB_BUFFER_SIZE SZ_16K +#define XFER_BUFFER_SIZE (SZ_16K * MAX_ENDPOINTS * 2) +#define PAD_BUFFER_SIZE SZ_16K + +#define TRBS_PER_EP (TRB_BUFFER_SIZE / (MAX_ENDPOINTS * sizeof(struct dwc3_trb))) +#define XFER_BUFFER_BYTES_PER_EP (XFER_BUFFER_SIZE / MAX_ENDPOINTS) + +#define XFER_SIZE SZ_16K + +#define SCRATCHPAD_IOVA 0xbeef0000 +#define EVENT_BUFFER_IOVA 0xdead0000 +#define XFER_BUFFER_IOVA 0xbabe0000 +#define TRB_BUFFER_IOVA 0xf00d0000 + +/* these map to the control endpoint 0x00/0x80 */ +#define USB_LEP_CTRL_OUT 0 +#define USB_LEP_CTRL_IN 1 + +/* maps to interrupt endpoint 0x81 */ +#define USB_LEP_CDC_INTR_IN 3 + +/* these map to physical endpoints 0x02 and 0x82 */ +#define USB_LEP_CDC_BULK_OUT 4 +#define USB_LEP_CDC_BULK_IN 5 + +/* maps to interrupt endpoint 0x83 */ +#define USB_LEP_CDC_INTR_IN_2 7 + +/* these map to physical endpoints 0x04 and 0x84 */ +#define USB_LEP_CDC_BULK_OUT_2 8 +#define USB_LEP_CDC_BULK_IN_2 9 + +/* content doesn't matter at all, this is the setting linux writes by default */ +static const u8 cdc_default_line_coding[] = {0x80, 0x25, 0x00, 0x00, 0x00, 0x00, 0x08}; + +enum ep0_state { + USB_DWC3_EP0_STATE_IDLE, + USB_DWC3_EP0_STATE_SETUP_HANDLE, + USB_DWC3_EP0_STATE_DATA_SEND, + USB_DWC3_EP0_STATE_DATA_RECV, + USB_DWC3_EP0_STATE_DATA_SEND_DONE, + USB_DWC3_EP0_STATE_DATA_RECV_DONE, + USB_DWC3_EP0_STATE_DATA_RECV_STATUS, + USB_DWC3_EP0_STATE_DATA_RECV_STATUS_DONE, + USB_DWC3_EP0_STATE_DATA_SEND_STATUS, + USB_DWC3_EP0_STATE_DATA_SEND_STATUS_DONE +}; + +typedef struct dwc3_dev { + /* USB DRD */ + uintptr_t regs; + dart_dev_t *dart; + + enum ep0_state ep0_state; + const void *ep0_buffer; + u32 ep0_buffer_len; + void *ep0_read_buffer; + u32 ep0_read_buffer_len; + + void *evtbuffer; + u32 evt_buffer_offset; + + void *scratchpad; + void *xferbuffer; + struct dwc3_trb *trbs; + + struct { + bool xfer_in_progress; + bool zlp_pending; + + void *xfer_buffer; + uintptr_t xfer_buffer_iova; + + struct dwc3_trb *trb; + uintptr_t trb_iova; + } endpoints[MAX_ENDPOINTS]; + + struct { + ringbuffer_t *host2device; + ringbuffer_t *device2host; + u8 ep_intr; + u8 ep_in; + u8 ep_out; + bool ready; + /* USB ACM CDC serial */ + u8 cdc_line_coding[7]; + } pipe[CDC_ACM_PIPE_MAX]; + +} dwc3_dev_t; + +static const struct usb_string_descriptor str_manufacturer = + make_usb_string_descriptor("Asahi Linux"); +static const struct usb_string_descriptor str_product = + make_usb_string_descriptor("m1n1 uartproxy " BUILD_TAG); +static const struct usb_string_descriptor str_serial = make_usb_string_descriptor("P-0"); + +static const struct usb_string_descriptor_languages str_langs = { + .bLength = sizeof(str_langs) + 2, + .bDescriptorType = USB_STRING_DESCRIPTOR, + .wLANGID = {USB_LANGID_EN_US}, +}; + +struct cdc_dev_desc { + const struct usb_configuration_descriptor configuration; + const struct usb_interface_descriptor interface_management; + const struct cdc_union_functional_descriptor cdc_union_func; + const struct usb_endpoint_descriptor endpoint_notification; + const struct usb_interface_descriptor interface_data; + const struct usb_endpoint_descriptor endpoint_data_in; + const struct usb_endpoint_descriptor endpoint_data_out; + const struct usb_interface_descriptor sec_interface_management; + const struct cdc_union_functional_descriptor sec_cdc_union_func; + const struct usb_endpoint_descriptor sec_endpoint_notification; + const struct usb_interface_descriptor sec_interface_data; + const struct usb_endpoint_descriptor sec_endpoint_data_in; + const struct usb_endpoint_descriptor sec_endpoint_data_out; +} PACKED; + +static const struct usb_device_descriptor usb_cdc_device_descriptor = { + .bLength = sizeof(struct usb_device_descriptor), + .bDescriptorType = USB_DEVICE_DESCRIPTOR, + .bcdUSB = 0x0200, + .bDeviceClass = CDC_DEVICE_CLASS, + .bDeviceSubClass = 0, // unused + .bDeviceProtocol = 0, // unused + .bMaxPacketSize0 = 64, + .idVendor = CDC_USB_VID, + .idProduct = CDC_USB_PID, + .bcdDevice = 0x0100, + .iManufacturer = STRING_DESCRIPTOR_MANUFACTURER, + .iProduct = STRING_DESCRIPTOR_PRODUCT, + .iSerialNumber = STRING_DESCRIPTOR_SERIAL, + .bNumConfigurations = 1, +}; + +static const struct cdc_dev_desc cdc_configuration_descriptor = { + .configuration = + { + .bLength = sizeof(cdc_configuration_descriptor.configuration), + .bDescriptorType = USB_CONFIGURATION_DESCRIPTOR, + .wTotalLength = sizeof(cdc_configuration_descriptor), + .bNumInterfaces = 4, + .bConfigurationValue = 1, + .iConfiguration = 0, + .bmAttributes = USB_CONFIGURATION_ATTRIBUTE_RES1 | USB_CONFIGURATION_SELF_POWERED, + .bMaxPower = 250, + + }, + .interface_management = + { + .bLength = sizeof(cdc_configuration_descriptor.interface_management), + .bDescriptorType = USB_INTERFACE_DESCRIPTOR, + .bInterfaceNumber = 0, + .bAlternateSetting = 0, + .bNumEndpoints = 1, + .bInterfaceClass = CDC_INTERFACE_CLASS, + .bInterfaceSubClass = CDC_INTERFACE_SUBCLASS_ACM, + .bInterfaceProtocol = CDC_INTERFACE_PROTOCOL_NONE, + .iInterface = 0, + + }, + .cdc_union_func = + { + .bFunctionLength = sizeof(cdc_configuration_descriptor.cdc_union_func), + .bDescriptorType = USB_CDC_INTERFACE_FUNCTIONAL_DESCRIPTOR, + .bDescriptorSubtype = USB_CDC_UNION_SUBTYPE, + .bControlInterface = 0, + .bDataInterface = 1, + }, + /* + * we never use this endpoint, but it should exist and always be idle. + * it needs to exist in the descriptor though to make hosts correctly recognize + * us as a ACM CDC device. + */ + .endpoint_notification = + { + .bLength = sizeof(cdc_configuration_descriptor.endpoint_notification), + .bDescriptorType = USB_ENDPOINT_DESCRIPTOR, + .bEndpointAddress = USB_ENDPOINT_ADDR_IN(1), + .bmAttributes = USB_ENDPOINT_ATTR_TYPE_INTERRUPT, + .wMaxPacketSize = 64, + .bInterval = 10, + + }, + .interface_data = + { + .bLength = sizeof(cdc_configuration_descriptor.interface_data), + .bDescriptorType = USB_INTERFACE_DESCRIPTOR, + .bInterfaceNumber = 1, + .bAlternateSetting = 0, + .bNumEndpoints = 2, + .bInterfaceClass = CDC_INTERFACE_CLASS_DATA, + .bInterfaceSubClass = 0, // unused + .bInterfaceProtocol = 0, // unused + .iInterface = 0, + }, + .endpoint_data_in = + { + .bLength = sizeof(cdc_configuration_descriptor.endpoint_data_in), + .bDescriptorType = USB_ENDPOINT_DESCRIPTOR, + .bEndpointAddress = USB_ENDPOINT_ADDR_OUT(2), + .bmAttributes = USB_ENDPOINT_ATTR_TYPE_BULK, + .wMaxPacketSize = 512, + .bInterval = 10, + }, + .endpoint_data_out = + { + .bLength = sizeof(cdc_configuration_descriptor.endpoint_data_out), + .bDescriptorType = USB_ENDPOINT_DESCRIPTOR, + .bEndpointAddress = USB_ENDPOINT_ADDR_IN(2), + .bmAttributes = USB_ENDPOINT_ATTR_TYPE_BULK, + .wMaxPacketSize = 512, + .bInterval = 10, + }, + + /* + * CDC ACM interface for virtual uart + */ + + .sec_interface_management = + { + .bLength = sizeof(cdc_configuration_descriptor.sec_interface_management), + .bDescriptorType = USB_INTERFACE_DESCRIPTOR, + .bInterfaceNumber = 2, + .bAlternateSetting = 0, + .bNumEndpoints = 1, + .bInterfaceClass = CDC_INTERFACE_CLASS, + .bInterfaceSubClass = CDC_INTERFACE_SUBCLASS_ACM, + .bInterfaceProtocol = CDC_INTERFACE_PROTOCOL_NONE, + .iInterface = 0, + + }, + .sec_cdc_union_func = + { + .bFunctionLength = sizeof(cdc_configuration_descriptor.sec_cdc_union_func), + .bDescriptorType = USB_CDC_INTERFACE_FUNCTIONAL_DESCRIPTOR, + .bDescriptorSubtype = USB_CDC_UNION_SUBTYPE, + .bControlInterface = 2, + .bDataInterface = 3, + }, + /* + * we never use this endpoint, but it should exist and always be idle. + * it needs to exist in the descriptor though to make hosts correctly recognize + * us as a ACM CDC device. + */ + .sec_endpoint_notification = + { + .bLength = sizeof(cdc_configuration_descriptor.sec_endpoint_notification), + .bDescriptorType = USB_ENDPOINT_DESCRIPTOR, + .bEndpointAddress = USB_ENDPOINT_ADDR_IN(3), + .bmAttributes = USB_ENDPOINT_ATTR_TYPE_INTERRUPT, + .wMaxPacketSize = 64, + .bInterval = 10, + + }, + .sec_interface_data = + { + .bLength = sizeof(cdc_configuration_descriptor.sec_interface_data), + .bDescriptorType = USB_INTERFACE_DESCRIPTOR, + .bInterfaceNumber = 3, + .bAlternateSetting = 0, + .bNumEndpoints = 2, + .bInterfaceClass = CDC_INTERFACE_CLASS_DATA, + .bInterfaceSubClass = 0, // unused + .bInterfaceProtocol = 0, // unused + .iInterface = 0, + }, + .sec_endpoint_data_in = + { + .bLength = sizeof(cdc_configuration_descriptor.sec_endpoint_data_in), + .bDescriptorType = USB_ENDPOINT_DESCRIPTOR, + .bEndpointAddress = USB_ENDPOINT_ADDR_OUT(4), + .bmAttributes = USB_ENDPOINT_ATTR_TYPE_BULK, + .wMaxPacketSize = 512, + .bInterval = 10, + }, + .sec_endpoint_data_out = + { + .bLength = sizeof(cdc_configuration_descriptor.sec_endpoint_data_out), + .bDescriptorType = USB_ENDPOINT_DESCRIPTOR, + .bEndpointAddress = USB_ENDPOINT_ADDR_IN(4), + .bmAttributes = USB_ENDPOINT_ATTR_TYPE_BULK, + .wMaxPacketSize = 512, + .bInterval = 10, + }, +}; + +static const struct usb_device_qualifier_descriptor usb_cdc_device_qualifier_descriptor = { + .bLength = sizeof(struct usb_device_qualifier_descriptor), + .bDescriptorType = USB_DEVICE_QUALIFIER_DESCRIPTOR, + .bcdUSB = 0x0200, + .bDeviceClass = CDC_DEVICE_CLASS, + .bDeviceSubClass = 0, // unused + .bDeviceProtocol = 0, // unused + .bMaxPacketSize0 = 64, + .bNumConfigurations = 0, +}; + +static const char *devt_names[] = { + "DisconnEvt", "USBRst", "ConnectDone", "ULStChng", "WkUpEvt", "Reserved", "EOPF", + "SOF", "Reserved", "ErrticErr", "CmdCmplt", "EvntOverflow", "VndrDevTstRcved"}; +static const char *depvt_names[] = { + "Reserved", + "XferComplete", + "XferInProgress", + "XferNotReady", + "RxTxFifoEvt (IN->Underrun, OUT->Overrun)", + "Reserved", + "StreamEvt", + "EPCmdCmplt", +}; + +static const char *ep0_state_names[] = { + "STATE_IDLE", + "STATE_SETUP_HANDLE", + "STATE_DATA_SEND", + "STATE_DATA_RECV", + "STATE_DATA_SEND_DONE", + "STATE_DATA_RECV_DONE", + "STATE_DATA_RECV_STATUS", + "STATE_DATA_RECV_STATUS_DONE", + "STATE_DATA_SEND_STATUS", + "STATE_DATA_SEND_STATUS_DONE", +}; + +static u8 ep_to_num(u8 epno) +{ + return (epno << 1) | (epno >> 7); +} + +static int usb_dwc3_command(dwc3_dev_t *dev, u32 command, u32 par) +{ + write32(dev->regs + DWC3_DGCMDPAR, par); + write32(dev->regs + DWC3_DGCMD, command | DWC3_DGCMD_CMDACT); + + if (poll32(dev->regs + DWC3_DGCMD, DWC3_DGCMD_CMDACT, 0, 1000)) { + usb_debug_printf("timeout while waiting for DWC3_DGCMD_CMDACT to clear.\n"); + return -1; + } + + return DWC3_DGCMD_STATUS(read32(dev->regs + DWC3_DGCMD)); +} + +static int usb_dwc3_ep_command(dwc3_dev_t *dev, u8 ep, u32 command, u32 par0, u32 par1, u32 par2) +{ + write32(dev->regs + DWC3_DEPCMDPAR0(ep), par0); + write32(dev->regs + DWC3_DEPCMDPAR1(ep), par1); + write32(dev->regs + DWC3_DEPCMDPAR2(ep), par2); + write32(dev->regs + DWC3_DEPCMD(ep), command | DWC3_DEPCMD_CMDACT); + + if (poll32(dev->regs + DWC3_DEPCMD(ep), DWC3_DEPCMD_CMDACT, 0, 1000)) { + usb_debug_printf("timeout while waiting for DWC3_DEPCMD_CMDACT to clear.\n"); + return -1; + } + + return DWC3_DEPCMD_STATUS(read32(dev->regs + DWC3_DEPCMD(ep))); +} + +static int usb_dwc3_ep_configure(dwc3_dev_t *dev, u8 ep, u8 type, u32 max_packet_len) +{ + u32 param0, param1; + + param0 = DWC3_DEPCFG_EP_TYPE(type) | DWC3_DEPCFG_MAX_PACKET_SIZE(max_packet_len); + if (type != DWC3_DEPCMD_TYPE_CONTROL) + param0 |= DWC3_DEPCFG_FIFO_NUMBER(ep); + + param1 = + DWC3_DEPCFG_XFER_COMPLETE_EN | DWC3_DEPCFG_XFER_NOT_READY_EN | DWC3_DEPCFG_EP_NUMBER(ep); + + if (usb_dwc3_ep_command(dev, ep, DWC3_DEPCMD_SETEPCONFIG, param0, param1, 0)) { + usb_debug_printf("cannot issue DWC3_DEPCMD_SETEPCONFIG for EP %d.\n", ep); + return -1; + } + + if (usb_dwc3_ep_command(dev, ep, DWC3_DEPCMD_SETTRANSFRESOURCE, 1, 0, 0)) { + usb_debug_printf("cannot issue DWC3_DEPCMD_SETTRANSFRESOURCE EP %d.\n", ep); + return -1; + } + + return 0; +} + +static int usb_dwc3_ep_start_transfer(dwc3_dev_t *dev, u8 ep, uintptr_t trb_iova) +{ + if (dev->endpoints[ep].xfer_in_progress) { + usb_debug_printf( + "Tried to start a transfer for ep 0x%02x while another transfer is ongoing.\n", ep); + return -1; + } + + dma_wmb(); + int ret = + usb_dwc3_ep_command(dev, ep, DWC3_DEPCMD_STARTTRANSFER, trb_iova >> 32, (u32)trb_iova, 0); + if (ret) { + usb_debug_printf("cannot issue DWC3_DEPCMD_STARTTRANSFER for EP %d: %d.\n", ep, ret); + return ret; + } + + dev->endpoints[ep].xfer_in_progress = true; + return 0; +} + +static uintptr_t usb_dwc3_init_trb(dwc3_dev_t *dev, u8 ep, struct dwc3_trb **trb) +{ + struct dwc3_trb *next_trb = dev->endpoints[ep].trb; + + if (trb) + *trb = next_trb; + + next_trb->ctrl = DWC3_TRB_CTRL_HWO | DWC3_TRB_CTRL_ISP_IMI | DWC3_TRB_CTRL_LST; + next_trb->size = DWC3_TRB_SIZE_LENGTH(0); + next_trb->bph = 0; + next_trb->bpl = dev->endpoints[ep].xfer_buffer_iova; + + return dev->endpoints[ep].trb_iova; +} + +static int usb_dwc3_run_data_trb(dwc3_dev_t *dev, u8 ep, u32 data_len) +{ + struct dwc3_trb *trb; + uintptr_t trb_iova = usb_dwc3_init_trb(dev, ep, &trb); + + trb->ctrl |= DWC3_TRBCTL_CONTROL_DATA; + trb->size = DWC3_TRB_SIZE_LENGTH(data_len); + + return usb_dwc3_ep_start_transfer(dev, ep, trb_iova); +} + +static int usb_dwc3_start_setup_phase(dwc3_dev_t *dev) +{ + struct dwc3_trb *trb; + uintptr_t trb_iova = usb_dwc3_init_trb(dev, USB_LEP_CTRL_OUT, &trb); + + trb->ctrl |= DWC3_TRBCTL_CONTROL_SETUP; + trb->size = DWC3_TRB_SIZE_LENGTH(sizeof(union usb_setup_packet)); + return usb_dwc3_ep_start_transfer(dev, USB_LEP_CTRL_OUT, trb_iova); +} + +static int usb_dwc3_start_status_phase(dwc3_dev_t *dev, u8 ep) +{ + struct dwc3_trb *trb; + uintptr_t trb_iova = usb_dwc3_init_trb(dev, ep, &trb); + + trb->ctrl |= DWC3_TRBCTL_CONTROL_STATUS2; + trb->size = DWC3_TRB_SIZE_LENGTH(0); + + return usb_dwc3_ep_start_transfer(dev, ep, trb_iova); +} + +static int usb_dwc3_ep0_start_data_send_phase(dwc3_dev_t *dev) +{ + if (dev->ep0_buffer_len > XFER_BUFFER_BYTES_PER_EP) { + usb_debug_printf("Cannot xfer more than %d bytes but was requested to xfer %d on ep 1\n", + XFER_BUFFER_BYTES_PER_EP, dev->ep0_buffer_len); + return -1; + } + + memset(dev->endpoints[USB_LEP_CTRL_IN].xfer_buffer, 0, 64); + memcpy(dev->endpoints[USB_LEP_CTRL_IN].xfer_buffer, dev->ep0_buffer, dev->ep0_buffer_len); + + return usb_dwc3_run_data_trb(dev, USB_LEP_CTRL_IN, dev->ep0_buffer_len); +} + +static int usb_dwc3_ep0_start_data_recv_phase(dwc3_dev_t *dev) +{ + if (dev->ep0_buffer_len > XFER_BUFFER_BYTES_PER_EP) { + usb_debug_printf("Cannot xfer more than %d bytes but was requested to xfer %d on ep 0\n", + XFER_BUFFER_BYTES_PER_EP, dev->ep0_buffer_len); + return -1; + } + + memset(dev->endpoints[USB_LEP_CTRL_OUT].xfer_buffer, 0, 64); + + return usb_dwc3_run_data_trb(dev, USB_LEP_CTRL_OUT, 64); +} + +static void usb_dwc3_ep_set_stall(dwc3_dev_t *dev, u8 ep, u8 stall) +{ + if (stall) + usb_dwc3_ep_command(dev, ep, DWC3_DEPCMD_SETSTALL, 0, 0, 0); + else + usb_dwc3_ep_command(dev, ep, DWC3_DEPCMD_CLEARSTALL, 0, 0, 0); +} + +static void usb_cdc_get_string_descriptor(u32 index, const void **descriptor, u16 *descriptor_len) +{ + switch (index) { + case STRING_DESCRIPTOR_LANGUAGES: + *descriptor = &str_langs; + *descriptor_len = str_langs.bLength; + break; + case STRING_DESCRIPTOR_MANUFACTURER: + *descriptor = &str_manufacturer; + *descriptor_len = str_manufacturer.bLength; + break; + case STRING_DESCRIPTOR_PRODUCT: + *descriptor = &str_product; + *descriptor_len = str_product.bLength; + break; + case STRING_DESCRIPTOR_SERIAL: + *descriptor = &str_serial; + *descriptor_len = str_serial.bLength; + break; + default: + *descriptor = NULL; + *descriptor_len = 0; + } +} + +static int +usb_dwc3_handle_ep0_get_descriptor(dwc3_dev_t *dev, + const struct usb_setup_packet_get_descriptor *get_descriptor) +{ + const void *descriptor = NULL; + u16 descriptor_len = 0; + + switch (get_descriptor->type) { + case USB_DEVICE_DESCRIPTOR: + descriptor = &usb_cdc_device_descriptor; + descriptor_len = usb_cdc_device_descriptor.bLength; + break; + case USB_CONFIGURATION_DESCRIPTOR: + descriptor = &cdc_configuration_descriptor; + descriptor_len = cdc_configuration_descriptor.configuration.wTotalLength; + break; + case USB_STRING_DESCRIPTOR: + usb_cdc_get_string_descriptor(get_descriptor->index, &descriptor, &descriptor_len); + break; + case USB_DEVICE_QUALIFIER_DESCRIPTOR: + descriptor = &usb_cdc_device_qualifier_descriptor; + descriptor_len = usb_cdc_device_qualifier_descriptor.bLength; + break; + default: + usb_debug_printf("Unknown descriptor type: %d\n", get_descriptor->type); + break; + } + + if (descriptor) { + dev->ep0_buffer = descriptor; + dev->ep0_buffer_len = min(get_descriptor->wLength, descriptor_len); + return 0; + } else { + return -1; + } +} + +static void usb_dwc3_ep0_handle_standard_device(dwc3_dev_t *dev, + const union usb_setup_packet *setup) +{ + switch (setup->raw.bRequest) { + case USB_REQUEST_SET_ADDRESS: + mask32(dev->regs + DWC3_DCFG, DWC3_DCFG_DEVADDR_MASK, + DWC3_DCFG_DEVADDR(setup->set_address.address)); + dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND_STATUS; + break; + + case USB_REQUEST_SET_CONFIGURATION: + switch (setup->set_configuration.configuration) { + case 0: + clear32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_BULK_OUT)); + clear32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_BULK_IN)); + clear32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_INTR_IN)); + clear32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_BULK_OUT_2)); + clear32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_BULK_IN_2)); + clear32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_INTR_IN_2)); + dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND_STATUS; + for (int i = 0; i < CDC_ACM_PIPE_MAX; i++) + dev->pipe[i].ready = false; + break; + case 1: + /* we've already configured these endpoints so that we just need to enable them + * here */ + set32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_BULK_OUT)); + set32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_BULK_IN)); + set32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_INTR_IN)); + set32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_BULK_OUT_2)); + set32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_BULK_IN_2)); + set32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_INTR_IN_2)); + dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND_STATUS; + break; + default: + usb_dwc3_ep_set_stall(dev, 0, 1); + dev->ep0_state = USB_DWC3_EP0_STATE_IDLE; + break; + } + break; + + case USB_REQUEST_GET_DESCRIPTOR: + if (usb_dwc3_handle_ep0_get_descriptor(dev, &setup->get_descriptor) < 0) { + usb_dwc3_ep_set_stall(dev, 0, 1); + dev->ep0_state = USB_DWC3_EP0_STATE_IDLE; + } else { + dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND; + } + break; + + case USB_REQUEST_GET_STATUS: { + static const u16 device_status = 0x0001; // self-powered + dev->ep0_buffer = &device_status; + dev->ep0_buffer_len = 2; + dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND; + break; + } + + default: + usb_dwc3_ep_set_stall(dev, 0, 1); + dev->ep0_state = USB_DWC3_EP0_STATE_IDLE; + usb_debug_printf("unsupported SETUP packet\n"); + } +} + +static void usb_dwc3_ep0_handle_standard_interface(dwc3_dev_t *dev, + const union usb_setup_packet *setup) +{ + switch (setup->raw.bRequest) { + case USB_REQUEST_GET_STATUS: { + static const u16 device_status = 0x0000; // reserved + dev->ep0_buffer = &device_status; + dev->ep0_buffer_len = 2; + dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND; + break; + } + default: + usb_dwc3_ep_set_stall(dev, 0, 1); + dev->ep0_state = USB_DWC3_EP0_STATE_IDLE; + usb_debug_printf("unsupported SETUP packet\n"); + } +} + +static void usb_dwc3_ep0_handle_standard_endpoint(dwc3_dev_t *dev, + const union usb_setup_packet *setup) +{ + switch (setup->raw.bRequest) { + case USB_REQUEST_GET_STATUS: { + static const u16 device_status = 0x0000; // reserved + dev->ep0_buffer = &device_status; + dev->ep0_buffer_len = 2; + dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND; + break; + } + case USB_REQUEST_CLEAR_FEATURE: { + switch (setup->feature.wFeatureSelector) { + case USB_FEATURE_ENDPOINT_HALT: + usb_debug_printf("Host cleared EP 0x%x stall\n", setup->feature.wEndpoint); + usb_dwc3_ep_set_stall(dev, ep_to_num(setup->feature.wEndpoint), 0); + usb_dwc3_start_status_phase(dev, USB_LEP_CTRL_IN); + dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND_STATUS_DONE; + break; + default: + usb_dwc3_ep_set_stall(dev, 0, 1); + dev->ep0_state = USB_DWC3_EP0_STATE_IDLE; + usb_debug_printf("unsupported CLEAR FEATURE: 0x%x\n", + setup->feature.wFeatureSelector); + break; + } + break; + } + default: + usb_dwc3_ep_set_stall(dev, 0, 1); + dev->ep0_state = USB_DWC3_EP0_STATE_IDLE; + usb_debug_printf("unsupported SETUP packet\n"); + } +} + +static void usb_dwc3_ep0_handle_standard(dwc3_dev_t *dev, const union usb_setup_packet *setup) +{ + switch (setup->raw.bmRequestType & USB_REQUEST_TYPE_RECIPIENT_MASK) { + case USB_REQUEST_TYPE_RECIPIENT_DEVICE: + usb_dwc3_ep0_handle_standard_device(dev, setup); + break; + + case USB_REQUEST_TYPE_RECIPIENT_INTERFACE: + usb_dwc3_ep0_handle_standard_interface(dev, setup); + break; + + case USB_REQUEST_TYPE_RECIPIENT_ENDPOINT: + usb_dwc3_ep0_handle_standard_endpoint(dev, setup); + break; + + default: + usb_dwc3_ep_set_stall(dev, 0, 1); + dev->ep0_state = USB_DWC3_EP0_STATE_IDLE; + usb_debug_printf("unimplemented request recipient\n"); + } +} + +static void usb_dwc3_ep0_handle_class(dwc3_dev_t *dev, const union usb_setup_packet *setup) +{ + int pipe = setup->raw.wIndex / 2; + + switch (setup->raw.bRequest) { + case USB_REQUEST_CDC_GET_LINE_CODING: + dev->ep0_buffer_len = min(setup->raw.wLength, sizeof(dev->pipe[pipe].cdc_line_coding)); + dev->ep0_buffer = dev->pipe[pipe].cdc_line_coding; + dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND; + break; + + case USB_REQUEST_CDC_SET_CTRL_LINE_STATE: + if (setup->raw.wValue & 1) { // DTR + dev->pipe[pipe].ready = false; + usb_debug_printf("ACM device opened\n"); + dev->pipe[pipe].ready = true; + } else { + dev->pipe[pipe].ready = false; + usb_debug_printf("ACM device closed\n"); + } + usb_dwc3_start_status_phase(dev, USB_LEP_CTRL_IN); + dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND_STATUS_DONE; + break; + + case USB_REQUEST_CDC_SET_LINE_CODING: + dev->ep0_read_buffer = dev->pipe[pipe].cdc_line_coding; + dev->ep0_read_buffer_len = + min(setup->raw.wLength, sizeof(dev->pipe[pipe].cdc_line_coding)); + dev->ep0_state = USB_DWC3_EP0_STATE_DATA_RECV; + break; + + default: + usb_dwc3_ep_set_stall(dev, 0, 1); + dev->ep0_state = USB_DWC3_EP0_STATE_IDLE; + usb_debug_printf("unsupported SETUP packet\n"); + } +} + +static void usb_dwc3_ep0_handle_setup(dwc3_dev_t *dev) +{ + const union usb_setup_packet *setup = dev->endpoints[0].xfer_buffer; + + switch (setup->raw.bmRequestType & USB_REQUEST_TYPE_MASK) { + case USB_REQUEST_TYPE_STANDARD: + usb_dwc3_ep0_handle_standard(dev, setup); + break; + case USB_REQUEST_TYPE_CLASS: + usb_dwc3_ep0_handle_class(dev, setup); + break; + default: + usb_debug_printf("unsupported request type\n"); + usb_dwc3_ep_set_stall(dev, 0, 1); + dev->ep0_state = USB_DWC3_EP0_STATE_IDLE; + } +} + +static void usb_dwc3_ep0_handle_xfer_done(dwc3_dev_t *dev, const struct dwc3_event_depevt event) +{ + switch (dev->ep0_state) { + case USB_DWC3_EP0_STATE_SETUP_HANDLE: + usb_dwc3_ep0_handle_setup(dev); + break; + + case USB_DWC3_EP0_STATE_DATA_RECV_STATUS_DONE: + case USB_DWC3_EP0_STATE_DATA_SEND_STATUS_DONE: + usb_dwc3_start_setup_phase(dev); + dev->ep0_state = USB_DWC3_EP0_STATE_SETUP_HANDLE; + break; + + case USB_DWC3_EP0_STATE_DATA_SEND_DONE: + dev->ep0_state = USB_DWC3_EP0_STATE_DATA_RECV_STATUS; + break; + + case USB_DWC3_EP0_STATE_DATA_RECV_DONE: + memcpy(dev->ep0_read_buffer, dev->endpoints[event.endpoint_number].xfer_buffer, + dev->ep0_read_buffer_len); + dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND_STATUS; + break; + + case USB_DWC3_EP0_STATE_IDLE: + default: + usb_debug_printf("invalid state in usb_dwc3_ep0_handle_xfer_done: %d, %s\n", + dev->ep0_state, ep0_state_names[dev->ep0_state]); + usb_dwc3_ep_set_stall(dev, 0, 1); + dev->ep0_state = USB_DWC3_EP0_STATE_IDLE; + } +} + +static void usb_dwc3_ep0_handle_xfer_not_ready(dwc3_dev_t *dev, + const struct dwc3_event_depevt event) +{ + switch (dev->ep0_state) { + case USB_DWC3_EP0_STATE_IDLE: + usb_dwc3_start_setup_phase(dev); + dev->ep0_state = USB_DWC3_EP0_STATE_SETUP_HANDLE; + break; + + case USB_DWC3_EP0_STATE_DATA_SEND: + if (usb_dwc3_ep0_start_data_send_phase(dev)) + usb_debug_printf("cannot start xtrl xfer data phase for EP 1.\n"); + dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND_DONE; + break; + + case USB_DWC3_EP0_STATE_DATA_RECV: + if (usb_dwc3_ep0_start_data_recv_phase(dev)) + usb_debug_printf("cannot start xtrl xfer data phase for EP 0.\n"); + dev->ep0_state = USB_DWC3_EP0_STATE_DATA_RECV_DONE; + break; + + case USB_DWC3_EP0_STATE_DATA_RECV_STATUS: + usb_dwc3_start_status_phase(dev, USB_LEP_CTRL_OUT); + dev->ep0_state = USB_DWC3_EP0_STATE_DATA_RECV_STATUS_DONE; + break; + + case USB_DWC3_EP0_STATE_DATA_SEND_STATUS: + usb_dwc3_start_status_phase(dev, USB_LEP_CTRL_IN); + dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND_STATUS_DONE; + break; + + default: + usb_debug_printf( + "invalid state in usb_dwc3_ep0_handle_xfer_not_ready: %d, %s for ep %d (%x)\n", + dev->ep0_state, ep0_state_names[dev->ep0_state], event.endpoint_number, + event.endpoint_event); + usb_dwc3_ep_set_stall(dev, 0, 1); + dev->ep0_state = USB_DWC3_EP0_STATE_IDLE; + } +} + +ringbuffer_t *usb_dwc3_cdc_get_ringbuffer(dwc3_dev_t *dev, u8 endpoint_number) +{ + switch (endpoint_number) { + case USB_LEP_CDC_BULK_IN: + return dev->pipe[CDC_ACM_PIPE_0].device2host; + case USB_LEP_CDC_BULK_OUT: + return dev->pipe[CDC_ACM_PIPE_0].host2device; + case USB_LEP_CDC_BULK_IN_2: + return dev->pipe[CDC_ACM_PIPE_1].device2host; + case USB_LEP_CDC_BULK_OUT_2: + return dev->pipe[CDC_ACM_PIPE_1].host2device; + default: + return NULL; + } +} + +static void usb_dwc3_cdc_start_bulk_out_xfer(dwc3_dev_t *dev, u8 endpoint_number) +{ + struct dwc3_trb *trb; + uintptr_t trb_iova; + + if (dev->endpoints[endpoint_number].xfer_in_progress) + return; + + ringbuffer_t *host2device = usb_dwc3_cdc_get_ringbuffer(dev, endpoint_number); + if (!host2device) + return; + + if (ringbuffer_get_free(host2device) < XFER_SIZE) + return; + + memset(dev->endpoints[endpoint_number].xfer_buffer, 0xaa, XFER_SIZE); + trb_iova = usb_dwc3_init_trb(dev, endpoint_number, &trb); + trb->ctrl |= DWC3_TRBCTL_NORMAL; + trb->size = DWC3_TRB_SIZE_LENGTH(XFER_SIZE); + + usb_dwc3_ep_start_transfer(dev, endpoint_number, trb_iova); + dev->endpoints[endpoint_number].xfer_in_progress = true; +} + +static void usb_dwc3_cdc_start_bulk_in_xfer(dwc3_dev_t *dev, u8 endpoint_number) +{ + struct dwc3_trb *trb; + uintptr_t trb_iova; + + if (dev->endpoints[endpoint_number].xfer_in_progress) + return; + + ringbuffer_t *device2host = usb_dwc3_cdc_get_ringbuffer(dev, endpoint_number); + if (!device2host) + return; + + size_t len = + ringbuffer_read(dev->endpoints[endpoint_number].xfer_buffer, XFER_SIZE, device2host); + + if (!len && !dev->endpoints[endpoint_number].zlp_pending) + return; + + trb_iova = usb_dwc3_init_trb(dev, endpoint_number, &trb); + trb->ctrl |= DWC3_TRBCTL_NORMAL; + trb->size = DWC3_TRB_SIZE_LENGTH(len); + + usb_dwc3_ep_start_transfer(dev, endpoint_number, trb_iova); + dev->endpoints[endpoint_number].xfer_in_progress = true; + dev->endpoints[endpoint_number].zlp_pending = (len % 512) == 0; +} + +static void usb_dwc3_cdc_handle_bulk_out_xfer_done(dwc3_dev_t *dev, + const struct dwc3_event_depevt event) +{ + ringbuffer_t *host2device = usb_dwc3_cdc_get_ringbuffer(dev, event.endpoint_number); + if (!host2device) + return; + size_t len = min(XFER_SIZE, ringbuffer_get_free(host2device)); + ringbuffer_write(dev->endpoints[event.endpoint_number].xfer_buffer, + len - dev->endpoints[event.endpoint_number].trb->size, host2device); +} + +static void usb_dwc3_handle_event_ep(dwc3_dev_t *dev, const struct dwc3_event_depevt event) +{ + if (event.endpoint_event == DWC3_DEPEVT_XFERCOMPLETE) { + dev->endpoints[event.endpoint_number].xfer_in_progress = false; + + switch (event.endpoint_number) { + case USB_LEP_CTRL_IN: + case USB_LEP_CTRL_OUT: + return usb_dwc3_ep0_handle_xfer_done(dev, event); + case USB_LEP_CDC_INTR_IN: // [[fallthrough]] + case USB_LEP_CDC_INTR_IN_2: + return; + case USB_LEP_CDC_BULK_IN: // [[fallthrough]] + case USB_LEP_CDC_BULK_IN_2: + return; + case USB_LEP_CDC_BULK_OUT: // [[fallthrough]] + case USB_LEP_CDC_BULK_OUT_2: + return usb_dwc3_cdc_handle_bulk_out_xfer_done(dev, event); + } + } else if (event.endpoint_event == DWC3_DEPEVT_XFERNOTREADY) { + /* + * this might be a bug, we sometimes get spurious events like these here. + * ignoring them works just fine though + */ + if (dev->endpoints[event.endpoint_number].xfer_in_progress) + return; + + switch (event.endpoint_number) { + case USB_LEP_CTRL_IN: + case USB_LEP_CTRL_OUT: + return usb_dwc3_ep0_handle_xfer_not_ready(dev, event); + case USB_LEP_CDC_INTR_IN: // [[fallthrough]] + case USB_LEP_CDC_INTR_IN_2: + return; + case USB_LEP_CDC_BULK_IN: // [[fallthrough]] + case USB_LEP_CDC_BULK_IN_2: + return usb_dwc3_cdc_start_bulk_in_xfer(dev, event.endpoint_number); + case USB_LEP_CDC_BULK_OUT: // [[fallthrough]] + case USB_LEP_CDC_BULK_OUT_2: + return usb_dwc3_cdc_start_bulk_out_xfer(dev, event.endpoint_number); + } + } + + usb_debug_printf("unhandled EP %02x event: %s (0x%02x) (%d)\n", event.endpoint_number, + depvt_names[event.endpoint_event], event.endpoint_event, + dev->endpoints[event.endpoint_number].xfer_in_progress); + usb_dwc3_ep_set_stall(dev, event.endpoint_event, 1); +} + +static void usb_dwc3_handle_event_usbrst(dwc3_dev_t *dev) +{ + /* clear STALL mode for all endpoints */ + dev->endpoints[0].xfer_in_progress = false; + for (int i = 1; i < MAX_ENDPOINTS; ++i) { + dev->endpoints[i].xfer_in_progress = false; + memset(dev->endpoints[i].xfer_buffer, 0, XFER_BUFFER_BYTES_PER_EP); + memset(dev->endpoints[i].trb, 0, TRBS_PER_EP * sizeof(struct dwc3_trb)); + usb_dwc3_ep_set_stall(dev, i, 0); + } + + /* set device address back to zero */ + mask32(dev->regs + DWC3_DCFG, DWC3_DCFG_DEVADDR_MASK, DWC3_DCFG_DEVADDR(0)); + + /* only keep control endpoints enabled */ + write32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(0) | DWC3_DALEPENA_EP(1)); +} + +static void usb_dwc3_handle_event_connect_done(dwc3_dev_t *dev) +{ + u32 speed = read32(dev->regs + DWC3_DSTS) & DWC3_DSTS_CONNECTSPD; + + if (speed != DWC3_DSTS_HIGHSPEED) { + usb_debug_printf( + "WARNING: we only support high speed right now but %02x was requested in DSTS\n", + speed); + } + + usb_dwc3_start_setup_phase(dev); + dev->ep0_state = USB_DWC3_EP0_STATE_SETUP_HANDLE; +} + +static void usb_dwc3_handle_event_dev(dwc3_dev_t *dev, const struct dwc3_event_devt event) +{ + usb_debug_printf("device event: %s (0x%02x)\n", devt_names[event.type], event.type); + switch (event.type) { + case DWC3_DEVT_USBRST: + usb_dwc3_handle_event_usbrst(dev); + break; + case DWC3_DEVT_CONNECTDONE: + usb_dwc3_handle_event_connect_done(dev); + break; + default: + usb_debug_printf("unhandled device event: %s (0x%02x)\n", devt_names[event.type], + event.type); + } +} + +static void usb_dwc3_handle_event(dwc3_dev_t *dev, const union dwc3_event event) +{ + if (!event.type.is_devspec) + usb_dwc3_handle_event_ep(dev, event.depevt); + else if (event.type.type == DWC3_EVENT_TYPE_DEV) + usb_dwc3_handle_event_dev(dev, event.devt); + else + usb_debug_printf("unknown event %08x\n", event.raw); +} + +void usb_dwc3_handle_events(dwc3_dev_t *dev) +{ + if (!dev) + return; + + u32 n_events = read32(dev->regs + DWC3_GEVNTCOUNT(0)) / sizeof(union dwc3_event); + if (n_events == 0) + return; + + dma_rmb(); + + const union dwc3_event *evtbuffer = dev->evtbuffer; + for (u32 i = 0; i < n_events; ++i) { + usb_dwc3_handle_event(dev, evtbuffer[dev->evt_buffer_offset]); + + dev->evt_buffer_offset = + (dev->evt_buffer_offset + 1) % (DWC3_EVENT_BUFFERS_SIZE / sizeof(union dwc3_event)); + } + + write32(dev->regs + DWC3_GEVNTCOUNT(0), sizeof(union dwc3_event) * n_events); +} + +dwc3_dev_t *usb_dwc3_init(uintptr_t regs, dart_dev_t *dart) +{ + /* sanity check */ + u32 snpsid = read32(regs + DWC3_GSNPSID); + if ((snpsid & DWC3_GSNPSID_MASK) != 0x33310000) { + debug_printf("no DWC3 core found at 0x%lx: %08x\n", regs, snpsid); + return NULL; + } + + dwc3_dev_t *dev = malloc(sizeof(*dev)); + if (!dev) + return NULL; + + memset(dev, 0, sizeof(*dev)); + for (int i = 0; i < CDC_ACM_PIPE_MAX; i++) + memcpy(dev->pipe[i].cdc_line_coding, cdc_default_line_coding, + sizeof(cdc_default_line_coding)); + + dev->regs = regs; + dev->dart = dart; + + /* allocate and map dma buffers */ + dev->evtbuffer = memalign(SZ_16K, max(DWC3_EVENT_BUFFERS_SIZE, SZ_16K)); + if (!dev->evtbuffer) + goto error; + + dev->scratchpad = memalign(SZ_16K, max(DWC3_SCRATCHPAD_SIZE, SZ_16K)); + if (!dev->scratchpad) + goto error; + + dev->trbs = memalign(SZ_16K, TRB_BUFFER_SIZE); + if (!dev->trbs) + goto error; + + dev->xferbuffer = memalign(SZ_16K, XFER_BUFFER_SIZE); + if (!dev->xferbuffer) + goto error; + + memset(dev->evtbuffer, 0xaa, max(DWC3_EVENT_BUFFERS_SIZE, SZ_16K)); + memset(dev->scratchpad, 0, max(DWC3_SCRATCHPAD_SIZE, SZ_16K)); + memset(dev->xferbuffer, 0, XFER_BUFFER_SIZE); + memset(dev->trbs, 0, TRB_BUFFER_SIZE); + + if (dart_map(dev->dart, EVENT_BUFFER_IOVA, dev->evtbuffer, + max(DWC3_EVENT_BUFFERS_SIZE, SZ_16K))) + goto error; + if (dart_map(dev->dart, SCRATCHPAD_IOVA, dev->scratchpad, max(DWC3_SCRATCHPAD_SIZE, SZ_16K))) + goto error; + if (dart_map(dev->dart, TRB_BUFFER_IOVA, dev->trbs, TRB_BUFFER_SIZE)) + goto error; + if (dart_map(dev->dart, XFER_BUFFER_IOVA, dev->xferbuffer, XFER_BUFFER_SIZE)) + goto error; + + /* prepare endpoint buffers */ + for (int i = 0; i < MAX_ENDPOINTS; ++i) { + u32 xferbuffer_offset = i * XFER_BUFFER_BYTES_PER_EP; + dev->endpoints[i].xfer_buffer = dev->xferbuffer + xferbuffer_offset; + dev->endpoints[i].xfer_buffer_iova = XFER_BUFFER_IOVA + xferbuffer_offset; + + u32 trb_offset = i * TRBS_PER_EP; + dev->endpoints[i].trb = &dev->trbs[i * TRBS_PER_EP]; + dev->endpoints[i].trb_iova = TRB_BUFFER_IOVA + trb_offset * sizeof(struct dwc3_trb); + } + + /* reset the device side of the controller */ + set32(dev->regs + DWC3_DCTL, DWC3_DCTL_CSFTRST); + if (poll32(dev->regs + DWC3_DCTL, DWC3_DCTL_CSFTRST, 0, 1000)) { + usb_debug_printf("timeout while waiting for DWC3_DCTL_CSFTRST to clear.\n"); + goto error; + } + + /* soft reset the core and phy */ + set32(dev->regs + DWC3_GCTL, DWC3_GCTL_CORESOFTRESET); + set32(dev->regs + DWC3_GUSB3PIPECTL(0), DWC3_GUSB3PIPECTL_PHYSOFTRST); + set32(dev->regs + DWC3_GUSB2PHYCFG(0), DWC3_GUSB2PHYCFG_PHYSOFTRST); + mdelay(100); + clear32(dev->regs + DWC3_GUSB3PIPECTL(0), DWC3_GUSB3PIPECTL_PHYSOFTRST); + clear32(dev->regs + DWC3_GUSB2PHYCFG(0), DWC3_GUSB2PHYCFG_PHYSOFTRST); + mdelay(100); + clear32(dev->regs + DWC3_GCTL, DWC3_GCTL_CORESOFTRESET); + mdelay(100); + + /* disable unused features */ + clear32(dev->regs + DWC3_GCTL, DWC3_GCTL_SCALEDOWN_MASK | DWC3_GCTL_DISSCRAMBLE); + + /* switch to device-only mode */ + mask32(dev->regs + DWC3_GCTL, DWC3_GCTL_PRTCAPDIR(DWC3_GCTL_PRTCAP_OTG), + DWC3_GCTL_PRTCAPDIR(DWC3_GCTL_PRTCAP_DEVICE)); + + /* stick to USB 2.0 high speed for now */ + mask32(dev->regs + DWC3_DCFG, DWC3_DCFG_SPEED_MASK, DWC3_DCFG_HIGHSPEED); + + /* setup scratchpad at SCRATCHPAD_IOVA */ + if (usb_dwc3_command(dev, DWC3_DGCMD_SET_SCRATCHPAD_ADDR_LO, SCRATCHPAD_IOVA)) { + usb_debug_printf("DWC3_DGCMD_SET_SCRATCHPAD_ADDR_LO failed."); + goto error; + } + if (usb_dwc3_command(dev, DWC3_DGCMD_SET_SCRATCHPAD_ADDR_HI, 0)) { + usb_debug_printf("DWC3_DGCMD_SET_SCRATCHPAD_ADDR_HI failed."); + goto error; + } + + /* setup a single event buffer at EVENT_BUFFER_IOVA */ + write32(dev->regs + DWC3_GEVNTADRLO(0), EVENT_BUFFER_IOVA); + write32(dev->regs + DWC3_GEVNTADRHI(0), 0); + write32(dev->regs + DWC3_GEVNTSIZ(0), DWC3_EVENT_BUFFERS_SIZE); + write32(dev->regs + DWC3_GEVNTCOUNT(0), 0); + + /* enable connect, disconnect and reset events */ + write32(dev->regs + DWC3_DEVTEN, + DWC3_DEVTEN_DISCONNEVTEN | DWC3_DEVTEN_USBRSTEN | DWC3_DEVTEN_CONNECTDONEEN); + + if (usb_dwc3_ep_command(dev, 0, DWC3_DEPCMD_DEPSTARTCFG, 0, 0, 0)) { + usb_debug_printf("cannot issue initial DWC3_DEPCMD_DEPSTARTCFG.\n"); + goto error; + } + + /* prepare control endpoint 0 IN and OUT */ + if (usb_dwc3_ep_configure(dev, USB_LEP_CTRL_OUT, DWC3_DEPCMD_TYPE_CONTROL, 64)) + goto error; + if (usb_dwc3_ep_configure(dev, USB_LEP_CTRL_IN, DWC3_DEPCMD_TYPE_CONTROL, 64)) + goto error; + + /* prepare CDC ACM interfaces */ + + dev->pipe[CDC_ACM_PIPE_0].ep_intr = USB_LEP_CDC_INTR_IN; + dev->pipe[CDC_ACM_PIPE_0].ep_in = USB_LEP_CDC_BULK_IN; + dev->pipe[CDC_ACM_PIPE_0].ep_out = USB_LEP_CDC_BULK_OUT; + + dev->pipe[CDC_ACM_PIPE_1].ep_intr = USB_LEP_CDC_INTR_IN_2; + dev->pipe[CDC_ACM_PIPE_1].ep_in = USB_LEP_CDC_BULK_IN_2; + dev->pipe[CDC_ACM_PIPE_1].ep_out = USB_LEP_CDC_BULK_OUT_2; + + for (int i = 0; i < CDC_ACM_PIPE_MAX; i++) { + dev->pipe[i].host2device = ringbuffer_alloc(CDC_BUFFER_SIZE); + if (!dev->pipe[i].host2device) + goto error; + dev->pipe[i].device2host = ringbuffer_alloc(CDC_BUFFER_SIZE); + if (!dev->pipe[i].device2host) + goto error; + + /* prepare INTR endpoint so that we don't have to reconfigure this device later */ + if (usb_dwc3_ep_configure(dev, dev->pipe[i].ep_intr, DWC3_DEPCMD_TYPE_INTR, 64)) + goto error; + + /* prepare BULK endpoints so that we don't have to reconfigure this device later */ + if (usb_dwc3_ep_configure(dev, dev->pipe[i].ep_in, DWC3_DEPCMD_TYPE_BULK, 512)) + goto error; + if (usb_dwc3_ep_configure(dev, dev->pipe[i].ep_out, DWC3_DEPCMD_TYPE_BULK, 512)) + goto error; + } + + /* prepare first control transfer */ + dev->ep0_state = USB_DWC3_EP0_STATE_IDLE; + + /* only enable control endpoints for now */ + write32(dev->regs + DWC3_DALEPENA, + DWC3_DALEPENA_EP(USB_LEP_CTRL_IN) | DWC3_DALEPENA_EP(USB_LEP_CTRL_OUT)); + + /* and finally kick the device controller to go live! */ + set32(dev->regs + DWC3_DCTL, DWC3_DCTL_RUN_STOP); + + return dev; + +error: + usb_dwc3_shutdown(dev); + return NULL; +} + +void usb_dwc3_shutdown(dwc3_dev_t *dev) +{ + for (int i = 0; i < CDC_ACM_PIPE_MAX; i++) + dev->pipe[i].ready = false; + + /* stop all ongoing transfers */ + for (int i = 1; i < MAX_ENDPOINTS; ++i) { + if (!dev->endpoints[i].xfer_in_progress) + continue; + + if (usb_dwc3_ep_command(dev, i, DWC3_DEPCMD_ENDTRANSFER, 0, 0, 0)) + usb_debug_printf("cannot issue DWC3_DEPCMD_ENDTRANSFER for EP %02x.\n", i); + } + + /* disable events and all endpoints and stop the device controller */ + write32(dev->regs + DWC3_DEVTEN, 0); + write32(dev->regs + DWC3_DALEPENA, 0); + clear32(dev->regs + DWC3_DCTL, DWC3_DCTL_RUN_STOP); + + /* wait until the controller is shut down */ + if (poll32(dev->regs + DWC3_DSTS, DWC3_DSTS_DEVCTRLHLT, DWC3_DSTS_DEVCTRLHLT, 1000)) + usb_debug_printf("timeout while waiting for DWC3_DSTS_DEVCTRLHLT during shutdown.\n"); + + /* reset the device side of the controller just to be safe */ + set32(dev->regs + DWC3_DCTL, DWC3_DCTL_CSFTRST); + if (poll32(dev->regs + DWC3_DCTL, DWC3_DCTL_CSFTRST, 0, 1000)) + usb_debug_printf("timeout while waiting for DWC3_DCTL_CSFTRST to clear during shutdown.\n"); + + /* unmap and free dma buffers */ + dart_unmap(dev->dart, TRB_BUFFER_IOVA, TRB_BUFFER_SIZE); + dart_unmap(dev->dart, XFER_BUFFER_IOVA, XFER_BUFFER_SIZE); + dart_unmap(dev->dart, SCRATCHPAD_IOVA, max(DWC3_SCRATCHPAD_SIZE, SZ_16K)); + dart_unmap(dev->dart, EVENT_BUFFER_IOVA, max(DWC3_EVENT_BUFFERS_SIZE, SZ_16K)); + + free(dev->evtbuffer); + free(dev->scratchpad); + free(dev->xferbuffer); + free(dev->trbs); + for (int i = 0; i < CDC_ACM_PIPE_MAX; i++) { + ringbuffer_free(dev->pipe[i].device2host); + ringbuffer_free(dev->pipe[i].host2device); + } + + if (dev->dart) + dart_shutdown(dev->dart); + free(dev); +} + +u8 usb_dwc3_getbyte(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe) +{ + ringbuffer_t *host2device = dev->pipe[pipe].host2device; + if (!host2device) + return 0; + + u8 ep = dev->pipe[pipe].ep_out; + + u8 c; + while (ringbuffer_read(&c, 1, host2device) < 1) { + usb_dwc3_handle_events(dev); + usb_dwc3_cdc_start_bulk_out_xfer(dev, ep); + } + return c; +} + +void usb_dwc3_putbyte(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe, u8 byte) +{ + ringbuffer_t *device2host = dev->pipe[pipe].device2host; + if (!device2host) + return; + + u8 ep = dev->pipe[pipe].ep_in; + + while (ringbuffer_write(&byte, 1, device2host) < 1) { + usb_dwc3_handle_events(dev); + usb_dwc3_cdc_start_bulk_in_xfer(dev, ep); + } +} + +size_t usb_dwc3_queue(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe, const void *buf, size_t count) +{ + const u8 *p = buf; + size_t wrote, sent = 0; + + if (!dev || !dev->pipe[pipe].ready) + return 0; + + ringbuffer_t *device2host = dev->pipe[pipe].device2host; + if (!device2host) + return 0; + + u8 ep = dev->pipe[pipe].ep_in; + + while (count) { + wrote = ringbuffer_write(p, count, device2host); + count -= wrote; + p += wrote; + sent += wrote; + if (count) { + usb_dwc3_handle_events(dev); + usb_dwc3_cdc_start_bulk_in_xfer(dev, ep); + } + } + + return sent; +} + +size_t usb_dwc3_write(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe, const void *buf, size_t count) +{ + if (!dev) + return -1; + + u8 ep = dev->pipe[pipe].ep_in; + size_t ret = usb_dwc3_queue(dev, pipe, buf, count); + + usb_dwc3_cdc_start_bulk_in_xfer(dev, ep); + + return ret; +} + +size_t usb_dwc3_read(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe, void *buf, size_t count) +{ + u8 *p = buf; + size_t read, recvd = 0; + + if (!dev || !dev->pipe[pipe].ready) + return 0; + + ringbuffer_t *host2device = dev->pipe[pipe].host2device; + if (!host2device) + return 0; + + u8 ep = dev->pipe[pipe].ep_out; + + while (count) { + read = ringbuffer_read(p, count, host2device); + count -= read; + p += read; + recvd += read; + usb_dwc3_handle_events(dev); + usb_dwc3_cdc_start_bulk_out_xfer(dev, ep); + } + + return recvd; +} + +ssize_t usb_dwc3_can_read(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe) +{ + if (!dev || !dev->pipe[pipe].ready) + return 0; + + ringbuffer_t *host2device = dev->pipe[pipe].host2device; + if (!host2device) + return 0; + + return ringbuffer_get_used(host2device); +} + +bool usb_dwc3_can_write(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe) +{ + (void)pipe; + if (!dev) + return false; + + return dev->pipe[pipe].ready; +} + +void usb_dwc3_flush(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe) +{ + if (!dev || !dev->pipe[pipe].ready) + return; + + ringbuffer_t *device2host = dev->pipe[pipe].device2host; + if (!device2host) + return; + + u8 ep = dev->pipe[pipe].ep_in; + + while (ringbuffer_get_used(device2host) != 0 || dev->endpoints[ep].xfer_in_progress) { + usb_dwc3_handle_events(dev); + } +} diff --git a/tools/src/usb_dwc3.h b/tools/src/usb_dwc3.h new file mode 100644 index 0000000..6b23c7c --- /dev/null +++ b/tools/src/usb_dwc3.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef USB_DWC3_H +#define USB_DWC3_H + +#include "dart.h" +#include "types.h" + +typedef struct dwc3_dev dwc3_dev_t; + +typedef enum _cdc_acm_pipe_id_t { + CDC_ACM_PIPE_0, + CDC_ACM_PIPE_1, + CDC_ACM_PIPE_MAX +} cdc_acm_pipe_id_t; + +dwc3_dev_t *usb_dwc3_init(uintptr_t regs, dart_dev_t *dart); +void usb_dwc3_shutdown(dwc3_dev_t *dev); + +void usb_dwc3_handle_events(dwc3_dev_t *dev); + +ssize_t usb_dwc3_can_read(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe); +bool usb_dwc3_can_write(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe); + +u8 usb_dwc3_getbyte(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe); +void usb_dwc3_putbyte(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe, u8 byte); + +size_t usb_dwc3_read(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe, void *buf, size_t count); +size_t usb_dwc3_write(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe, const void *buf, size_t count); +size_t usb_dwc3_queue(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe, const void *buf, size_t count); +void usb_dwc3_flush(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe); + +#endif diff --git a/tools/src/usb_dwc3_regs.h b/tools/src/usb_dwc3_regs.h new file mode 100644 index 0000000..9c3d9ca --- /dev/null +++ b/tools/src/usb_dwc3_regs.h @@ -0,0 +1,625 @@ +/** + * core.h - DesignWare USB3 DRD Core Header + * linux commit 7bc5a6ba369217e0137833f5955cf0b0f08b0712 before + * the switch to GPLv2 only + * + * Copyright (C) 2010-2011 Texas Instruments Incorporated - http://www.ti.com + * + * Authors: Felipe Balbi <balbi@ti.com>, + * Sebastian Andrzej Siewior <bigeasy@linutronix.de> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the above-listed copyright holders may not be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2, as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __DRIVERS_USB_DWC3_CORE_H +#define __DRIVERS_USB_DWC3_CORE_H + +#include "types.h" + +/* Global constants */ +#define DWC3_EP0_BOUNCE_SIZE 512 +#define DWC3_ENDPOINTS_NUM 32 +#define DWC3_XHCI_RESOURCES_NUM 2 + +#define DWC3_EVENT_SIZE 4 /* bytes */ +#define DWC3_EVENT_MAX_NUM 64 /* 2 events/endpoint */ +#define DWC3_EVENT_BUFFERS_SIZE (DWC3_EVENT_SIZE * DWC3_EVENT_MAX_NUM) +#define DWC3_EVENT_TYPE_MASK 0xfe + +#define DWC3_EVENT_TYPE_DEV 0 +#define DWC3_EVENT_TYPE_CARKIT 3 +#define DWC3_EVENT_TYPE_I2C 4 + +#define DWC3_DEVICE_EVENT_DISCONNECT 0 +#define DWC3_DEVICE_EVENT_RESET 1 +#define DWC3_DEVICE_EVENT_CONNECT_DONE 2 +#define DWC3_DEVICE_EVENT_LINK_STATUS_CHANGE 3 +#define DWC3_DEVICE_EVENT_WAKEUP 4 +#define DWC3_DEVICE_EVENT_HIBER_REQ 5 +#define DWC3_DEVICE_EVENT_EOPF 6 +#define DWC3_DEVICE_EVENT_SOF 7 +#define DWC3_DEVICE_EVENT_ERRATIC_ERROR 9 +#define DWC3_DEVICE_EVENT_CMD_CMPL 10 +#define DWC3_DEVICE_EVENT_OVERFLOW 11 + +#define DWC3_GEVNTCOUNT_MASK 0xfffc +#define DWC3_GSNPSID_MASK 0xffff0000 +#define DWC3_GSNPSREV_MASK 0xffff + +/* DWC3 registers memory space boundries */ +#define DWC3_XHCI_REGS_START 0x0 +#define DWC3_XHCI_REGS_END 0x7fff +#define DWC3_GLOBALS_REGS_START 0xc100 +#define DWC3_GLOBALS_REGS_END 0xc6ff +#define DWC3_DEVICE_REGS_START 0xc700 +#define DWC3_DEVICE_REGS_END 0xcbff +#define DWC3_OTG_REGS_START 0xcc00 +#define DWC3_OTG_REGS_END 0xccff + +/* Global Registers */ +#define DWC3_GSBUSCFG0 0xc100 +#define DWC3_GSBUSCFG1 0xc104 +#define DWC3_GTXTHRCFG 0xc108 +#define DWC3_GRXTHRCFG 0xc10c +#define DWC3_GCTL 0xc110 +#define DWC3_GEVTEN 0xc114 +#define DWC3_GSTS 0xc118 +#define DWC3_GSNPSID 0xc120 +#define DWC3_GGPIO 0xc124 +#define DWC3_GUID 0xc128 +#define DWC3_GUCTL 0xc12c +#define DWC3_GBUSERRADDR0 0xc130 +#define DWC3_GBUSERRADDR1 0xc134 +#define DWC3_GPRTBIMAP0 0xc138 +#define DWC3_GPRTBIMAP1 0xc13c +#define DWC3_GHWPARAMS0 0xc140 +#define DWC3_GHWPARAMS1 0xc144 +#define DWC3_GHWPARAMS2 0xc148 +#define DWC3_GHWPARAMS3 0xc14c +#define DWC3_GHWPARAMS4 0xc150 +#define DWC3_GHWPARAMS5 0xc154 +#define DWC3_GHWPARAMS6 0xc158 +#define DWC3_GHWPARAMS7 0xc15c +#define DWC3_GDBGFIFOSPACE 0xc160 +#define DWC3_GDBGLTSSM 0xc164 +#define DWC3_GPRTBIMAP_HS0 0xc180 +#define DWC3_GPRTBIMAP_HS1 0xc184 +#define DWC3_GPRTBIMAP_FS0 0xc188 +#define DWC3_GPRTBIMAP_FS1 0xc18c + +#define DWC3_GUSB2PHYCFG(n) (0xc200 + (n * 0x04)) +#define DWC3_GUSB2I2CCTL(n) (0xc240 + (n * 0x04)) + +#define DWC3_GUSB2PHYACC(n) (0xc280 + (n * 0x04)) + +#define DWC3_GUSB3PIPECTL(n) (0xc2c0 + (n * 0x04)) + +#define DWC3_GTXFIFOSIZ(n) (0xc300 + (n * 0x04)) +#define DWC3_GRXFIFOSIZ(n) (0xc380 + (n * 0x04)) + +#define DWC3_GEVNTADRLO(n) (0xc400 + (n * 0x10)) +#define DWC3_GEVNTADRHI(n) (0xc404 + (n * 0x10)) +#define DWC3_GEVNTSIZ(n) (0xc408 + (n * 0x10)) +#define DWC3_GEVNTCOUNT(n) (0xc40c + (n * 0x10)) + +#define DWC3_GHWPARAMS8 0xc600 + +/* Device Registers */ +#define DWC3_DCFG 0xc700 +#define DWC3_DCTL 0xc704 +#define DWC3_DEVTEN 0xc708 +#define DWC3_DSTS 0xc70c +#define DWC3_DGCMDPAR 0xc710 +#define DWC3_DGCMD 0xc714 +#define DWC3_DALEPENA 0xc720 +#define DWC3_DEPCMDPAR2(n) (0xc800 + (n * 0x10)) +#define DWC3_DEPCMDPAR1(n) (0xc804 + (n * 0x10)) +#define DWC3_DEPCMDPAR0(n) (0xc808 + (n * 0x10)) +#define DWC3_DEPCMD(n) (0xc80c + (n * 0x10)) + +/* OTG Registers */ +#define DWC3_OCFG 0xcc00 +#define DWC3_OCTL 0xcc04 +#define DWC3_OEVT 0xcc08 +#define DWC3_OEVTEN 0xcc0C +#define DWC3_OSTS 0xcc10 + +/* Bit fields */ + +/* Global Configuration Register */ +#define DWC3_GCTL_PWRDNSCALE(n) ((n) << 19) +#define DWC3_GCTL_U2RSTECN (1 << 16) +#define DWC3_GCTL_RAMCLKSEL(x) (((x)&DWC3_GCTL_CLK_MASK) << 6) +#define DWC3_GCTL_CLK_BUS (0) +#define DWC3_GCTL_CLK_PIPE (1) +#define DWC3_GCTL_CLK_PIPEHALF (2) +#define DWC3_GCTL_CLK_MASK (3) + +#define DWC3_GCTL_PRTCAP(n) (((n) & (3 << 12)) >> 12) +#define DWC3_GCTL_PRTCAPDIR(n) ((n) << 12) +#define DWC3_GCTL_PRTCAP_HOST 1 +#define DWC3_GCTL_PRTCAP_DEVICE 2 +#define DWC3_GCTL_PRTCAP_OTG 3 + +#define DWC3_GCTL_CORESOFTRESET (1 << 11) +#define DWC3_GCTL_SCALEDOWN(n) ((n) << 4) +#define DWC3_GCTL_SCALEDOWN_MASK DWC3_GCTL_SCALEDOWN(3) +#define DWC3_GCTL_DISSCRAMBLE (1 << 3) +#define DWC3_GCTL_GBLHIBERNATIONEN (1 << 1) +#define DWC3_GCTL_DSBLCLKGTNG (1 << 0) + +/* Global USB2 PHY Configuration Register */ +#define DWC3_GUSB2PHYCFG_PHYSOFTRST (1 << 31) +#define DWC3_GUSB2PHYCFG_SUSPHY (1 << 6) + +/* Global USB3 PIPE Control Register */ +#define DWC3_GUSB3PIPECTL_PHYSOFTRST (1 << 31) +#define DWC3_GUSB3PIPECTL_SUSPHY (1 << 17) + +/* Global TX Fifo Size Register */ +#define DWC3_GTXFIFOSIZ_TXFDEF(n) ((n)&0xffff) +#define DWC3_GTXFIFOSIZ_TXFSTADDR(n) ((n)&0xffff0000) + +/* Global HWPARAMS1 Register */ +#define DWC3_GHWPARAMS1_EN_PWROPT(n) (((n) & (3 << 24)) >> 24) +#define DWC3_GHWPARAMS1_EN_PWROPT_NO 0 +#define DWC3_GHWPARAMS1_EN_PWROPT_CLK 1 +#define DWC3_GHWPARAMS1_EN_PWROPT_HIB 2 +#define DWC3_GHWPARAMS1_PWROPT(n) ((n) << 24) +#define DWC3_GHWPARAMS1_PWROPT_MASK DWC3_GHWPARAMS1_PWROPT(3) + +/* Global HWPARAMS4 Register */ +#define DWC3_GHWPARAMS4_HIBER_SCRATCHBUFS(n) (((n) & (0x0f << 13)) >> 13) +#define DWC3_MAX_HIBER_SCRATCHBUFS 15 + +/* Device Configuration Register */ +#define DWC3_DCFG_LPM_CAP (1 << 22) +#define DWC3_DCFG_DEVADDR(addr) ((addr) << 3) +#define DWC3_DCFG_DEVADDR_MASK DWC3_DCFG_DEVADDR(0x7f) + +#define DWC3_DCFG_SPEED_MASK (7 << 0) +#define DWC3_DCFG_SUPERSPEED (4 << 0) +#define DWC3_DCFG_HIGHSPEED (0 << 0) +#define DWC3_DCFG_FULLSPEED2 (1 << 0) +#define DWC3_DCFG_LOWSPEED (2 << 0) +#define DWC3_DCFG_FULLSPEED1 (3 << 0) + +#define DWC3_DCFG_LPM_CAP (1 << 22) + +/* Device Control Register */ +#define DWC3_DCTL_RUN_STOP (1 << 31) +#define DWC3_DCTL_CSFTRST (1 << 30) +#define DWC3_DCTL_LSFTRST (1 << 29) + +#define DWC3_DCTL_HIRD_THRES_MASK (0x1f << 24) +#define DWC3_DCTL_HIRD_THRES(n) ((n) << 24) + +#define DWC3_DCTL_APPL1RES (1 << 23) + +/* These apply for core versions 1.87a and earlier */ +#define DWC3_DCTL_TRGTULST_MASK (0x0f << 17) +#define DWC3_DCTL_TRGTULST(n) ((n) << 17) +#define DWC3_DCTL_TRGTULST_U2 (DWC3_DCTL_TRGTULST(2)) +#define DWC3_DCTL_TRGTULST_U3 (DWC3_DCTL_TRGTULST(3)) +#define DWC3_DCTL_TRGTULST_SS_DIS (DWC3_DCTL_TRGTULST(4)) +#define DWC3_DCTL_TRGTULST_RX_DET (DWC3_DCTL_TRGTULST(5)) +#define DWC3_DCTL_TRGTULST_SS_INACT (DWC3_DCTL_TRGTULST(6)) + +/* These apply for core versions 1.94a and later */ +#define DWC3_DCTL_KEEP_CONNECT (1 << 19) +#define DWC3_DCTL_L1_HIBER_EN (1 << 18) +#define DWC3_DCTL_CRS (1 << 17) +#define DWC3_DCTL_CSS (1 << 16) + +#define DWC3_DCTL_INITU2ENA (1 << 12) +#define DWC3_DCTL_ACCEPTU2ENA (1 << 11) +#define DWC3_DCTL_INITU1ENA (1 << 10) +#define DWC3_DCTL_ACCEPTU1ENA (1 << 9) +#define DWC3_DCTL_TSTCTRL_MASK (0xf << 1) + +#define DWC3_DCTL_ULSTCHNGREQ_MASK (0x0f << 5) +#define DWC3_DCTL_ULSTCHNGREQ(n) (((n) << 5) & DWC3_DCTL_ULSTCHNGREQ_MASK) + +#define DWC3_DCTL_ULSTCHNG_NO_ACTION (DWC3_DCTL_ULSTCHNGREQ(0)) +#define DWC3_DCTL_ULSTCHNG_SS_DISABLED (DWC3_DCTL_ULSTCHNGREQ(4)) +#define DWC3_DCTL_ULSTCHNG_RX_DETECT (DWC3_DCTL_ULSTCHNGREQ(5)) +#define DWC3_DCTL_ULSTCHNG_SS_INACTIVE (DWC3_DCTL_ULSTCHNGREQ(6)) +#define DWC3_DCTL_ULSTCHNG_RECOVERY (DWC3_DCTL_ULSTCHNGREQ(8)) +#define DWC3_DCTL_ULSTCHNG_COMPLIANCE (DWC3_DCTL_ULSTCHNGREQ(10)) +#define DWC3_DCTL_ULSTCHNG_LOOPBACK (DWC3_DCTL_ULSTCHNGREQ(11)) + +/* Device Event Enable Register */ +#define DWC3_DEVTEN_VNDRDEVTSTRCVEDEN (1 << 12) +#define DWC3_DEVTEN_EVNTOVERFLOWEN (1 << 11) +#define DWC3_DEVTEN_CMDCMPLTEN (1 << 10) +#define DWC3_DEVTEN_ERRTICERREN (1 << 9) +#define DWC3_DEVTEN_SOFEN (1 << 7) +#define DWC3_DEVTEN_EOPFEN (1 << 6) +#define DWC3_DEVTEN_HIBERNATIONREQEVTEN (1 << 5) +#define DWC3_DEVTEN_WKUPEVTEN (1 << 4) +#define DWC3_DEVTEN_ULSTCNGEN (1 << 3) +#define DWC3_DEVTEN_CONNECTDONEEN (1 << 2) +#define DWC3_DEVTEN_USBRSTEN (1 << 1) +#define DWC3_DEVTEN_DISCONNEVTEN (1 << 0) + +/* Device Status Register */ +#define DWC3_DSTS_DCNRD (1 << 29) + +/* This applies for core versions 1.87a and earlier */ +#define DWC3_DSTS_PWRUPREQ (1 << 24) + +/* These apply for core versions 1.94a and later */ +#define DWC3_DSTS_RSS (1 << 25) +#define DWC3_DSTS_SSS (1 << 24) + +#define DWC3_DSTS_COREIDLE (1 << 23) +#define DWC3_DSTS_DEVCTRLHLT (1 << 22) + +#define DWC3_DSTS_USBLNKST_MASK (0x0f << 18) +#define DWC3_DSTS_USBLNKST(n) (((n)&DWC3_DSTS_USBLNKST_MASK) >> 18) + +#define DWC3_DSTS_RXFIFOEMPTY (1 << 17) + +#define DWC3_DSTS_SOFFN_MASK (0x3fff << 3) +#define DWC3_DSTS_SOFFN(n) (((n)&DWC3_DSTS_SOFFN_MASK) >> 3) + +#define DWC3_DSTS_CONNECTSPD (7 << 0) + +#define DWC3_DSTS_SUPERSPEED (4 << 0) +#define DWC3_DSTS_HIGHSPEED (0 << 0) +#define DWC3_DSTS_FULLSPEED2 (1 << 0) +#define DWC3_DSTS_LOWSPEED (2 << 0) +#define DWC3_DSTS_FULLSPEED1 (3 << 0) + +/* Device Generic Command Register */ +#define DWC3_DGCMD_SET_LMP 0x01 +#define DWC3_DGCMD_SET_PERIODIC_PAR 0x02 +#define DWC3_DGCMD_XMIT_FUNCTION 0x03 + +/* These apply for core versions 1.94a and later */ +#define DWC3_DGCMD_SET_SCRATCHPAD_ADDR_LO 0x04 +#define DWC3_DGCMD_SET_SCRATCHPAD_ADDR_HI 0x05 + +#define DWC3_DGCMD_SELECTED_FIFO_FLUSH 0x09 +#define DWC3_DGCMD_ALL_FIFO_FLUSH 0x0a +#define DWC3_DGCMD_SET_ENDPOINT_NRDY 0x0c +#define DWC3_DGCMD_RUN_SOC_BUS_LOOPBACK 0x10 + +#define DWC3_DGCMD_STATUS(n) (((n) >> 15) & 1) +#define DWC3_DGCMD_CMDACT (1 << 10) +#define DWC3_DGCMD_CMDIOC (1 << 8) + +/* Device Generic Command Parameter Register */ +#define DWC3_DGCMDPAR_FORCE_LINKPM_ACCEPT (1 << 0) +#define DWC3_DGCMDPAR_FIFO_NUM(n) ((n) << 0) +#define DWC3_DGCMDPAR_RX_FIFO (0 << 5) +#define DWC3_DGCMDPAR_TX_FIFO (1 << 5) +#define DWC3_DGCMDPAR_LOOPBACK_DIS (0 << 0) +#define DWC3_DGCMDPAR_LOOPBACK_ENA (1 << 0) + +/* Device Endpoint Command Register */ +#define DWC3_DEPCMD_PARAM_SHIFT 16 +#define DWC3_DEPCMD_PARAM(x) ((x) << DWC3_DEPCMD_PARAM_SHIFT) +#define DWC3_DEPCMD_GET_RSC_IDX(x) (((x) >> DWC3_DEPCMD_PARAM_SHIFT) & 0x7f) +#define DWC3_DEPCMD_STATUS(x) (((x) >> 15) & 1) +#define DWC3_DEPCMD_HIPRI_FORCERM (1 << 11) +#define DWC3_DEPCMD_CMDACT (1 << 10) +#define DWC3_DEPCMD_CMDIOC (1 << 8) + +#define DWC3_DEPCMD_DEPSTARTCFG (0x09 << 0) +#define DWC3_DEPCMD_ENDTRANSFER (0x08 << 0) +#define DWC3_DEPCMD_UPDATETRANSFER (0x07 << 0) +#define DWC3_DEPCMD_STARTTRANSFER (0x06 << 0) +#define DWC3_DEPCMD_CLEARSTALL (0x05 << 0) +#define DWC3_DEPCMD_SETSTALL (0x04 << 0) +/* This applies for core versions 1.90a and earlier */ +#define DWC3_DEPCMD_GETSEQNUMBER (0x03 << 0) +/* This applies for core versions 1.94a and later */ +#define DWC3_DEPCMD_GETEPSTATE (0x03 << 0) +#define DWC3_DEPCMD_SETTRANSFRESOURCE (0x02 << 0) +#define DWC3_DEPCMD_SETEPCONFIG (0x01 << 0) + +/* The EP number goes 0..31 so ep0 is always out and ep1 is always in */ +#define DWC3_DALEPENA_EP(n) (1 << n) + +#define DWC3_DEPCMD_TYPE_CONTROL 0 +#define DWC3_DEPCMD_TYPE_ISOC 1 +#define DWC3_DEPCMD_TYPE_BULK 2 +#define DWC3_DEPCMD_TYPE_INTR 3 + +#define DWC3_EVENT_PENDING BIT(0) + +#define DWC3_EP_FLAG_STALLED (1 << 0) +#define DWC3_EP_FLAG_WEDGED (1 << 1) + +#define DWC3_EP_DIRECTION_TX true +#define DWC3_EP_DIRECTION_RX false + +#define DWC3_TRB_NUM 32 +#define DWC3_TRB_MASK (DWC3_TRB_NUM - 1) + +#define DWC3_EP_ENABLED (1 << 0) +#define DWC3_EP_STALL (1 << 1) +#define DWC3_EP_WEDGE (1 << 2) +#define DWC3_EP_BUSY (1 << 4) +#define DWC3_EP_PENDING_REQUEST (1 << 5) +#define DWC3_EP_MISSED_ISOC (1 << 6) + +/* This last one is specific to EP0 */ +#define DWC3_EP0_DIR_IN (1 << 31) + +enum dwc3_link_state { + /* In SuperSpeed */ + DWC3_LINK_STATE_U0 = 0x00, /* in HS, means ON */ + DWC3_LINK_STATE_U1 = 0x01, + DWC3_LINK_STATE_U2 = 0x02, /* in HS, means SLEEP */ + DWC3_LINK_STATE_U3 = 0x03, /* in HS, means SUSPEND */ + DWC3_LINK_STATE_SS_DIS = 0x04, + DWC3_LINK_STATE_RX_DET = 0x05, /* in HS, means Early Suspend */ + DWC3_LINK_STATE_SS_INACT = 0x06, + DWC3_LINK_STATE_POLL = 0x07, + DWC3_LINK_STATE_RECOV = 0x08, + DWC3_LINK_STATE_HRESET = 0x09, + DWC3_LINK_STATE_CMPLY = 0x0a, + DWC3_LINK_STATE_LPBK = 0x0b, + DWC3_LINK_STATE_RESET = 0x0e, + DWC3_LINK_STATE_RESUME = 0x0f, + DWC3_LINK_STATE_MASK = 0x0f, +}; + +/* TRB Length, PCM and Status */ +#define DWC3_TRB_SIZE_MASK (0x00ffffff) +#define DWC3_TRB_SIZE_LENGTH(n) ((n)&DWC3_TRB_SIZE_MASK) +#define DWC3_TRB_SIZE_PCM1(n) (((n)&0x03) << 24) +#define DWC3_TRB_SIZE_TRBSTS(n) (((n) & (0x0f << 28)) >> 28) + +#define DWC3_TRBSTS_OK 0 +#define DWC3_TRBSTS_MISSED_ISOC 1 +#define DWC3_TRBSTS_SETUP_PENDING 2 +#define DWC3_TRB_STS_XFER_IN_PROG 4 + +/* TRB Control */ +#define DWC3_TRB_CTRL_HWO (1 << 0) +#define DWC3_TRB_CTRL_LST (1 << 1) +#define DWC3_TRB_CTRL_CHN (1 << 2) +#define DWC3_TRB_CTRL_CSP (1 << 3) +#define DWC3_TRB_CTRL_TRBCTL(n) (((n)&0x3f) << 4) +#define DWC3_TRB_CTRL_ISP_IMI (1 << 10) +#define DWC3_TRB_CTRL_IOC (1 << 11) +#define DWC3_TRB_CTRL_SID_SOFN(n) (((n)&0xffff) << 14) + +#define DWC3_TRBCTL_NORMAL DWC3_TRB_CTRL_TRBCTL(1) +#define DWC3_TRBCTL_CONTROL_SETUP DWC3_TRB_CTRL_TRBCTL(2) +#define DWC3_TRBCTL_CONTROL_STATUS2 DWC3_TRB_CTRL_TRBCTL(3) +#define DWC3_TRBCTL_CONTROL_STATUS3 DWC3_TRB_CTRL_TRBCTL(4) +#define DWC3_TRBCTL_CONTROL_DATA DWC3_TRB_CTRL_TRBCTL(5) +#define DWC3_TRBCTL_ISOCHRONOUS_FIRST DWC3_TRB_CTRL_TRBCTL(6) +#define DWC3_TRBCTL_ISOCHRONOUS DWC3_TRB_CTRL_TRBCTL(7) +#define DWC3_TRBCTL_LINK_TRB DWC3_TRB_CTRL_TRBCTL(8) + +/** + * struct dwc3_trb - transfer request block (hw format) + * @bpl: DW0-3 + * @bph: DW4-7 + * @size: DW8-B + * @trl: DWC-F + */ +struct dwc3_trb { + u32 bpl; + u32 bph; + u32 size; + u32 ctrl; +} PACKED; + +/* HWPARAMS0 */ +#define DWC3_MODE(n) ((n)&0x7) + +#define DWC3_MODE_DEVICE 0 +#define DWC3_MODE_HOST 1 +#define DWC3_MODE_DRD 2 +#define DWC3_MODE_HUB 3 + +#define DWC3_MDWIDTH(n) (((n)&0xff00) >> 8) + +/* HWPARAMS1 */ +#define DWC3_NUM_INT(n) (((n) & (0x3f << 15)) >> 15) + +/* HWPARAMS3 */ +#define DWC3_NUM_IN_EPS_MASK (0x1f << 18) +#define DWC3_NUM_EPS_MASK (0x3f << 12) +#define DWC3_NUM_EPS(p) (((p)->hwparams3 & (DWC3_NUM_EPS_MASK)) >> 12) +#define DWC3_NUM_IN_EPS(p) (((p)->hwparams3 & (DWC3_NUM_IN_EPS_MASK)) >> 18) + +/* HWPARAMS7 */ +#define DWC3_RAM1_DEPTH(n) ((n)&0xffff) + +#define DWC3_REVISION_173A 0x5533173a +#define DWC3_REVISION_175A 0x5533175a +#define DWC3_REVISION_180A 0x5533180a +#define DWC3_REVISION_183A 0x5533183a +#define DWC3_REVISION_185A 0x5533185a +#define DWC3_REVISION_187A 0x5533187a +#define DWC3_REVISION_188A 0x5533188a +#define DWC3_REVISION_190A 0x5533190a +#define DWC3_REVISION_194A 0x5533194a +#define DWC3_REVISION_200A 0x5533200a +#define DWC3_REVISION_202A 0x5533202a +#define DWC3_REVISION_210A 0x5533210a +#define DWC3_REVISION_220A 0x5533220a +#define DWC3_REVISION_230A 0x5533230a +#define DWC3_REVISION_240A 0x5533240a +#define DWC3_REVISION_250A 0x5533250a + +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ + +struct dwc3_event_type { + u32 is_devspec : 1; + u32 type : 7; + u32 reserved8_31 : 24; +} PACKED; + +#define DWC3_DEPEVT_XFERCOMPLETE 0x01 +#define DWC3_DEPEVT_XFERINPROGRESS 0x02 +#define DWC3_DEPEVT_XFERNOTREADY 0x03 +#define DWC3_DEPEVT_RXTXFIFOEVT 0x04 +#define DWC3_DEPEVT_STREAMEVT 0x06 +#define DWC3_DEPEVT_EPCMDCMPLT 0x07 + +/** + * struct dwc3_event_depvt - Device Endpoint Events + * @one_bit: indicates this is an endpoint event (not used) + * @endpoint_number: number of the endpoint + * @endpoint_event: The event we have: + * 0x00 - Reserved + * 0x01 - XferComplete + * 0x02 - XferInProgress + * 0x03 - XferNotReady + * 0x04 - RxTxFifoEvt (IN->Underrun, OUT->Overrun) + * 0x05 - Reserved + * 0x06 - StreamEvt + * 0x07 - EPCmdCmplt + * @reserved11_10: Reserved, don't use. + * @status: Indicates the status of the event. Refer to databook for + * more information. + * @parameters: Parameters of the current event. Refer to databook for + * more information. + */ +struct dwc3_event_depevt { + u32 one_bit : 1; + u32 endpoint_number : 5; + u32 endpoint_event : 4; + u32 reserved11_10 : 2; + u32 status : 4; + +/* Within XferNotReady */ +#define DEPEVT_STATUS_TRANSFER_ACTIVE (1 << 3) + +/* Within XferComplete */ +#define DEPEVT_STATUS_BUSERR (1 << 0) +#define DEPEVT_STATUS_SHORT (1 << 1) +#define DEPEVT_STATUS_IOC (1 << 2) +#define DEPEVT_STATUS_LST (1 << 3) + +/* Stream event only */ +#define DEPEVT_STREAMEVT_FOUND 1 +#define DEPEVT_STREAMEVT_NOTFOUND 2 + +/* Control-only Status */ +#define DEPEVT_STATUS_CONTROL_DATA 1 +#define DEPEVT_STATUS_CONTROL_STATUS 2 + + u32 parameters : 16; +} PACKED; + +#define DWC3_DEVT_DISCONN 0x00 +#define DWC3_DEVT_USBRST 0x01 +#define DWC3_DEVT_CONNECTDONE 0x02 +#define DWC3_DEVT_ULSTCHNG 0x03 +#define DWC3_DEVT_WKUPEVT 0x04 +#define DWC3_DEVT_EOPF 0x06 +#define DWC3_DEVT_SOF 0x07 +#define DWC3_DEVT_ERRTICERR 0x09 +#define DWC3_DEVT_CMDCMPLT 0x0a +#define DWC3_DEVT_EVNTOVERFLOW 0x0b +#define DWC3_DEVT_VNDRDEVTSTRCVED 0x0c + +/** + * struct dwc3_event_devt - Device Events + * @one_bit: indicates this is a non-endpoint event (not used) + * @device_event: indicates it's a device event. Should read as 0x00 + * @type: indicates the type of device event. + * 0 - DisconnEvt + * 1 - USBRst + * 2 - ConnectDone + * 3 - ULStChng + * 4 - WkUpEvt + * 5 - Reserved + * 6 - EOPF + * 7 - SOF + * 8 - Reserved + * 9 - ErrticErr + * 10 - CmdCmplt + * 11 - EvntOverflow + * 12 - VndrDevTstRcved + * @reserved15_12: Reserved, not used + * @event_info: Information about this event + * @reserved31_24: Reserved, not used + */ +struct dwc3_event_devt { + u32 one_bit : 1; + u32 device_event : 7; + u32 type : 4; + u32 reserved15_12 : 4; + u32 event_info : 8; + u32 reserved31_24 : 8; +} PACKED; + +/** + * struct dwc3_event_gevt - Other Core Events + * @one_bit: indicates this is a non-endpoint event (not used) + * @device_event: indicates it's (0x03) Carkit or (0x04) I2C event. + * @phy_port_number: self-explanatory + * @reserved31_12: Reserved, not used. + */ +struct dwc3_event_gevt { + u32 one_bit : 1; + u32 device_event : 7; + u32 phy_port_number : 4; + u32 reserved31_12 : 20; +} PACKED; + +union dwc3_event { + u32 raw; + struct dwc3_event_type type; + struct dwc3_event_depevt depevt; + struct dwc3_event_devt devt; + struct dwc3_event_gevt gevt; +}; + +#define DWC3_DEPCFG_EP_TYPE(n) (((n)&0x3) << 1) +#define DWC3_DEPCFG_EP_NUMBER(n) (((n)&0x1f) << 25) +#define DWC3_DEPCFG_FIFO_NUMBER(n) (((n)&0xf) << 17) +#define DWC3_DEPCFG_MAX_PACKET_SIZE(n) (((n)&0x7ff) << 3) + +#define DWC3_DEPCFG_INT_NUM(n) (((n)&0x1f) << 0) +#define DWC3_DEPCFG_XFER_COMPLETE_EN BIT(8) +#define DWC3_DEPCFG_XFER_IN_PROGRESS_EN BIT(9) +#define DWC3_DEPCFG_XFER_NOT_READY_EN BIT(10) +#define DWC3_DEPCFG_FIFO_ERROR_EN BIT(11) +#define DWC3_DEPCFG_STREAM_EVENT_EN BIT(13) +#define DWC3_DEPCFG_BINTERVAL_M1(n) (((n)&0xff) << 16) +#define DWC3_DEPCFG_STREAM_CAPABLE BIT(24) +#define DWC3_DEPCFG_EP_NUMBER(n) (((n)&0x1f) << 25) +#define DWC3_DEPCFG_BULK_BASED BIT(30) +#define DWC3_DEPCFG_FIFO_BASED BIT(31) + +#endif /* __DRIVERS_USB_DWC3_CORE_H */ diff --git a/tools/src/usb_types.h b/tools/src/usb_types.h new file mode 100644 index 0000000..2571a1a --- /dev/null +++ b/tools/src/usb_types.h @@ -0,0 +1,209 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef USB_TYPES_H +#define USB_TYPES_H + +#include "types.h" + +#define USB_REQUEST_TYPE_DIRECTION_SHIFT 7 +#define USB_REQUEST_TYPE_DIRECTION(d) ((d) << USB_REQUEST_TYPE_DIRECTION_SHIFT) +#define USB_REQUEST_TYPE_DIRECTION_HOST2DEVICE 0 +#define USB_REQUEST_TYPE_DIRECTION_DEVICE2HOST 1 + +#define USB_REQUEST_TYPE_SHIFT 5 +#define USB_REQUEST_TYPE(t) ((t) << USB_REQUEST_TYPE_SHIFT) +#define USB_REQUEST_TYPE_STANDARD USB_REQUEST_TYPE(0b00) +#define USB_REQUEST_TYPE_CLASS USB_REQUEST_TYPE(0b01) +#define USB_REQUEST_TYPE_VENDOR USB_REQUEST_TYPE(0b10) +#define USB_REQUEST_TYPE_MASK USB_REQUEST_TYPE(0b11) + +#define USB_REQUEST_TYPE_RECIPIENT_DEVICE 0 +#define USB_REQUEST_TYPE_RECIPIENT_INTERFACE 1 +#define USB_REQUEST_TYPE_RECIPIENT_ENDPOINT 2 +#define USB_REQUEST_TYPE_RECIPIENT_OTHER 3 +#define USB_REQUEST_TYPE_RECIPIENT_MASK 0b11 + +#define USB_REQUEST_GET_STATUS 0x00 +#define USB_REQUEST_CLEAR_FEATURE 0x01 +#define USB_REQUEST_SET_FEATURE 0x03 +#define USB_REQUEST_SET_ADDRESS 0x05 +#define USB_REQUEST_GET_DESCRIPTOR 0x06 +#define USB_REQUEST_SET_DESCRIPTOR 0x07 +#define USB_REQUEST_GET_CONFIGURATION 0x08 +#define USB_REQUEST_SET_CONFIGURATION 0x09 + +#define USB_EP_REQUEST_CLEAR_FEATURE 0x01 +#define USB_EP_REQUEST_SET_FEATURE 0x03 + +#define USB_FEATURE_ENDPOINT_HALT 0x00 + +#define USB_REQUEST_CDC_SET_LINE_CODING 0x20 +#define USB_REQUEST_CDC_GET_LINE_CODING 0x21 +#define USB_REQUEST_CDC_SET_CTRL_LINE_STATE 0x22 + +struct usb_setup_packet_raw { + u8 bmRequestType; + u8 bRequest; + u16 wValue; + u16 wIndex; + u16 wLength; +} PACKED; + +struct usb_setup_packet_get_descriptor { + u8 bmRequestType; + u8 bRequest; + u8 index; + u8 type; + u16 language; + u16 wLength; +} PACKED; + +struct usb_set_packet_set_address { + u8 bmRequestType; + u8 bRequest; + u16 address; + u16 zero0; + u16 zero1; +} PACKED; + +struct usb_set_packet_set_configuration { + u8 bmRequestType; + u8 bRequest; + u16 configuration; + u16 zero0; + u16 zero1; +} PACKED; + +struct usb_setup_packet_feature { + u8 bmRequestType; + u8 bRequest; + u16 wFeatureSelector; + u16 wEndpoint; + u16 wLength; +} PACKED; + +union usb_setup_packet { + struct usb_setup_packet_raw raw; + struct usb_setup_packet_get_descriptor get_descriptor; + struct usb_set_packet_set_address set_address; + struct usb_set_packet_set_configuration set_configuration; + struct usb_setup_packet_feature feature; +}; + +#define USB_DEVICE_DESCRIPTOR 0x01 +#define USB_CONFIGURATION_DESCRIPTOR 0x02 +#define USB_STRING_DESCRIPTOR 0x03 +#define USB_INTERFACE_DESCRIPTOR 0x04 +#define USB_ENDPOINT_DESCRIPTOR 0x05 +#define USB_DEVICE_QUALIFIER_DESCRIPTOR 0x06 +#define USB_OTHER_SPEED_CONFIGURATION_DESCRIPTOR 0x07 + +#define USB_CDC_INTERFACE_FUNCTIONAL_DESCRIPTOR 0x24 +#define USB_CDC_UNION_SUBTYPE 0x06 + +#define USB_CONFIGURATION_SELF_POWERED 0x40 +#define USB_CONFIGURATION_ATTRIBUTE_RES1 0x80 + +#define USB_ENDPOINT_ADDR_IN(ep) (0x80 | (ep)) +#define USB_ENDPOINT_ADDR_OUT(ep) (0x00 | (ep)) + +#define USB_ENDPOINT_ATTR_TYPE_CONTROL 0b00 +#define USB_ENDPOINT_ATTR_TYPE_ISOCHRONOUS 0b01 +#define USB_ENDPOINT_ATTR_TYPE_BULK 0b10 +#define USB_ENDPOINT_ATTR_TYPE_INTERRUPT 0b11 + +#define USB_LANGID_EN_US 0x0409 + +struct usb_device_descriptor { + u8 bLength; + u8 bDescriptorType; + u16 bcdUSB; + u8 bDeviceClass; + u8 bDeviceSubClass; + u8 bDeviceProtocol; + u8 bMaxPacketSize0; + u16 idVendor; + u16 idProduct; + u16 bcdDevice; + u8 iManufacturer; + u8 iProduct; + u8 iSerialNumber; + u8 bNumConfigurations; +} PACKED; + +struct usb_configuration_descriptor { + u8 bLength; + u8 bDescriptorType; + u16 wTotalLength; + u8 bNumInterfaces; + u8 bConfigurationValue; + u8 iConfiguration; + u8 bmAttributes; + u8 bMaxPower; +} PACKED; + +struct usb_interface_descriptor { + u8 bLength; + u8 bDescriptorType; + u8 bInterfaceNumber; + u8 bAlternateSetting; + u8 bNumEndpoints; + u8 bInterfaceClass; + u8 bInterfaceSubClass; + u8 bInterfaceProtocol; + u8 iInterface; +} PACKED; + +struct usb_endpoint_descriptor { + u8 bLength; + u8 bDescriptorType; + u8 bEndpointAddress; + u8 bmAttributes; + u16 wMaxPacketSize; + u8 bInterval; +} PACKED; + +struct usb_string_descriptor { + u8 bLength; + u8 bDescriptorType; + u16 bString[]; +} PACKED; + +struct usb_string_descriptor_languages { + u8 bLength; + u8 bDescriptorType; + u16 wLANGID[]; +} PACKED; + +struct cdc_union_functional_descriptor { + u8 bFunctionLength; + u8 bDescriptorType; + u8 bDescriptorSubtype; + u8 bControlInterface; + u8 bDataInterface; +} PACKED; + +struct usb_device_qualifier_descriptor { + u8 bLength; + u8 bDescriptorType; + u16 bcdUSB; + u8 bDeviceClass; + u8 bDeviceSubClass; + u8 bDeviceProtocol; + u8 bMaxPacketSize0; + u8 bNumConfigurations; + u8 bReserved; +} PACKED; + +/* + * this macro is required because we need to convert any string literals + * to UTF16 and because we need to calculate the correct total size of the + * string descriptor. + */ +#define make_usb_string_descriptor(str) \ + { \ + .bLength = sizeof(struct usb_string_descriptor) + sizeof(u##str), \ + .bDescriptorType = USB_STRING_DESCRIPTOR, .bString = u##str \ + } + +#endif diff --git a/tools/src/utils.c b/tools/src/utils.c new file mode 100644 index 0000000..2343476 --- /dev/null +++ b/tools/src/utils.c @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: MIT */ + +#include <assert.h> +#include <stdarg.h> + +#include "utils.h" +#include "iodev.h" +#include "smp.h" +#include "types.h" +#include "vsprintf.h" +#include "xnuboot.h" + +static char ascii(char s) +{ + if (s < 0x20) + return '.'; + if (s > 0x7E) + return '.'; + return s; +} + +void hexdump(const void *d, size_t len) +{ + u8 *data; + size_t i, off; + data = (u8 *)d; + for (off = 0; off < len; off += 16) { + printf("%08lx ", off); + for (i = 0; i < 16; i++) { + if ((i + off) >= len) + printf(" "); + else + printf("%02x ", data[off + i]); + } + + printf(" "); + for (i = 0; i < 16; i++) { + if ((i + off) >= len) + printf(" "); + else + printf("%c", ascii(data[off + i])); + } + printf("\n"); + } +} + +void regdump(u64 addr, size_t len) +{ + u64 i, off; + for (off = 0; off < len; off += 32) { + printf("%016lx ", addr + off); + for (i = 0; i < 32; i += 4) { + printf("%08x ", read32(addr + off + i)); + } + printf("\n"); + } +} + +int snprintf(char *buffer, size_t size, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = vsnprintf(buffer, size, fmt, args); + va_end(args); + return i; +} + +int debug_printf(const char *fmt, ...) +{ + va_list args; + char buffer[512]; + int i; + + va_start(args, fmt); + i = vsnprintf(buffer, sizeof(buffer), fmt, args); + va_end(args); + + iodev_console_write(buffer, min(i, (int)(sizeof(buffer) - 1))); + + return i; +} + +void __assert_fail(const char *assertion, const char *file, unsigned int line, const char *function) +{ + printf("Assertion failed: '%s' on %s:%d:%s\n", assertion, file, line, function); + flush_and_reboot(); +} + +void udelay(u32 d) +{ + u64 delay = ((u64)d) * mrs(CNTFRQ_EL0) / 1000000; + u64 val = mrs(CNTPCT_EL0); + while ((mrs(CNTPCT_EL0) - val) < delay) + ; + sysop("isb"); +} + +u64 ticks_to_msecs(u64 ticks) +{ + // NOTE: only accurate if freq is even kHz + return ticks / (mrs(CNTFRQ_EL0) / 1000); +} + +u64 ticks_to_usecs(u64 ticks) +{ + // NOTE: only accurate if freq is even MHz + return ticks / (mrs(CNTFRQ_EL0) / 1000000); +} + +u64 timeout_calculate(u32 usec) +{ + u64 delay = ((u64)usec) * mrs(CNTFRQ_EL0) / 1000000; + return mrs(CNTPCT_EL0) + delay; +} + +bool timeout_expired(u64 timeout) +{ + bool expired = mrs(CNTPCT_EL0) > timeout; + sysop("isb"); + return expired; +} + +void flush_and_reboot(void) +{ + iodev_console_flush(); + reboot(); +} + +void spin_init(spinlock_t *lock) +{ + lock->lock = -1; + lock->count = 0; +} + +void spin_lock(spinlock_t *lock) +{ + s64 tmp; + s64 me = smp_id(); + if (__atomic_load_n(&lock->lock, __ATOMIC_ACQUIRE) == me) { + lock->count++; + return; + } + + __asm__ volatile("1:\n" + "mov\t%0, -1\n" + "2:\n" + "\tcasa\t%0, %2, %1\n" + "\tcmn\t%0, 1\n" + "\tbeq\t3f\n" + "\tldxr\t%0, %1\n" + "\tcmn\t%0, 1\n" + "\tbeq\t2b\n" + "\twfe\n" + "\tb\t1b\n" + "3:" + : "=&r"(tmp), "+m"(lock->lock) + : "r"(me) + : "cc", "memory"); + + assert(__atomic_load_n(&lock->lock, __ATOMIC_RELAXED) == me); + lock->count++; +} + +void spin_unlock(spinlock_t *lock) +{ + s64 me = smp_id(); + assert(__atomic_load_n(&lock->lock, __ATOMIC_RELAXED) == me); + assert(lock->count > 0); + if (!--lock->count) + __atomic_store_n(&lock->lock, -1L, __ATOMIC_RELEASE); +} + +bool is_heap(void *addr) +{ + u64 p = (u64)addr; + u64 top_of_kernel_data = (u64)cur_boot_args.top_of_kernel_data; + u64 top_of_ram = cur_boot_args.mem_size + cur_boot_args.phys_base; + + return p > top_of_kernel_data && p < top_of_ram; +} diff --git a/tools/src/utils.h b/tools/src/utils.h new file mode 100644 index 0000000..1d053d2 --- /dev/null +++ b/tools/src/utils.h @@ -0,0 +1,444 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef UTILS_H +#define UTILS_H + +#include "types.h" + +#define printf(...) debug_printf(__VA_ARGS__) + +#ifdef DEBUG +#define dprintf(...) debug_printf(__VA_ARGS__) +#else +#define dprintf(...) \ + do { \ + } while (0) +#endif + +#define ARRAY_SIZE(s) (sizeof(s) / sizeof((s)[0])) + +#define BIT(x) (1UL << (x)) +#define MASK(x) (BIT(x) - 1) +#define GENMASK(msb, lsb) ((BIT((msb + 1) - (lsb)) - 1) << (lsb)) +#define _FIELD_LSB(field) ((field) & ~(field - 1)) +#define FIELD_PREP(field, val) ((val) * (_FIELD_LSB(field))) +#define FIELD_GET(field, val) (((val) & (field)) / _FIELD_LSB(field)) + +#define ALIGN_UP(x, a) (((x) + ((a)-1)) & ~((a)-1)) +#define ALIGN_DOWN(x, a) ((x) & ~((a)-1)) + +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define max(a, b) (((a) > (b)) ? (a) : (b)) + +#define USEC_PER_SEC 1000000L + +static inline u64 read64(u64 addr) +{ + u64 data; + __asm__ volatile("ldr\t%0, [%1]" : "=r"(data) : "r"(addr) : "memory"); + return data; +} + +static inline void write64(u64 addr, u64 data) +{ + __asm__ volatile("str\t%0, [%1]" : : "r"(data), "r"(addr) : "memory"); +} + +static inline u64 set64(u64 addr, u64 set) +{ + u64 data; + __asm__ volatile("ldr\t%0, [%1]\n" + "\torr\t%0, %0, %2\n" + "\tstr\t%0, [%1]" + : "=&r"(data) + : "r"(addr), "r"(set) + : "memory"); + return data; +} + +static inline u64 clear64(u64 addr, u64 clear) +{ + u64 data; + __asm__ volatile("ldr\t%0, [%1]\n" + "\tbic\t%0, %0, %2\n" + "\tstr\t%0, [%1]" + : "=&r"(data) + : "r"(addr), "r"(clear) + : "memory"); + return data; +} + +static inline u64 mask64(u64 addr, u64 clear, u64 set) +{ + u64 data; + __asm__ volatile("ldr\t%0, [%1]\n" + "\tbic\t%0, %0, %3\n" + "\torr\t%0, %0, %2\n" + "\tstr\t%0, [%1]" + : "=&r"(data) + : "r"(addr), "r"(set), "r"(clear) + : "memory"); + return data; +} + +static inline u64 writeread64(u64 addr, u64 data) +{ + write64(addr, data); + return read64(addr); +} + +static inline u32 read32(u64 addr) +{ + u32 data; + __asm__ volatile("ldr\t%w0, [%1]" : "=r"(data) : "r"(addr) : "memory"); + return data; +} + +static inline void write32(u64 addr, u32 data) +{ + __asm__ volatile("str\t%w0, [%1]" : : "r"(data), "r"(addr) : "memory"); +} + +static inline u32 writeread32(u64 addr, u32 data) +{ + write32(addr, data); + return read32(addr); +} + +static inline u32 set32(u64 addr, u32 set) +{ + u32 data; + __asm__ volatile("ldr\t%w0, [%1]\n" + "\torr\t%w0, %w0, %w2\n" + "\tstr\t%w0, [%1]" + : "=&r"(data) + : "r"(addr), "r"(set) + : "memory"); + return data; +} + +static inline u32 clear32(u64 addr, u32 clear) +{ + u32 data; + __asm__ volatile("ldr\t%w0, [%1]\n" + "\tbic\t%w0, %w0, %w2\n" + "\tstr\t%w0, [%1]" + : "=&r"(data) + : "r"(addr), "r"(clear) + : "memory"); + return data; +} + +static inline u32 mask32(u64 addr, u32 clear, u32 set) +{ + u32 data; + __asm__ volatile("ldr\t%w0, [%1]\n" + "\tbic\t%w0, %w0, %w3\n" + "\torr\t%w0, %w0, %w2\n" + "\tstr\t%w0, [%1]" + : "=&r"(data) + : "r"(addr), "r"(set), "r"(clear) + : "memory"); + return data; +} + +static inline u16 read16(u64 addr) +{ + u32 data; + __asm__ volatile("ldrh\t%w0, [%1]" : "=r"(data) : "r"(addr) : "memory"); + return data; +} + +static inline void write16(u64 addr, u16 data) +{ + __asm__ volatile("strh\t%w0, [%1]" : : "r"(data), "r"(addr) : "memory"); +} + +static inline u16 set16(u64 addr, u16 set) +{ + u16 data; + __asm__ volatile("ldrh\t%w0, [%1]\n" + "\torr\t%w0, %w0, %w2\n" + "\tstrh\t%w0, [%1]" + : "=&r"(data) + : "r"(addr), "r"(set) + : "memory" + + ); + return data; +} + +static inline u16 clear16(u64 addr, u16 clear) +{ + u16 data; + __asm__ volatile("ldrh\t%w0, [%1]\n" + "\tbic\t%w0, %w0, %w2\n" + "\tstrh\t%w0, [%1]" + : "=&r"(data) + : "r"(addr), "r"(clear) + : "memory"); + return data; +} + +static inline u16 mask16(u64 addr, u16 clear, u16 set) +{ + u16 data; + __asm__ volatile("ldrh\t%w0, [%1]\n" + "\tbic\t%w0, %w0, %w3\n" + "\torr\t%w0, %w0, %w2\n" + "\tstrh\t%w0, [%1]" + : "=&r"(data) + : "r"(addr), "r"(set), "r"(clear) + : "memory"); + return data; +} + +static inline u16 writeread16(u64 addr, u16 data) +{ + write16(addr, data); + return read16(addr); +} + +static inline u8 read8(u64 addr) +{ + u32 data; + __asm__ volatile("ldrb\t%w0, [%1]" : "=r"(data) : "r"(addr) : "memory"); + return data; +} + +static inline void write8(u64 addr, u8 data) +{ + __asm__ volatile("strb\t%w0, [%1]" : : "r"(data), "r"(addr) : "memory"); +} + +static inline u8 set8(u64 addr, u8 set) +{ + u8 data; + __asm__ volatile("ldrb\t%w0, [%1]\n" + "\torr\t%w0, %w0, %w2\n" + "\tstrb\t%w0, [%1]" + : "=&r"(data) + : "r"(addr), "r"(set) + : "memory"); + return data; +} + +static inline u8 clear8(u64 addr, u8 clear) +{ + u8 data; + __asm__ volatile("ldrb\t%w0, [%1]\n" + "\tbic\t%w0, %w0, %w2\n" + "\tstrb\t%w0, [%1]" + : "=&r"(data) + : "r"(addr), "r"(clear) + : "memory"); + return data; +} + +static inline u8 mask8(u64 addr, u8 clear, u8 set) +{ + u8 data; + __asm__ volatile("ldrb\t%w0, [%1]\n" + "\tbic\t%w0, %w0, %w3\n" + "\torr\t%w0, %w0, %w2\n" + "\tstrb\t%w0, [%1]" + : "=&r"(data) + : "r"(addr), "r"(set), "r"(clear) + : "memory"); + return data; +} + +static inline u8 writeread8(u64 addr, u8 data) +{ + write8(addr, data); + return read8(addr); +} + +static inline void write64_lo_hi(u64 addr, u64 val) +{ + write32(addr, val); + write32(addr + 4, val >> 32); +} + +#define _concat(a, _1, b, ...) a##b + +#define _sr_tkn_S(_0, _1, op0, op1, CRn, CRm, op2) s##op0##_##op1##_c##CRn##_c##CRm##_##op2 + +#define _sr_tkn(a) a + +#define sr_tkn(...) _concat(_sr_tkn, __VA_ARGS__, )(__VA_ARGS__) + +#define __mrs(reg) \ + ({ \ + u64 val; \ + __asm__ volatile("mrs\t%0, " #reg : "=r"(val)); \ + val; \ + }) +#define _mrs(reg) __mrs(reg) + +#define __msr(reg, val) \ + ({ \ + u64 __val = (u64)val; \ + __asm__ volatile("msr\t" #reg ", %0" : : "r"(__val)); \ + }) +#define _msr(reg, val) __msr(reg, val) + +#define mrs(reg) _mrs(sr_tkn(reg)) +#define msr(reg, val) _msr(sr_tkn(reg), val) +#define msr_sync(reg, val) \ + ({ \ + _msr(sr_tkn(reg), val); \ + sysop("isb"); \ + }) + +#define reg_clr(reg, bits) _msr(sr_tkn(reg), _mrs(sr_tkn(reg)) & ~(bits)) +#define reg_set(reg, bits) _msr(sr_tkn(reg), _mrs(sr_tkn(reg)) | bits) +#define reg_mask(reg, clr, set) _msr(sr_tkn(reg), (_mrs(sr_tkn(reg)) & ~(clr)) | set) + +#define reg_clr_sync(reg, bits) \ + ({ \ + reg_clr(sr_tkn(reg), bits); \ + sysop("isb"); \ + }) +#define reg_set_sync(reg, bits) \ + ({ \ + reg_set(sr_tkn(reg), bits); \ + sysop("isb"); \ + }) +#define reg_mask_sync(reg, clr, set) \ + ({ \ + reg_mask(sr_tkn(reg), clr, set); \ + sysop("isb"); \ + }) + +#define sysop(op) __asm__ volatile(op ::: "memory") + +#define cacheop(op, val) ({ __asm__ volatile(op ", %0" : : "r"(val) : "memory"); }) + +#define ic_ialluis() sysop("ic ialluis") +#define ic_iallu() sysop("ic iallu") +#define ic_iavau(p) cacheop("ic ivau", p) +#define dc_ivac(p) cacheop("dc ivac", p) +#define dc_isw(p) cacheop("dc isw", p) +#define dc_csw(p) cacheop("dc csw", p) +#define dc_cisw(p) cacheop("dc cisw", p) +#define dc_zva(p) cacheop("dc zva", p) +#define dc_cvac(p) cacheop("dc cvac", p) +#define dc_cvau(p) cacheop("dc cvau", p) +#define dc_civac(p) cacheop("dc civac", p) + +#define dma_mb() sysop("dmb osh") +#define dma_rmb() sysop("dmb oshld") +#define dma_wmb() sysop("dmb oshst") + +static inline int is_ecore(void) +{ + return !(mrs(MPIDR_EL1) & (1 << 16)); +} + +static inline int in_el2(void) +{ + return (mrs(CurrentEL) >> 2) == 2; +} + +static inline int is_primary_core(void) +{ + return mrs(MPIDR_EL1) == 0x80000000; +} + +extern char _base[]; +extern char _rodata_end[]; +extern char _end[]; +extern char _payload_start[]; +extern char _payload_end[]; + +/* + * These functions are guaranteed to copy by reading from src and writing to dst + * in <n>-bit units If size is not aligned, the remaining bytes are not copied + */ +void memcpy128(void *dst, void *src, size_t size); +void memset64(void *dst, u64 value, size_t size); +void memcpy64(void *dst, void *src, size_t size); +void memset32(void *dst, u32 value, size_t size); +void memcpy32(void *dst, void *src, size_t size); +void memset16(void *dst, u16 value, size_t size); +void memcpy16(void *dst, void *src, size_t size); +void memset8(void *dst, u8 value, size_t size); +void memcpy8(void *dst, void *src, size_t size); + +void get_simd_state(void *state); +void put_simd_state(void *state); + +void hexdump(const void *d, size_t len); +void regdump(u64 addr, size_t len); +int snprintf(char *str, size_t size, const char *fmt, ...); +int debug_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +void udelay(u32 d); + +static inline u64 get_ticks(void) +{ + return mrs(CNTPCT_EL0); +} +u64 ticks_to_msecs(u64 ticks); +u64 ticks_to_usecs(u64 ticks); + +void reboot(void) __attribute__((noreturn)); +void flush_and_reboot(void) __attribute__((noreturn)); + +u64 timeout_calculate(u32 usec); +bool timeout_expired(u64 timeout); + +#define SPINLOCK_ALIGN 64 + +typedef struct { + s64 lock; + int count; +} spinlock_t ALIGNED(SPINLOCK_ALIGN); + +#define SPINLOCK_INIT \ + { \ + -1, 0 \ + } +#define DECLARE_SPINLOCK(n) spinlock_t n = SPINLOCK_INIT; + +void spin_init(spinlock_t *lock); +void spin_lock(spinlock_t *lock); +void spin_unlock(spinlock_t *lock); + +#define mdelay(m) udelay((m)*1000) + +#define panic(fmt, ...) \ + do { \ + debug_printf(fmt, ##__VA_ARGS__); \ + flush_and_reboot(); \ + } while (0) + +static inline int poll32(u64 addr, u32 mask, u32 target, u32 timeout) +{ + while (--timeout > 0) { + u32 value = read32(addr) & mask; + if (value == target) + return 0; + udelay(1); + } + + return -1; +} + +typedef u64(generic_func)(u64, u64, u64, u64, u64); + +struct vector_args { + generic_func *entry; + u64 args[5]; + bool restore_logo; +}; + +extern u32 board_id, chip_id; + +extern struct vector_args next_stage; + +void deep_wfi(void); + +bool is_heap(void *addr); + +#endif diff --git a/tools/src/utils_asm.S b/tools/src/utils_asm.S new file mode 100644 index 0000000..7fe1cea --- /dev/null +++ b/tools/src/utils_asm.S @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: MIT */ + +#include "cpu_regs.h" + +.text + +.globl memcpy128 +.type memcpy128, @function +memcpy128: + ands x2, x2, #~15 + beq 2f +1: ldp x3, x4, [x1], #16 + stp x3, x4, [x0], #16 + subs x2, x2, #16 + bne 1b +2: + ret + +.globl memcpy64 +.type memcpy64, @function +memcpy64: + ands x2, x2, #~7 + beq 2f +1: ldr x3, [x1], #8 + str x3, [x0], #8 + subs x2, x2, #8 + bne 1b +2: + ret + +.globl memset64 +.type memset64, @function +memset64: + ands x2, x2, #~7 + beq 2f +1: str x1, [x0], #8 + subs x2, x2, #8 + bne 1b +2: + ret + +.globl memcpy32 +.type memcpy32, @function +memcpy32: + ands x2, x2, #~3 + beq 2f +1: ldr w3, [x1], #4 + str w3, [x0], #4 + subs x2, x2, #4 + bne 1b +2: + ret + +.globl memset32 +.type memset32, @function +memset32: + ands x2, x2, #~3 + beq 2f +1: str w1, [x0], #4 + subs x2, x2, #4 + bne 1b +2: + ret + +.globl memcpy16 +.type memcpy16, @function +memcpy16: + ands x2, x2, #~1 + beq 2f +1: ldrh w3, [x1], #2 + strh w3, [x0], #2 + subs x2, x2, #2 + bne 1b +2: + ret + +.globl memset16 +.type memset16, @function +memset16: + ands x2, x2, #~1 + beq 2f +1: strh w1, [x0], #2 + subs x2, x2, #2 + bne 1b +2: + ret + +.globl memcpy8 +.type memcpy8, @function +memcpy8: + cmp x2, #0 + beq 2f +1: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + bne 1b +2: + ret + +.globl memset8 +.type memset8, @function +memset8: + cmp x2, #0 + beq 2f +1: strb w1, [x0], #1 + subs x2, x2, #1 + bne 1b +2: + ret + +.globl get_simd_state +.type get_simd_state, @function +get_simd_state: + stp q0, q1, [x0], #32 + stp q2, q3, [x0], #32 + stp q4, q5, [x0], #32 + stp q6, q7, [x0], #32 + stp q8, q9, [x0], #32 + stp q10, q11, [x0], #32 + stp q12, q13, [x0], #32 + stp q14, q15, [x0], #32 + stp q16, q17, [x0], #32 + stp q18, q19, [x0], #32 + stp q20, q21, [x0], #32 + stp q22, q23, [x0], #32 + stp q24, q25, [x0], #32 + stp q26, q27, [x0], #32 + stp q28, q29, [x0], #32 + stp q30, q31, [x0], #32 + ret + +.globl put_simd_state +.type put_simd_state, @function +put_simd_state: + ldp q0, q1, [x0], #32 + ldp q2, q3, [x0], #32 + ldp q4, q5, [x0], #32 + ldp q6, q7, [x0], #32 + ldp q8, q9, [x0], #32 + ldp q10, q11, [x0], #32 + ldp q12, q13, [x0], #32 + ldp q14, q15, [x0], #32 + ldp q16, q17, [x0], #32 + ldp q18, q19, [x0], #32 + ldp q20, q21, [x0], #32 + ldp q22, q23, [x0], #32 + ldp q24, q25, [x0], #32 + ldp q26, q27, [x0], #32 + ldp q28, q29, [x0], #32 + ldp q30, q31, [x0], #32 + ret + +.globl deep_wfi +.type deep_wfi, @function +deep_wfi: + str x30, [sp, #-16]! + stp x28, x29, [sp, #-16]! + stp x26, x27, [sp, #-16]! + stp x24, x25, [sp, #-16]! + stp x22, x23, [sp, #-16]! + stp x20, x21, [sp, #-16]! + stp x18, x19, [sp, #-16]! + + mrs x0, SYS_IMP_APL_CYC_OVRD + orr x0, x0, #(3L << 24) + msr SYS_IMP_APL_CYC_OVRD, x0 + + wfi + + mrs x0, SYS_IMP_APL_CYC_OVRD + bic x0, x0, #(1L << 24) + msr SYS_IMP_APL_CYC_OVRD, x0 + + ldp x18, x19, [sp], #16 + ldp x20, x21, [sp], #16 + ldp x22, x23, [sp], #16 + ldp x24, x25, [sp], #16 + ldp x26, x27, [sp], #16 + ldp x28, x29, [sp], #16 + ldr x30, [sp], #16 + + ret diff --git a/tools/src/vsprintf.c b/tools/src/vsprintf.c new file mode 100644 index 0000000..daa5d29 --- /dev/null +++ b/tools/src/vsprintf.c @@ -0,0 +1,703 @@ +/* + * Copyright (c) 1995 Patrick Powell. + * + * This code is based on code written by Patrick Powell <papowell@astart.com>. + * It may be used for any purpose as long as this notice remains intact on all + * source code distributions. + */ + +/* + * Copyright (c) 2008 Holger Weiss. + * + * This version of the code is maintained by Holger Weiss <holger@jhweiss.de>. + * My changes to the code may freely be used, modified and/or redistributed for + * any purpose. It would be nice if additions and fixes to this file (including + * trivial code cleanups) would be sent back in order to let me include them in + * the version available at <http://www.jhweiss.de/software/snprintf.html>. + * However, this is not a requirement for using or redistributing (possibly + * modified) versions of this file, nor is leaving this notice intact mandatory. + */ + +/* + * History + * + * 2009-03-05 Hector Martin "marcan" <marcan@marcansoft.com> + * + * Hacked up and removed a lot of stuff including floating-point support, + * a bunch of ifs and defines, locales, and tests + * + * 2008-01-20 Holger Weiss <holger@jhweiss.de> for C99-snprintf 1.1: + * + * Fixed the detection of infinite floating point values on IRIX (and + * possibly other systems) and applied another few minor cleanups. + * + * 2008-01-06 Holger Weiss <holger@jhweiss.de> for C99-snprintf 1.0: + * + * Added a lot of new features, fixed many bugs, and incorporated various + * improvements done by Andrew Tridgell <tridge@samba.org>, Russ Allbery + * <rra@stanford.edu>, Hrvoje Niksic <hniksic@xemacs.org>, Damien Miller + * <djm@mindrot.org>, and others for the Samba, INN, Wget, and OpenSSH + * projects. The additions include: support the "e", "E", "g", "G", and + * "F" conversion specifiers (and use conversion style "f" or "F" for the + * still unsupported "a" and "A" specifiers); support the "hh", "ll", "j", + * "t", and "z" length modifiers; support the "#" flag and the (non-C99) + * "'" flag; use localeconv(3) (if available) to get both the current + * locale's decimal point character and the separator between groups of + * digits; fix the handling of various corner cases of field width and + * precision specifications; fix various floating point conversion bugs; + * handle infinite and NaN floating point values; don't attempt to write to + * the output buffer (which may be NULL) if a size of zero was specified; + * check for integer overflow of the field width, precision, and return + * values and during the floating point conversion; use the OUTCHAR() macro + * instead of a function for better performance; provide asprintf(3) and + * vasprintf(3) functions; add new test cases. The replacement functions + * have been renamed to use an "rpl_" prefix, the function calls in the + * main project (and in this file) must be redefined accordingly for each + * replacement function which is needed (by using Autoconf or other means). + * Various other minor improvements have been applied and the coding style + * was cleaned up for consistency. + * + * 2007-07-23 Holger Weiss <holger@jhweiss.de> for Mutt 1.5.13: + * + * C99 compliant snprintf(3) and vsnprintf(3) functions return the number + * of characters that would have been written to a sufficiently sized + * buffer (excluding the '\0'). The original code simply returned the + * length of the resulting output string, so that's been fixed. + * + * 1998-03-05 Michael Elkins <me@mutt.org> for Mutt 0.90.8: + * + * The original code assumed that both snprintf(3) and vsnprintf(3) were + * missing. Some systems only have snprintf(3) but not vsnprintf(3), so + * the code is now broken down under HAVE_SNPRINTF and HAVE_VSNPRINTF. + * + * 1998-01-27 Thomas Roessler <roessler@does-not-exist.org> for Mutt 0.89i: + * + * The PGP code was using unsigned hexadecimal formats. Unfortunately, + * unsigned formats simply didn't work. + * + * 1997-10-22 Brandon Long <blong@fiction.net> for Mutt 0.87.1: + * + * Ok, added some minimal floating point support, which means this probably + * requires libm on most operating systems. Don't yet support the exponent + * (e,E) and sigfig (g,G). Also, fmtint() was pretty badly broken, it just + * wasn't being exercised in ways which showed it, so that's been fixed. + * Also, formatted the code to Mutt conventions, and removed dead code left + * over from the original. Also, there is now a builtin-test, run with: + * gcc -DTEST_SNPRINTF -o snprintf snprintf.c -lm && ./snprintf + * + * 2996-09-15 Brandon Long <blong@fiction.net> for Mutt 0.43: + * + * This was ugly. It is still ugly. I opted out of floating point + * numbers, but the formatter understands just about everything from the + * normal C string format, at least as far as I can tell from the Solaris + * 2.5 printf(3S) man page. + */ + +#include <stdarg.h> + +#include "types.h" + +#define VA_START(ap, last) va_start(ap, last) +#define VA_SHIFT(ap, value, type) /* No-op for ANSI C. */ + +#define ULLONG unsigned long long +#define UINTMAX_T unsigned long +#define LLONG long +#define INTMAX_T long + +/* Support for uintptr_t. */ +#ifndef UINTPTR_T +#if HAVE_UINTPTR_T || defined(uintptr_t) +#define UINTPTR_T uintptr_t +#else +#define UINTPTR_T unsigned long int +#endif /* HAVE_UINTPTR_T || defined(uintptr_t) */ +#endif /* !defined(UINTPTR_T) */ + +/* Support for ptrdiff_t. */ +#ifndef PTRDIFF_T +#if HAVE_PTRDIFF_T || defined(ptrdiff_t) +#define PTRDIFF_T ptrdiff_t +#else +#define PTRDIFF_T long int +#endif /* HAVE_PTRDIFF_T || defined(ptrdiff_t) */ +#endif /* !defined(PTRDIFF_T) */ + +/* + * We need an unsigned integer type corresponding to ptrdiff_t (cf. C99: + * 7.19.6.1, 7). However, we'll simply use PTRDIFF_T and convert it to an + * unsigned type if necessary. This should work just fine in practice. + */ +#ifndef UPTRDIFF_T +#define UPTRDIFF_T PTRDIFF_T +#endif /* !defined(UPTRDIFF_T) */ + +/* + * We need a signed integer type corresponding to size_t (cf. C99: 7.19.6.1, 7). + * However, we'll simply use size_t and convert it to a signed type if + * necessary. This should work just fine in practice. + */ +#ifndef SSIZE_T +#define SSIZE_T size_t +#endif /* !defined(SSIZE_T) */ + +/* + * Buffer size to hold the octal string representation of UINT128_MAX without + * nul-termination ("3777777777777777777777777777777777777777777"). + */ +#ifdef MAX_CONVERT_LENGTH +#undef MAX_CONVERT_LENGTH +#endif /* defined(MAX_CONVERT_LENGTH) */ +#define MAX_CONVERT_LENGTH 43 + +/* Format read states. */ +#define PRINT_S_DEFAULT 0 +#define PRINT_S_FLAGS 1 +#define PRINT_S_WIDTH 2 +#define PRINT_S_DOT 3 +#define PRINT_S_PRECISION 4 +#define PRINT_S_MOD 5 +#define PRINT_S_CONV 6 + +/* Format flags. */ +#define PRINT_F_MINUS (1 << 0) +#define PRINT_F_PLUS (1 << 1) +#define PRINT_F_SPACE (1 << 2) +#define PRINT_F_NUM (1 << 3) +#define PRINT_F_ZERO (1 << 4) +#define PRINT_F_QUOTE (1 << 5) +#define PRINT_F_UP (1 << 6) +#define PRINT_F_UNSIGNED (1 << 7) +#define PRINT_F_TYPE_G (1 << 8) +#define PRINT_F_TYPE_E (1 << 9) + +/* Conversion flags. */ +#define PRINT_C_CHAR 1 +#define PRINT_C_SHORT 2 +#define PRINT_C_LONG 3 +#define PRINT_C_LLONG 4 +// #define PRINT_C_LDOUBLE 5 +#define PRINT_C_SIZE 6 +#define PRINT_C_PTRDIFF 7 +#define PRINT_C_INTMAX 8 + +#ifndef MAX +#define MAX(x, y) ((x >= y) ? x : y) +#endif /* !defined(MAX) */ +#ifndef CHARTOINT +#define CHARTOINT(ch) (ch - '0') +#endif /* !defined(CHARTOINT) */ +#ifndef ISDIGIT +#define ISDIGIT(ch) ('0' <= (unsigned char)ch && (unsigned char)ch <= '9') +#endif /* !defined(ISDIGIT) */ + +#define OUTCHAR(str, len, size, ch) \ + do { \ + if (len + 1 < size) \ + str[len] = ch; \ + (len)++; \ + } while (/* CONSTCOND */ 0) + +static void fmtstr(char *, size_t *, size_t, const char *, int, int, int); +static void fmtint(char *, size_t *, size_t, INTMAX_T, int, int, int, int); +static void printsep(char *, size_t *, size_t); +static int getnumsep(int); +static int convert(UINTMAX_T, char *, size_t, int, int); + +int vsnprintf(char *str, size_t size, const char *format, va_list args) +{ + INTMAX_T value; + unsigned char cvalue; + const char *strvalue; + INTMAX_T *intmaxptr; + PTRDIFF_T *ptrdiffptr; + SSIZE_T *sizeptr; + LLONG *llongptr; + long int *longptr; + int *intptr; + short int *shortptr; + signed char *charptr; + size_t len = 0; + int overflow = 0; + int base = 0; + int cflags = 0; + int flags = 0; + int width = 0; + int precision = -1; + int state = PRINT_S_DEFAULT; + char ch = *format++; + + /* + * C99 says: "If `n' is zero, nothing is written, and `s' may be a null + * pointer." (7.19.6.5, 2) We're forgiving and allow a NULL pointer + * even if a size larger than zero was specified. At least NetBSD's + * snprintf(3) does the same, as well as other versions of this file. + * (Though some of these versions will write to a non-NULL buffer even + * if a size of zero was specified, which violates the standard.) + */ + if (str == NULL && size != 0) + size = 0; + + while (ch != '\0') + switch (state) { + case PRINT_S_DEFAULT: + if (ch == '%') + state = PRINT_S_FLAGS; + else + OUTCHAR(str, len, size, ch); + ch = *format++; + break; + case PRINT_S_FLAGS: + switch (ch) { + case '-': + flags |= PRINT_F_MINUS; + ch = *format++; + break; + case '+': + flags |= PRINT_F_PLUS; + ch = *format++; + break; + case ' ': + flags |= PRINT_F_SPACE; + ch = *format++; + break; + case '#': + flags |= PRINT_F_NUM; + ch = *format++; + break; + case '0': + flags |= PRINT_F_ZERO; + ch = *format++; + break; + case '\'': /* SUSv2 flag (not in C99). */ + flags |= PRINT_F_QUOTE; + ch = *format++; + break; + default: + state = PRINT_S_WIDTH; + break; + } + break; + case PRINT_S_WIDTH: + if (ISDIGIT(ch)) { + ch = CHARTOINT(ch); + if (width > (INT_MAX - ch) / 10) { + overflow = 1; + goto out; + } + width = 10 * width + ch; + ch = *format++; + } else if (ch == '*') { + /* + * C99 says: "A negative field width argument is + * taken as a `-' flag followed by a positive + * field width." (7.19.6.1, 5) + */ + if ((width = va_arg(args, int)) < 0) { + flags |= PRINT_F_MINUS; + width = -width; + } + ch = *format++; + state = PRINT_S_DOT; + } else + state = PRINT_S_DOT; + break; + case PRINT_S_DOT: + if (ch == '.') { + state = PRINT_S_PRECISION; + ch = *format++; + } else + state = PRINT_S_MOD; + break; + case PRINT_S_PRECISION: + if (precision == -1) + precision = 0; + if (ISDIGIT(ch)) { + ch = CHARTOINT(ch); + if (precision > (INT_MAX - ch) / 10) { + overflow = 1; + goto out; + } + precision = 10 * precision + ch; + ch = *format++; + } else if (ch == '*') { + /* + * C99 says: "A negative precision argument is + * taken as if the precision were omitted." + * (7.19.6.1, 5) + */ + if ((precision = va_arg(args, int)) < 0) + precision = -1; + ch = *format++; + state = PRINT_S_MOD; + } else + state = PRINT_S_MOD; + break; + case PRINT_S_MOD: + switch (ch) { + case 'h': + ch = *format++; + if (ch == 'h') { /* It's a char. */ + ch = *format++; + cflags = PRINT_C_CHAR; + } else + cflags = PRINT_C_SHORT; + break; + case 'l': + ch = *format++; + if (ch == 'l') { /* It's a long long. */ + ch = *format++; + cflags = PRINT_C_LLONG; + } else + cflags = PRINT_C_LONG; + break; + case 'j': + cflags = PRINT_C_INTMAX; + ch = *format++; + break; + case 't': + cflags = PRINT_C_PTRDIFF; + ch = *format++; + break; + case 'z': + cflags = PRINT_C_SIZE; + ch = *format++; + break; + } + state = PRINT_S_CONV; + break; + case PRINT_S_CONV: + switch (ch) { + case 'd': + /* FALLTHROUGH */ + case 'i': + switch (cflags) { + case PRINT_C_CHAR: + value = (signed char)va_arg(args, int); + break; + case PRINT_C_SHORT: + value = (short int)va_arg(args, int); + break; + case PRINT_C_LONG: + value = va_arg(args, long int); + break; + case PRINT_C_LLONG: + value = va_arg(args, LLONG); + break; + case PRINT_C_SIZE: + value = va_arg(args, SSIZE_T); + break; + case PRINT_C_INTMAX: + value = va_arg(args, INTMAX_T); + break; + case PRINT_C_PTRDIFF: + value = va_arg(args, PTRDIFF_T); + break; + default: + value = va_arg(args, int); + break; + } + fmtint(str, &len, size, value, 10, width, precision, flags); + break; + case 'X': + flags |= PRINT_F_UP; + /* FALLTHROUGH */ + case 'x': + base = 16; + /* FALLTHROUGH */ + case 'o': + if (base == 0) + base = 8; + /* FALLTHROUGH */ + case 'u': + if (base == 0) + base = 10; + flags |= PRINT_F_UNSIGNED; + switch (cflags) { + case PRINT_C_CHAR: + value = (unsigned char)va_arg(args, unsigned int); + break; + case PRINT_C_SHORT: + value = (unsigned short int)va_arg(args, unsigned int); + break; + case PRINT_C_LONG: + value = va_arg(args, unsigned long int); + break; + case PRINT_C_LLONG: + value = va_arg(args, ULLONG); + break; + case PRINT_C_SIZE: + value = va_arg(args, size_t); + break; + case PRINT_C_INTMAX: + value = va_arg(args, UINTMAX_T); + break; + case PRINT_C_PTRDIFF: + value = va_arg(args, UPTRDIFF_T); + break; + default: + value = va_arg(args, unsigned int); + break; + } + fmtint(str, &len, size, value, base, width, precision, flags); + break; + case 'c': + cvalue = va_arg(args, int); + OUTCHAR(str, len, size, cvalue); + break; + case 's': + strvalue = va_arg(args, char *); + fmtstr(str, &len, size, strvalue, width, precision, flags); + break; + case 'p': + /* + * C99 says: "The value of the pointer is + * converted to a sequence of printing + * characters, in an implementation-defined + * manner." (C99: 7.19.6.1, 8) + */ + if ((strvalue = va_arg(args, void *)) == NULL) + /* + * We use the glibc format. BSD prints + * "0x0", SysV "0". + */ + fmtstr(str, &len, size, "(nil)", width, -1, flags); + else { + /* + * We use the BSD/glibc format. SysV + * omits the "0x" prefix (which we emit + * using the PRINT_F_NUM flag). + */ + flags |= PRINT_F_NUM; + flags |= PRINT_F_UNSIGNED; + fmtint(str, &len, size, (UINTPTR_T)strvalue, 16, width, precision, + flags); + } + break; + case 'n': + switch (cflags) { + case PRINT_C_CHAR: + charptr = va_arg(args, signed char *); + *charptr = len; + break; + case PRINT_C_SHORT: + shortptr = va_arg(args, short int *); + *shortptr = len; + break; + case PRINT_C_LONG: + longptr = va_arg(args, long int *); + *longptr = len; + break; + case PRINT_C_LLONG: + llongptr = va_arg(args, LLONG *); + *llongptr = len; + break; + case PRINT_C_SIZE: + /* + * C99 says that with the "z" length + * modifier, "a following `n' conversion + * specifier applies to a pointer to a + * signed integer type corresponding to + * size_t argument." (7.19.6.1, 7) + */ + sizeptr = va_arg(args, SSIZE_T *); + *sizeptr = len; + break; + case PRINT_C_INTMAX: + intmaxptr = va_arg(args, INTMAX_T *); + *intmaxptr = len; + break; + case PRINT_C_PTRDIFF: + ptrdiffptr = va_arg(args, PTRDIFF_T *); + *ptrdiffptr = len; + break; + default: + intptr = va_arg(args, int *); + *intptr = len; + break; + } + break; + case '%': /* Print a "%" character verbatim. */ + OUTCHAR(str, len, size, ch); + break; + default: /* Skip other characters. */ + break; + } + ch = *format++; + state = PRINT_S_DEFAULT; + base = cflags = flags = width = 0; + precision = -1; + break; + } +out: + if (len < size) + str[len] = '\0'; + else if (size > 0) + str[size - 1] = '\0'; + + if (overflow || len >= INT_MAX) { + return -1; + } + return (int)len; +} + +static void fmtstr(char *str, size_t *len, size_t size, const char *value, int width, int precision, + int flags) +{ + int padlen, strln; /* Amount to pad. */ + int noprecision = (precision == -1); + + if (value == NULL) /* We're forgiving. */ + value = "(null)"; + + /* If a precision was specified, don't read the string past it. */ + for (strln = 0; value[strln] != '\0' && (noprecision || strln < precision); strln++) + continue; + + if ((padlen = width - strln) < 0) + padlen = 0; + if (flags & PRINT_F_MINUS) /* Left justify. */ + padlen = -padlen; + + while (padlen > 0) { /* Leading spaces. */ + OUTCHAR(str, *len, size, ' '); + padlen--; + } + while (*value != '\0' && (noprecision || precision-- > 0)) { + OUTCHAR(str, *len, size, *value); + value++; + } + while (padlen < 0) { /* Trailing spaces. */ + OUTCHAR(str, *len, size, ' '); + padlen++; + } +} + +static void fmtint(char *str, size_t *len, size_t size, INTMAX_T value, int base, int width, + int precision, int flags) +{ + UINTMAX_T uvalue; + char iconvert[MAX_CONVERT_LENGTH]; + char sign = 0; + char hexprefix = 0; + int spadlen = 0; /* Amount to space pad. */ + int zpadlen = 0; /* Amount to zero pad. */ + int pos; + int separators = (flags & PRINT_F_QUOTE); + int noprecision = (precision == -1); + + if (flags & PRINT_F_UNSIGNED) + uvalue = value; + else { + uvalue = (value >= 0) ? value : -value; + if (value < 0) + sign = '-'; + else if (flags & PRINT_F_PLUS) /* Do a sign. */ + sign = '+'; + else if (flags & PRINT_F_SPACE) + sign = ' '; + } + + pos = convert(uvalue, iconvert, sizeof(iconvert), base, flags & PRINT_F_UP); + + if (flags & PRINT_F_NUM && uvalue != 0) { + /* + * C99 says: "The result is converted to an `alternative form'. + * For `o' conversion, it increases the precision, if and only + * if necessary, to force the first digit of the result to be a + * zero (if the value and precision are both 0, a single 0 is + * printed). For `x' (or `X') conversion, a nonzero result has + * `0x' (or `0X') prefixed to it." (7.19.6.1, 6) + */ + switch (base) { + case 8: + if (precision <= pos) + precision = pos + 1; + break; + case 16: + hexprefix = (flags & PRINT_F_UP) ? 'X' : 'x'; + break; + } + } + + if (separators) /* Get the number of group separators we'll print. */ + separators = getnumsep(pos); + + zpadlen = precision - pos - separators; + spadlen = width /* Minimum field width. */ + - separators /* Number of separators. */ + - MAX(precision, pos) /* Number of integer digits. */ + - ((sign != 0) ? 1 : 0) /* Will we print a sign? */ + - ((hexprefix != 0) ? 2 : 0); /* Will we print a prefix? */ + + if (zpadlen < 0) + zpadlen = 0; + if (spadlen < 0) + spadlen = 0; + + /* + * C99 says: "If the `0' and `-' flags both appear, the `0' flag is + * ignored. For `d', `i', `o', `u', `x', and `X' conversions, if a + * precision is specified, the `0' flag is ignored." (7.19.6.1, 6) + */ + if (flags & PRINT_F_MINUS) /* Left justify. */ + spadlen = -spadlen; + else if (flags & PRINT_F_ZERO && noprecision) { + zpadlen += spadlen; + spadlen = 0; + } + while (spadlen > 0) { /* Leading spaces. */ + OUTCHAR(str, *len, size, ' '); + spadlen--; + } + if (sign != 0) /* Sign. */ + OUTCHAR(str, *len, size, sign); + if (hexprefix != 0) { /* A "0x" or "0X" prefix. */ + OUTCHAR(str, *len, size, '0'); + OUTCHAR(str, *len, size, hexprefix); + } + while (zpadlen > 0) { /* Leading zeros. */ + OUTCHAR(str, *len, size, '0'); + zpadlen--; + } + while (pos > 0) { /* The actual digits. */ + pos--; + OUTCHAR(str, *len, size, iconvert[pos]); + if (separators > 0 && pos > 0 && pos % 3 == 0) + printsep(str, len, size); + } + while (spadlen < 0) { /* Trailing spaces. */ + OUTCHAR(str, *len, size, ' '); + spadlen++; + } +} + +static void printsep(char *str, size_t *len, size_t size) +{ + OUTCHAR(str, *len, size, ','); +} + +static int getnumsep(int digits) +{ + int separators = (digits - ((digits % 3 == 0) ? 1 : 0)) / 3; + return separators; +} + +static int convert(UINTMAX_T value, char *buf, size_t size, int base, int caps) +{ + const char *digits = caps ? "0123456789ABCDEF" : "0123456789abcdef"; + size_t pos = 0; + + /* We return an unterminated buffer with the digits in reverse order. */ + do { + buf[pos++] = digits[value % base]; + value /= base; + } while (value != 0 && pos < size); + + return (int)pos; +} + +int vsprintf(char *buf, const char *fmt, va_list args) +{ + return vsnprintf(buf, INT_MAX, fmt, args); +} diff --git a/tools/src/vsprintf.h b/tools/src/vsprintf.h new file mode 100644 index 0000000..cff6c93 --- /dev/null +++ b/tools/src/vsprintf.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef VSPRINTF_H +#define VSPRINTF_H + +#include <stdarg.h> + +int vsprintf(char *buf, const char *fmt, va_list args); +int vsnprintf(char *buf, size_t size, const char *fmt, va_list args); + +#endif diff --git a/tools/src/wdt.c b/tools/src/wdt.c new file mode 100644 index 0000000..a3ebe3a --- /dev/null +++ b/tools/src/wdt.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: MIT */ + +#include "wdt.h" +#include "adt.h" +#include "types.h" +#include "utils.h" + +#define WDT_COUNT 0x10 +#define WDT_ALARM 0x14 +#define WDT_CTL 0x1c + +static u64 wdt_base = 0; + +void wdt_disable(void) +{ + int path[8]; + int node = adt_path_offset_trace(adt, "/arm-io/wdt", path); + + if (node < 0) { + printf("WDT node not found!\n"); + return; + } + + if (adt_get_reg(adt, path, "reg", 0, &wdt_base, NULL)) { + printf("Failed to get WDT reg property!\n"); + return; + } + + printf("WDT registers @ 0x%lx\n", wdt_base); + + write32(wdt_base + WDT_CTL, 0); + + printf("WDT disabled\n"); +} + +void wdt_reboot(void) +{ + if (!wdt_base) + return; + + write32(wdt_base + WDT_ALARM, 0x100000); + write32(wdt_base + WDT_COUNT, 0); + write32(wdt_base + WDT_CTL, 4); +} diff --git a/tools/src/wdt.h b/tools/src/wdt.h new file mode 100644 index 0000000..6a48601 --- /dev/null +++ b/tools/src/wdt.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef __WDT_H__ +#define __WDT_H__ + +void wdt_disable(void); +void wdt_reboot(void); + +#endif diff --git a/tools/src/xnuboot.h b/tools/src/xnuboot.h new file mode 100644 index 0000000..32623b3 --- /dev/null +++ b/tools/src/xnuboot.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef XNUBOOT_H +#define XNUBOOT_H + +#define CMDLINE_LENGTH 608 + +struct boot_video { + u64 base; + u64 display; + u64 stride; + u64 width; + u64 height; + u64 depth; +}; + +struct boot_args { + u16 revision; + u16 version; + u64 virt_base; + u64 phys_base; + u64 mem_size; + u64 top_of_kernel_data; + struct boot_video video; + u32 machine_type; + void *devtree; + u32 devtree_size; + char cmdline[CMDLINE_LENGTH]; + u64 boot_flags; + u64 mem_size_actual; +}; + +extern u64 boot_args_addr; +extern struct boot_args cur_boot_args; + +#endif |
