summaryrefslogtreecommitdiff
path: root/tools/src
diff options
context:
space:
mode:
authormagh <magh@maghmogh.com>2023-03-06 18:44:55 -0600
committermagh <magh@maghmogh.com>2023-03-06 18:44:55 -0600
commite80d9d8871b325a04b18f90a9ea4bb7fd148fb25 (patch)
tree79dbdb8506b7ff1e92549188d1b94cfc0b3503ae /tools/src
add m1n1HEADmaster
Diffstat (limited to 'tools/src')
-rw-r--r--tools/src/adt.c375
-rw-r--r--tools/src/adt.h109
-rw-r--r--tools/src/afk.c545
-rw-r--r--tools/src/afk.h17
-rw-r--r--tools/src/aic.c153
-rw-r--r--tools/src/aic.h40
-rw-r--r--tools/src/aic_regs.h53
-rw-r--r--tools/src/arm_cpu_regs.h338
-rw-r--r--tools/src/asc.c126
-rw-r--r--tools/src/asc.h30
-rw-r--r--tools/src/chainload.c148
-rw-r--r--tools/src/chainload.h11
-rw-r--r--tools/src/chainload_asm.S20
-rw-r--r--tools/src/chickens.c118
-rw-r--r--tools/src/chickens.h8
-rw-r--r--tools/src/chickens_avalanche.c50
-rw-r--r--tools/src/chickens_blizzard.c18
-rw-r--r--tools/src/chickens_firestorm.c113
-rw-r--r--tools/src/chickens_icestorm.c30
-rw-r--r--tools/src/clk.c36
-rw-r--r--tools/src/clk.h8
-rw-r--r--tools/src/cpu_regs.h290
-rw-r--r--tools/src/cpufreq.c120
-rw-r--r--tools/src/cpufreq.h8
-rw-r--r--tools/src/dapf.c137
-rw-r--r--tools/src/dapf.h9
-rw-r--r--tools/src/dart.c714
-rw-r--r--tools/src/dart.h33
-rw-r--r--tools/src/dcp.c92
-rw-r--r--tools/src/dcp.h22
-rw-r--r--tools/src/dcp_iboot.c224
-rw-r--r--tools/src/dcp_iboot.h111
-rw-r--r--tools/src/devicetree.c69
-rw-r--r--tools/src/devicetree.h22
-rw-r--r--tools/src/display.c514
-rw-r--r--tools/src/display.h21
-rw-r--r--tools/src/dlmalloc/malloc.c6286
-rw-r--r--tools/src/dlmalloc/malloc_config.h37
-rw-r--r--tools/src/exception.c388
-rw-r--r--tools/src/exception.h55
-rw-r--r--tools/src/exception_asm.S231
-rw-r--r--tools/src/fb.c415
-rw-r--r--tools/src/fb.h57
-rw-r--r--tools/src/firmware.c82
-rw-r--r--tools/src/firmware.h37
-rw-r--r--tools/src/gxf.c114
-rw-r--r--tools/src/gxf.h22
-rw-r--r--tools/src/gxf_asm.S246
-rw-r--r--tools/src/heapblock.c44
-rw-r--r--tools/src/heapblock.h13
-rw-r--r--tools/src/hv.c329
-rw-r--r--tools/src/hv.h112
-rw-r--r--tools/src/hv_aic.c95
-rw-r--r--tools/src/hv_asm.S196
-rw-r--r--tools/src/hv_exc.c515
-rw-r--r--tools/src/hv_virtio.c308
-rw-r--r--tools/src/hv_vm.c1278
-rw-r--r--tools/src/hv_vuart.c125
-rw-r--r--tools/src/hv_wdt.c130
-rw-r--r--tools/src/i2c.c216
-rw-r--r--tools/src/i2c.h22
-rw-r--r--tools/src/iodev.c319
-rw-r--r--tools/src/iodev.h63
-rw-r--r--tools/src/iova.c233
-rw-r--r--tools/src/iova.h18
-rw-r--r--tools/src/kboot.c1937
-rw-r--r--tools/src/kboot.h25
-rw-r--r--tools/src/kboot_gpu.c452
-rw-r--r--tools/src/libfdt/fdt.c327
-rw-r--r--tools/src/libfdt/fdt.h66
-rw-r--r--tools/src/libfdt/fdt_addresses.c101
-rw-r--r--tools/src/libfdt/fdt_empty_tree.c38
-rw-r--r--tools/src/libfdt/fdt_overlay.c882
-rw-r--r--tools/src/libfdt/fdt_ro.c859
-rw-r--r--tools/src/libfdt/fdt_rw.c492
-rw-r--r--tools/src/libfdt/fdt_strerror.c59
-rw-r--r--tools/src/libfdt/fdt_sw.c384
-rw-r--r--tools/src/libfdt/fdt_wip.c94
-rw-r--r--tools/src/libfdt/libfdt.h2080
-rw-r--r--tools/src/libfdt/libfdt_env.h95
-rw-r--r--tools/src/libfdt/libfdt_internal.h173
-rw-r--r--tools/src/main.c205
-rw-r--r--tools/src/math/exp2f_data.c42
-rw-r--r--tools/src/math/exp2f_data.h22
-rw-r--r--tools/src/math/expf.c83
-rw-r--r--tools/src/math/libm.h271
-rw-r--r--tools/src/mcc.c271
-rw-r--r--tools/src/mcc.h19
-rw-r--r--tools/src/memory.c566
-rw-r--r--tools/src/memory.h88
-rw-r--r--tools/src/memory_asm.S166
-rw-r--r--tools/src/minilzlib/dictbuf.c155
-rw-r--r--tools/src/minilzlib/inputbuf.c144
-rw-r--r--tools/src/minilzlib/lzma2dec.c228
-rw-r--r--tools/src/minilzlib/lzma2dec.h91
-rw-r--r--tools/src/minilzlib/lzmadec.c627
-rw-r--r--tools/src/minilzlib/lzmadec.h114
-rw-r--r--tools/src/minilzlib/minlzlib.h88
-rw-r--r--tools/src/minilzlib/minlzma.h33
-rw-r--r--tools/src/minilzlib/rangedec.c395
-rw-r--r--tools/src/minilzlib/xzstream.c547
-rw-r--r--tools/src/minilzlib/xzstream.h123
-rw-r--r--tools/src/nvme.c505
-rw-r--r--tools/src/nvme.h14
-rw-r--r--tools/src/payload.c281
-rw-r--r--tools/src/payload.h8
-rw-r--r--tools/src/pcie.c388
-rw-r--r--tools/src/pcie.h9
-rw-r--r--tools/src/pmgr.c358
-rw-r--r--tools/src/pmgr.h24
-rw-r--r--tools/src/proxy.c575
-rw-r--r--tools/src/proxy.h183
-rw-r--r--tools/src/ringbuffer.c81
-rw-r--r--tools/src/ringbuffer.h22
-rw-r--r--tools/src/rtkit.c710
-rw-r--r--tools/src/rtkit.h43
-rw-r--r--tools/src/sart.c219
-rw-r--r--tools/src/sart.h16
-rw-r--r--tools/src/sep.c68
-rw-r--r--tools/src/sep.h12
-rw-r--r--tools/src/smp.c296
-rw-r--r--tools/src/smp.h41
-rw-r--r--tools/src/soc.h25
-rw-r--r--tools/src/start.S176
-rw-r--r--tools/src/startup.c121
-rw-r--r--tools/src/string.c209
-rw-r--r--tools/src/tinf/adler32.c95
-rw-r--r--tools/src/tinf/crc32.c57
-rw-r--r--tools/src/tinf/tinf.h142
-rw-r--r--tools/src/tinf/tinfgzip.c191
-rw-r--r--tools/src/tinf/tinflate.c648
-rw-r--r--tools/src/tinf/tinfzlib.c99
-rw-r--r--tools/src/tps6598x.c172
-rw-r--r--tools/src/tps6598x.h28
-rw-r--r--tools/src/tunables.c124
-rw-r--r--tools/src/tunables.h42
-rw-r--r--tools/src/tunables_static.c105
-rw-r--r--tools/src/types.h62
-rw-r--r--tools/src/uart.c180
-rw-r--r--tools/src/uart.h29
-rw-r--r--tools/src/uart_regs.h32
-rw-r--r--tools/src/uartproxy.c317
-rw-r--r--tools/src/uartproxy.h45
-rw-r--r--tools/src/usb.c343
-rw-r--r--tools/src/usb.h18
-rw-r--r--tools/src/usb_dwc3.c1416
-rw-r--r--tools/src/usb_dwc3.h33
-rw-r--r--tools/src/usb_dwc3_regs.h625
-rw-r--r--tools/src/usb_types.h209
-rw-r--r--tools/src/utils.c182
-rw-r--r--tools/src/utils.h444
-rw-r--r--tools/src/utils_asm.S182
-rw-r--r--tools/src/vsprintf.c703
-rw-r--r--tools/src/vsprintf.h11
-rw-r--r--tools/src/wdt.c44
-rw-r--r--tools/src/wdt.h9
-rw-r--r--tools/src/xnuboot.h36
157 files changed, 40067 insertions, 0 deletions
diff --git a/tools/src/adt.c b/tools/src/adt.c
new file mode 100644
index 0000000..4189974
--- /dev/null
+++ b/tools/src/adt.c
@@ -0,0 +1,375 @@
+/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */
+
+#include "adt.h"
+#include "string.h"
+
+/* This API is designed to match libfdt's read-only API */
+
+#define ADT_CHECK_HEADER(adt) \
+ { \
+ int err; \
+ if ((err = adt_check_header(adt)) != 0) \
+ return err; \
+ }
+
+// #define DEBUG
+
+#ifdef DEBUG
+#include "utils.h"
+#define dprintf printf
+#else
+#define dprintf(...) \
+ do { \
+ } while (0)
+#endif
+
+int _adt_check_node_offset(const void *adt, int offset)
+{
+ if ((offset < 0) || (offset % ADT_ALIGN))
+ return -ADT_ERR_BADOFFSET;
+
+ const struct adt_node_hdr *node = ADT_NODE(adt, offset);
+
+ // Sanity check
+ if (node->property_count > 2048 || !node->property_count || node->child_count > 2048)
+ return -ADT_ERR_BADOFFSET;
+
+ return 0;
+}
+
+int _adt_check_prop_offset(const void *adt, int offset)
+{
+ if ((offset < 0) || (offset % ADT_ALIGN))
+ return -ADT_ERR_BADOFFSET;
+
+ const struct adt_property *prop = ADT_PROP(adt, offset);
+
+ if (prop->size & 0x7ff00000) // up to 1MB properties
+ return -ADT_ERR_BADOFFSET;
+
+ return 0;
+}
+
+int adt_check_header(const void *adt)
+{
+ return _adt_check_node_offset(adt, 0);
+}
+
+static int _adt_string_eq(const char *a, const char *b, size_t len)
+{
+ return (strlen(a) == len) && (memcmp(a, b, len) == 0);
+}
+
+static int _adt_nodename_eq(const char *a, const char *b, size_t len)
+{
+ if (memcmp(a, b, len) != 0)
+ return 0;
+
+ if (a[len] == '\0')
+ return 1;
+ else if (!memchr(b, '@', len) && (a[len] == '@'))
+ return 1;
+ else
+ return 0;
+}
+
+const struct adt_property *adt_get_property_namelen(const void *adt, int offset, const char *name,
+ size_t namelen)
+{
+ dprintf("adt_get_property_namelen(%p, %d, \"%s\", %u)\n", adt, offset, name, namelen);
+
+ ADT_FOREACH_PROPERTY(adt, offset, prop)
+ {
+ dprintf(" off=0x%x name=\"%s\"\n", offset, prop->name);
+ if (_adt_string_eq(prop->name, name, namelen))
+ return prop;
+ }
+
+ return NULL;
+}
+
+const struct adt_property *adt_get_property(const void *adt, int nodeoffset, const char *name)
+{
+ return adt_get_property_namelen(adt, nodeoffset, name, strlen(name));
+}
+
+const void *adt_getprop_namelen(const void *adt, int nodeoffset, const char *name, size_t namelen,
+ u32 *lenp)
+{
+ const struct adt_property *prop;
+
+ prop = adt_get_property_namelen(adt, nodeoffset, name, namelen);
+
+ if (!prop)
+ return NULL;
+
+ if (lenp)
+ *lenp = prop->size;
+
+ return prop->value;
+}
+
+const void *adt_getprop_by_offset(const void *adt, int offset, const char **namep, u32 *lenp)
+{
+ const struct adt_property *prop;
+
+ prop = adt_get_property_by_offset(adt, offset);
+ if (!prop)
+ return NULL;
+
+ if (namep)
+ *namep = prop->name;
+ if (lenp)
+ *lenp = prop->size;
+ return prop->value;
+}
+
+const void *adt_getprop(const void *adt, int nodeoffset, const char *name, u32 *lenp)
+{
+ return adt_getprop_namelen(adt, nodeoffset, name, strlen(name), lenp);
+}
+
+int adt_setprop(void *adt, int nodeoffset, const char *name, void *value, size_t len)
+{
+ u32 plen;
+ void *prop = (void *)adt_getprop(adt, nodeoffset, name, &plen);
+ if (!prop)
+ return -ADT_ERR_NOTFOUND;
+
+ if (len != plen)
+ return -ADT_ERR_BADLENGTH;
+
+ memcpy(prop, value, len);
+ return len;
+}
+
+int adt_getprop_copy(const void *adt, int nodeoffset, const char *name, void *out, size_t len)
+{
+ u32 plen;
+
+ const void *p = adt_getprop(adt, nodeoffset, name, &plen);
+
+ if (!p)
+ return -ADT_ERR_NOTFOUND;
+
+ if (plen != len)
+ return -ADT_ERR_BADLENGTH;
+
+ memcpy(out, p, len);
+ return len;
+}
+
+int adt_first_child_offset(const void *adt, int offset)
+{
+ const struct adt_node_hdr *node = ADT_NODE(adt, offset);
+
+ u32 cnt = node->property_count;
+ offset = adt_first_property_offset(adt, offset);
+
+ while (cnt--) {
+ offset = adt_next_property_offset(adt, offset);
+ }
+
+ return offset;
+}
+
+int adt_next_sibling_offset(const void *adt, int offset)
+{
+ const struct adt_node_hdr *node = ADT_NODE(adt, offset);
+
+ u32 cnt = node->child_count;
+ offset = adt_first_child_offset(adt, offset);
+
+ while (cnt--) {
+ offset = adt_next_sibling_offset(adt, offset);
+ }
+
+ return offset;
+}
+
+int adt_subnode_offset_namelen(const void *adt, int offset, const char *name, size_t namelen)
+{
+ ADT_CHECK_HEADER(adt);
+
+ ADT_FOREACH_CHILD(adt, offset)
+ {
+ const char *cname = adt_get_name(adt, offset);
+
+ if (_adt_nodename_eq(cname, name, namelen))
+ return offset;
+ }
+
+ return -ADT_ERR_NOTFOUND;
+}
+
+int adt_subnode_offset(const void *adt, int parentoffset, const char *name)
+{
+ return adt_subnode_offset_namelen(adt, parentoffset, name, strlen(name));
+}
+
+int adt_path_offset(const void *adt, const char *path)
+{
+ return adt_path_offset_trace(adt, path, NULL);
+}
+
+int adt_path_offset_trace(const void *adt, const char *path, int *offsets)
+{
+ const char *end = path + strlen(path);
+ const char *p = path;
+ int offset = 0;
+
+ ADT_CHECK_HEADER(adt);
+
+ while (*p) {
+ const char *q;
+
+ while (*p == '/')
+ p++;
+ if (!*p)
+ break;
+ q = strchr(p, '/');
+ if (!q)
+ q = end;
+
+ offset = adt_subnode_offset_namelen(adt, offset, p, q - p);
+ if (offset < 0)
+ break;
+
+ if (offsets)
+ *offsets++ = offset;
+
+ p = q;
+ }
+
+ if (offsets)
+ *offsets++ = 0;
+
+ return offset;
+}
+
+const char *adt_get_name(const void *adt, int nodeoffset)
+{
+ return adt_getprop(adt, nodeoffset, "name", NULL);
+}
+
+static void get_cells(u64 *dst, const u32 **src, int cells)
+{
+ *dst = 0;
+ for (int i = 0; i < cells; i++)
+ *dst |= ((u64) * ((*src)++)) << (32 * i);
+}
+
+int adt_get_reg(const void *adt, int *path, const char *prop, int idx, u64 *paddr, u64 *psize)
+{
+ int cur = 0;
+
+ if (!*path)
+ return -ADT_ERR_BADOFFSET;
+
+ while (path[cur + 1])
+ cur++;
+
+ int node = path[cur];
+ int parent = cur > 0 ? path[cur - 1] : 0;
+ u32 a_cells = 2, s_cells = 1;
+
+ ADT_GETPROP(adt, parent, "#address-cells", &a_cells);
+ ADT_GETPROP(adt, parent, "#size-cells", &s_cells);
+
+ dprintf("adt_get_reg: node '%s' @ %d, parent @ %d, address-cells=%d size-cells=%d idx=%d\n",
+ adt_get_name(adt, node), node, parent, a_cells, s_cells, idx);
+
+ if (a_cells < 1 || a_cells > 2 || s_cells > 2) {
+ dprintf("bad n-cells\n");
+ return ADT_ERR_BADNCELLS;
+ }
+
+ u32 reg_len = 0;
+ const u32 *reg = adt_getprop(adt, node, prop, &reg_len);
+
+ if (!reg || !reg_len) {
+ dprintf("reg not found or empty\n");
+ return -ADT_ERR_NOTFOUND;
+ }
+
+ if (reg_len < (idx + 1) * (a_cells + s_cells) * 4) {
+ dprintf("bad reg property length %d\n", reg_len);
+ return -ADT_ERR_BADVALUE;
+ }
+
+ reg += idx * (a_cells + s_cells);
+
+ u64 addr, size = 0;
+ get_cells(&addr, &reg, a_cells);
+ get_cells(&size, &reg, s_cells);
+
+ dprintf(" addr=0x%lx size=0x%lx\n", addr, size);
+
+ while (parent) {
+ cur--;
+ node = parent;
+ parent = cur > 0 ? path[cur - 1] : 0;
+
+ dprintf(" walking up to %s\n", adt_get_name(adt, node));
+
+ u32 ranges_len;
+ const u32 *ranges = adt_getprop(adt, node, "ranges", &ranges_len);
+ if (!ranges)
+ break;
+
+ u32 pa_cells = 2, ps_cells = 1;
+ ADT_GETPROP(adt, parent, "#address-cells", &pa_cells);
+ ADT_GETPROP(adt, parent, "#size-cells", &ps_cells);
+
+ dprintf(" translate range to address-cells=%d size-cells=%d\n", pa_cells, ps_cells);
+
+ if (pa_cells < 1 || pa_cells > 2 || ps_cells > 2)
+ return ADT_ERR_BADNCELLS;
+
+ int range_cnt = ranges_len / (4 * (pa_cells + a_cells + s_cells));
+
+ while (range_cnt--) {
+ u64 c_addr, p_addr, c_size;
+ get_cells(&c_addr, &ranges, a_cells);
+ get_cells(&p_addr, &ranges, pa_cells);
+ get_cells(&c_size, &ranges, s_cells);
+
+ dprintf(" ranges %lx %lx %lx\n", c_addr, p_addr, c_size);
+
+ if (addr >= c_addr && (addr + size) <= (c_addr + c_size)) {
+ dprintf(" translate %lx", addr);
+ addr = addr - c_addr + p_addr;
+ dprintf(" -> %lx\n", addr);
+ break;
+ }
+ }
+
+ a_cells = pa_cells;
+ s_cells = ps_cells;
+ }
+
+ if (paddr)
+ *paddr = addr;
+ if (psize)
+ *psize = size;
+
+ return 0;
+}
+
+bool adt_is_compatible(const void *adt, int nodeoffset, const char *compat)
+{
+ u32 len;
+ const char *list = adt_getprop(adt, nodeoffset, "compatible", &len);
+ if (!list)
+ return false;
+
+ const char *end = list + len;
+
+ while (list != end) {
+ if (!strcmp(list, compat))
+ return true;
+ list += strlen(list) + 1;
+ }
+
+ return false;
+}
diff --git a/tools/src/adt.h b/tools/src/adt.h
new file mode 100644
index 0000000..6373c86
--- /dev/null
+++ b/tools/src/adt.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */
+
+#ifndef XDT_H
+#define XDT_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "types.h"
+
+#define ADT_ERR_NOTFOUND 1
+#define ADT_ERR_BADOFFSET 4
+#define ADT_ERR_BADPATH 5
+#define ADT_ERR_BADNCELLS 14
+#define ADT_ERR_BADVALUE 15
+#define ADT_ERR_BADLENGTH 20
+
+#define ADT_ALIGN 4
+
+extern void *adt;
+
+struct adt_property {
+ char name[32];
+ u32 size;
+ u8 value[];
+};
+
+struct adt_node_hdr {
+ u32 property_count;
+ u32 child_count;
+};
+
+#define ADT_NODE(adt, offset) ((const struct adt_node_hdr *)(((u8 *)(adt)) + (offset)))
+#define ADT_PROP(adt, offset) ((const struct adt_property *)(((u8 *)(adt)) + (offset)))
+#define ADT_SIZE(node) ((node)->size & 0x7fffffff)
+
+/* This API is designed to match libfdt's read-only API */
+
+/* Basic sanity check */
+int adt_check_header(const void *adt);
+
+static inline int adt_get_property_count(const void *adt, int offset)
+{
+ return ADT_NODE(adt, offset)->property_count;
+}
+
+static inline int adt_first_property_offset(const void *adt, int offset)
+{
+ UNUSED(adt);
+ return offset + sizeof(struct adt_node_hdr);
+}
+
+static inline int adt_next_property_offset(const void *adt, int offset)
+{
+ const struct adt_property *prop = ADT_PROP(adt, offset);
+ return offset + sizeof(struct adt_property) + ((prop->size + ADT_ALIGN - 1) & ~(ADT_ALIGN - 1));
+}
+
+static inline const struct adt_property *adt_get_property_by_offset(const void *adt, int offset)
+{
+ return ADT_PROP(adt, offset);
+}
+
+static inline int adt_get_child_count(const void *adt, int offset)
+{
+ return ADT_NODE(adt, offset)->child_count;
+}
+
+int adt_first_child_offset(const void *adt, int offset);
+int adt_next_sibling_offset(const void *adt, int offset);
+
+int adt_subnode_offset_namelen(const void *adt, int parentoffset, const char *name, size_t namelen);
+int adt_subnode_offset(const void *adt, int parentoffset, const char *name);
+int adt_path_offset(const void *adt, const char *path);
+int adt_path_offset_trace(const void *adt, const char *path, int *offsets);
+
+const char *adt_get_name(const void *adt, int nodeoffset);
+const struct adt_property *adt_get_property_namelen(const void *adt, int nodeoffset,
+ const char *name, size_t namelen);
+const struct adt_property *adt_get_property(const void *adt, int nodeoffset, const char *name);
+const void *adt_getprop_by_offset(const void *adt, int offset, const char **namep, u32 *lenp);
+const void *adt_getprop_namelen(const void *adt, int nodeoffset, const char *name, size_t namelen,
+ u32 *lenp);
+const void *adt_getprop(const void *adt, int nodeoffset, const char *name, u32 *lenp);
+int adt_setprop(void *adt, int nodeoffset, const char *name, void *value, size_t len);
+int adt_getprop_copy(const void *adt, int nodeoffset, const char *name, void *out, size_t len);
+
+#define ADT_GETPROP(adt, nodeoffset, name, val) \
+ adt_getprop_copy(adt, nodeoffset, name, (val), sizeof(*(val)))
+
+#define ADT_GETPROP_ARRAY(adt, nodeoffset, name, arr) \
+ adt_getprop_copy(adt, nodeoffset, name, (arr), sizeof(arr))
+
+int adt_get_reg(const void *adt, int *path, const char *prop, int idx, u64 *addr, u64 *size);
+bool adt_is_compatible(const void *adt, int nodeoffset, const char *compat);
+
+#define ADT_FOREACH_CHILD(adt, node) \
+ for (int _child_count = adt_get_child_count(adt, node); _child_count; _child_count = 0) \
+ for (node = adt_first_child_offset(adt, node); _child_count--; \
+ node = adt_next_sibling_offset(adt, node))
+
+#define ADT_FOREACH_PROPERTY(adt, node, prop) \
+ for (int _prop_count = adt_get_property_count(adt, node), \
+ _poff = adt_first_property_offset(adt, node); \
+ _prop_count; _prop_count = 0) \
+ for (const struct adt_property *prop = ADT_PROP(adt, _poff); _prop_count--; \
+ prop = ADT_PROP(adt, _poff = adt_next_property_offset(adt, _poff)))
+
+#endif
diff --git a/tools/src/afk.c b/tools/src/afk.c
new file mode 100644
index 0000000..7191a21
--- /dev/null
+++ b/tools/src/afk.c
@@ -0,0 +1,545 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "afk.h"
+#include "assert.h"
+#include "malloc.h"
+#include "string.h"
+#include "utils.h"
+
+struct afk_rb_hdr {
+ u32 bufsz;
+ u32 unk;
+ u32 _pad1[14];
+ u32 rptr;
+ u32 _pad2[15];
+ u32 wptr;
+ u32 _pad3[15];
+};
+
+struct afk_rb {
+ bool ready;
+ struct afk_rb_hdr *hdr;
+ u32 rptr;
+ void *buf;
+ size_t bufsz;
+};
+
+enum EPICType {
+ TYPE_NOTIFY = 0,
+ TYPE_COMMAND = 3,
+ TYPE_REPLY = 4,
+ TYPE_NOTIFY_ACK = 8,
+};
+
+enum EPICCategory {
+ CAT_REPORT = 0x00,
+ CAT_NOTIFY = 0x10,
+ CAT_REPLY = 0x20,
+ CAT_COMMAND = 0x30,
+};
+
+enum EPICMessage {
+ CODE_ANNOUNCE = 0x30,
+};
+
+struct afk_qe {
+ u32 magic;
+ u32 size;
+ u32 channel;
+ u32 type;
+ u8 data[];
+};
+
+struct epic_hdr {
+ u8 version;
+ u16 seq;
+ u8 _pad;
+ u32 unk;
+ u64 timestamp;
+} PACKED;
+
+struct epic_sub_hdr {
+ u32 length;
+ u8 version;
+ u8 category;
+ u16 code;
+ u64 timestamp;
+ u16 seq;
+ u16 unk;
+ u32 unk2;
+} PACKED;
+
+struct epic_announce {
+ char name[32];
+ u8 props[];
+} PACKED;
+
+struct epic_cmd {
+ u32 retcode;
+ u64 rxbuf;
+ u64 txbuf;
+ u32 rxlen;
+ u32 txlen;
+} PACKED;
+
+struct afk_epic_ep {
+ int ep;
+ rtkit_dev_t *rtk;
+
+ struct rtkit_buffer buf;
+ u16 tag;
+
+ struct afk_rb tx;
+ struct afk_rb rx;
+
+ struct rtkit_buffer txbuf;
+ struct rtkit_buffer rxbuf;
+
+ bool started;
+};
+
+enum RBEP_MSG {
+ RBEP_INIT = 0x80,
+ RBEP_INIT_ACK = 0xa0,
+ RBEP_GETBUF = 0x89,
+ RBEP_GETBUF_ACK = 0xa1,
+ RBEP_INIT_TX = 0x8a,
+ RBEP_INIT_RX = 0x8b,
+ RBEP_START = 0xa3,
+ RBEP_START_ACK = 0x86,
+ RBEP_SEND = 0xa2,
+ RBEP_RECV = 0x85,
+ RBEP_SHUTDOWN = 0xc0,
+ RBEP_SHUTDOWN_ACK = 0xc1,
+};
+
+#define BLOCK_SHIFT 6
+#define QE_MAGIC ' POI'
+
+#define RBEP_TYPE GENMASK(63, 48)
+
+#define GETBUF_SIZE GENMASK(31, 16)
+#define GETBUF_TAG GENMASK(15, 0)
+#define GETBUF_ACK_DVA GENMASK(47, 0)
+
+#define INITRB_OFFSET GENMASK(47, 32)
+#define INITRB_SIZE GENMASK(31, 16)
+#define INITRB_TAG GENMASK(15, 0)
+
+#define SEND_WPTR GENMASK(31, 0)
+
+bool afk_rb_init(afk_epic_ep_t *epic, struct afk_rb *rb, u64 base, u64 size)
+{
+ rb->hdr = epic->buf.bfr + base;
+
+ if (rb->hdr->bufsz + sizeof(*rb->hdr) != size) {
+ printf("AFK: ring buffer size mismatch\n");
+ return false;
+ }
+
+ rb->buf = rb->hdr + 1;
+ rb->bufsz = rb->hdr->bufsz;
+ rb->ready = true;
+
+ return true;
+}
+
+static int afk_epic_poll(afk_epic_ep_t *epic)
+{
+ int ret;
+ struct rtkit_message msg;
+
+ while ((ret = rtkit_recv(epic->rtk, &msg)) == 0)
+ ;
+
+ if (ret < 0) {
+ printf("EPIC: rtkit_recv failed!\n");
+ return ret;
+ }
+
+ if (msg.ep != epic->ep) {
+ printf("EPIC: received message for unexpected endpoint %d\n", msg.ep);
+ return 0;
+ }
+
+ int type = FIELD_GET(RBEP_TYPE, msg.msg);
+ u64 base, size, tag;
+ switch (type) {
+ case RBEP_INIT_ACK:
+ break;
+
+ case RBEP_GETBUF:
+ size = FIELD_GET(GETBUF_SIZE, msg.msg) << BLOCK_SHIFT;
+ epic->tag = FIELD_GET(GETBUF_TAG, msg.msg);
+ if (!rtkit_alloc_buffer(epic->rtk, &epic->buf, size)) {
+ printf("EPIC: failed to allocate buffer\n");
+ return -1;
+ }
+ msg.msg = (FIELD_PREP(RBEP_TYPE, RBEP_GETBUF_ACK) |
+ FIELD_PREP(GETBUF_ACK_DVA, epic->buf.dva));
+ if (!rtkit_send(epic->rtk, &msg)) {
+ printf("EPIC: failed to send buffer address\n");
+ return -1;
+ }
+ break;
+
+ case RBEP_INIT_TX:
+ case RBEP_INIT_RX:
+ base = FIELD_GET(INITRB_OFFSET, msg.msg) << BLOCK_SHIFT;
+ size = FIELD_GET(INITRB_SIZE, msg.msg) << BLOCK_SHIFT;
+ tag = FIELD_GET(INITRB_TAG, msg.msg);
+ if (tag != epic->tag) {
+ printf("EPIC: wrong tag (0x%x != 0x%lx)\n", epic->tag, tag);
+ return -1;
+ }
+
+ struct afk_rb *rb;
+ if (type == RBEP_INIT_RX)
+ rb = &epic->rx;
+ else
+ rb = &epic->tx;
+
+ if (!afk_rb_init(epic, rb, base, size))
+ return -1;
+
+ if (epic->rx.ready && epic->tx.ready) {
+ msg.msg = FIELD_PREP(RBEP_TYPE, RBEP_START);
+ if (!rtkit_send(epic->rtk, &msg)) {
+ printf("EPIC: failed to send start\n");
+ return -1;
+ }
+ }
+ break;
+
+ case RBEP_RECV:
+ return 1;
+
+ case RBEP_START_ACK:
+ epic->started = true;
+ break;
+
+ case RBEP_SHUTDOWN_ACK:
+ epic->started = false;
+ break;
+
+ default:
+ printf("EPIC: received unknown message type 0x%x\n", type);
+ return 0;
+ break;
+ }
+
+ return 0;
+}
+
+static int afk_epic_rx(afk_epic_ep_t *epic, struct afk_qe **qe)
+{
+ int ret;
+ struct afk_rb *rb = &epic->rx;
+
+ u32 rptr = rb->hdr->rptr;
+
+ while (rptr == rb->hdr->wptr) {
+ do {
+ ret = afk_epic_poll(epic);
+ if (ret < 0)
+ return ret;
+ } while (ret == 0);
+ dma_rmb();
+ }
+
+ struct afk_qe *hdr = rb->buf + rptr;
+
+ if (hdr->magic != QE_MAGIC) {
+ printf("EPIC: bad queue entry magic!\n");
+ return -1;
+ }
+
+ if (rptr + hdr->size > rb->bufsz) {
+ rptr = 0;
+ hdr = rb->buf + rptr;
+ if (hdr->magic != QE_MAGIC) {
+ printf("EPIC: bad queue entry magic!\n");
+ return -1;
+ }
+ rb->hdr->rptr = rptr;
+ }
+
+ *qe = hdr;
+
+ return 1;
+}
+
+static int afk_epic_tx(afk_epic_ep_t *epic, u32 channel, u32 type, void *data, size_t size)
+{
+ struct afk_rb *rb = &epic->tx;
+
+ u32 rptr = rb->hdr->rptr;
+ u32 wptr = rb->hdr->wptr;
+ struct afk_qe *hdr = rb->buf + wptr;
+
+ if (wptr < rptr && (wptr + sizeof(struct afk_qe) > rptr)) {
+ printf("EPIC: TX ring buffer is full\n");
+ return -1;
+ }
+
+ hdr->magic = QE_MAGIC;
+ hdr->channel = channel;
+ hdr->type = type;
+ hdr->size = size;
+
+ wptr += sizeof(struct afk_qe);
+
+ if (size > rb->bufsz - wptr) {
+ if (rptr < sizeof(struct afk_qe)) {
+ printf("EPIC: TX ring buffer is full\n");
+ return -1;
+ }
+ *(struct afk_qe *)rb->buf = *hdr;
+ hdr = rb->buf;
+ wptr = sizeof(struct afk_qe);
+ }
+
+ if (wptr < rptr && (wptr + size > rptr)) {
+ printf("EPIC: TX ring buffer is full\n");
+ return -1;
+ }
+
+ wptr += size;
+ wptr = ALIGN_UP(wptr, 1 << BLOCK_SHIFT);
+
+ memcpy(hdr + 1, data, size);
+
+ dma_mb();
+ rb->hdr->wptr = wptr;
+ dma_wmb();
+
+ struct rtkit_message msg = {
+ epic->ep,
+ FIELD_PREP(RBEP_TYPE, RBEP_SEND) | FIELD_PREP(SEND_WPTR, wptr),
+ };
+
+ if (!rtkit_send(epic->rtk, &msg)) {
+ printf("EPIC: failed to send TX WPTR message\n");
+ return -1;
+ }
+
+ return 1;
+}
+
+static void afk_epic_rx_ack(afk_epic_ep_t *epic)
+{
+ struct afk_rb *rb = &epic->rx;
+ u32 rptr = rb->hdr->rptr;
+ struct afk_qe *hdr = rb->buf + rptr;
+
+ if (hdr->magic != QE_MAGIC) {
+ printf("EPIC: bad queue entry magic!\n");
+ }
+
+ dma_mb();
+
+ rptr = ALIGN_UP(rptr + sizeof(*hdr) + hdr->size, 1 << BLOCK_SHIFT);
+ assert(rptr < rb->bufsz);
+ if (rptr == rb->bufsz)
+ rptr = 0;
+ rb->hdr->rptr = rptr;
+}
+
+int afk_epic_command(afk_epic_ep_t *epic, int channel, u16 code, void *txbuf, size_t txsize,
+ void *rxbuf, size_t *rxsize)
+{
+ struct {
+ struct epic_hdr hdr;
+ struct epic_sub_hdr sub;
+ struct epic_cmd cmd;
+ } PACKED msg;
+
+ assert(txsize <= epic->txbuf.sz);
+ assert(!rxsize || *rxsize <= epic->rxbuf.sz);
+
+ memset(&msg, 0, sizeof(msg));
+
+ msg.hdr.version = 2;
+ msg.hdr.seq = 0;
+ msg.sub.length = sizeof(msg.cmd);
+ msg.sub.version = 3;
+ msg.sub.category = CAT_COMMAND;
+ msg.sub.code = code;
+ msg.sub.seq = 0;
+ msg.cmd.txbuf = epic->txbuf.dva;
+ msg.cmd.txlen = txsize;
+ msg.cmd.rxbuf = epic->rxbuf.dva;
+ msg.cmd.rxlen = rxsize ? *rxsize : 0;
+
+ memcpy(epic->txbuf.bfr, txbuf, txsize);
+
+ int ret = afk_epic_tx(epic, channel, TYPE_COMMAND, &msg, sizeof msg);
+ if (ret < 0) {
+ printf("EPIC: failed to transmit command\n");
+ return ret;
+ }
+
+ struct afk_qe *rmsg;
+ struct epic_cmd *rcmd;
+
+ while (true) {
+ ret = afk_epic_rx(epic, &rmsg);
+ if (ret < 0)
+ return ret;
+
+ if (rmsg->type != TYPE_REPLY && rmsg->type != TYPE_NOTIFY) {
+ printf("EPIC: got unexpected message type %d during command\n", rmsg->type);
+ afk_epic_rx_ack(epic);
+ continue;
+ }
+
+ struct epic_hdr *hdr = (void *)(rmsg + 1);
+ struct epic_sub_hdr *sub = (void *)(hdr + 1);
+
+ if (sub->category != CAT_REPLY || sub->code != code) {
+ printf("EPIC: got unexpected message %02x:%04x during command\n", sub->category,
+ sub->code);
+ afk_epic_rx_ack(epic);
+ continue;
+ }
+
+ rcmd = (void *)(sub + 1);
+ break;
+ }
+
+ if (rcmd->retcode != 0) {
+ printf("EPIC: IOP returned 0x%x\n", rcmd->retcode);
+ afk_epic_rx_ack(epic);
+ return rcmd->retcode; // should be negative already
+ }
+
+ assert(*rxsize >= rcmd->rxlen);
+ *rxsize = rcmd->rxlen;
+
+ if (rxsize && *rxsize && rcmd->rxbuf)
+ memcpy(rxbuf, epic->rxbuf.bfr, *rxsize);
+
+ afk_epic_rx_ack(epic);
+
+ return 0;
+}
+
+afk_epic_ep_t *afk_epic_init(rtkit_dev_t *rtk, int endpoint)
+{
+ afk_epic_ep_t *epic = malloc(sizeof(afk_epic_ep_t));
+ if (!epic)
+ return NULL;
+
+ memset(epic, 0, sizeof(*epic));
+ epic->ep = endpoint;
+ epic->rtk = rtk;
+
+ if (!rtkit_start_ep(rtk, endpoint)) {
+ printf("EPIC: failed to start endpoint %d\n", endpoint);
+ goto err;
+ }
+
+ struct rtkit_message msg = {endpoint, FIELD_PREP(RBEP_TYPE, RBEP_INIT)};
+ if (!rtkit_send(rtk, &msg)) {
+ printf("EPIC: failed to send init message\n");
+ goto err;
+ }
+
+ while (!epic->started) {
+ int ret = afk_epic_poll(epic);
+ if (ret < 0)
+ break;
+ else if (ret > 0)
+ printf("EPIC: received unexpected message during init\n");
+ }
+
+ return epic;
+
+err:
+ free(epic);
+ return NULL;
+}
+
+int afk_epic_shutdown(afk_epic_ep_t *epic)
+{
+ struct rtkit_message msg = {epic->ep, FIELD_PREP(RBEP_TYPE, RBEP_SHUTDOWN)};
+ if (!rtkit_send(epic->rtk, &msg)) {
+ printf("EPIC: failed to send shutdown message\n");
+ return -1;
+ }
+
+ while (epic->started) {
+ int ret = afk_epic_poll(epic);
+ if (ret < 0)
+ break;
+ }
+
+ rtkit_free_buffer(epic->rtk, &epic->buf);
+ rtkit_free_buffer(epic->rtk, &epic->rxbuf);
+ rtkit_free_buffer(epic->rtk, &epic->txbuf);
+
+ free(epic);
+ return 0;
+}
+
+int afk_epic_start_interface(afk_epic_ep_t *epic, char *name, size_t txsize, size_t rxsize)
+{
+ int channel = -1;
+ struct afk_qe *msg;
+ struct epic_announce *announce;
+
+ for (int tries = 0; tries < 20; tries += 1) {
+
+ int ret = afk_epic_rx(epic, &msg);
+ if (ret < 0)
+ return ret;
+
+ if (msg->type != TYPE_NOTIFY) {
+ printf("EPIC: got unexpected message type %d during iface start\n", msg->type);
+ afk_epic_rx_ack(epic);
+ continue;
+ }
+
+ struct epic_hdr *hdr = (void *)(msg + 1);
+ struct epic_sub_hdr *sub = (void *)(hdr + 1);
+
+ if (sub->category != CAT_REPORT || sub->code != CODE_ANNOUNCE) {
+ printf("EPIC: got unexpected message %02x:%04x during iface start\n", sub->category,
+ sub->code);
+ afk_epic_rx_ack(epic);
+ continue;
+ }
+
+ announce = (void *)(sub + 1);
+
+ if (strncmp(name, announce->name, sizeof(announce->name))) {
+ printf("EPIC: ignoring channel %d: %s\n", msg->channel, announce->name);
+ afk_epic_rx_ack(epic);
+ continue;
+ }
+
+ channel = msg->channel;
+ break;
+ }
+
+ if (channel == -1) {
+ printf("EPIC: too many unexpected messages, giving up\n");
+ return -1;
+ }
+
+ if (!rtkit_alloc_buffer(epic->rtk, &epic->rxbuf, rxsize)) {
+ printf("EPIC: failed to allocate rx buffer\n");
+ return -1;
+ }
+
+ if (!rtkit_alloc_buffer(epic->rtk, &epic->txbuf, txsize)) {
+ printf("EPIC: failed to allocate tx buffer\n");
+ return -1;
+ }
+
+ printf("EPIC: started interface %d (%s)\n", msg->channel, announce->name);
+
+ afk_epic_rx_ack(epic);
+
+ return channel;
+}
diff --git a/tools/src/afk.h b/tools/src/afk.h
new file mode 100644
index 0000000..e76ade3
--- /dev/null
+++ b/tools/src/afk.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef DCP_AFK_H
+#define DCP_AFK_H
+
+#include "rtkit.h"
+
+typedef struct afk_epic_ep afk_epic_ep_t;
+
+afk_epic_ep_t *afk_epic_init(rtkit_dev_t *rtkit, int endpoint);
+int afk_epic_shutdown(afk_epic_ep_t *epic);
+
+int afk_epic_start_interface(afk_epic_ep_t *epic, char *name, size_t insize, size_t outsize);
+int afk_epic_command(afk_epic_ep_t *epic, int channel, u16 code, void *txbuf, size_t txsize,
+ void *rxbuf, size_t *rxsize);
+
+#endif
diff --git a/tools/src/aic.c b/tools/src/aic.c
new file mode 100644
index 0000000..6974aac
--- /dev/null
+++ b/tools/src/aic.c
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "aic.h"
+#include "adt.h"
+#include "aic_regs.h"
+#include "assert.h"
+#include "utils.h"
+
+#define MASK_REG(x) (4 * ((x) >> 5))
+#define MASK_BIT(x) BIT((x)&GENMASK(4, 0))
+
+static struct aic aic1 = {
+ .version = 1,
+ .nr_die = 1,
+ .max_die = 1,
+ .regs =
+ {
+ .reg_size = AIC_REG_SIZE,
+ .event = AIC_EVENT,
+ .tgt_cpu = AIC_TARGET_CPU,
+ .sw_set = AIC_SW_SET,
+ .sw_clr = AIC_SW_CLR,
+ .mask_set = AIC_MASK_SET,
+ .mask_clr = AIC_MASK_CLR,
+ },
+};
+
+static struct aic aic2 = {
+ .version = 2,
+ .regs =
+ {
+ .config = AIC2_IRQ_CFG,
+ },
+};
+
+struct aic *aic;
+
+static int aic2_init(int node)
+{
+ int ret = ADT_GETPROP(adt, node, "aic-iack-offset", &aic->regs.event);
+ if (ret < 0) {
+ printf("AIC: failed to get property aic-iack-offset\n");
+ return ret;
+ }
+
+ u32 info1 = read32(aic->base + AIC2_INFO1);
+ aic->nr_die = FIELD_GET(AIC2_INFO1_LAST_DIE, info1) + 1;
+ aic->nr_irq = FIELD_GET(AIC2_INFO1_NR_IRQ, info1);
+
+ u32 info3 = read32(aic->base + AIC2_INFO3);
+ aic->max_die = FIELD_GET(AIC2_INFO3_MAX_DIE, info3);
+ aic->max_irq = FIELD_GET(AIC2_INFO3_MAX_IRQ, info3);
+
+ if (aic->nr_die > AIC_MAX_DIES) {
+ printf("AIC: more dies than supported: %u\n", aic->max_die);
+ return -1;
+ }
+
+ if (aic->max_irq > AIC_MAX_HW_NUM) {
+ printf("AIC: more IRQs than supported: %u\n", aic->max_irq);
+ return -1;
+ }
+
+ const u64 start_off = aic->regs.config;
+ u64 off = start_off + sizeof(u32) * aic->max_irq; /* IRQ_CFG */
+
+ aic->regs.sw_set = off;
+ off += sizeof(u32) * (aic->max_irq >> 5); /* SW_SET */
+ aic->regs.sw_clr = off;
+ off += sizeof(u32) * (aic->max_irq >> 5); /* SW_CLR */
+ aic->regs.mask_set = off;
+ off += sizeof(u32) * (aic->max_irq >> 5); /* MASK_SET */
+ aic->regs.mask_clr = off;
+ off += sizeof(u32) * (aic->max_irq >> 5); /* MASK_CLR */
+ off += sizeof(u32) * (aic->max_irq >> 5); /* HW_STATE */
+
+ aic->die_stride = off - start_off;
+ aic->regs.reg_size = aic->regs.event + 4;
+
+ printf("AIC: AIC2 with %u/%u dies, %u/%u IRQs, reg_size:%05lx die_stride:%05x\n", aic->nr_die,
+ aic->max_die, aic->nr_irq, aic->max_irq, aic->regs.reg_size, aic->die_stride);
+
+ u32 ext_intr_config_len;
+ const u8 *ext_intr_config = adt_getprop(adt, node, "aic-ext-intr-cfg", &ext_intr_config_len);
+
+ if (ext_intr_config) {
+ printf("AIC: Configuring %d external interrupts\n", ext_intr_config_len / 3);
+ for (u32 i = 0; i < ext_intr_config_len; i += 3) {
+ u8 die = ext_intr_config[i + 1] >> 4;
+ u16 irq = ext_intr_config[i] | ((ext_intr_config[i + 1] & 0xf) << 8);
+ u8 target = ext_intr_config[i + 2];
+ assert(die < aic->nr_die);
+ assert(irq < aic->nr_irq);
+ mask32(aic->base + aic->regs.config + die * aic->die_stride + 4 * irq,
+ AIC2_IRQ_CFG_TARGET, FIELD_PREP(AIC2_IRQ_CFG_TARGET, target));
+ }
+ }
+
+ return 0;
+}
+
+void aic_init(void)
+{
+ int path[8];
+ int node = adt_path_offset_trace(adt, "/arm-io/aic", path);
+
+ if (node < 0) {
+ printf("AIC node not found!\n");
+ return;
+ }
+
+ if (adt_is_compatible(adt, node, "aic,1")) {
+ aic = &aic1;
+ } else if (adt_is_compatible(adt, node, "aic,2")) {
+ aic = &aic2;
+ } else {
+ printf("AIC: Error: Unsupported version\n");
+ return;
+ }
+
+ if (adt_get_reg(adt, path, "reg", 0, &aic->base, NULL)) {
+ printf("Failed to get AIC reg property!\n");
+ return;
+ }
+
+ if (aic->version == 1) {
+ printf("AIC: Version 1 @ 0x%lx\n", aic->base);
+ aic->nr_irq = FIELD_GET(AIC_INFO_NR_HW, read32(aic->base + AIC_INFO));
+ aic->max_irq = AIC1_MAX_IRQ;
+ } else if (aic->version == 2) {
+ printf("AIC: Version 2 @ 0x%lx\n", aic->base);
+ int ret = aic2_init(node);
+ if (ret < 0)
+ aic = NULL;
+ }
+}
+
+void aic_set_sw(int irq, bool active)
+{
+ u32 die = irq / aic->max_irq;
+ irq = irq % aic->max_irq;
+ if (active)
+ write32(aic->base + aic->regs.sw_set + die * aic->die_stride + MASK_REG(irq),
+ MASK_BIT(irq));
+ else
+ write32(aic->base + aic->regs.sw_clr + die * aic->die_stride + MASK_REG(irq),
+ MASK_BIT(irq));
+}
+
+uint32_t aic_ack(void)
+{
+ return read32(aic->base + aic->regs.event);
+}
diff --git a/tools/src/aic.h b/tools/src/aic.h
new file mode 100644
index 0000000..1f401b1
--- /dev/null
+++ b/tools/src/aic.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef AIC_H
+#define AIC_H
+
+#include "types.h"
+
+#define AIC_MAX_DIES 4
+
+struct aic_regs {
+ uint64_t reg_size;
+ uint64_t event;
+ uint64_t tgt_cpu;
+ uint64_t config;
+ uint64_t sw_set;
+ uint64_t sw_clr;
+ uint64_t mask_set;
+ uint64_t mask_clr;
+};
+
+struct aic {
+ uint64_t base;
+ uint32_t version;
+
+ uint32_t nr_irq;
+ uint32_t nr_die;
+ uint32_t max_irq;
+ uint32_t max_die;
+ uint32_t die_stride;
+
+ struct aic_regs regs;
+};
+
+extern struct aic *aic;
+
+void aic_init(void);
+void aic_set_sw(int irq, bool active);
+uint32_t aic_ack(void);
+
+#endif
diff --git a/tools/src/aic_regs.h b/tools/src/aic_regs.h
new file mode 100644
index 0000000..8cc360b
--- /dev/null
+++ b/tools/src/aic_regs.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: MIT */
+
+#define AIC_REG_SIZE 0x8000
+#define AIC_INFO 0x0004
+#define AIC_WHOAMI 0x2000
+#define AIC_EVENT 0x2004
+#define AIC_IPI_SEND 0x2008
+#define AIC_IPI_ACK 0x200c
+#define AIC_IPI_MASK_SET 0x2024
+#define AIC_IPI_MASK_CLR 0x2028
+#define AIC_TARGET_CPU 0x3000
+#define AIC_SW_SET 0x4000
+#define AIC_SW_CLR 0x4080
+#define AIC_MASK_SET 0x4100
+#define AIC_MASK_CLR 0x4180
+
+#define AIC_CPU_IPI_SET(cpu) (0x5008 + ((cpu) << 7))
+#define AIC_CPU_IPI_CLR(cpu) (0x500c + ((cpu) << 7))
+#define AIC_CPU_IPI_MASK_SET(cpu) (0x5024 + ((cpu) << 7))
+#define AIC_CPU_IPI_MASK_CLR(cpu) (0x5028 + ((cpu) << 7))
+
+#define AIC2_INFO1 0x0004
+#define AIC2_INFO2 0x0008
+#define AIC2_INFO3 0x000c
+#define AIC2_LATENCY 0x0204
+#define AIC2_IRQ_CFG 0x2000
+
+#define AIC2_IRQ_CFG_TARGET GENMASK(3, 0)
+
+#define AIC_INFO_NR_HW GENMASK(15, 0)
+
+#define AIC2_INFO1_NR_IRQ GENMASK(15, 0)
+#define AIC2_INFO1_LAST_DIE GENMASK(27, 24)
+
+#define AIC2_INFO3_MAX_IRQ GENMASK(15, 0)
+#define AIC2_INFO3_MAX_DIE GENMASK(27, 24)
+
+#define AIC_EVENT_DIE GENMASK(31, 24)
+#define AIC_EVENT_TYPE GENMASK(23, 16)
+#define AIC_EVENT_NUM GENMASK(15, 0)
+
+#define AIC_EVENT_TYPE_HW 1
+#define AIC_EVENT_TYPE_IPI 4
+#define AIC_EVENT_IPI_OTHER 1
+#define AIC_EVENT_IPI_SELF 2
+
+#define AIC_IPI_SEND_CPU(cpu) BIT(cpu)
+
+#define AIC_IPI_OTHER BIT(0)
+#define AIC_IPI_SELF BIT(31)
+
+#define AIC1_MAX_IRQ 0x400
+#define AIC_MAX_HW_NUM (0x80 * 32) // max_irq of the M1 Max
diff --git a/tools/src/arm_cpu_regs.h b/tools/src/arm_cpu_regs.h
new file mode 100644
index 0000000..06cc919
--- /dev/null
+++ b/tools/src/arm_cpu_regs.h
@@ -0,0 +1,338 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "types.h"
+
+#define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1)
+#define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1)
+#define SYS_ACTLR_EL3 sys_reg(3, 6, 1, 0, 1)
+
+#define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0)
+// HCR_EL2.E2H == 1
+#define CNTHCTL_EVNTIS BIT(17)
+#define CNTHCTL_EL1NVVCT BIT(16)
+#define CNTHCTL_EL1NVPCT BIT(15)
+#define CNTHCTL_EL1TVCT BIT(14)
+#define CNTHCTL_EL1TVT BIT(13)
+#define CNTHCTL_ECV BIT(12)
+#define CNTHCTL_EL1PTEN BIT(11)
+#define CNTHCTL_EL1PCTEN BIT(10)
+#define CNTHCTL_EL0PTEN BIT(9)
+#define CNTHCTL_EL0VTEN BIT(8)
+#define CNTHCTL_EVNTI GENMASK(7, 4)
+#define CNTHCTL_EVNTDIR BIT(3)
+#define CNTHCTL_EVNTEN BIT(2)
+#define CNTHCTL_EL0VCTEN BIT(1)
+#define CNTHCTL_EL0PCTEN BIT(0)
+
+#define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1)
+#define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1)
+#define SYS_CNTHV_CTL_EL2 sys_reg(3, 4, 14, 3, 1)
+#define SYS_CNTHP_CTL_EL2 sys_reg(3, 4, 14, 2, 1)
+#define CNTx_CTL_ISTATUS BIT(2)
+#define CNTx_CTL_IMASK BIT(1)
+#define CNTx_CTL_ENABLE BIT(0)
+
+#define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0)
+#define ESR_ISS2 GENMASK(36, 32)
+#define ESR_EC GENMASK(31, 26)
+#define ESR_IL BIT(25)
+#define ESR_ISS GENMASK(24, 0)
+
+#define ESR_EC_UNKNOWN 0b000000
+#define ESR_EC_WFI 0b000001
+#define ESR_EC_FP_TRAP 0b000111
+#define ESR_EC_PAUTH_TRAP 0b001000
+#define ESR_EC_LS64 0b001010
+#define ESR_EC_BTI 0b001101
+#define ESR_EC_ILLEGAL 0b001110
+#define ESR_EC_SVC 0b010101
+#define ESR_EC_HVC 0b010110
+#define ESR_EC_SMC 0b010111
+#define ESR_EC_MSR 0b011000
+#define ESR_EC_SVE 0b011001
+#define ESR_EC_PAUTH_FAIL 0b011100
+#define ESR_EC_IABORT_LOWER 0b100000
+#define ESR_EC_IABORT 0b100001
+#define ESR_EC_PC_ALIGN 0b100010
+#define ESR_EC_DABORT_LOWER 0b100100
+#define ESR_EC_DABORT 0b100101
+#define ESR_EC_SP_ALIGN 0b100110
+#define ESR_EC_FP_EXC 0b101100
+#define ESR_EC_SERROR 0b101111
+#define ESR_EC_BKPT_LOWER 0b110000
+#define ESR_EC_BKPT 0b110001
+#define ESR_EC_SSTEP_LOWER 0b110010
+#define ESR_EC_SSTEP 0b110011
+#define ESR_EC_WATCH_LOWER 0b110100
+#define ESR_EC_WATCH 0b110101
+#define ESR_EC_BRK 0b111100
+
+#define ESR_ISS_DABORT_ISV BIT(24)
+#define ESR_ISS_DABORT_SAS GENMASK(23, 22)
+#define ESR_ISS_DABORT_SSE BIT(21)
+#define ESR_ISS_DABORT_SRT GENMASK(20, 16)
+#define ESR_ISS_DABORT_SF BIT(15)
+#define ESR_ISS_DABORT_AR BIT(14)
+#define ESR_ISS_DABORT_VNCR BIT(13)
+#define ESR_ISS_DABORT_SET GENMASK(12, 11)
+#define ESR_ISS_DABORT_LSR GENMASK(12, 11)
+#define ESR_ISS_DABORT_FnV BIT(10)
+#define ESR_ISS_DABORT_EA BIT(9)
+#define ESR_ISS_DABORT_CM BIT(8)
+#define ESR_ISS_DABORT_S1PTR BIT(7)
+#define ESR_ISS_DABORT_WnR BIT(6)
+#define ESR_ISS_DABORT_DFSC GENMASK(5, 0)
+
+#define SAS_8B 0
+#define SAS_16B 1
+#define SAS_32B 2
+#define SAS_64B 3
+
+#define ESR_ISS_MSR_OP0 GENMASK(21, 20)
+#define ESR_ISS_MSR_OP0_SHIFT 20
+#define ESR_ISS_MSR_OP2 GENMASK(19, 17)
+#define ESR_ISS_MSR_OP2_SHIFT 17
+#define ESR_ISS_MSR_OP1 GENMASK(16, 14)
+#define ESR_ISS_MSR_OP1_SHIFT 14
+#define ESR_ISS_MSR_CRn GENMASK(13, 10)
+#define ESR_ISS_MSR_CRn_SHIFT 10
+#define ESR_ISS_MSR_Rt GENMASK(9, 5)
+#define ESR_ISS_MSR_CRm GENMASK(4, 1)
+#define ESR_ISS_MSR_CRm_SHIFT 1
+#define ESR_ISS_MSR_DIR BIT(0)
+
+#define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0)
+#define HCR_TWEDEL GENMASK(63, 60)
+#define HCR_TWEDEn BIT(59)
+#define HCR_TID5 BIT(58)
+#define HCR_DCT BIT(57)
+#define HCR_ATA BIT(56)
+#define HCR_TTLBOS BIT(55)
+#define HCR_TTLBIS BIT(54)
+#define HCR_EnSCXT BIT(53)
+#define HCR_TOCU BIT(52)
+#define HCR_AMVOFFEN BIT(51)
+#define HCR_TICAB BIT(50)
+#define HCR_TID4 BIT(49)
+#define HCR_FIEN BIT(47)
+#define HCR_FWB BIT(46)
+#define HCR_NV2 BIT(45)
+#define HCR_AT BIT(44)
+#define HCR_NV1 BIT(43)
+#define HCR_NV1 BIT(43)
+#define HCR_NV BIT(42)
+#define HCR_NV BIT(42)
+#define HCR_API BIT(41)
+#define HCR_APK BIT(40)
+#define HCR_MIOCNCE BIT(38)
+#define HCR_TEA BIT(37)
+#define HCR_TERR BIT(36)
+#define HCR_TLOR BIT(35)
+#define HCR_E2H BIT(34)
+#define HCR_ID BIT(33)
+#define HCR_CD BIT(32)
+#define HCR_RW BIT(31)
+#define HCR_TRVM BIT(30)
+#define HCR_HCD BIT(29)
+#define HCR_TDZ BIT(28)
+#define HCR_TGE BIT(27)
+#define HCR_TVM BIT(26)
+#define HCR_TTLB BIT(25)
+#define HCR_TPU BIT(24)
+#define HCR_TPCP BIT(23)
+#define HCR_TPC BIT(23)
+#define HCR_TSW BIT(22)
+#define HCR_TACR BIT(21)
+#define HCR_TIDCP BIT(20)
+#define HCR_TSC BIT(19)
+#define HCR_TID3 BIT(18)
+#define HCR_TID2 BIT(17)
+#define HCR_TID1 BIT(16)
+#define HCR_TID0 BIT(15)
+#define HCR_TWE BIT(14)
+#define HCR_TWI BIT(13)
+#define HCR_DC BIT(12)
+#define HCR_BSU GENMASK(11, 10)
+#define HCR_FB BIT(9)
+#define HCR_VSE BIT(8)
+#define HCR_VI BIT(7)
+#define HCR_VF BIT(6)
+#define HCR_AMO BIT(5)
+#define HCR_IMO BIT(4)
+#define HCR_FMO BIT(3)
+#define HCR_PTW BIT(2)
+#define HCR_SWIO BIT(1)
+#define HCR_VM BIT(0)
+
+#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0)
+#define ID_AA64MMFR0_ECV GENMASK(63, 60)
+#define ID_AA64MMFR0_FGT GENMASK(59, 56)
+#define ID_AA64MMFR0_ExS GENMASK(47, 44)
+#define ID_AA64MMFR0_TGran4_2 GENMASK(43, 40)
+#define ID_AA64MMFR0_TGran64_2 GENMASK(39, 36)
+#define ID_AA64MMFR0_TGran16_2 GENMASK(35, 32)
+#define ID_AA64MMFR0_TGran4 GENMASK(31, 28)
+#define ID_AA64MMFR0_TGran64 GENMASK(27, 24)
+#define ID_AA64MMFR0_TGran16 GENMASK(23, 20)
+#define ID_AA64MMFR0_BigEndEL0 GENMASK(19, 16)
+#define ID_AA64MMFR0_SNSMem GENMASK(15, 12)
+#define ID_AA64MMFR0_BigEnd GENMASK(11, 8)
+#define ID_AA64MMFR0_ASIDBits GENMASK(7, 4)
+#define ID_AA64MMFR0_PARange GENMASK(3, 0)
+
+#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0)
+// AArch64-PAR_EL1.F == 0b0
+#define PAR_ATTR GENMASK(63, 56)
+#define PAR_PA GENMASK(51, 12)
+#define PAR_NS BIT(9)
+#define PAR_SH GENMASK(8, 7)
+#define PAR_F BIT(0)
+// AArch64-PAR_EL1.F == 0b1
+#define PAR_S BIT(9)
+#define PAR_PTW BIT(8)
+#define PAR_FST GENMASK(6, 1)
+
+#define SYS_SCTLR_EL1 sys_reg(3, 0, 1, 0, 0)
+#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0)
+#define SCTLR_EPAN BIT(57)
+#define SCTLR_EnALS BIT(56)
+#define SCTLR_EnAS0 BIT(55)
+#define SCTLR_EnASR BIT(54)
+#define SCTLR_TWEDEL GENMASK(49, 46)
+#define SCTLR_TWEDEn BIT(45)
+#define SCTLR_DSSBS BIT(44)
+#define SCTLR_ATA BIT(43)
+#define SCTLR_ATA0 BIT(42)
+#define SCTLR_TCF GENMASK(41, 40)
+#define SCTLR_TCF0 GENMASK(39, 38)
+#define SCTLR_ITFSB BIT(37)
+#define SCTLR_BT1 BIT(36)
+#define SCTLR_BT0 BIT(35)
+#define SCTLR_EnIA BIT(31)
+#define SCTLR_EnIB BIT(30)
+#define SCTLR_LSMAOE BIT(29)
+#define SCTLR_nTLSMD BIT(28)
+#define SCTLR_EnDA BIT(27)
+#define SCTLR_UCI BIT(26)
+#define SCTLR_EE BIT(25)
+#define SCTLR_E0E BIT(24)
+#define SCTLR_SPAN BIT(23)
+#define SCTLR_EIS BIT(22)
+#define SCTLR_IESB BIT(21)
+#define SCTLR_TSCXT BIT(20)
+#define SCTLR_WXN BIT(19)
+#define SCTLR_nTWE BIT(18)
+#define SCTLR_nTWI BIT(16)
+#define SCTLR_UCT BIT(15)
+#define SCTLR_DZE BIT(14)
+#define SCTLR_EnDB BIT(13)
+#define SCTLR_I BIT(12)
+#define SCTLR_EOS BIT(11)
+#define SCTLR_EnRCTX BIT(10)
+#define SCTLR_UMA BIT(9)
+#define SCTLR_SED BIT(8)
+#define SCTLR_ITD BIT(7)
+#define SCTLR_nAA BIT(6)
+#define SCTLR_CP15BEN BIT(5)
+#define SCTLR_SA0 BIT(4)
+#define SCTLR_SA BIT(3)
+#define SCTLR_C BIT(2)
+#define SCTLR_A BIT(1)
+#define SCTLR_M BIT(0)
+
+#define SYS_SPSR_EL1 sys_reg(3, 0, 4, 0, 0)
+#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0)
+#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
+// exception taken from AArch64
+#define SPSR_N BIT(31)
+#define SPSR_Z BIT(30)
+#define SPSR_C BIT(29)
+#define SPSR_V BIT(28)
+#define SPSR_TCO BIT(25)
+#define SPSR_DIT BIT(24)
+#define SPSR_UAO BIT(23)
+#define SPSR_PAN BIT(22)
+#define SPSR_SS BIT(21)
+#define SPSR_IL BIT(20)
+#define SPSR_SSBS BIT(12)
+#define SPSR_BTYPE GENMASK(11, 10)
+#define SPSR_D BIT(9)
+#define SPSR_A BIT(8)
+#define SPSR_I BIT(7)
+#define SPSR_F BIT(6)
+#define SPSR_M GENMASK(4, 0)
+
+#define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2)
+#define TCR_DS BIT(59)
+#define TCR_TCMA1 BIT(58)
+#define TCR_TCMA0 BIT(57)
+#define TCR_E0PD1 BIT(56)
+#define TCR_E0PD0 BIT(55)
+#define TCR_NFD1 BIT(54)
+#define TCR_NFD0 BIT(53)
+#define TCR_TBID1 BIT(52)
+#define TCR_TBID0 BIT(51)
+#define TCR_HWU162 BIT(50)
+#define TCR_HWU161 BIT(49)
+#define TCR_HWU160 BIT(48)
+#define TCR_HWU159 BIT(47)
+#define TCR_HWU062 BIT(46)
+#define TCR_HWU061 BIT(45)
+#define TCR_HWU060 BIT(44)
+#define TCR_HWU059 BIT(43)
+#define TCR_HPD1 BIT(42)
+#define TCR_HPD0 BIT(41)
+#define TCR_HD BIT(40)
+#define TCR_HA BIT(39)
+#define TCR_TBI1 BIT(38)
+#define TCR_TBI0 BIT(37)
+#define TCR_AS BIT(36)
+#define TCR_IPS GENMASK(34, 32)
+#define TCR_IPS_1TB 0b010UL
+#define TCR_IPS_4TB 0b011UL
+#define TCR_IPS_16TB 0b100UL
+#define TCR_TG1 GENMASK(31, 30)
+#define TCR_TG1_16K 0b01UL
+#define TCR_SH1 GENMASK(29, 28)
+#define TCR_SH1_IS 0b11UL
+#define TCR_ORGN1 GENMASK(27, 26)
+#define TCR_ORGN1_WBWA 0b01UL
+#define TCR_IRGN1 GENMASK(25, 24)
+#define TCR_IRGN1_WBWA 0b01UL
+#define TCR_EPD1 BIT(23)
+#define TCR_A1 BIT(22)
+#define TCR_T1SZ GENMASK(21, 16)
+#define TCR_T1SZ_48BIT 16UL
+#define TCR_TG0 GENMASK(15, 14)
+#define TCR_TG0_16K 0b10UL
+#define TCR_SH0 GENMASK(13, 12)
+#define TCR_SH0_IS 0b11UL
+#define TCR_ORGN0 GENMASK(11, 10)
+#define TCR_ORGN0_WBWA 0b01UL
+#define TCR_IRGN0 GENMASK(9, 8)
+#define TCR_IRGN0_WBWA 0b01UL
+#define TCR_EPD0 BIT(7)
+#define TCR_T0SZ GENMASK(5, 0)
+#define TCR_T0SZ_48BIT 16UL
+
+#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2)
+// Profile(A)
+#define VTCR_SL2 BIT(33)
+#define VTCR_DS BIT(32)
+#define VTCR_NSA BIT(30)
+#define VTCR_NSW BIT(29)
+#define VTCR_HWU62 BIT(28)
+#define VTCR_HWU61 BIT(27)
+#define VTCR_HWU60 BIT(26)
+#define VTCR_HWU59 BIT(25)
+#define VTCR_HD BIT(22)
+#define VTCR_HA BIT(21)
+#define VTCR_VS BIT(19)
+#define VTCR_PS GENMASK(18, 16)
+#define VTCR_TG0 GENMASK(15, 14)
+#define VTCR_SH0 GENMASK(13, 12)
+#define VTCR_ORGN0 GENMASK(11, 10)
+#define VTCR_IRGN0 GENMASK(9, 8)
+#define VTCR_SL0 GENMASK(7, 6)
+#define VTCR_SL0 GENMASK(7, 6)
+#define VTCR_T0SZ GENMASK(5, 0)
diff --git a/tools/src/asc.c b/tools/src/asc.c
new file mode 100644
index 0000000..67c9d46
--- /dev/null
+++ b/tools/src/asc.c
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "adt.h"
+#include "asc.h"
+#include "malloc.h"
+#include "utils.h"
+
+#define ASC_CPU_CONTROL 0x44
+#define ASC_CPU_CONTROL_START 0x10
+
+#define ASC_MBOX_CONTROL_FULL BIT(16)
+#define ASC_MBOX_CONTROL_EMPTY BIT(17)
+
+#define ASC_MBOX_A2I_CONTROL 0x110
+#define ASC_MBOX_A2I_SEND0 0x800
+#define ASC_MBOX_A2I_SEND1 0x808
+#define ASC_MBOX_A2I_RECV0 0x810
+#define ASC_MBOX_A2I_RECV1 0x818
+
+#define ASC_MBOX_I2A_CONTROL 0x114
+#define ASC_MBOX_I2A_SEND0 0x820
+#define ASC_MBOX_I2A_SEND1 0x828
+#define ASC_MBOX_I2A_RECV0 0x830
+#define ASC_MBOX_I2A_RECV1 0x838
+
+struct asc_dev {
+ uintptr_t cpu_base;
+ uintptr_t base;
+ int iop_node;
+};
+
+asc_dev_t *asc_init(const char *path)
+{
+ int asc_path[8];
+ int node = adt_path_offset_trace(adt, path, asc_path);
+ if (node < 0) {
+ printf("asc: Error getting ASC node %s\n", path);
+ return NULL;
+ }
+
+ u64 base;
+ if (adt_get_reg(adt, asc_path, "reg", 0, &base, NULL) < 0) {
+ printf("asc: Error getting ASC %s base address.\n", path);
+ return NULL;
+ }
+
+ asc_dev_t *asc = malloc(sizeof(*asc));
+ if (!asc)
+ return NULL;
+
+ asc->iop_node = adt_first_child_offset(adt, node);
+ asc->cpu_base = base;
+ asc->base = base + 0x8000;
+
+ clear32(base + ASC_CPU_CONTROL, ASC_CPU_CONTROL_START);
+ return asc;
+}
+
+void asc_free(asc_dev_t *asc)
+{
+ free(asc);
+}
+
+int asc_get_iop_node(asc_dev_t *asc)
+{
+ return asc->iop_node;
+}
+
+void asc_cpu_start(asc_dev_t *asc)
+{
+ set32(asc->cpu_base + ASC_CPU_CONTROL, ASC_CPU_CONTROL_START);
+}
+
+void asc_cpu_stop(asc_dev_t *asc)
+{
+ clear32(asc->cpu_base + ASC_CPU_CONTROL, ASC_CPU_CONTROL_START);
+}
+
+bool asc_can_recv(asc_dev_t *asc)
+{
+ return !(read32(asc->base + ASC_MBOX_I2A_CONTROL) & ASC_MBOX_CONTROL_EMPTY);
+}
+
+bool asc_recv(asc_dev_t *asc, struct asc_message *msg)
+{
+ if (!asc_can_recv(asc))
+ return false;
+
+ msg->msg0 = read64(asc->base + ASC_MBOX_I2A_RECV0);
+ msg->msg1 = (u32)read64(asc->base + ASC_MBOX_I2A_RECV1);
+ dma_rmb();
+
+ // printf("received msg: %lx %x\n", msg->msg0, msg->msg1);
+
+ return true;
+}
+
+bool asc_recv_timeout(asc_dev_t *asc, struct asc_message *msg, u32 delay_usec)
+{
+ u64 timeout = timeout_calculate(delay_usec);
+ while (!timeout_expired(timeout)) {
+ if (asc_recv(asc, msg))
+ return true;
+ }
+ return false;
+}
+
+bool asc_can_send(asc_dev_t *asc)
+{
+ return !(read32(asc->base + ASC_MBOX_A2I_CONTROL) & ASC_MBOX_CONTROL_FULL);
+}
+
+bool asc_send(asc_dev_t *asc, const struct asc_message *msg)
+{
+ if (poll32(asc->base + ASC_MBOX_A2I_CONTROL, ASC_MBOX_CONTROL_FULL, 0, 200000)) {
+ printf("asc: A2I mailbox full for 200ms. Is the ASC stuck?");
+ return false;
+ }
+
+ dma_wmb();
+ write64(asc->base + ASC_MBOX_A2I_SEND0, msg->msg0);
+ write64(asc->base + ASC_MBOX_A2I_SEND1, msg->msg1);
+
+ // printf("sent msg: %lx %x\n", msg->msg0, msg->msg1);
+ return true;
+}
diff --git a/tools/src/asc.h b/tools/src/asc.h
new file mode 100644
index 0000000..0aac349
--- /dev/null
+++ b/tools/src/asc.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef ASC_H
+#define ASC_H
+
+#include "types.h"
+
+struct asc_message {
+ u64 msg0;
+ u32 msg1;
+};
+
+typedef struct asc_dev asc_dev_t;
+
+asc_dev_t *asc_init(const char *path);
+void asc_free(asc_dev_t *asc);
+
+int asc_get_iop_node(asc_dev_t *asc);
+
+void asc_cpu_start(asc_dev_t *asc);
+void asc_cpu_stop(asc_dev_t *asc);
+
+bool asc_can_recv(asc_dev_t *asc);
+bool asc_can_send(asc_dev_t *asc);
+
+bool asc_recv(asc_dev_t *asc, struct asc_message *msg);
+bool asc_recv_timeout(asc_dev_t *asc, struct asc_message *msg, u32 delay_usec);
+bool asc_send(asc_dev_t *asc, const struct asc_message *msg);
+
+#endif
diff --git a/tools/src/chainload.c b/tools/src/chainload.c
new file mode 100644
index 0000000..1dd7c68
--- /dev/null
+++ b/tools/src/chainload.c
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "../build/build_cfg.h"
+
+#include "chainload.h"
+#include "adt.h"
+#include "malloc.h"
+#include "memory.h"
+#include "nvme.h"
+#include "string.h"
+#include "types.h"
+#include "utils.h"
+#include "xnuboot.h"
+
+#ifdef CHAINLOADING
+int rust_load_image(const char *spec, void **image, size_t *size);
+#endif
+
+extern u8 _chainload_stub_start[];
+extern u8 _chainload_stub_end[];
+
+int chainload_image(void *image, size_t size, char **vars, size_t var_cnt)
+{
+ u64 new_base = (u64)_base;
+ size_t image_size = size;
+
+ printf("chainload: Preparing image...\n");
+
+ // m1n1 variables
+ for (size_t i = 0; i < var_cnt; i++)
+ image_size += strlen(vars[i]) + 1;
+
+ // pad to end payload
+ image_size += 4;
+ image_size = ALIGN_UP(image_size, SZ_16K);
+
+ // SEPFW
+ size_t sepfw_off = image_size;
+
+ int anode = adt_path_offset(adt, "/chosen/memory-map");
+ if (anode < 0) {
+ printf("chainload: /chosen/memory-map not found\n");
+ return -1;
+ }
+ u64 sepfw[2];
+ if (ADT_GETPROP_ARRAY(adt, anode, "SEPFW", sepfw) < 0) {
+ printf("chainload: Failed to find SEPFW\n");
+ return -1;
+ }
+
+ image_size += sepfw[1];
+ image_size = ALIGN_UP(image_size, SZ_16K);
+
+ // Bootargs
+ size_t bootargs_off = image_size;
+ const size_t bootargs_size = SZ_16K;
+ image_size += bootargs_size;
+
+ printf("chainload: Total image size: 0x%lx\n", image_size);
+
+ size_t stub_size = _chainload_stub_end - _chainload_stub_start;
+
+ void *new_image = malloc(image_size + stub_size);
+
+ // Copy m1n1
+ memcpy(new_image, image, size);
+
+ // Add vars
+ u8 *p = new_image + size;
+ for (size_t i = 0; i < var_cnt; i++) {
+ size_t len = strlen(vars[i]);
+
+ memcpy(p, vars[i], len);
+ p[len] = '\n';
+ p += len + 1;
+ }
+
+ // Add end padding
+ memset(p, 0, 4);
+
+ // Copy SEPFW
+ memcpy(new_image + sepfw_off, (void *)sepfw[0], sepfw[1]);
+
+ // Adjust ADT SEPFW address
+ sepfw[0] = new_base + sepfw_off;
+ if (adt_setprop(adt, anode, "SEPFW", &sepfw, sizeof(sepfw)) < 0) {
+ printf("chainload: Failed to set SEPFW prop\n");
+ free(new_image);
+ return -1;
+ }
+
+ // Copy bootargs
+ struct boot_args *new_boot_args = new_image + bootargs_off;
+ *new_boot_args = cur_boot_args;
+ new_boot_args->top_of_kernel_data = new_base + image_size;
+
+ // Copy chainload stub
+ void *stub = new_image + image_size;
+ memcpy(stub, _chainload_stub_start, stub_size);
+ dc_cvau_range(stub, stub_size);
+ ic_ivau_range(stub, stub_size);
+
+ // Set up next stage
+ next_stage.entry = stub;
+ next_stage.args[0] = new_base + bootargs_off;
+ next_stage.args[1] = (u64)new_image;
+ next_stage.args[2] = new_base;
+ next_stage.args[3] = image_size;
+ next_stage.args[4] = new_base + 0x800; // m1n1 entrypoint
+ next_stage.restore_logo = false;
+
+ return 0;
+}
+
+#ifdef CHAINLOADING
+
+int chainload_load(const char *spec, char **vars, size_t var_cnt)
+{
+ void *image;
+ size_t size;
+ int ret;
+
+ if (!nvme_init()) {
+ printf("chainload: NVME init failed\n");
+ return -1;
+ }
+
+ ret = rust_load_image(spec, &image, &size);
+ nvme_shutdown();
+ if (ret < 0)
+ return ret;
+
+ return chainload_image(image, size, vars, var_cnt);
+}
+
+#else
+
+int chainload_load(const char *spec, char **vars, size_t var_cnt)
+{
+ UNUSED(spec);
+ UNUSED(vars);
+ UNUSED(var_cnt);
+
+ printf("Chainloading files not supported in this build!\n");
+ return -1;
+}
+
+#endif
diff --git a/tools/src/chainload.h b/tools/src/chainload.h
new file mode 100644
index 0000000..206f482
--- /dev/null
+++ b/tools/src/chainload.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef __CHAINLOAD_H__
+#define __CHAINLOAD_H__
+
+#include "types.h"
+
+int chainload_image(void *base, size_t size, char **vars, size_t var_cnt);
+int chainload_load(const char *spec, char **vars, size_t var_cnt);
+
+#endif
diff --git a/tools/src/chainload_asm.S b/tools/src/chainload_asm.S
new file mode 100644
index 0000000..361ec8f
--- /dev/null
+++ b/tools/src/chainload_asm.S
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+
+.text
+
+.globl _chainload_stub_start
+.globl _chainload_stub_end
+.type _chainload_stub_start, @function
+
+_chainload_stub_start:
+1:
+ ldp x5, x6, [x1], #16
+ stp x5, x6, [x2]
+ dc cvau, x2
+ ic ivau, x2
+ add x2, x2, #16
+ sub x3, x3, #16
+ cbnz x3, 1b
+
+ br x4
+_chainload_stub_end:
diff --git a/tools/src/chickens.c b/tools/src/chickens.c
new file mode 100644
index 0000000..68a7eee
--- /dev/null
+++ b/tools/src/chickens.c
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "chickens.h"
+#include "cpu_regs.h"
+#include "uart.h"
+#include "utils.h"
+
+/* Part IDs in MIDR_EL1 */
+#define MIDR_PART_T8181_ICESTORM 0x20
+#define MIDR_PART_T8181_FIRESTORM 0x21
+#define MIDR_PART_T8103_ICESTORM 0x22
+#define MIDR_PART_T8103_FIRESTORM 0x23
+#define MIDR_PART_T6000_ICESTORM 0x24
+#define MIDR_PART_T6000_FIRESTORM 0x25
+#define MIDR_PART_T6001_ICESTORM 0x28
+#define MIDR_PART_T6001_FIRESTORM 0x29
+#define MIDR_PART_T8110_BLIZZARD 0x30
+#define MIDR_PART_T8110_AVALANCHE 0x31
+#define MIDR_PART_T8112_BLIZZARD 0x32
+#define MIDR_PART_T8112_AVALANCHE 0x33
+
+#define MIDR_REV_LOW GENMASK(3, 0)
+#define MIDR_PART GENMASK(15, 4)
+#define MIDR_REV_HIGH GENMASK(23, 20)
+
+void init_m1_icestorm(void);
+void init_t8103_firestorm(int rev);
+void init_t6000_firestorm(int rev);
+void init_t6001_firestorm(int rev);
+
+void init_m2_blizzard(void);
+void init_t8112_avalanche(int rev);
+
+const char *init_cpu(void)
+{
+ const char *cpu = "Unknown";
+
+ msr(OSLAR_EL1, 0);
+
+ /* This is performed unconditionally on all cores (necessary?) */
+ if (is_ecore())
+ reg_set(SYS_IMP_APL_EHID4, HID4_DISABLE_DC_MVA | HID4_DISABLE_DC_SW_L2_OPS);
+ else
+ reg_set(SYS_IMP_APL_HID4, HID4_DISABLE_DC_MVA | HID4_DISABLE_DC_SW_L2_OPS);
+
+ uint64_t midr = mrs(MIDR_EL1);
+ int part = FIELD_GET(MIDR_PART, midr);
+ int rev = (FIELD_GET(MIDR_REV_HIGH, midr) << 4) | FIELD_GET(MIDR_REV_LOW, midr);
+
+ printf(" CPU part: 0x%x rev: 0x%x\n", part, rev);
+
+ switch (part) {
+ case MIDR_PART_T8103_FIRESTORM:
+ cpu = "M1 Firestorm";
+ init_t8103_firestorm(rev);
+ break;
+
+ case MIDR_PART_T6000_FIRESTORM:
+ cpu = "M1 Pro Firestorm";
+ init_t6000_firestorm(rev);
+ break;
+
+ case MIDR_PART_T6001_FIRESTORM:
+ cpu = "M1 Max Firestorm";
+ init_t6001_firestorm(rev);
+ break;
+
+ case MIDR_PART_T8103_ICESTORM:
+ cpu = "M1 Icestorm";
+ init_m1_icestorm();
+ break;
+
+ case MIDR_PART_T6000_ICESTORM:
+ cpu = "M1 Pro Icestorm";
+ init_m1_icestorm();
+ break;
+
+ case MIDR_PART_T6001_ICESTORM:
+ cpu = "M1 Max Icestorm";
+ init_m1_icestorm();
+ break;
+
+ case MIDR_PART_T8112_AVALANCHE:
+ cpu = "M2 Avalanche";
+ init_t8112_avalanche(rev);
+ break;
+
+ case MIDR_PART_T8112_BLIZZARD:
+ cpu = "M2 Blizzard";
+ init_m2_blizzard();
+ break;
+
+ default:
+ uart_puts(" Unknown CPU type");
+ break;
+ }
+
+ int core = mrs(MPIDR_EL1) & 0xff;
+
+ // Unknown, related to SMP?
+ msr(s3_4_c15_c5_0, core);
+ msr(SYS_IMP_APL_AMX_CTL_EL1, 0x100);
+
+ // Enable IRQs (at least necessary on t600x)
+ msr(s3_4_c15_c10_4, 0);
+
+ sysop("isb");
+
+ /* Unmask external IRQs, set WFI mode to up (2) */
+ reg_mask(SYS_IMP_APL_CYC_OVRD,
+ CYC_OVRD_FIQ_MODE_MASK | CYC_OVRD_IRQ_MODE_MASK | CYC_OVRD_WFI_MODE_MASK,
+ CYC_OVRD_FIQ_MODE(0) | CYC_OVRD_IRQ_MODE(0) | CYC_OVRD_WFI_MODE(2));
+
+ /* Enable branch prediction state retention across ACC sleep */
+ reg_mask(SYS_IMP_APL_ACC_CFG, ACC_CFG_BP_SLEEP_MASK, ACC_CFG_BP_SLEEP(3));
+
+ return cpu;
+}
diff --git a/tools/src/chickens.h b/tools/src/chickens.h
new file mode 100644
index 0000000..c1cb5a6
--- /dev/null
+++ b/tools/src/chickens.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef __CHICKENS_H__
+#define __CHICKENS_H__
+
+const char *init_cpu(void);
+
+#endif
diff --git a/tools/src/chickens_avalanche.c b/tools/src/chickens_avalanche.c
new file mode 100644
index 0000000..faf7a6b
--- /dev/null
+++ b/tools/src/chickens_avalanche.c
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "cpu_regs.h"
+#include "utils.h"
+
+static void init_common_avalanche(void)
+{
+ reg_mask(SYS_IMP_APL_HID1, HID1_ZCL_RF_MISPREDICT_THRESHOLD_MASK,
+ HID1_ZCL_RF_MISPREDICT_THRESHOLD(1));
+ reg_mask(SYS_IMP_APL_HID1, HID1_ZCL_RF_RESTART_THRESHOLD_MASK,
+ HID1_ZCL_RF_RESTART_THRESHOLD(3));
+
+ reg_set(SYS_IMP_APL_HID11, HID11_DISABLE_LD_NT_WIDGET);
+
+ reg_set(SYS_IMP_APL_HID9, HID9_TSO_ALLOW_DC_ZVA_WC | HID9_AVL_UNK17);
+
+ // "configure dummy cycles to work around incorrect temp sensor readings on
+ // NEX power gating" (maybe)
+ reg_mask(SYS_IMP_APL_HID13,
+ HID13_POST_OFF_CYCLES_MASK | HID13_POST_ON_CYCLES_MASK | HID13_PRE_CYCLES_MASK |
+ HID13_GROUP0_FF1_DELAY_MASK | HID13_GROUP0_FF2_DELAY_MASK |
+ HID13_GROUP0_FF3_DELAY_MASK | HID13_GROUP0_FF4_DELAY_MASK |
+ HID13_GROUP0_FF5_DELAY_MASK | HID13_GROUP0_FF6_DELAY_MASK |
+ HID13_GROUP0_FF7_DELAY_MASK | HID13_RESET_CYCLES_MASK,
+ HID13_POST_OFF_CYCLES(8) | HID13_POST_ON_CYCLES(8) | HID13_PRE_CYCLES(1) |
+ HID13_GROUP0_FF1_DELAY(4) | HID13_GROUP0_FF2_DELAY(4) | HID13_GROUP0_FF3_DELAY(4) |
+ HID13_GROUP0_FF4_DELAY(4) | HID13_GROUP0_FF5_DELAY(4) | HID13_GROUP0_FF6_DELAY(4) |
+ HID13_GROUP0_FF7_DELAY(4) | HID13_RESET_CYCLES(0));
+
+ reg_mask(SYS_IMP_APL_HID26, HID26_GROUP1_OFFSET_MASK | HID26_GROUP2_OFFSET_MASK,
+ HID26_GROUP1_OFFSET(26) | HID26_GROUP2_OFFSET(31));
+ reg_mask(SYS_IMP_APL_HID27, HID27_GROUP3_OFFSET_MASK, HID27_GROUP3_OFFSET(31));
+}
+
+static void init_m2_avalanche(void)
+{
+ init_common_avalanche();
+
+ reg_mask(SYS_IMP_APL_HID3, HID3_DEV_PCIE_THROTTLE_LIMIT_MASK, HID3_DEV_PCIE_THROTTLE_LIMIT(60));
+ reg_set(SYS_IMP_APL_HID3, HID3_DEV_PCIE_THROTTLE_ENABLE);
+ reg_set(SYS_IMP_APL_HID18, HID18_AVL_UNK27 | HID18_AVL_UNK29);
+ reg_set(SYS_IMP_APL_HID16, HID16_AVL_UNK12);
+}
+
+void init_t8112_avalanche(int rev)
+{
+ UNUSED(rev);
+
+ init_m2_avalanche();
+}
diff --git a/tools/src/chickens_blizzard.c b/tools/src/chickens_blizzard.c
new file mode 100644
index 0000000..8b88b6c
--- /dev/null
+++ b/tools/src/chickens_blizzard.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "cpu_regs.h"
+#include "utils.h"
+
+static void init_common_blizzard(void)
+{
+ reg_set(SYS_IMP_APL_EHID0, EHID0_BLI_UNK32);
+}
+
+void init_m2_blizzard(void)
+{
+ init_common_blizzard();
+
+ reg_mask(SYS_IMP_APL_EHID9, EHID9_DEV_2_THROTTLE_LIMIT_MASK, EHID9_DEV_2_THROTTLE_LIMIT(60));
+ reg_set(SYS_IMP_APL_EHID9, EHID9_DEV_2_THROTTLE_ENABLE);
+ reg_set(SYS_IMP_APL_EHID18, EHID18_BLZ_UNK34);
+}
diff --git a/tools/src/chickens_firestorm.c b/tools/src/chickens_firestorm.c
new file mode 100644
index 0000000..7754820
--- /dev/null
+++ b/tools/src/chickens_firestorm.c
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "cpu_regs.h"
+#include "utils.h"
+
+static void init_common_firestorm(void)
+{
+ reg_set(SYS_IMP_APL_HID0, HID0_SAME_PG_POWER_OPTIMIZATION);
+
+ // Disable SMC trapping to EL2
+ reg_clr(SYS_IMP_APL_HID1, HID1_TRAP_SMC);
+
+ reg_clr(SYS_IMP_APL_HID3, HID3_DEV_PCIE_THROTTLE_ENABLE | HID3_DISABLE_ARBITER_FIX_BIF_CRD);
+
+ // "Post-silicon tuning of STNT widget contiguous counter threshold"
+ reg_mask(SYS_IMP_APL_HID4, HID4_STNT_COUNTER_THRESHOLD_MASK, HID4_STNT_COUNTER_THRESHOLD(3));
+
+ // "Sibling Merge in LLC can cause UC load to violate ARM Memory Ordering Rules."
+ reg_set(SYS_IMP_APL_HID5, HID5_DISABLE_FILL_2C_MERGE);
+
+ reg_set(SYS_IMP_APL_HID9, HID9_TSO_ALLOW_DC_ZVA_WC);
+
+ reg_set(SYS_IMP_APL_HID11, HID11_DISABLE_LD_NT_WIDGET);
+
+ // "configure dummy cycles to work around incorrect temp sensor readings on
+ // NEX power gating"
+ reg_mask(SYS_IMP_APL_HID13, HID13_PRE_CYCLES_MASK, HID13_PRE_CYCLES(4));
+
+ // Best bit names...
+ // Maybe: "RF bank and Multipass conflict forward progress widget does not
+ // handle 3+ cycle livelock"
+ reg_set(SYS_IMP_APL_HID16, HID16_SPAREBIT0 | HID16_SPAREBIT3 | HID16_ENABLE_MPX_PICK_45 |
+ HID16_ENABLE_MP_CYCLONE_7);
+}
+
+static void init_m1_firestorm(void)
+{
+ init_common_firestorm();
+
+ // "Cross-beat Crypto(AES/PMUL) ICache fusion is not disabled for branch
+ // uncondtional "recoded instruction."
+ reg_set(SYS_IMP_APL_HID0, HID0_FETCH_WIDTH_DISABLE | HID0_CACHE_FUSION_DISABLE);
+
+ reg_set(SYS_IMP_APL_HID7, HID7_FORCE_NONSPEC_IF_STEPPING |
+ HID7_FORCE_NONSPEC_IF_SPEC_FLUSH_POINTER_INVALID_AND_MP_VALID);
+
+ reg_mask(SYS_IMP_APL_HID7, HID7_FORCE_NONSPEC_TARGET_TIMER_SEL_MASK,
+ HID7_FORCE_NONSPEC_TARGET_TIMER_SEL(3));
+
+ reg_set(SYS_IMP_APL_HID9, HID9_TSO_SERIALIZE_VLD_MICROOPS | HID9_FIX_BUG_51667805);
+
+ reg_set(SYS_IMP_APL_HID18, HID18_HVC_SPECULATION_DISABLE);
+
+ reg_clr(SYS_IMP_APL_HID21, HID21_ENABLE_LDREX_FILL_REPLY);
+}
+
+void init_t8103_firestorm(int rev)
+{
+ init_m1_firestorm();
+
+ reg_mask(SYS_IMP_APL_HID6, HID6_UP_CRD_TKN_INIT_C2_MASK, HID6_UP_CRD_TKN_INIT_C2(0));
+
+ if (rev >= 0x10) {
+ reg_set(SYS_IMP_APL_HID4,
+ HID4_ENABLE_LFSR_STALL_LOAD_PIPE_2_ISSUE | HID4_ENABLE_LFSR_STALL_STQ_REPLAY);
+
+ reg_set(SYS_IMP_APL_HID9, HID9_FIX_BUG_55719865);
+ reg_set(SYS_IMP_APL_HID11, HID11_ENABLE_FIX_UC_55719865);
+ }
+
+ if (rev == 0x11)
+ reg_set(SYS_IMP_APL_HID1, HID1_ENABLE_MDSB_STALL_PIPELINE_ECO | HID1_ENABLE_BR_KILL_LIMIT);
+
+ if (rev >= 0x11)
+ reg_set(SYS_IMP_APL_HID18, HID18_SPAREBIT17);
+}
+
+void init_t6000_firestorm(int rev)
+{
+ init_m1_firestorm();
+
+ reg_set(SYS_IMP_APL_HID9, HID9_FIX_BUG_55719865);
+ reg_set(SYS_IMP_APL_HID11, HID11_ENABLE_FIX_UC_55719865);
+
+ if (rev >= 0x10) {
+ reg_set(SYS_IMP_APL_HID1, HID1_ENABLE_MDSB_STALL_PIPELINE_ECO | HID1_ENABLE_BR_KILL_LIMIT);
+
+ reg_set(SYS_IMP_APL_HID4,
+ HID4_ENABLE_LFSR_STALL_LOAD_PIPE_2_ISSUE | HID4_ENABLE_LFSR_STALL_STQ_REPLAY);
+
+ reg_set(SYS_IMP_APL_HID18, HID18_SPAREBIT17);
+ }
+}
+
+void init_t6001_firestorm(int rev)
+{
+ init_m1_firestorm();
+
+ reg_set(SYS_IMP_APL_HID1, HID1_ENABLE_MDSB_STALL_PIPELINE_ECO);
+
+ reg_set(SYS_IMP_APL_HID4,
+ HID4_ENABLE_LFSR_STALL_LOAD_PIPE_2_ISSUE | HID4_ENABLE_LFSR_STALL_STQ_REPLAY);
+
+ reg_set(SYS_IMP_APL_HID9, HID9_FIX_BUG_55719865);
+
+ reg_set(SYS_IMP_APL_HID11, HID11_ENABLE_FIX_UC_55719865);
+
+ if (rev >= 0x10) {
+ reg_set(SYS_IMP_APL_HID1, HID1_ENABLE_BR_KILL_LIMIT);
+
+ reg_set(SYS_IMP_APL_HID18, HID18_SPAREBIT17);
+ }
+}
diff --git a/tools/src/chickens_icestorm.c b/tools/src/chickens_icestorm.c
new file mode 100644
index 0000000..bc0cfb8
--- /dev/null
+++ b/tools/src/chickens_icestorm.c
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "cpu_regs.h"
+#include "utils.h"
+
+static void init_common_icestorm(void)
+{
+ // "Sibling Merge in LLC can cause UC load to violate ARM Memory Ordering Rules."
+ reg_set(SYS_IMP_APL_HID5, HID5_DISABLE_FILL_2C_MERGE);
+
+ reg_clr(SYS_IMP_APL_EHID9, EHID9_DEV_2_THROTTLE_ENABLE);
+
+ // "Prevent store-to-load forwarding for UC memory to avoid barrier ordering
+ // violation"
+ reg_set(SYS_IMP_APL_EHID10, HID10_FORCE_WAIT_STATE_DRAIN_UC | HID10_DISABLE_ZVA_TEMPORAL_TSO);
+
+ // Disable SMC trapping to EL2
+ reg_clr(SYS_IMP_APL_EHID20, EHID20_TRAP_SMC);
+}
+
+void init_m1_icestorm(void)
+{
+ init_common_icestorm();
+
+ reg_set(SYS_IMP_APL_EHID20, EHID20_FORCE_NONSPEC_IF_OLDEST_REDIR_VALID_AND_OLDER |
+ EHID20_FORCE_NONSPEC_IF_SPEC_FLUSH_POINTER_NE_BLK_RTR_POINTER);
+
+ reg_mask(SYS_IMP_APL_EHID20, EHID20_FORCE_NONSPEC_TARGETED_TIMER_SEL_MASK,
+ EHID20_FORCE_NONSPEC_TARGETED_TIMER_SEL(3));
+}
diff --git a/tools/src/clk.c b/tools/src/clk.c
new file mode 100644
index 0000000..ec0c77d
--- /dev/null
+++ b/tools/src/clk.c
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "clk.h"
+#include "adt.h"
+#include "types.h"
+#include "utils.h"
+
+#define CLK_MUX GENMASK(27, 24)
+
+#define NCO_BASE 5
+#define NUM_NCOS 5
+
+void clk_init(void)
+{
+ int path[8];
+ int node = adt_path_offset_trace(adt, "/arm-io/mca-switch", path);
+
+ if (node < 0) {
+ printf("mca-switch node not found!\n");
+ return;
+ }
+
+ u64 mca_clk_base, mca_clk_size;
+ if (adt_get_reg(adt, path, "reg", 2, &mca_clk_base, &mca_clk_size)) {
+ printf("Failed to get mca-switch reg property!\n");
+ return;
+ }
+
+ printf("CLK: MCA clock registers @ 0x%lx (0x%lx)\n", mca_clk_base, mca_clk_size);
+
+ unsigned int i;
+ for (i = 0; i < (mca_clk_size / 4); i++)
+ mask32(mca_clk_base + 4 * i, CLK_MUX, FIELD_PREP(CLK_MUX, NCO_BASE + min(NUM_NCOS - 1, i)));
+
+ printf("CLK: Initialized %d MCA clock muxes\n", i);
+}
diff --git a/tools/src/clk.h b/tools/src/clk.h
new file mode 100644
index 0000000..bb79fa4
--- /dev/null
+++ b/tools/src/clk.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef __CLK_H__
+#define __CLK_H__
+
+void clk_init(void);
+
+#endif
diff --git a/tools/src/cpu_regs.h b/tools/src/cpu_regs.h
new file mode 100644
index 0000000..236b53e
--- /dev/null
+++ b/tools/src/cpu_regs.h
@@ -0,0 +1,290 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "arm_cpu_regs.h"
+#include "types.h"
+
+/* ARM extensions */
+#define ESR_EC_IMPDEF 0b111111
+#define ESR_ISS_IMPDEF_MSR 0x20
+
+#define SYS_IMP_APL_ACTLR_EL12 sys_reg(3, 6, 15, 14, 6)
+
+#define SYS_IMP_APL_AMX_CTL_EL1 sys_reg(3, 4, 15, 1, 4)
+#define SYS_IMP_APL_AMX_CTL_EL2 sys_reg(3, 4, 15, 4, 7)
+#define SYS_IMP_APL_AMX_CTL_EL12 sys_reg(3, 4, 15, 4, 6)
+
+#define AMX_CTL_EN BIT(63)
+#define AMX_CTL_EN_EL1 BIT(62)
+
+#define SYS_IMP_APL_CNTVCT_ALIAS_EL0 sys_reg(3, 4, 15, 10, 6)
+
+/* HID registers */
+#define SYS_IMP_APL_HID0 sys_reg(3, 0, 15, 0, 0)
+#define HID0_FETCH_WIDTH_DISABLE BIT(28)
+#define HID0_CACHE_FUSION_DISABLE BIT(36)
+#define HID0_SAME_PG_POWER_OPTIMIZATION BIT(45)
+
+#define SYS_IMP_APL_EHID0 sys_reg(3, 0, 15, 0, 1)
+#define EHID0_BLI_UNK32 BIT(32)
+
+#define SYS_IMP_APL_HID1 sys_reg(3, 0, 15, 1, 0)
+#define HID1_TRAP_SMC BIT(54)
+#define HID1_ENABLE_MDSB_STALL_PIPELINE_ECO BIT(58)
+#define HID1_ENABLE_BR_KILL_LIMIT BIT(60)
+
+#define HID1_ZCL_RF_RESTART_THRESHOLD_MASK GENMASK(23, 22)
+#define HID1_ZCL_RF_RESTART_THRESHOLD(x) (((unsigned long)x) << 22)
+#define HID1_ZCL_RF_MISPREDICT_THRESHOLD_MASK GENMASK(43, 42)
+#define HID1_ZCL_RF_MISPREDICT_THRESHOLD(x) (((unsigned long)x) << 42)
+
+#define SYS_IMP_APL_HID3 sys_reg(3, 0, 15, 3, 0)
+#define HID3_DISABLE_ARBITER_FIX_BIF_CRD BIT(44)
+#define HID3_DEV_PCIE_THROTTLE_LIMIT_MASK GENMASK(62, 57)
+#define HID3_DEV_PCIE_THROTTLE_LIMIT(x) (((unsigned long)x) << 57)
+#define HID3_DEV_PCIE_THROTTLE_ENABLE BIT(63)
+
+#define SYS_IMP_APL_HID4 sys_reg(3, 0, 15, 4, 0)
+#define SYS_IMP_APL_EHID4 sys_reg(3, 0, 15, 4, 1)
+#define HID4_DISABLE_DC_MVA BIT(11)
+#define HID4_DISABLE_DC_SW_L2_OPS BIT(44)
+#define HID4_STNT_COUNTER_THRESHOLD(x) (((unsigned long)x) << 40)
+#define HID4_STNT_COUNTER_THRESHOLD_MASK (3UL << 40)
+#define HID4_ENABLE_LFSR_STALL_LOAD_PIPE_2_ISSUE BIT(49)
+#define HID4_ENABLE_LFSR_STALL_STQ_REPLAY BIT(53)
+
+#define SYS_IMP_APL_HID5 sys_reg(3, 0, 15, 5, 0)
+#define HID5_DISABLE_FILL_2C_MERGE BIT(61)
+
+#define SYS_IMP_APL_HID6 sys_reg(3, 0, 15, 6, 0)
+#define HID6_UP_CRD_TKN_INIT_C2(x) (((unsigned long)x) << 5)
+#define HID6_UP_CRD_TKN_INIT_C2_MASK (0x1FUL << 5)
+
+#define SYS_IMP_APL_HID7 sys_reg(3, 0, 15, 7, 0)
+#define HID7_FORCE_NONSPEC_IF_SPEC_FLUSH_POINTER_INVALID_AND_MP_VALID BIT(16)
+#define HID7_FORCE_NONSPEC_IF_STEPPING BIT(20)
+#define HID7_FORCE_NONSPEC_TARGET_TIMER_SEL(x) (((unsigned long)x) << 24)
+#define HID7_FORCE_NONSPEC_TARGET_TIMER_SEL_MASK (3UL << 24)
+
+#define SYS_IMP_APL_HID9 sys_reg(3, 0, 15, 9, 0)
+#define HID9_AVL_UNK17 BIT(17)
+#define HID9_TSO_ALLOW_DC_ZVA_WC BIT(26)
+#define HID9_TSO_SERIALIZE_VLD_MICROOPS BIT(29)
+#define HID9_FIX_BUG_51667805 BIT(48)
+#define HID9_FIX_BUG_55719865 BIT(55)
+
+#define SYS_IMP_APL_EHID9 sys_reg(3, 0, 15, 9, 1)
+#define EHID9_DEV_2_THROTTLE_ENABLE BIT(5)
+#define EHID9_DEV_2_THROTTLE_LIMIT_MASK GENMASK(11, 6)
+#define EHID9_DEV_2_THROTTLE_LIMIT(x) (((unsigned long)x) << 6)
+
+#define SYS_IMP_APL_HID10 sys_reg(3, 0, 15, 10, 0)
+#define SYS_IMP_APL_EHID10 sys_reg(3, 0, 15, 10, 1)
+#define HID10_FORCE_WAIT_STATE_DRAIN_UC BIT(32)
+#define HID10_DISABLE_ZVA_TEMPORAL_TSO BIT(49)
+
+#define SYS_IMP_APL_HID11 sys_reg(3, 0, 15, 11, 0)
+#define HID11_ENABLE_FIX_UC_55719865 BIT(15)
+#define HID11_DISABLE_LD_NT_WIDGET BIT(59)
+
+#define SYS_IMP_APL_HID13 sys_reg(3, 0, 15, 14, 0)
+#define HID13_POST_OFF_CYCLES(x) (((unsigned long)x))
+#define HID13_POST_OFF_CYCLES_MASK GENMASK(6, 0)
+#define HID13_POST_ON_CYCLES(x) (((unsigned long)x) << 7)
+#define HID13_POST_ON_CYCLES_MASK GENMASK(13, 7)
+#define HID13_PRE_CYCLES(x) (((unsigned long)x) << 14)
+#define HID13_PRE_CYCLES_MASK GENMASK(17, 14)
+#define HID13_GROUP0_FF1_DELAY(x) (((unsigned long)x) << 26)
+#define HID13_GROUP0_FF1_DELAY_MASK GENMASK(29, 26)
+#define HID13_GROUP0_FF2_DELAY(x) (((unsigned long)x) << 30)
+#define HID13_GROUP0_FF2_DELAY_MASK GENMASK(33, 30)
+#define HID13_GROUP0_FF3_DELAY(x) (((unsigned long)x) << 34)
+#define HID13_GROUP0_FF3_DELAY_MASK GENMASK(37, 34)
+#define HID13_GROUP0_FF4_DELAY(x) (((unsigned long)x) << 38)
+#define HID13_GROUP0_FF4_DELAY_MASK GENMASK(41, 38)
+#define HID13_GROUP0_FF5_DELAY(x) (((unsigned long)x) << 42)
+#define HID13_GROUP0_FF5_DELAY_MASK GENMASK(45, 42)
+#define HID13_GROUP0_FF6_DELAY(x) (((unsigned long)x) << 46)
+#define HID13_GROUP0_FF6_DELAY_MASK GENMASK(49, 46)
+#define HID13_GROUP0_FF7_DELAY(x) (((unsigned long)x) << 50)
+#define HID13_GROUP0_FF7_DELAY_MASK GENMASK(53, 50)
+#define HID13_RESET_CYCLES(x) (((unsigned long)x) << 60)
+#define HID13_RESET_CYCLES_MASK (0xFUL << 60)
+
+#define SYS_IMP_APL_HID16 sys_reg(3, 0, 15, 15, 2)
+#define HID16_AVL_UNK12 BIT(12)
+#define HID16_SPAREBIT0 BIT(56)
+#define HID16_SPAREBIT3 BIT(59)
+#define HID16_ENABLE_MPX_PICK_45 BIT(61)
+#define HID16_ENABLE_MP_CYCLONE_7 BIT(62)
+
+#define SYS_IMP_APL_HID18 sys_reg(3, 0, 15, 11, 2)
+#define HID18_HVC_SPECULATION_DISABLE BIT(14)
+#define HID18_AVL_UNK27 BIT(27)
+#define HID18_AVL_UNK29 BIT(29)
+#define HID18_SPAREBIT7 BIT(39)
+#define HID18_SPAREBIT17 BIT(49)
+
+#define SYS_IMP_APL_EHID18 sys_reg(3, 0, 15, 11, 3)
+#define EHID18_BLZ_UNK34 BIT(34)
+
+#define SYS_IMP_APL_EHID20 sys_reg(3, 0, 15, 1, 2)
+#define EHID20_TRAP_SMC BIT(8)
+#define EHID20_FORCE_NONSPEC_IF_OLDEST_REDIR_VALID_AND_OLDER BIT(15)
+#define EHID20_FORCE_NONSPEC_IF_SPEC_FLUSH_POINTER_NE_BLK_RTR_POINTER BIT(16)
+#define EHID20_FORCE_NONSPEC_TARGETED_TIMER_SEL(x) (((unsigned long)x) << 21)
+#define EHID20_FORCE_NONSPEC_TARGETED_TIMER_SEL_MASK (3UL << 21)
+
+#define SYS_IMP_APL_HID21 sys_reg(3, 0, 15, 1, 3)
+#define HID21_ENABLE_LDREX_FILL_REPLY BIT(19)
+#define HID21_LDQ_RTR_WAIT_FOR_OLD_ST_REL_COMPLETION BIT(33)
+#define HID21_DISABLE_CDP_REPLY_PURGED_TRANSACTION BIT(34)
+#define HID21_AVL_UNK52 BIT(52)
+
+#define SYS_IMP_APL_HID26 sys_reg(3, 0, 15, 0, 3)
+#define HID26_GROUP1_OFFSET(x) (((unsigned long)x) << 0)
+#define HID26_GROUP1_OFFSET_MASK (0xffUL << 0)
+#define HID26_GROUP2_OFFSET(x) (((unsigned long)x) << 36)
+#define HID26_GROUP2_OFFSET_MASK (0xffUL << 36)
+
+#define SYS_IMP_APL_HID27 sys_reg(3, 0, 15, 0, 4)
+#define HID27_GROUP3_OFFSET(x) (((unsigned long)x) << 8)
+#define HID27_GROUP3_OFFSET_MASK (0xffUL << 8)
+
+#define SYS_IMP_APL_PMCR0 sys_reg(3, 1, 15, 0, 0)
+#define PMCR0_CNT_EN_MASK (MASK(8) | GENMASK(33, 32))
+#define PMCR0_IMODE_OFF (0 << 8)
+#define PMCR0_IMODE_PMI (1 << 8)
+#define PMCR0_IMODE_AIC (2 << 8)
+#define PMCR0_IMODE_HALT (3 << 8)
+#define PMCR0_IMODE_FIQ (4 << 8)
+#define PMCR0_IMODE_MASK (7 << 8)
+#define PMCR0_IACT (BIT(11))
+#define PMCR0_PMI_SHIFT 12
+#define PMCR0_CNT_MASK (PMCR0_CNT_EN_MASK | (PMCR0_CNT_EN_MASK << PMCR0_PMI_SHIFT))
+
+#define SYS_IMP_APL_PMCR1 sys_reg(3, 1, 15, 1, 0)
+#define SYS_IMP_APL_PMCR2 sys_reg(3, 1, 15, 2, 0)
+#define SYS_IMP_APL_PMCR3 sys_reg(3, 1, 15, 3, 0)
+#define SYS_IMP_APL_PMCR4 sys_reg(3, 1, 15, 4, 0)
+
+#define SYS_IMP_APL_PMESR0 sys_reg(3, 1, 15, 5, 0)
+#define SYS_IMP_APL_PMESR1 sys_reg(3, 1, 15, 6, 0)
+
+#define SYS_IMP_APL_PMSR sys_reg(3, 1, 15, 13, 0)
+
+#define SYS_IMP_APL_PMC0 sys_reg(3, 2, 15, 0, 0)
+#define SYS_IMP_APL_PMC1 sys_reg(3, 2, 15, 1, 0)
+#define SYS_IMP_APL_PMC2 sys_reg(3, 2, 15, 2, 0)
+#define SYS_IMP_APL_PMC3 sys_reg(3, 2, 15, 3, 0)
+#define SYS_IMP_APL_PMC4 sys_reg(3, 2, 15, 4, 0)
+#define SYS_IMP_APL_PMC5 sys_reg(3, 2, 15, 5, 0)
+#define SYS_IMP_APL_PMC6 sys_reg(3, 2, 15, 6, 0)
+#define SYS_IMP_APL_PMC7 sys_reg(3, 2, 15, 7, 0)
+#define SYS_IMP_APL_PMC8 sys_reg(3, 2, 15, 9, 0)
+#define SYS_IMP_APL_PMC9 sys_reg(3, 2, 15, 10, 0)
+
+#define SYS_IMP_APL_LSU_ERR_STS sys_reg(3, 3, 15, 0, 0)
+#define SYS_IMP_APL_E_LSU_ERR_STS sys_reg(3, 3, 15, 2, 0)
+
+#define SYS_IMP_APL_L2C_ERR_STS sys_reg(3, 3, 15, 8, 0)
+
+#define L2C_ERR_STS_RECURSIVE_FAULT BIT(1)
+#define L2C_ERR_STS_ACCESS_FAULT BIT(7)
+#define L2C_ERR_STS_ENABLE_W1C BIT(56)
+
+#define SYS_IMP_APL_L2C_ERR_ADR sys_reg(3, 3, 15, 9, 0)
+#define SYS_IMP_APL_L2C_ERR_INF sys_reg(3, 3, 15, 10, 0)
+
+#define SYS_IMP_APL_FED_ERR_STS sys_reg(3, 4, 15, 0, 0)
+#define SYS_IMP_APL_E_FED_ERR_STS sys_reg(3, 4, 15, 0, 2)
+
+#define SYS_IMP_APL_MMU_ERR_STS sys_reg(3, 6, 15, 0, 0)
+#define SYS_IMP_APL_E_MMU_ERR_STS sys_reg(3, 6, 15, 2, 0)
+
+/* ACC/CYC Registers */
+#define SYS_IMP_APL_ACC_CFG sys_reg(3, 5, 15, 4, 0)
+#define ACC_CFG_BP_SLEEP(x) (((unsigned long)x) << 2)
+#define ACC_CFG_BP_SLEEP_MASK (3UL << 2)
+
+#define SYS_IMP_APL_CYC_OVRD sys_reg(3, 5, 15, 5, 0)
+#define CYC_OVRD_FIQ_MODE(x) (((unsigned long)x) << 20)
+#define CYC_OVRD_FIQ_MODE_MASK (3UL << 20)
+#define CYC_OVRD_IRQ_MODE(x) (((unsigned long)x) << 22)
+#define CYC_OVRD_IRQ_MODE_MASK (3UL << 22)
+#define CYC_OVRD_WFI_MODE(x) (((unsigned long)x) << 24)
+#define CYC_OVRD_WFI_MODE_MASK (3UL << 24)
+#define CYC_OVRD_DISABLE_WFI_RET BIT(0)
+
+#define SYS_IMP_APL_UPMCR0 sys_reg(3, 7, 15, 0, 4)
+#define UPMCR0_IMODE_OFF (0 << 16)
+#define UPMCR0_IMODE_AIC (2 << 16)
+#define UPMCR0_IMODE_HALT (3 << 16)
+#define UPMCR0_IMODE_FIQ (4 << 16)
+#define UPMCR0_IMODE_MASK (7 << 16)
+
+#define SYS_IMP_APL_UPMSR sys_reg(3, 7, 15, 6, 4)
+#define UPMSR_IACT (BIT(0))
+
+/* SPRR and GXF registers */
+#define SYS_IMP_APL_SPRR_CONFIG_EL1 sys_reg(3, 6, 15, 1, 0)
+#define SPRR_CONFIG_EN BIT(0)
+#define SPRR_CONFIG_LOCK_CONFIG BIT(1)
+#define SPRR_CONFIG_LOCK_PERM BIT(4)
+#define SPRR_CONFIG_LOCK_KERNEL_PERM BIT(5)
+
+#define SYS_IMP_APL_GXF_CONFIG_EL1 sys_reg(3, 6, 15, 1, 2)
+#define GXF_CONFIG_EN BIT(0)
+
+#define SYS_IMP_APL_GXF_STATUS_EL1 sys_reg(3, 6, 15, 8, 0)
+#define GXF_STATUS_GUARDED BIT(0)
+
+#define SYS_IMP_APL_GXF_ABORT_EL1 sys_reg(3, 6, 15, 8, 2)
+#define SYS_IMP_APL_GXF_ENTER_EL1 sys_reg(3, 6, 15, 8, 1)
+
+#define SYS_IMP_APL_GXF_ABORT_EL12 sys_reg(3, 6, 15, 15, 3)
+#define SYS_IMP_APL_GXF_ENTER_EL12 sys_reg(3, 6, 15, 15, 2)
+
+#define SYS_IMP_APL_SPRR_PERM_EL0 sys_reg(3, 6, 15, 1, 5)
+#define SYS_IMP_APL_SPRR_PERM_EL1 sys_reg(3, 6, 15, 1, 6)
+#define SYS_IMP_APL_SPRR_PERM_EL02 sys_reg(3, 4, 15, 5, 2)
+#define SYS_IMP_APL_SPRR_PERM_EL12 sys_reg(3, 6, 15, 15, 7)
+
+#define SYS_IMP_APL_TPIDR_GL1 sys_reg(3, 6, 15, 10, 1)
+#define SYS_IMP_APL_VBAR_GL1 sys_reg(3, 6, 15, 10, 2)
+#define SYS_IMP_APL_SPSR_GL1 sys_reg(3, 6, 15, 10, 3)
+#define SYS_IMP_APL_ASPSR_GL1 sys_reg(3, 6, 15, 10, 4)
+#define SYS_IMP_APL_ESR_GL1 sys_reg(3, 6, 15, 10, 5)
+#define SYS_IMP_APL_ELR_GL1 sys_reg(3, 6, 15, 10, 6)
+#define SYS_IMP_APL_FAR_GL1 sys_reg(3, 6, 15, 10, 7)
+
+#define SYS_IMP_APL_VBAR_GL12 sys_reg(3, 6, 15, 9, 2)
+#define SYS_IMP_APL_SPSR_GL12 sys_reg(3, 6, 15, 9, 3)
+#define SYS_IMP_APL_ASPSR_GL12 sys_reg(3, 6, 15, 9, 4)
+#define SYS_IMP_APL_ESR_GL12 sys_reg(3, 6, 15, 9, 5)
+#define SYS_IMP_APL_ELR_GL12 sys_reg(3, 6, 15, 9, 6)
+#define SYS_IMP_APL_SP_GL12 sys_reg(3, 6, 15, 10, 0)
+
+#define SYS_IMP_APL_AFSR1_GL1 sys_reg(3, 6, 15, 0, 1)
+
+/* PAuth registers */
+#define SYS_IMP_APL_APVMKEYLO_EL2 sys_reg(3, 6, 15, 14, 4)
+#define SYS_IMP_APL_APVMKEYHI_EL2 sys_reg(3, 6, 15, 14, 5)
+#define SYS_IMP_APL_APSTS_EL12 sys_reg(3, 6, 15, 14, 7)
+
+#define SYS_IMP_APL_APCTL_EL1 sys_reg(3, 4, 15, 0, 4)
+#define SYS_IMP_APL_APCTL_EL2 sys_reg(3, 6, 15, 12, 2)
+#define SYS_IMP_APL_APCTL_EL12 sys_reg(3, 6, 15, 15, 0)
+
+/* VM registers */
+#define SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2 sys_reg(3, 5, 15, 1, 3)
+#define VM_TMR_FIQ_ENA_ENA_V BIT(0)
+#define VM_TMR_FIQ_ENA_ENA_P BIT(1)
+
+/* IPI registers */
+#define SYS_IMP_APL_IPI_RR_LOCAL_EL1 sys_reg(3, 5, 15, 0, 0)
+#define SYS_IMP_APL_IPI_RR_GLOBAL_EL1 sys_reg(3, 5, 15, 0, 1)
+
+#define SYS_IMP_APL_IPI_SR_EL1 sys_reg(3, 5, 15, 1, 1)
+#define IPI_SR_PENDING BIT(0)
+
+#define SYS_IMP_APL_IPI_CR_EL1 sys_reg(3, 5, 15, 3, 1)
diff --git a/tools/src/cpufreq.c b/tools/src/cpufreq.c
new file mode 100644
index 0000000..e7c4f41
--- /dev/null
+++ b/tools/src/cpufreq.c
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "cpufreq.h"
+#include "adt.h"
+#include "soc.h"
+#include "utils.h"
+
+#define CLUSTER_PSTATE 0x20
+#define CLUSTER_CONFIG 0x6b8
+
+#define CLUSTER_PSTATE_BUSY BIT(31)
+#define CLUSTER_PSTATE_SET BIT(25)
+#define CLUSTER_PSTATE_DESIRED2 GENMASK(15, 12)
+#define CLUSTER_PSTATE_DESIRED1 GENMASK(3, 0)
+
+#define CLUSTER_CONFIG_ENABLE BIT(63)
+#define CLUSTER_CONFIG_DVMR1 BIT(32)
+#define CLUSTER_CONFIG_DVMR2 BIT(31)
+
+#define CLUSTER_SWITCH_TIMEOUT 100
+
+struct cluster_t {
+ const char *name;
+ u64 base;
+ bool dvmr;
+ uint32_t boot_pstate;
+};
+
+int cpufreq_init_cluster(const struct cluster_t *cluster)
+{
+ u64 enable = CLUSTER_CONFIG_ENABLE;
+ if (cluster->dvmr)
+ enable |= CLUSTER_CONFIG_DVMR1 | CLUSTER_CONFIG_DVMR2;
+
+ u64 val = read64(cluster->base + CLUSTER_CONFIG);
+ if ((val & enable) != enable) {
+ printf("cpufreq: Configuring cluster %s (dvmr: %d)\n", cluster->name, cluster->dvmr);
+ write64(cluster->base + CLUSTER_CONFIG, val | enable);
+ }
+
+ val = read64(cluster->base + CLUSTER_PSTATE);
+
+ if (FIELD_GET(CLUSTER_PSTATE_DESIRED1, val) != cluster->boot_pstate) {
+ val &= CLUSTER_PSTATE_DESIRED1 | CLUSTER_PSTATE_DESIRED2;
+ val |= CLUSTER_PSTATE_SET | FIELD_PREP(CLUSTER_PSTATE_DESIRED1, cluster->boot_pstate) |
+ FIELD_PREP(CLUSTER_PSTATE_DESIRED2, cluster->boot_pstate);
+ printf("cpufreq: Switching cluster %s to P-State %d\n", cluster->name,
+ cluster->boot_pstate);
+ write64(cluster->base + CLUSTER_PSTATE, val);
+ if (poll32(cluster->base + CLUSTER_PSTATE, CLUSTER_PSTATE_BUSY, 0, CLUSTER_SWITCH_TIMEOUT) <
+ 0) {
+ printf("cpufreq: Timed out waiting for cluster %s P-State switch\n", cluster->name);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static const struct cluster_t t8103_clusters[] = {
+ {"ECPU", 0x210e20000, false, 5},
+ {"PCPU", 0x211e20000, true, 7},
+ {},
+};
+
+static const struct cluster_t t6000_clusters[] = {
+ {"ECPU0", 0x210e20000, false, 5},
+ {"PCPU0", 0x211e20000, false, 7},
+ {"PCPU1", 0x212e20000, false, 7},
+ {},
+};
+
+static const struct cluster_t t6002_clusters[] = {
+ {"ECPU0", 0x0210e20000, false, 5},
+ {"PCPU0", 0x0211e20000, false, 7},
+ {"PCPU1", 0x0212e20000, false, 7},
+ {"ECPU1", 0x2210e20000, false, 5},
+ {"PCPU2", 0x2211e20000, false, 7},
+ {"PCPU3", 0x2212e20000, false, 7},
+ {},
+};
+
+static const struct cluster_t t8112_clusters[] = {
+ {"ECPU", 0x210e20000, false, 7},
+ {"PCPU", 0x211e20000, true, 6},
+ {},
+};
+
+int cpufreq_init(void)
+{
+ printf("cpufreq: Initializing clusters\n");
+
+ const struct cluster_t *cluster;
+
+ switch (chip_id) {
+ case T8103:
+ cluster = t8103_clusters;
+ break;
+ case T6000:
+ case T6001:
+ cluster = t6000_clusters;
+ break;
+ case T6002:
+ cluster = t6002_clusters;
+ break;
+ case T8112:
+ cluster = t8112_clusters;
+ break;
+ default:
+ printf("cpufreq: Chip 0x%x is unsupported\n", chip_id);
+ return -1;
+ }
+
+ bool err = false;
+ while (cluster->base) {
+ err |= cpufreq_init_cluster(cluster++);
+ }
+
+ return err ? -1 : 0;
+}
diff --git a/tools/src/cpufreq.h b/tools/src/cpufreq.h
new file mode 100644
index 0000000..7710f20
--- /dev/null
+++ b/tools/src/cpufreq.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef CPUFREQ_H
+#define CPUFREQ_H
+
+int cpufreq_init(void);
+
+#endif
diff --git a/tools/src/dapf.c b/tools/src/dapf.c
new file mode 100644
index 0000000..cbeb576
--- /dev/null
+++ b/tools/src/dapf.c
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "dapf.h"
+#include "adt.h"
+#include "assert.h"
+#include "malloc.h"
+#include "memory.h"
+#include "string.h"
+#include "utils.h"
+
+struct dapf_t8020_config {
+ u64 start;
+ u64 end;
+ u8 unk1;
+ u8 r0_hi;
+ u8 r0_lo;
+ u8 unk2;
+ u32 r4;
+} PACKED;
+
+static int dapf_init_t8020(const char *path, u64 base, int node)
+{
+ u32 length;
+ const char *prop = "filter-data-instance-0";
+ const struct dapf_t8020_config *config = adt_getprop(adt, node, prop, &length);
+
+ if (!config || !length || (length % sizeof(*config)) != 0) {
+ printf("dapf: Error getting ADT node %s property %s.\n", path, prop);
+ return -1;
+ }
+
+ int count = length / sizeof(*config);
+
+ for (int i = 0; i < count; i++) {
+ write32(base + 0x04, config[i].r4);
+ write64(base + 0x08, config[i].start);
+ write64(base + 0x10, config[i].end);
+ write32(base + 0x00, (config[i].r0_hi << 4) | config[i].r0_lo);
+ base += 0x40;
+ }
+ return 0;
+}
+
+struct dapf_t8110_config {
+ u64 start;
+ u64 end;
+ u32 r20;
+ u32 unk1;
+ u32 r4;
+ u32 unk2[5];
+ u8 unk3;
+ u8 r0_hi;
+ u8 r0_lo;
+ u8 unk4;
+} PACKED;
+
+static int dapf_init_t8110(const char *path, u64 base, int node)
+{
+ u32 length;
+ const char *prop = "dapf-instance-0";
+ const struct dapf_t8110_config *config = adt_getprop(adt, node, prop, &length);
+
+ if (!config || !length) {
+ printf("dapf: Error getting ADT node %s property %s.\n", path, prop);
+ return -1;
+ }
+
+ if (length % sizeof(*config) != 0) {
+ printf("dapf: Invalid length for %s property %s\n", path, prop);
+ return -1;
+ }
+
+ int count = length / sizeof(*config);
+
+ for (int i = 0; i < count; i++) {
+ write32(base + 0x04, config[i].r4);
+ write64(base + 0x08, config[i].start);
+ write64(base + 0x10, config[i].end);
+ write32(base + 0x00, (config[i].r0_hi << 4) | config[i].r0_lo);
+ write32(base + 0x20, config[i].r20);
+ base += 0x40;
+ }
+ return 0;
+}
+
+int dapf_init(const char *path)
+{
+ int ret;
+ int dart_path[8];
+ int node = adt_path_offset_trace(adt, path, dart_path);
+ if (node < 0) {
+ printf("dapf: Error getting DAPF %s node.\n", path);
+ return -1;
+ }
+
+ u64 base;
+ if (adt_get_reg(adt, dart_path, "reg", 1, &base, NULL) < 0) {
+ printf("dapf: Error getting DAPF %s base address.\n", path);
+ return -1;
+ }
+
+ if (adt_is_compatible(adt, node, "dart,t8020")) {
+ ret = dapf_init_t8020(path, base, node);
+ } else if (adt_is_compatible(adt, node, "dart,t6000")) {
+ ret = dapf_init_t8020(path, base, node);
+ } else if (adt_is_compatible(adt, node, "dart,t8110")) {
+ ret = dapf_init_t8110(path, base, node);
+ } else {
+ printf("dapf: DAPF %s at 0x%lx is of an unknown type\n", path, base);
+ return -1;
+ }
+
+ if (!ret)
+ printf("dapf: Initialized %s\n", path);
+
+ return ret;
+}
+
+const char *dapf_paths[] = {"/arm-io/dart-aop", "/arm-io/dart-mtp", "/arm-io/dart-pmp", NULL};
+
+int dapf_init_all(void)
+{
+ int ret = 0;
+ int count = 0;
+
+ for (const char **path = dapf_paths; *path; path++) {
+ if (adt_path_offset(adt, *path) < 0)
+ continue;
+
+ if (dapf_init(*path) < 0) {
+ ret = -1;
+ }
+ count += 1;
+ }
+
+ return ret ? ret : count;
+}
diff --git a/tools/src/dapf.h b/tools/src/dapf.h
new file mode 100644
index 0000000..2a7e1bf
--- /dev/null
+++ b/tools/src/dapf.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef DAPF_H
+#define DAPF_H
+
+int dapf_init_all(void);
+int dapf_init(const char *path);
+
+#endif
diff --git a/tools/src/dart.c b/tools/src/dart.c
new file mode 100644
index 0000000..96c4261
--- /dev/null
+++ b/tools/src/dart.c
@@ -0,0 +1,714 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "dart.h"
+#include "adt.h"
+#include "assert.h"
+#include "devicetree.h"
+#include "malloc.h"
+#include "memory.h"
+#include "string.h"
+#include "utils.h"
+
+#include "libfdt/libfdt.h"
+
+#define DART_T8020_CONFIG 0x60
+#define DART_T8020_CONFIG_LOCK BIT(15)
+
+#define DART_T8020_ERROR 0x40
+#define DART_T8020_ERROR_STREAM_SHIFT 24
+#define DART_T8020_ERROR_STREAM_MASK 0xf
+#define DART_T8020_ERROR_CODE_MASK 0xffffff
+#define DART_T8020_ERROR_FLAG BIT(31)
+#define DART_T8020_ERROR_READ_FAULT BIT(4)
+#define DART_T8020_ERROR_WRITE_FAULT BIT(3)
+#define DART_T8020_ERROR_NO_PTE BIT(2)
+#define DART_T8020_ERROR_NO_PMD BIT(1)
+#define DART_T8020_ERROR_NO_TTBR BIT(0)
+
+#define DART_T8020_STREAM_SELECT 0x34
+
+#define DART_T8020_STREAM_COMMAND 0x20
+#define DART_T8020_STREAM_COMMAND_BUSY BIT(2)
+#define DART_T8020_STREAM_COMMAND_INVALIDATE BIT(20)
+
+#define DART_T8020_STREAM_COMMAND_BUSY_TIMEOUT 100
+
+#define DART_T8020_STREAM_REMAP 0x80
+
+#define DART_T8020_ERROR_ADDR_HI 0x54
+#define DART_T8020_ERROR_ADDR_LO 0x50
+
+#define DART_T8020_ENABLED_STREAMS 0xfc
+
+#define DART_T8020_TCR_OFF 0x100
+#define DART_T8020_TCR_TRANSLATE_ENABLE BIT(7)
+#define DART_T8020_TCR_BYPASS_DART BIT(8)
+#define DART_T8020_TCR_BYPASS_DAPF BIT(12)
+
+#define DART_T8020_TTBR_OFF 0x200
+#define DART_T8020_TTBR_VALID BIT(31)
+#define DART_T8020_TTBR_ADDR GENMASK(30, 0)
+#define DART_T8020_TTBR_SHIFT 12
+
+#define DART_PTE_OFFSET_SHIFT 14
+#define DART_PTE_SP_START GENMASK(63, 52)
+#define DART_PTE_SP_END GENMASK(51, 40)
+#define DART_T8020_PTE_OFFSET GENMASK(39, 14)
+#define DART_T6000_PTE_OFFSET GENMASK(39, 10)
+#define DART_T8020_PTE_DISABLE_SP BIT(1)
+#define DART_T6000_PTE_REALTIME BIT(1)
+#define DART_PTE_VALID BIT(0)
+
+#define DART_T8110_TTBR_OFF 0x1400
+#define DART_T8110_TTBR_VALID BIT(0)
+#define DART_T8110_TTBR_ADDR GENMASK(29, 2)
+#define DART_T8110_TTBR_SHIFT 14
+
+#define DART_T8110_TCR_OFF 0x1000
+#define DART_T8110_TCR_REMAP GENMASK(11, 8)
+#define DART_T8110_TCR_REMAP_EN BIT(7)
+#define DART_T8110_TCR_BYPASS_DAPF BIT(2)
+#define DART_T8110_TCR_BYPASS_DART BIT(1)
+#define DART_T8110_TCR_TRANSLATE_ENABLE BIT(0)
+
+#define DART_T8110_TLB_CMD 0x80
+#define DART_T8110_TLB_CMD_BUSY BIT(31)
+#define DART_T8110_TLB_CMD_OP GENMASK(10, 8)
+#define DART_T8110_TLB_CMD_OP_FLUSH_ALL 0
+#define DART_T8110_TLB_CMD_OP_FLUSH_SID 1
+#define DART_T8110_TLB_CMD_STREAM GENMASK(7, 0)
+
+#define DART_T8110_PROTECT 0x200
+#define DART_T8110_PROTECT_TTBR_TCR BIT(0)
+
+#define DART_T8110_ENABLE_STREAMS 0xc00
+#define DART_T8110_DISABLE_STREAMS 0xc20
+
+#define DART_MAX_TTBR_COUNT 4
+
+#define DART_TCR(dart) (dart->regs + dart->params->tcr_off + 4 * dart->device)
+#define DART_TTBR(dart, idx) \
+ (dart->regs + dart->params->ttbr_off + 4 * dart->params->ttbr_count * dart->device + 4 * idx)
+
+struct dart_params {
+ int sid_count;
+
+ u64 pte_flags;
+ u64 offset_mask;
+
+ u64 tcr_enabled;
+ u64 tcr_disabled;
+ u64 tcr_off;
+
+ u64 ttbr_valid;
+ u64 ttbr_addr;
+ u64 ttbr_shift;
+ u64 ttbr_off;
+ int ttbr_count;
+
+ void (*tlb_invalidate)(dart_dev_t *dart);
+};
+
+struct dart_dev {
+ bool locked;
+ bool keep;
+ uintptr_t regs;
+ u8 device;
+ enum dart_type_t type;
+ const struct dart_params *params;
+ u64 vm_base;
+
+ u64 *l1[DART_MAX_TTBR_COUNT];
+};
+
+static void dart_t8020_tlb_invalidate(dart_dev_t *dart)
+{
+ write32(dart->regs + DART_T8020_STREAM_SELECT, BIT(dart->device));
+
+ /* ensure that the DART can see the updated pagetables before invalidating */
+ dma_wmb();
+ write32(dart->regs + DART_T8020_STREAM_COMMAND, DART_T8020_STREAM_COMMAND_INVALIDATE);
+
+ if (poll32(dart->regs + DART_T8020_STREAM_COMMAND, DART_T8020_STREAM_COMMAND_BUSY, 0, 100))
+ printf("dart: DART_T8020_STREAM_COMMAND_BUSY did not clear.\n");
+}
+
+static void dart_t8110_tlb_invalidate(dart_dev_t *dart)
+{
+ /* ensure that the DART can see the updated pagetables before invalidating */
+ dma_wmb();
+ write32(dart->regs + DART_T8110_TLB_CMD,
+ FIELD_PREP(DART_T8110_TLB_CMD_OP, DART_T8110_TLB_CMD_OP_FLUSH_SID) |
+ FIELD_PREP(DART_T8110_TLB_CMD_STREAM, dart->device));
+
+ if (poll32(dart->regs + DART_T8110_TLB_CMD_OP, DART_T8110_TLB_CMD_BUSY, 0, 100))
+ printf("dart: DART_T8110_TLB_CMD_BUSY did not clear.\n");
+}
+
+const struct dart_params dart_t8020 = {
+ .sid_count = 32,
+ .pte_flags = FIELD_PREP(DART_PTE_SP_END, 0xfff) | FIELD_PREP(DART_PTE_SP_START, 0) |
+ DART_T8020_PTE_DISABLE_SP | DART_PTE_VALID,
+ .offset_mask = DART_T8020_PTE_OFFSET,
+ .tcr_enabled = DART_T8020_TCR_TRANSLATE_ENABLE,
+ .tcr_disabled = DART_T8020_TCR_BYPASS_DAPF | DART_T8020_TCR_BYPASS_DART,
+ .tcr_off = DART_T8020_TCR_OFF,
+ .ttbr_valid = DART_T8020_TTBR_VALID,
+ .ttbr_addr = DART_T8020_TTBR_ADDR,
+ .ttbr_shift = DART_T8020_TTBR_SHIFT,
+ .ttbr_off = DART_T8020_TTBR_OFF,
+ .ttbr_count = 4,
+ .tlb_invalidate = dart_t8020_tlb_invalidate,
+};
+
+const struct dart_params dart_t6000 = {
+ .sid_count = 32,
+ .pte_flags =
+ FIELD_PREP(DART_PTE_SP_END, 0xfff) | FIELD_PREP(DART_PTE_SP_START, 0) | DART_PTE_VALID,
+ .offset_mask = DART_T6000_PTE_OFFSET,
+ .tcr_enabled = DART_T8020_TCR_TRANSLATE_ENABLE,
+ .tcr_disabled = DART_T8020_TCR_BYPASS_DAPF | DART_T8020_TCR_BYPASS_DART,
+ .tcr_off = DART_T8020_TCR_OFF,
+ .ttbr_valid = DART_T8020_TTBR_VALID,
+ .ttbr_addr = DART_T8020_TTBR_ADDR,
+ .ttbr_shift = DART_T8020_TTBR_SHIFT,
+ .ttbr_off = DART_T8020_TTBR_OFF,
+ .ttbr_count = 4,
+ .tlb_invalidate = dart_t8020_tlb_invalidate,
+};
+
+const struct dart_params dart_t8110 = {
+ .sid_count = 256,
+ .pte_flags =
+ FIELD_PREP(DART_PTE_SP_END, 0xfff) | FIELD_PREP(DART_PTE_SP_START, 0) | DART_PTE_VALID,
+ .offset_mask = DART_T6000_PTE_OFFSET,
+ .tcr_enabled = DART_T8110_TCR_TRANSLATE_ENABLE,
+ .tcr_disabled = DART_T8110_TCR_BYPASS_DAPF | DART_T8110_TCR_BYPASS_DART,
+ .tcr_off = DART_T8110_TCR_OFF,
+ .ttbr_valid = DART_T8110_TTBR_VALID,
+ .ttbr_addr = DART_T8110_TTBR_ADDR,
+ .ttbr_shift = DART_T8110_TTBR_SHIFT,
+ .ttbr_off = DART_T8110_TTBR_OFF,
+ .ttbr_count = 1,
+ .tlb_invalidate = dart_t8110_tlb_invalidate,
+};
+
+dart_dev_t *dart_init(uintptr_t base, u8 device, bool keep_pts, enum dart_type_t type)
+{
+ dart_dev_t *dart = malloc(sizeof(*dart));
+ if (!dart)
+ return NULL;
+
+ memset(dart, 0, sizeof(*dart));
+
+ dart->regs = base;
+ dart->device = device;
+ dart->type = type;
+
+ switch (type) {
+ case DART_T8020:
+ dart->params = &dart_t8020;
+ break;
+ case DART_T8110:
+ dart->params = &dart_t8110;
+ break;
+ case DART_T6000:
+ dart->params = &dart_t6000;
+ break;
+ }
+
+ if (device >= dart->params->sid_count) {
+ printf("dart: device %d is too big for this DART type\n", device);
+ free(dart);
+ return NULL;
+ }
+
+ switch (type) {
+ case DART_T8020:
+ case DART_T6000:
+ if (read32(dart->regs + DART_T8020_CONFIG) & DART_T8020_CONFIG_LOCK)
+ dart->locked = true;
+ set32(dart->regs + DART_T8020_ENABLED_STREAMS, BIT(device & 0x1f));
+ break;
+ case DART_T8110:
+ // TODO locked dart
+ write32(dart->regs + DART_T8110_ENABLE_STREAMS + 4 * (device >> 5), BIT(device & 0x1f));
+ break;
+ }
+
+ dart->keep = keep_pts;
+
+ if (dart->locked || keep_pts) {
+ for (int i = 0; i < dart->params->ttbr_count; i++) {
+ u32 ttbr = read32(DART_TTBR(dart, i));
+ if (ttbr & dart->params->ttbr_valid)
+ dart->l1[i] =
+ (u64 *)(FIELD_GET(dart->params->ttbr_addr, ttbr) << dart->params->ttbr_shift);
+ }
+ }
+
+ for (int i = 0; i < dart->params->ttbr_count; i++) {
+ if (dart->l1[i])
+ continue;
+
+ dart->l1[i] = memalign(SZ_16K, SZ_16K);
+ if (!dart->l1[i])
+ goto error;
+ memset(dart->l1[i], 0, SZ_16K);
+
+ write32(DART_TTBR(dart, i),
+ dart->params->ttbr_valid |
+ FIELD_PREP(dart->params->ttbr_addr,
+ ((uintptr_t)dart->l1[i]) >> dart->params->ttbr_shift));
+ }
+
+ if (!dart->locked && !keep_pts)
+ write32(DART_TCR(dart), dart->params->tcr_enabled);
+
+ dart->params->tlb_invalidate(dart);
+ return dart;
+
+error:
+ if (!dart->locked)
+ free(dart->l1);
+ free(dart);
+ return NULL;
+}
+
+dart_dev_t *dart_init_adt(const char *path, int instance, int device, bool keep_pts)
+{
+ int dart_path[8];
+ int node = adt_path_offset_trace(adt, path, dart_path);
+ if (node < 0) {
+ printf("dart: Error getting DART node %s\n", path);
+ return NULL;
+ }
+
+ u64 base;
+ if (adt_get_reg(adt, dart_path, "reg", instance, &base, NULL) < 0) {
+ printf("dart: Error getting DART %s base address.\n", path);
+ return NULL;
+ }
+
+ enum dart_type_t type;
+ const char *type_s;
+
+ if (adt_is_compatible(adt, node, "dart,t8020")) {
+ type = DART_T8020;
+ type_s = "t8020";
+ } else if (adt_is_compatible(adt, node, "dart,t6000")) {
+ type = DART_T6000;
+ type_s = "t6000";
+ } else if (adt_is_compatible(adt, node, "dart,t8110")) {
+ type = DART_T8110;
+ type_s = "t8110";
+ } else {
+ printf("dart: dart %s at 0x%lx is of an unknown type\n", path, base);
+ return NULL;
+ }
+
+ dart_dev_t *dart = dart_init(base, device, keep_pts, type);
+
+ if (!dart)
+ return NULL;
+
+ printf("dart: dart %s at 0x%lx is a %s%s\n", path, base, type_s,
+ dart->locked ? " (locked)" : "");
+
+ if (adt_getprop(adt, node, "real-time", NULL)) {
+ for (int i = 0; i < dart->params->ttbr_count; i++) {
+ printf("dart: dart %s.%d.%d L1 %d is real-time at %p\n", path, instance, device, i,
+ dart->l1[i]);
+ }
+ }
+ if (ADT_GETPROP(adt, node, "vm-base", &dart->vm_base) < 0)
+ dart->vm_base = 0;
+
+ return dart;
+}
+
+void dart_lock_adt(const char *path, int instance)
+{
+ int dart_path[8];
+ int node = adt_path_offset_trace(adt, path, dart_path);
+ if (node < 0) {
+ printf("dart: Error getting DART node %s\n", path);
+ return;
+ }
+
+ u64 base;
+ if (adt_get_reg(adt, dart_path, "reg", instance, &base, NULL) < 0) {
+ printf("dart: Error getting DART %s base address.\n", path);
+ return;
+ }
+
+ if (adt_is_compatible(adt, node, "dart,t8020") || adt_is_compatible(adt, node, "dart,t6000")) {
+ if (!(read32(base + DART_T8020_CONFIG) & DART_T8020_CONFIG_LOCK))
+ set32(base + DART_T8020_CONFIG, DART_T8020_CONFIG_LOCK);
+ } else if (adt_is_compatible(adt, node, "dart,t8110")) {
+ if (!(read32(base + DART_T8110_PROTECT) & DART_T8110_PROTECT_TTBR_TCR))
+ set32(base + DART_T8110_PROTECT, DART_T8110_PROTECT_TTBR_TCR);
+ } else {
+ printf("dart: dart %s at 0x%lx is of an unknown type\n", path, base);
+ }
+}
+
+dart_dev_t *dart_init_fdt(void *dt, u32 phandle, int device, bool keep_pts)
+{
+ int node = fdt_node_offset_by_phandle(dt, phandle);
+ if (node < 0) {
+ printf("FDT: node for phandle %u not found\n", phandle);
+ return NULL;
+ }
+
+ u64 base = dt_get_address(dt, node);
+ if (!base)
+ return NULL;
+
+ enum dart_type_t type;
+ const char *type_s;
+ const char *name = fdt_get_name(dt, node, NULL);
+
+ if (fdt_node_check_compatible(dt, node, "apple,t8103-dart") == 0) {
+ type = DART_T8020;
+ type_s = "t8020";
+ } else if (fdt_node_check_compatible(dt, node, "apple,t6000-dart") == 0) {
+ type = DART_T6000;
+ type_s = "t6000";
+ } else if (fdt_node_check_compatible(dt, node, "apple,t8110-dart") == 0) {
+ type = DART_T8110;
+ type_s = "t8110";
+ } else {
+ printf("dart: dart %s at 0x%lx is of an unknown type\n", name, base);
+ return NULL;
+ }
+
+ dart_dev_t *dart = dart_init(base, device, keep_pts, type);
+
+ if (!dart)
+ return NULL;
+
+ printf("dart: dart %s at 0x%lx is a %s%s\n", name, base, type_s,
+ dart->locked ? " (locked)" : "");
+
+ return dart;
+}
+
+int dart_setup_pt_region(dart_dev_t *dart, const char *path, int device, u64 vm_base)
+{
+ int node = adt_path_offset(adt, path);
+ if (node < 0) {
+ printf("dart: Error getting DART node %s\n", path);
+ return -1;
+ }
+ char pt_region_str[24];
+ snprintf(pt_region_str, sizeof(pt_region_str), "pt-region-%d", device);
+ char l2_tt_str[24];
+ snprintf(l2_tt_str, sizeof(l2_tt_str), "l2-tt-%d", device);
+
+ const struct adt_property *pt_region = adt_get_property(adt, node, pt_region_str);
+ if (pt_region && pt_region->size == 16) {
+ u64 region[2];
+ memcpy(region, pt_region->value, sizeof(region));
+ u64 tbl_count = (region[1] - region[0]) / SZ_16K;
+ if (tbl_count > 64) {
+ printf("dart: dart %s ignoring large %s, %lu L2 tables\n", path, pt_region_str,
+ tbl_count);
+ return -1;
+ }
+ /* first index is the l1 table, cap at 2 or else macOS hates it */
+ tbl_count = min(2, tbl_count - 1);
+ u64 l2_start = region[0] + SZ_16K;
+ u64 vmstart = vm_base >> (14 + 11);
+ for (u64 index = 0; index < tbl_count; index++) {
+ int ttbr = (vmstart + index) >> 11;
+ int idx = (vmstart + index) & 0x7ff;
+ u64 l2tbl = l2_start + index * SZ_16K;
+
+ if (dart->l1[ttbr][idx] & DART_PTE_VALID) {
+ u64 off = FIELD_GET(dart->params->offset_mask, dart->l1[ttbr][idx])
+ << DART_PTE_OFFSET_SHIFT;
+ if (off != l2tbl)
+ printf("dart: unexpected L2 tbl at index:%lu. 0x%016lx != 0x%016lx\n", index,
+ off, l2tbl);
+ continue;
+ } else {
+ printf("dart: allocating L2 tbl at %d, %d to 0x%lx\n", ttbr, idx, l2tbl);
+ memset((void *)l2tbl, 0, SZ_16K);
+ }
+
+ u64 offset = FIELD_PREP(dart->params->offset_mask, l2tbl >> DART_PTE_OFFSET_SHIFT);
+ dart->l1[ttbr][idx] = offset | DART_PTE_VALID;
+ }
+
+ u64 l2_tt[2] = {region[0], tbl_count};
+ int ret = adt_setprop(adt, node, l2_tt_str, &l2_tt, sizeof(l2_tt));
+ if (ret < 0) {
+ printf("dart: failed to update '%s/%s'\n", path, l2_tt_str);
+ }
+
+ dart->params->tlb_invalidate(dart);
+ }
+
+ return 0;
+}
+
+static u64 *dart_get_l2(dart_dev_t *dart, u32 idx)
+{
+ int ttbr = idx >> 11;
+ idx &= 0x7ff;
+
+ if (dart->l1[ttbr][idx] & DART_PTE_VALID) {
+ u64 off = FIELD_GET(dart->params->offset_mask, dart->l1[ttbr][idx])
+ << DART_PTE_OFFSET_SHIFT;
+ return (u64 *)off;
+ }
+
+ u64 *tbl = memalign(SZ_16K, SZ_16K);
+ if (!tbl)
+ return NULL;
+
+ memset(tbl, 0, SZ_16K);
+
+ u64 offset = FIELD_PREP(dart->params->offset_mask, ((u64)tbl) >> DART_PTE_OFFSET_SHIFT);
+
+ dart->l1[ttbr][idx] = offset | DART_PTE_VALID;
+
+ return tbl;
+}
+
+static int dart_map_page(dart_dev_t *dart, uintptr_t iova, uintptr_t paddr)
+{
+ u32 l1_index = (iova >> 25) & 0x1fff;
+ u32 l2_index = (iova >> 14) & 0x7ff;
+
+ u64 *l2 = dart_get_l2(dart, l1_index);
+ if (!l2) {
+ printf("dart: couldn't create l2 for iova %lx\n", iova);
+ return -1;
+ }
+
+ if (l2[l2_index] & DART_PTE_VALID) {
+ printf("dart: iova %lx already has a valid PTE: %lx\n", iova, l2[l2_index]);
+ return -1;
+ }
+
+ u64 offset = FIELD_PREP(dart->params->offset_mask, paddr >> DART_PTE_OFFSET_SHIFT);
+
+ l2[l2_index] = offset | dart->params->pte_flags;
+
+ return 0;
+}
+
+int dart_map(dart_dev_t *dart, uintptr_t iova, void *bfr, size_t len)
+{
+ uintptr_t paddr = (uintptr_t)bfr;
+ u64 offset = 0;
+
+ if (len % SZ_16K)
+ return -1;
+ if (paddr % SZ_16K)
+ return -1;
+ if (iova % SZ_16K)
+ return -1;
+
+ while (offset < len) {
+ int ret = dart_map_page(dart, iova + offset, paddr + offset);
+
+ if (ret) {
+ dart_unmap(dart, iova, offset);
+ return ret;
+ }
+
+ offset += SZ_16K;
+ }
+
+ dart->params->tlb_invalidate(dart);
+ return 0;
+}
+
+static void dart_unmap_page(dart_dev_t *dart, uintptr_t iova)
+{
+ u32 ttbr = (iova >> 36) & 0x3;
+ u32 l1_index = (iova >> 25) & 0x7ff;
+ u32 l2_index = (iova >> 14) & 0x7ff;
+
+ if (!(dart->l1[ttbr][l1_index] & DART_PTE_VALID))
+ return;
+
+ u64 *l2 = dart_get_l2(dart, l1_index);
+ l2[l2_index] = 0;
+}
+
+void dart_unmap(dart_dev_t *dart, uintptr_t iova, size_t len)
+{
+ if (len % SZ_16K)
+ return;
+ if (iova % SZ_16K)
+ return;
+
+ while (len) {
+ dart_unmap_page(dart, iova);
+
+ len -= SZ_16K;
+ iova += SZ_16K;
+ }
+
+ dart->params->tlb_invalidate(dart);
+}
+
+void dart_free_l2(dart_dev_t *dart, uintptr_t iova)
+{
+ if (iova & ((1 << 25) - 1)) {
+ printf("dart: %08lx is not at the start of L2 table\n", iova);
+ return;
+ }
+
+ u32 ttbr = (iova >> 36) & 0x3;
+ u32 l1_index = (iova >> 25) & 0x7ff;
+
+ if (!(dart->l1[ttbr][l1_index] & DART_PTE_VALID))
+ return;
+
+ u64 *l2 = dart_get_l2(dart, l1_index);
+
+ for (u32 idx = 0; idx < 2048; idx++) {
+ if (l2[idx] & DART_PTE_VALID) {
+ printf("dart: %08lx is still mapped\n", iova + (idx << 14));
+ return;
+ }
+ }
+ dart->l1[ttbr][l1_index] = 0;
+ free(l2);
+}
+
+static void *dart_translate_internal(dart_dev_t *dart, uintptr_t iova, int silent)
+{
+ u32 ttbr = (iova >> 36) & 0x3;
+ u32 l1_index = (iova >> 25) & 0x7ff;
+
+ if ((int)ttbr >= dart->params->ttbr_count) {
+ printf("dart[%lx %u]: ttbr out of range: %d\n", dart->regs, dart->device, ttbr);
+ return NULL;
+ }
+
+ if (!dart->l1[ttbr]) {
+ printf("dart[%lx %u]: l1[%u] is not set\n", dart->regs, dart->device, ttbr);
+ return NULL;
+ }
+
+ if (!(dart->l1[ttbr][l1_index] & DART_PTE_VALID) && !silent) {
+ printf("dart[%lx %u]: l1 translation failure %x %lx\n", dart->regs, dart->device, l1_index,
+ iova);
+ return NULL;
+ }
+
+ u32 l2_index = (iova >> 14) & 0x7ff;
+ u64 *l2 = (u64 *)(FIELD_GET(dart->params->offset_mask, dart->l1[ttbr][l1_index])
+ << DART_PTE_OFFSET_SHIFT);
+
+ if (!(l2[l2_index] & DART_PTE_VALID) && !silent) {
+ printf("dart[%lx %u]: l2 translation failure %x:%x %lx\n", dart->regs, dart->device,
+ l1_index, l2_index, iova);
+ return NULL;
+ }
+
+ u32 offset = iova & 0x3fff;
+ void *base =
+ (void *)(FIELD_GET(dart->params->offset_mask, l2[l2_index]) << DART_PTE_OFFSET_SHIFT);
+
+ return base + offset;
+}
+
+void *dart_translate(dart_dev_t *dart, uintptr_t iova)
+{
+ return dart_translate_internal(dart, iova, 0);
+}
+
+u64 dart_search(dart_dev_t *dart, void *paddr)
+{
+ for (int ttbr = 0; ttbr < dart->params->ttbr_count; ++ttbr) {
+ if (!dart->l1[ttbr])
+ continue;
+ for (u32 l1_index = 0; l1_index < 0x7ff; l1_index++) {
+ if (!(dart->l1[ttbr][l1_index] & DART_PTE_VALID))
+ continue;
+
+ u64 *l2 = (u64 *)(FIELD_GET(dart->params->offset_mask, dart->l1[ttbr][l1_index])
+ << DART_PTE_OFFSET_SHIFT);
+ for (u32 l2_index = 0; l2_index < 0x7ff; l2_index++) {
+ if (!(l2[l2_index] & DART_PTE_VALID))
+ continue;
+ u64 *dst = (u64 *)(FIELD_GET(dart->params->offset_mask, l2[l2_index])
+ << DART_PTE_OFFSET_SHIFT);
+ if (dst == paddr)
+ return ((u64)ttbr << 36) | ((u64)l1_index << 25) | (l2_index << 14);
+ }
+ }
+ }
+
+ return DART_PTR_ERR;
+}
+
+u64 dart_find_iova(dart_dev_t *dart, s64 start, size_t len)
+{
+ if (len % SZ_16K)
+ return -1;
+ if (start < 0 || start % SZ_16K)
+ return -1;
+
+ uintptr_t end = 1LLU << 36;
+ uintptr_t iova = start;
+
+ while (iova + len <= end) {
+
+ if (dart_translate_internal(dart, iova, 1) == NULL) {
+ size_t size;
+ for (size = SZ_16K; size < len; size += SZ_16K) {
+ if (dart_translate_internal(dart, iova + size, 1) != NULL)
+ break;
+ }
+ if (size == len)
+ return iova;
+
+ iova += size + SZ_16K;
+ } else
+ iova += SZ_16K;
+ }
+
+ return DART_PTR_ERR;
+}
+
+void dart_shutdown(dart_dev_t *dart)
+{
+ if (!dart->locked && !dart->keep)
+ write32(DART_TCR(dart), dart->params->tcr_disabled);
+
+ for (int i = 0; i < dart->params->ttbr_count; ++i)
+ if (is_heap(dart->l1[i]))
+ write32(DART_TTBR(dart, i), 0);
+
+ for (int ttbr = 0; ttbr < dart->params->ttbr_count; ++ttbr) {
+ for (int i = 0; i < SZ_16K / 8; ++i) {
+ if (dart->l1[ttbr][i] & DART_PTE_VALID) {
+ void *l2 = dart_get_l2(dart, i);
+ if (is_heap(l2)) {
+ free(l2);
+ dart->l1[ttbr][i] = 0;
+ }
+ }
+ }
+ }
+
+ dart->params->tlb_invalidate(dart);
+
+ for (int i = 0; i < dart->params->ttbr_count; ++i)
+ if (is_heap(dart->l1[i]))
+ free(dart->l1[i]);
+ free(dart);
+}
+
+u64 dart_vm_base(dart_dev_t *dart)
+{
+ return dart->vm_base;
+}
diff --git a/tools/src/dart.h b/tools/src/dart.h
new file mode 100644
index 0000000..7d8474b
--- /dev/null
+++ b/tools/src/dart.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef DART_H
+#define DART_H
+
+#include "types.h"
+
+#define DART_PTR_ERR BIT(63)
+#define DART_IS_ERR(val) FIELD_GET(DART_PTR_ERR, val)
+
+typedef struct dart_dev dart_dev_t;
+
+enum dart_type_t {
+ DART_T8020,
+ DART_T8110,
+ DART_T6000,
+};
+
+dart_dev_t *dart_init(uintptr_t base, u8 device, bool keep_pts, enum dart_type_t type);
+dart_dev_t *dart_init_adt(const char *path, int instance, int device, bool keep_pts);
+void dart_lock_adt(const char *path, int instance);
+dart_dev_t *dart_init_fdt(void *dt, u32 phandle, int device, bool keep_pts);
+int dart_setup_pt_region(dart_dev_t *dart, const char *path, int device, u64 vm_base);
+int dart_map(dart_dev_t *dart, uintptr_t iova, void *bfr, size_t len);
+void dart_unmap(dart_dev_t *dart, uintptr_t iova, size_t len);
+void dart_free_l2(dart_dev_t *dart, uintptr_t iova);
+void *dart_translate(dart_dev_t *dart, uintptr_t iova);
+u64 dart_search(dart_dev_t *dart, void *paddr);
+u64 dart_find_iova(dart_dev_t *dart, s64 start, size_t len);
+void dart_shutdown(dart_dev_t *dart);
+u64 dart_vm_base(dart_dev_t *dart);
+
+#endif
diff --git a/tools/src/dcp.c b/tools/src/dcp.c
new file mode 100644
index 0000000..e9f0503
--- /dev/null
+++ b/tools/src/dcp.c
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "dcp.h"
+#include "adt.h"
+#include "malloc.h"
+#include "pmgr.h"
+#include "rtkit.h"
+#include "utils.h"
+
+dcp_dev_t *dcp_init(const char *dcp_path, const char *dcp_dart_path, const char *disp_dart_path)
+{
+ u32 sid;
+
+ int node = adt_path_offset(adt, "/arm-io/dart-dcp/mapper-dcp");
+ if (node < 0) {
+ printf("dcp: mapper-dcp not found!\n");
+ return NULL;
+ }
+ if (ADT_GETPROP(adt, node, "reg", &sid) < 0) {
+ printf("dcp: failed to read dart stream ID!\n");
+ return NULL;
+ }
+
+ dcp_dev_t *dcp = malloc(sizeof(dcp_dev_t));
+ if (!dcp)
+ return NULL;
+
+ dcp->dart_dcp = dart_init_adt(dcp_dart_path, 0, sid, true);
+ if (!dcp->dart_dcp) {
+ printf("dcp: failed to initialize DCP DART\n");
+ goto out_free;
+ }
+ u64 vm_base = dart_vm_base(dcp->dart_dcp);
+ dart_setup_pt_region(dcp->dart_dcp, dcp_dart_path, sid, vm_base);
+
+ dcp->dart_disp = dart_init_adt(disp_dart_path, 0, 0, true);
+ if (!dcp->dart_disp) {
+ printf("dcp: failed to initialize DISP DART\n");
+ goto out_dart_dcp;
+ }
+ // set disp0's page tables at dart-dcp's vm-base
+ dart_setup_pt_region(dcp->dart_disp, disp_dart_path, 0, vm_base);
+
+ dcp->iovad_dcp = iovad_init(vm_base + 0x10000000, vm_base + 0x20000000);
+
+ dcp->asc = asc_init(dcp_path);
+ if (!dcp->asc) {
+ printf("dcp: failed to initialize ASC\n");
+ goto out_iovad;
+ }
+
+ dcp->rtkit = rtkit_init("dcp", dcp->asc, dcp->dart_dcp, dcp->iovad_dcp, NULL);
+ if (!dcp->rtkit) {
+ printf("dcp: failed to initialize RTKit\n");
+ goto out_iovad;
+ }
+
+ if (!rtkit_boot(dcp->rtkit)) {
+ printf("dcp: failed to boot RTKit\n");
+ goto out_iovad;
+ }
+
+ return dcp;
+
+ rtkit_quiesce(dcp->rtkit);
+ rtkit_free(dcp->rtkit);
+out_iovad:
+ iovad_shutdown(dcp->iovad_dcp, dcp->dart_dcp);
+ dart_shutdown(dcp->dart_disp);
+out_dart_dcp:
+ dart_shutdown(dcp->dart_dcp);
+out_free:
+ free(dcp);
+ return NULL;
+}
+
+int dcp_shutdown(dcp_dev_t *dcp, bool sleep)
+{
+ if (sleep) {
+ rtkit_sleep(dcp->rtkit);
+ pmgr_reset(0, "DISP0_CPU0");
+ } else {
+ rtkit_quiesce(dcp->rtkit);
+ }
+ rtkit_free(dcp->rtkit);
+ dart_shutdown(dcp->dart_disp);
+ iovad_shutdown(dcp->iovad_dcp, dcp->dart_dcp);
+ dart_shutdown(dcp->dart_dcp);
+ free(dcp);
+
+ return 0;
+}
diff --git a/tools/src/dcp.h b/tools/src/dcp.h
new file mode 100644
index 0000000..c9de8f2
--- /dev/null
+++ b/tools/src/dcp.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef DCP_H
+#define DCP_H
+
+#include "asc.h"
+#include "dart.h"
+#include "rtkit.h"
+
+typedef struct {
+ dart_dev_t *dart_dcp;
+ dart_dev_t *dart_disp;
+ iova_domain_t *iovad_dcp;
+ asc_dev_t *asc;
+ rtkit_dev_t *rtkit;
+} dcp_dev_t;
+
+dcp_dev_t *dcp_init(const char *dcp_path, const char *dcp_dart_path, const char *disp_dart_path);
+
+int dcp_shutdown(dcp_dev_t *dcp, bool sleep);
+
+#endif
diff --git a/tools/src/dcp_iboot.c b/tools/src/dcp_iboot.c
new file mode 100644
index 0000000..8f8a374
--- /dev/null
+++ b/tools/src/dcp_iboot.c
@@ -0,0 +1,224 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "dcp_iboot.h"
+#include "afk.h"
+#include "assert.h"
+#include "malloc.h"
+#include "string.h"
+#include "utils.h"
+
+#define DCP_IBOOT_ENDPOINT 0x23
+
+#define TXBUF_LEN 0x4000
+#define RXBUF_LEN 0x4000
+
+struct txcmd {
+ u32 op;
+ u32 len;
+ u32 unk1;
+ u32 unk2;
+ u8 payload[];
+};
+
+struct rxcmd {
+ u32 op;
+ u32 len;
+ u8 payload[];
+};
+
+struct dcp_iboot_if {
+ dcp_dev_t *dcp;
+ afk_epic_ep_t *epic;
+ int channel;
+
+ union {
+ u8 txbuf[TXBUF_LEN];
+ struct txcmd txcmd;
+ };
+
+ union {
+ u8 rxbuf[RXBUF_LEN];
+ struct rxcmd rxcmd;
+ };
+};
+
+enum IBootCmd {
+ IBOOT_SET_POWER = 2,
+ IBOOT_GET_HPD = 3,
+ IBOOT_GET_TIMING_MODES = 4,
+ IBOOT_GET_COLOR_MODES = 5,
+ IBOOT_SET_MODE = 6,
+ IBOOT_SWAP_BEGIN = 15,
+ IBOOT_SWAP_SET_LAYER = 16,
+ IBOOT_SWAP_END = 18,
+};
+
+struct get_hpd_resp {
+ u8 hpd;
+ u8 pad[3];
+ u32 timing_cnt;
+ u32 color_cnt;
+};
+
+struct get_tmode_resp {
+ u32 count;
+ dcp_timing_mode_t modes[];
+};
+
+struct get_cmode_resp {
+ u32 count;
+ dcp_color_mode_t modes[];
+};
+
+struct swap_start_resp {
+ u32 unk1, unk2, unk3;
+ u32 swap_id;
+ u32 unk4;
+};
+
+struct swap_set_layer_cmd {
+ u32 unk;
+ u32 layer_id;
+ dcp_layer_t layer;
+ dcp_rect_t src;
+ dcp_rect_t dst;
+ u32 unk2;
+} PACKED;
+
+dcp_iboot_if_t *dcp_ib_init(dcp_dev_t *dcp)
+{
+ dcp_iboot_if_t *iboot = malloc(sizeof(dcp_iboot_if_t));
+ if (!iboot)
+ return NULL;
+
+ iboot->dcp = dcp;
+ iboot->epic = afk_epic_init(dcp->rtkit, DCP_IBOOT_ENDPOINT);
+ if (!iboot->epic) {
+ printf("dcp-iboot: failed to initialize EPIC\n");
+ goto err_free;
+ }
+
+ iboot->channel = afk_epic_start_interface(iboot->epic, "disp0-service", TXBUF_LEN, RXBUF_LEN);
+
+ if (iboot->channel < 0) {
+ printf("dcp-iboot: failed to initialize disp0 service\n");
+ goto err_shutdown;
+ }
+
+ return iboot;
+
+err_shutdown:
+ afk_epic_shutdown(iboot->epic);
+err_free:
+ free(iboot);
+ return NULL;
+}
+
+int dcp_ib_shutdown(dcp_iboot_if_t *iboot)
+{
+ afk_epic_shutdown(iboot->epic);
+
+ free(iboot);
+ return 0;
+}
+
+static int dcp_ib_cmd(dcp_iboot_if_t *iboot, int op, size_t in_size)
+{
+ size_t rxsize = RXBUF_LEN;
+ assert(in_size <= TXBUF_LEN - sizeof(struct txcmd));
+
+ iboot->txcmd.op = op;
+ iboot->txcmd.len = sizeof(struct txcmd) + in_size;
+
+ return afk_epic_command(iboot->epic, iboot->channel, 0xc0, iboot->txbuf,
+ sizeof(struct txcmd) + in_size, iboot->rxbuf, &rxsize);
+}
+
+int dcp_ib_set_power(dcp_iboot_if_t *iboot, bool power)
+{
+ u32 *pwr = (void *)iboot->txcmd.payload;
+ *pwr = power;
+
+ return dcp_ib_cmd(iboot, IBOOT_SET_POWER, 1);
+}
+
+int dcp_ib_get_hpd(dcp_iboot_if_t *iboot, int *timing_cnt, int *color_cnt)
+{
+ struct get_hpd_resp *resp = (void *)iboot->rxcmd.payload;
+ int ret = dcp_ib_cmd(iboot, IBOOT_GET_HPD, 0);
+
+ if (ret < 0)
+ return ret;
+
+ if (timing_cnt)
+ *timing_cnt = resp->timing_cnt;
+ if (color_cnt)
+ *color_cnt = resp->color_cnt;
+
+ return !!resp->hpd;
+}
+
+int dcp_ib_get_timing_modes(dcp_iboot_if_t *iboot, dcp_timing_mode_t **modes)
+{
+ struct get_tmode_resp *resp = (void *)iboot->rxcmd.payload;
+ int ret = dcp_ib_cmd(iboot, IBOOT_GET_TIMING_MODES, 0);
+
+ if (ret < 0)
+ return ret;
+
+ *modes = resp->modes;
+ return resp->count;
+}
+
+int dcp_ib_get_color_modes(dcp_iboot_if_t *iboot, dcp_color_mode_t **modes)
+{
+ struct get_cmode_resp *resp = (void *)iboot->rxcmd.payload;
+ int ret = dcp_ib_cmd(iboot, IBOOT_GET_COLOR_MODES, 0);
+
+ if (ret < 0)
+ return ret;
+
+ *modes = resp->modes;
+ return resp->count;
+}
+
+int dcp_ib_set_mode(dcp_iboot_if_t *iboot, dcp_timing_mode_t *tmode, dcp_color_mode_t *cmode)
+{
+ struct {
+ dcp_timing_mode_t tmode;
+ dcp_color_mode_t cmode;
+ } *cmd = (void *)iboot->txcmd.payload;
+
+ cmd->tmode = *tmode;
+ cmd->cmode = *cmode;
+ return dcp_ib_cmd(iboot, IBOOT_SET_MODE, sizeof(*cmd));
+}
+
+int dcp_ib_swap_begin(dcp_iboot_if_t *iboot)
+{
+ struct swap_start_resp *resp = (void *)iboot->rxcmd.payload;
+ int ret = dcp_ib_cmd(iboot, IBOOT_SWAP_BEGIN, 0);
+ if (ret < 0)
+ return ret;
+
+ return resp->swap_id;
+}
+
+int dcp_ib_swap_set_layer(dcp_iboot_if_t *iboot, int layer_id, dcp_layer_t *layer,
+ dcp_rect_t *src_rect, dcp_rect_t *dst_rect)
+{
+ struct swap_set_layer_cmd *cmd = (void *)iboot->txcmd.payload;
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->layer_id = layer_id;
+ cmd->layer = *layer;
+ cmd->src = *src_rect;
+ cmd->dst = *dst_rect;
+
+ return dcp_ib_cmd(iboot, IBOOT_SWAP_SET_LAYER, sizeof(*cmd));
+}
+
+int dcp_ib_swap_end(dcp_iboot_if_t *iboot)
+{
+ memset(iboot->txcmd.payload, 0, 12);
+ return dcp_ib_cmd(iboot, IBOOT_SWAP_END, 12);
+}
diff --git a/tools/src/dcp_iboot.h b/tools/src/dcp_iboot.h
new file mode 100644
index 0000000..adb449e
--- /dev/null
+++ b/tools/src/dcp_iboot.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef DCP_IBOOT_H
+#define DCP_IBOOT_H
+
+#include "dcp.h"
+
+typedef struct dcp_iboot_if dcp_iboot_if_t;
+
+enum DCPEOTF {
+ EOTF_GAMMA_SDR = 1,
+ EOTF_GAMMA_HDR = 2,
+};
+
+enum DCPEncoding {
+ ENC_RGB = 1,
+ ENC_YCBCR_444 = 3,
+ ENC_YCBCR_422 = 4,
+ ENC_YCBCR_420 = 5,
+};
+
+enum DCPColorimetry {
+ CLR_BT601_709 = 1,
+ CLR_BT2020 = 2,
+ CLR_DCIP3 = 3,
+};
+
+enum DCPSurfaceFmt {
+ FMT_BGRA = 1,
+ FMT_RGBA = 3,
+ FMT_w18p = 4,
+ FMT_444v = 6,
+ FMT_422v = 7,
+ FMT_420v = 8,
+ FMT_w30r = 9,
+ FMT_w40a = 10,
+};
+
+enum DCPTransform {
+ XFRM_NONE = 0,
+ XFRM_XFLIP = 1,
+ XFRM_YFLIP = 2,
+ XFRM_ROT_90 = 3,
+ XFRM_ROT_180 = 4,
+ XFRM_ROT_270 = 5,
+};
+
+enum AddrFormat {
+ ADDR_PLANAR = 1,
+ ADDR_TILED = 2,
+ ADDR_AGX = 3,
+};
+
+typedef struct {
+ u32 valid;
+ u32 width;
+ u32 height;
+ u32 fps;
+ u8 pad[8];
+} PACKED dcp_timing_mode_t;
+
+typedef struct {
+ u32 valid;
+ u32 colorimetry;
+ u32 eotf;
+ u32 encoding;
+ u32 bpp;
+ u8 pad[4];
+} PACKED dcp_color_mode_t;
+
+typedef struct {
+ u32 unk1;
+ u64 addr;
+ u32 tile_size;
+ u32 stride;
+ u32 unk2[4];
+ u32 addr_format;
+ u32 unk3;
+} PACKED dcp_plane_t;
+
+typedef struct {
+ dcp_plane_t planes[3];
+ u32 unk;
+ u32 plane_cnt;
+ u32 width;
+ u32 height;
+ u32 surface_fmt;
+ u32 colorspace;
+ u32 eotf;
+ u8 transform;
+ u8 padding[3];
+} PACKED dcp_layer_t;
+
+typedef struct {
+ u32 w, h, x, y;
+} PACKED dcp_rect_t;
+
+dcp_iboot_if_t *dcp_ib_init(dcp_dev_t *dcp);
+int dcp_ib_shutdown(dcp_iboot_if_t *iboot);
+
+int dcp_ib_set_power(dcp_iboot_if_t *iboot, bool power);
+int dcp_ib_get_hpd(dcp_iboot_if_t *iboot, int *timing_cnt, int *color_cnt);
+int dcp_ib_get_timing_modes(dcp_iboot_if_t *iboot, dcp_timing_mode_t **modes);
+int dcp_ib_get_color_modes(dcp_iboot_if_t *iboot, dcp_color_mode_t **modes);
+int dcp_ib_set_mode(dcp_iboot_if_t *iboot, dcp_timing_mode_t *timing, dcp_color_mode_t *color);
+int dcp_ib_swap_begin(dcp_iboot_if_t *iboot);
+int dcp_ib_swap_set_layer(dcp_iboot_if_t *iboot, int layer_id, dcp_layer_t *layer,
+ dcp_rect_t *src_rect, dcp_rect_t *dst_rect);
+int dcp_ib_swap_end(dcp_iboot_if_t *iboot);
+
+#endif
diff --git a/tools/src/devicetree.c b/tools/src/devicetree.c
new file mode 100644
index 0000000..f0c9193
--- /dev/null
+++ b/tools/src/devicetree.c
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "devicetree.h"
+
+#include "libfdt/libfdt.h"
+
+void dt_parse_ranges(void *dt, int node, struct dt_ranges_tbl *ranges)
+{
+ int len;
+ const struct fdt_property *ranges_prop = fdt_get_property(dt, node, "ranges", &len);
+ if (ranges_prop && len > 0) {
+ int idx = 0;
+ int num_entries = len / sizeof(fdt64_t);
+ if (num_entries > DT_MAX_RANGES)
+ num_entries = DT_MAX_RANGES;
+
+ const fdt64_t *entry = (const fdt64_t *)ranges_prop->data;
+ for (int i = 0; i < num_entries; ++i) {
+ u64 start = fdt64_ld(entry++);
+ u64 parent = fdt64_ld(entry++);
+ u64 size = fdt64_ld(entry++);
+ if (size) {
+ ranges[idx].start = start;
+ ranges[idx].parent = parent;
+ ranges[idx].size = size;
+ idx++;
+ }
+ }
+ }
+}
+
+u64 dt_translate(struct dt_ranges_tbl *ranges, const fdt64_t *reg)
+{
+ u64 addr = fdt64_ld(reg);
+ for (int idx = 0; idx < DT_MAX_RANGES; ++idx) {
+ if (ranges[idx].size == 0)
+ break;
+ if (addr >= ranges[idx].start && addr < ranges[idx].start + ranges[idx].size)
+ return ranges[idx].parent - ranges[idx].start + addr;
+ }
+
+ return addr;
+}
+
+u64 dt_get_address(void *dt, int node)
+{
+ int parent = fdt_parent_offset(dt, node);
+
+ // find parent with "ranges" property
+ while (parent >= 0) {
+ if (fdt_getprop(dt, parent, "ranges", NULL))
+ break;
+
+ parent = fdt_parent_offset(dt, parent);
+ }
+
+ if (parent < 0)
+ return 0;
+
+ // parse ranges for address translation
+ struct dt_ranges_tbl ranges[DT_MAX_RANGES] = {0};
+ dt_parse_ranges(dt, parent, ranges);
+
+ const fdt64_t *reg = fdt_getprop(dt, node, "reg", NULL);
+ if (!reg)
+ return 0;
+
+ return dt_translate(ranges, reg);
+}
diff --git a/tools/src/devicetree.h b/tools/src/devicetree.h
new file mode 100644
index 0000000..855f038
--- /dev/null
+++ b/tools/src/devicetree.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef DEVICETREE_H
+#define DEVICETREE_H
+
+#include "types.h"
+
+#include "libfdt/libfdt.h"
+
+#define DT_MAX_RANGES 8
+
+struct dt_ranges_tbl {
+ u64 start;
+ u64 parent;
+ u64 size;
+};
+
+void dt_parse_ranges(void *dt, int node, struct dt_ranges_tbl *ranges);
+u64 dt_translate(struct dt_ranges_tbl *ranges, const fdt64_t *reg);
+u64 dt_get_address(void *dt, int node);
+
+#endif
diff --git a/tools/src/display.c b/tools/src/display.c
new file mode 100644
index 0000000..3dbf49e
--- /dev/null
+++ b/tools/src/display.c
@@ -0,0 +1,514 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "display.h"
+#include "adt.h"
+#include "assert.h"
+#include "dcp.h"
+#include "dcp_iboot.h"
+#include "fb.h"
+#include "memory.h"
+#include "string.h"
+#include "utils.h"
+#include "xnuboot.h"
+
+#define DISPLAY_STATUS_DELAY 100
+#define DISPLAY_STATUS_RETRIES 20
+
+#define COMPARE(a, b) \
+ if ((a) > (b)) { \
+ *best = modes[i]; \
+ continue; \
+ } else if ((a) < (b)) { \
+ continue; \
+ }
+
+static dcp_dev_t *dcp;
+static dcp_iboot_if_t *iboot;
+static u64 fb_dva;
+static u64 fb_size;
+bool display_is_external;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+
+u64 display_mode_fb_size(dcp_timing_mode_t *mode)
+{
+ // assume 4 byte per pixel (either BGRA x2r10b10g10)
+ return mode->width * mode->height * 4;
+}
+
+static void display_choose_timing_mode(dcp_timing_mode_t *modes, int cnt, dcp_timing_mode_t *best,
+ dcp_timing_mode_t *want)
+{
+ *best = modes[0];
+
+ for (int i = 1; i < cnt; i++) {
+ COMPARE(modes[i].valid, best->valid);
+ if (want && want->valid) {
+ COMPARE(modes[i].width == want->width && modes[i].height == want->height,
+ best->width == want->width && best->height == want->height);
+ COMPARE(-abs((long)modes[i].fps - (long)want->fps),
+ -abs((long)best->fps - (long)want->fps));
+ } else {
+ COMPARE(display_mode_fb_size(&modes[i]) <= fb_size,
+ display_mode_fb_size(best) <= fb_size);
+ }
+
+ COMPARE(modes[i].width <= 1920, best->width <= 1920);
+ COMPARE(modes[i].height <= 1200, best->height <= 1200);
+ COMPARE(modes[i].fps <= 60 << 16, best->fps <= 60 << 16);
+ COMPARE(modes[i].width, best->width);
+ COMPARE(modes[i].height, best->height);
+ COMPARE(modes[i].fps, best->fps);
+ }
+
+ printf("display: timing mode: valid=%d %dx%d %d.%02d Hz\n", best->valid, best->width,
+ best->height, best->fps >> 16, ((best->fps & 0xffff) * 100 + 0x7fff) >> 16);
+}
+
+static void display_choose_color_mode(dcp_color_mode_t *modes, int cnt, dcp_color_mode_t *best)
+{
+ *best = modes[0];
+
+ for (int i = 1; i < cnt; i++) {
+ COMPARE(modes[i].valid, best->valid);
+ COMPARE(modes[i].bpp <= 32, best->bpp <= 32);
+ COMPARE(modes[i].bpp, best->bpp);
+ COMPARE(-modes[i].colorimetry, -best->colorimetry);
+ COMPARE(-modes[i].encoding, -best->encoding);
+ COMPARE(-modes[i].eotf, -best->eotf);
+ }
+
+ printf("display: color mode: valid=%d colorimetry=%d eotf=%d encoding=%d bpp=%d\n", best->valid,
+ best->colorimetry, best->eotf, best->encoding, best->bpp);
+}
+
+int display_get_vram(u64 *paddr, u64 *size)
+{
+ int ret = 0;
+ int adt_path[4];
+ int node = adt_path_offset_trace(adt, "/vram", adt_path);
+
+ if (node < 0) {
+ printf("display: '/vram' not found\n");
+ return -1;
+ }
+
+ int pp = 0;
+ while (adt_path[pp])
+ pp++;
+ adt_path[pp + 1] = 0;
+
+ ret = adt_get_reg(adt, adt_path, "reg", 0, paddr, size);
+ if (ret < 0) {
+ printf("display: failed to read /vram/reg\n");
+ return -1;
+ }
+
+ if (*paddr != cur_boot_args.video.base) {
+ printf("display: vram does not match boot_args.video.base\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static uintptr_t display_map_fb(uintptr_t iova, u64 paddr, u64 size)
+{
+ if (iova == 0) {
+ u64 iova_disp0 = 0;
+ u64 iova_dcp = 0;
+
+ // start scanning for free iova space on vm-base
+ iova_dcp = dart_find_iova(dcp->dart_dcp, dart_vm_base(dcp->dart_dcp), size);
+ if (DART_IS_ERR(iova_dcp)) {
+ printf("display: failed to find IOVA for fb of %06zx bytes (dcp)\n", size);
+ return iova_dcp;
+ }
+
+ // try to map the fb to the same IOVA on disp0
+ iova_disp0 = dart_find_iova(dcp->dart_disp, iova_dcp, size);
+ if (DART_IS_ERR(iova_disp0)) {
+ printf("display: failed to find IOVA for fb of %06zx bytes (disp0)\n", size);
+ return iova_disp0;
+ }
+
+ // assume this results in the same IOVA, not sure if this is required but matches what iboot
+ // does on other models.
+ if (iova_disp0 != iova_dcp) {
+ printf("display: IOVA mismatch for fb between dcp (%08lx) and disp0 (%08lx)\n",
+ (u64)iova_dcp, (u64)iova_disp0);
+ return DART_PTR_ERR;
+ }
+
+ iova = iova_dcp;
+ }
+
+ int ret = dart_map(dcp->dart_disp, iova, (void *)paddr, size);
+ if (ret < 0) {
+ printf("display: failed to map fb to dart-disp0\n");
+ return DART_PTR_ERR;
+ }
+
+ ret = dart_map(dcp->dart_dcp, iova, (void *)paddr, size);
+ if (ret < 0) {
+ printf("display: failed to map fb to dart-dcp\n");
+ dart_unmap(dcp->dart_disp, iova, size);
+ return DART_PTR_ERR;
+ }
+
+ return iova;
+}
+
+int display_start_dcp(void)
+{
+ if (iboot)
+ return 0;
+
+ dcp = dcp_init("/arm-io/dcp", "/arm-io/dart-dcp", "/arm-io/dart-disp0");
+ if (!dcp) {
+ printf("display: failed to initialize DCP\n");
+ return -1;
+ }
+
+ // determine frame buffer PA and size from "/vram"
+ u64 pa, size;
+ if (display_get_vram(&pa, &size)) {
+ // use a safe fb_size
+ fb_size = cur_boot_args.video.stride * cur_boot_args.video.height *
+ ((cur_boot_args.video.depth + 7) / 8);
+ } else {
+ fb_size = size;
+ }
+
+ // Find the framebuffer DVA
+ fb_dva = dart_search(dcp->dart_disp, (void *)cur_boot_args.video.base);
+ // framebuffer is not mapped on the M1 Ultra Mac Studio
+ if (DART_IS_ERR(fb_dva))
+ fb_dva = display_map_fb(0, pa, size);
+ if (DART_IS_ERR(fb_dva)) {
+ printf("display: failed to find display DVA\n");
+ fb_dva = 0;
+ dcp_shutdown(dcp, false);
+ return -1;
+ }
+
+ iboot = dcp_ib_init(dcp);
+ if (!iboot) {
+ printf("display: failed to initialize DCP iBoot interface\n");
+ dcp_shutdown(dcp, false);
+ return -1;
+ }
+
+ return 0;
+}
+
+struct display_options {
+ bool retina;
+};
+
+int display_parse_mode(const char *config, dcp_timing_mode_t *mode, struct display_options *opts)
+{
+ memset(mode, 0, sizeof(*mode));
+
+ if (!config || !strcmp(config, "auto"))
+ return 0;
+
+ const char *s_w = config;
+ const char *s_h = strchr(config, 'x');
+ const char *s_fps = strchr(config, '@');
+
+ if (s_w && s_h) {
+ mode->width = atol(s_w);
+ mode->height = atol(s_h + 1);
+ mode->valid = mode->width && mode->height;
+ }
+
+ if (s_fps) {
+ mode->fps = atol(s_fps + 1) << 16;
+
+ const char *s_fps_frac = strchr(s_fps + 1, '.');
+ if (s_fps_frac) {
+ // Assumes two decimals...
+ mode->fps += (atol(s_fps_frac + 1) << 16) / 100;
+ }
+ }
+
+ const char *option = config;
+ while (option && opts) {
+ if (!strncmp(option + 1, "retina", 6))
+ opts->retina = true;
+ option = strchr(option + 1, ',');
+ }
+
+ printf("display: want mode: valid=%d %dx%d %d.%02d Hz\n", mode->valid, mode->width,
+ mode->height, mode->fps >> 16, ((mode->fps & 0xffff) * 100 + 0x7fff) >> 16);
+
+ return mode->valid;
+}
+
+static int display_swap(u64 iova, u32 stride, u32 width, u32 height)
+{
+ int ret;
+ int swap_id = ret = dcp_ib_swap_begin(iboot);
+ if (swap_id < 0) {
+ printf("display: failed to start swap\n");
+ return -1;
+ }
+
+ dcp_layer_t layer = {
+ .planes = {{
+ .addr = iova,
+ .stride = stride,
+ .addr_format = ADDR_PLANAR,
+ }},
+ .plane_cnt = 1,
+ .width = width,
+ .height = height,
+ .surface_fmt = FMT_w30r,
+ .colorspace = 2,
+ .eotf = EOTF_GAMMA_SDR,
+ .transform = XFRM_NONE,
+ };
+
+ dcp_rect_t rect = {width, height, 0, 0};
+
+ if ((ret = dcp_ib_swap_set_layer(iboot, 0, &layer, &rect, &rect)) < 0) {
+ printf("display: failed to set layer\n");
+ return -1;
+ }
+
+ if ((ret = dcp_ib_swap_end(iboot)) < 0) {
+ printf("display: failed to complete swap\n");
+ return -1;
+ }
+
+ return swap_id;
+}
+
+int display_configure(const char *config)
+{
+ dcp_timing_mode_t want;
+ struct display_options opts = {0};
+
+ display_parse_mode(config, &want, &opts);
+
+ u64 start_time = get_ticks();
+
+ int ret = display_start_dcp();
+ if (ret < 0)
+ return ret;
+
+ // Power on
+ if ((ret = dcp_ib_set_power(iboot, true)) < 0) {
+ printf("display: failed to set power\n");
+ return ret;
+ }
+
+ // Detect if display is connected
+ int timing_cnt, color_cnt;
+ int hpd = 0, retries = 0;
+
+ /* After boot DCP does not immediately report a connected display. Retry getting display
+ * information for 2 seconds.
+ */
+ while (retries++ < DISPLAY_STATUS_RETRIES) {
+ hpd = dcp_ib_get_hpd(iboot, &timing_cnt, &color_cnt);
+ if (hpd < 0)
+ ret = hpd;
+ else if (hpd && timing_cnt && color_cnt)
+ break;
+ if (retries < DISPLAY_STATUS_RETRIES)
+ mdelay(DISPLAY_STATUS_DELAY);
+ }
+ printf("display: waited %d ms for display status\n", (retries - 1) * DISPLAY_STATUS_DELAY);
+ if (ret < 0) {
+ printf("display: failed to get display status\n");
+ return 0;
+ }
+
+ printf("display: connected:%d timing_cnt:%d color_cnt:%d\n", hpd, timing_cnt, color_cnt);
+
+ if (!hpd || !timing_cnt || !color_cnt)
+ return 0;
+
+ // Find best modes
+ dcp_timing_mode_t *tmodes, tbest;
+ if ((ret = dcp_ib_get_timing_modes(iboot, &tmodes)) < 0) {
+ printf("display: failed to get timing modes\n");
+ return -1;
+ }
+ assert(ret == timing_cnt);
+ display_choose_timing_mode(tmodes, timing_cnt, &tbest, &want);
+
+ dcp_color_mode_t *cmodes, cbest;
+ if ((ret = dcp_ib_get_color_modes(iboot, &cmodes)) < 0) {
+ printf("display: failed to get color modes\n");
+ return -1;
+ }
+ assert(ret == color_cnt);
+ display_choose_color_mode(cmodes, color_cnt, &cbest);
+
+ // Set mode
+ if ((ret = dcp_ib_set_mode(iboot, &tbest, &cbest)) < 0) {
+ printf("display: failed to set mode\n");
+ return -1;
+ }
+
+ u64 fb_pa = cur_boot_args.video.base;
+ u64 tmp_dva = 0;
+
+ size_t size =
+ ALIGN_UP(tbest.width * tbest.height * ((cbest.bpp + 7) / 8) + 24 * SZ_16K, SZ_16K);
+
+ if (fb_size < size) {
+ printf("display: current framebuffer is too small for new mode\n");
+
+ /* rtkit uses 0x10000000 as DVA offset, FB starts in the first page */
+ if ((s64)size > 7 * SZ_32M) {
+ printf("display: not enough reserved L2 DVA space for fb size 0x%zx\n", size);
+ return -1;
+ }
+
+ cur_boot_args.mem_size -= size;
+ fb_pa = cur_boot_args.phys_base + cur_boot_args.mem_size;
+ /* add guard page between RAM and framebuffer */
+ // TODO: update mapping?
+ cur_boot_args.mem_size -= SZ_16K;
+
+ memset((void *)fb_pa, 0, size);
+
+ tmp_dva = iova_alloc(dcp->iovad_dcp, size);
+
+ tmp_dva = display_map_fb(tmp_dva, fb_pa, size);
+ if (DART_IS_ERR(tmp_dva)) {
+ printf("display: failed to map new fb\n");
+ return -1;
+ }
+
+ // Swap!
+ u32 stride = tbest.width * 4;
+ ret = display_swap(tmp_dva, stride, tbest.width, tbest.height);
+ if (ret < 0)
+ return ret;
+
+ /* wait for swap durations + 1ms */
+ u32 delay = (((1000 << 16) + tbest.fps - 1) / tbest.fps) + 1;
+ mdelay(delay);
+ dart_unmap(dcp->dart_disp, fb_dva, fb_size);
+ dart_unmap(dcp->dart_dcp, fb_dva, fb_size);
+
+ fb_dva = display_map_fb(fb_dva, fb_pa, size);
+ if (DART_IS_ERR(fb_dva)) {
+ printf("display: failed to map new fb\n");
+ fb_dva = 0;
+ return -1;
+ }
+
+ fb_size = size;
+ mmu_map_framebuffer(fb_pa, fb_size);
+
+ /* update ADT with the physical address of the new framebuffer */
+ u64 fb_reg[2] = {fb_pa, size};
+ int node = adt_path_offset(adt, "vram");
+ if (node >= 0) {
+ // TODO: adt_set_reg(adt, node, "vram", fb_pa, size);?
+ ret = adt_setprop(adt, node, "reg", &fb_reg, sizeof(fb_reg));
+ if (ret < 0)
+ printf("display: failed to update '/vram'\n");
+ }
+ node = adt_path_offset(adt, "/chosen/carveout-memory-map");
+ if (node >= 0) {
+ // TODO: adt_set_reg(adt, node, "vram", fb_pa, size);?
+ ret = adt_setprop(adt, node, "region-id-14", &fb_reg, sizeof(fb_reg));
+ if (ret < 0)
+ printf("display: failed to update '/chosen/carveout-memory-map/region-id-14'\n");
+ }
+ }
+
+ // Swap!
+ u32 stride = tbest.width * 4;
+ ret = display_swap(fb_dva, stride, tbest.width, tbest.height);
+ if (ret < 0)
+ return ret;
+
+ printf("display: swapped! (swap_id=%d)\n", ret);
+
+ if (fb_pa != cur_boot_args.video.base || cur_boot_args.video.stride != stride ||
+ cur_boot_args.video.width != tbest.width || cur_boot_args.video.height != tbest.height ||
+ cur_boot_args.video.depth != 30) {
+ cur_boot_args.video.base = fb_pa;
+ cur_boot_args.video.stride = stride;
+ cur_boot_args.video.width = tbest.width;
+ cur_boot_args.video.height = tbest.height;
+ cur_boot_args.video.depth = 30 | (opts.retina ? FB_DEPTH_FLAG_RETINA : 0);
+ fb_reinit();
+ }
+
+ /* Update for python / subsequent stages */
+ memcpy((void *)boot_args_addr, &cur_boot_args, sizeof(cur_boot_args));
+
+ if (tmp_dva) {
+ // unmap / free temporary dva
+ dart_unmap(dcp->dart_disp, tmp_dva, size);
+ dart_unmap(dcp->dart_dcp, tmp_dva, size);
+ iova_free(dcp->iovad_dcp, tmp_dva, size);
+ }
+
+ u64 msecs = ticks_to_msecs(get_ticks() - start_time);
+ printf("display: Modeset took %ld ms\n", msecs);
+
+ return 1;
+}
+
+int display_init(void)
+{
+ int node = adt_path_offset(adt, "/arm-io/disp0");
+
+ if (node < 0) {
+ printf("DISP0 node not found!\n");
+ return -1;
+ }
+
+ display_is_external = adt_getprop(adt, node, "external", NULL);
+ if (display_is_external)
+ printf("display: Display is external\n");
+ else
+ printf("display: Display is internal\n");
+
+ if (cur_boot_args.video.width == 640 && cur_boot_args.video.height == 1136) {
+ printf("display: Dummy framebuffer found, initializing display\n");
+ return display_configure(NULL);
+ } else if (display_is_external) {
+ printf("display: External display found, reconfiguring\n");
+ return display_configure(NULL);
+ } else {
+ printf("display: Display is already initialized (%ldx%ld)\n", cur_boot_args.video.width,
+ cur_boot_args.video.height);
+ return 0;
+ }
+}
+
+void display_shutdown(dcp_shutdown_mode mode)
+{
+ if (iboot) {
+ dcp_ib_shutdown(iboot);
+ switch (mode) {
+ case DCP_QUIESCED:
+ printf("display: Quiescing DCP (unconditional)\n");
+ dcp_shutdown(dcp, false);
+ break;
+ case DCP_SLEEP_IF_EXTERNAL:
+ if (!display_is_external)
+ printf("display: Quiescing DCP (internal)\n");
+ else
+ printf("display: Sleeping DCP (external)\n");
+ dcp_shutdown(dcp, display_is_external);
+ break;
+ case DCP_SLEEP:
+ printf("display: Sleeping DCP (unconditional)\n");
+ dcp_shutdown(dcp, true);
+ break;
+ }
+ iboot = NULL;
+ }
+}
diff --git a/tools/src/display.h b/tools/src/display.h
new file mode 100644
index 0000000..992088e
--- /dev/null
+++ b/tools/src/display.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef DISPLAY_H
+#define DISPLAY_H
+
+#include "types.h"
+
+typedef enum _dcp_shutdown_mode {
+ DCP_QUIESCED = 0,
+ DCP_SLEEP_IF_EXTERNAL = 1,
+ DCP_SLEEP = 2,
+} dcp_shutdown_mode;
+
+extern bool display_is_external;
+
+int display_init(void);
+int display_start_dcp(void);
+int display_configure(const char *config);
+void display_shutdown(dcp_shutdown_mode mode);
+
+#endif
diff --git a/tools/src/dlmalloc/malloc.c b/tools/src/dlmalloc/malloc.c
new file mode 100644
index 0000000..31c9d21
--- /dev/null
+++ b/tools/src/dlmalloc/malloc.c
@@ -0,0 +1,6286 @@
+#include "malloc_config.h"
+
+/*
+ This is a version (aka dlmalloc) of malloc/free/realloc written by
+ Doug Lea and released to the public domain, as explained at
+ http://creativecommons.org/publicdomain/zero/1.0/ Send questions,
+ comments, complaints, performance data, etc to dl@cs.oswego.edu
+
+* Version 2.8.6 Wed Aug 29 06:57:58 2012 Doug Lea
+ Note: There may be an updated version of this malloc obtainable at
+ ftp://gee.cs.oswego.edu/pub/misc/malloc.c
+ Check before installing!
+
+* Quickstart
+
+ This library is all in one file to simplify the most common usage:
+ ftp it, compile it (-O3), and link it into another program. All of
+ the compile-time options default to reasonable values for use on
+ most platforms. You might later want to step through various
+ compile-time and dynamic tuning options.
+
+ For convenience, an include file for code using this malloc is at:
+ ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.6.h
+ You don't really need this .h file unless you call functions not
+ defined in your system include files. The .h file contains only the
+ excerpts from this file needed for using this malloc on ANSI C/C++
+ systems, so long as you haven't changed compile-time options about
+ naming and tuning parameters. If you do, then you can create your
+ own malloc.h that does include all settings by cutting at the point
+ indicated below. Note that you may already by default be using a C
+ library containing a malloc that is based on some version of this
+ malloc (for example in linux). You might still want to use the one
+ in this file to customize settings or to avoid overheads associated
+ with library versions.
+
+* Vital statistics:
+
+ Supported pointer/size_t representation: 4 or 8 bytes
+ size_t MUST be an unsigned type of the same width as
+ pointers. (If you are using an ancient system that declares
+ size_t as a signed type, or need it to be a different width
+ than pointers, you can use a previous release of this malloc
+ (e.g. 2.7.2) supporting these.)
+
+ Alignment: 8 bytes (minimum)
+ This suffices for nearly all current machines and C compilers.
+ However, you can define MALLOC_ALIGNMENT to be wider than this
+ if necessary (up to 128bytes), at the expense of using more space.
+
+ Minimum overhead per allocated chunk: 4 or 8 bytes (if 4byte sizes)
+ 8 or 16 bytes (if 8byte sizes)
+ Each malloced chunk has a hidden word of overhead holding size
+ and status information, and additional cross-check word
+ if FOOTERS is defined.
+
+ Minimum allocated size: 4-byte ptrs: 16 bytes (including overhead)
+ 8-byte ptrs: 32 bytes (including overhead)
+
+ Even a request for zero bytes (i.e., malloc(0)) returns a
+ pointer to something of the minimum allocatable size.
+ The maximum overhead wastage (i.e., number of extra bytes
+ allocated than were requested in malloc) is less than or equal
+ to the minimum size, except for requests >= mmap_threshold that
+ are serviced via mmap(), where the worst case wastage is about
+ 32 bytes plus the remainder from a system page (the minimal
+ mmap unit); typically 4096 or 8192 bytes.
+
+ Security: static-safe; optionally more or less
+ The "security" of malloc refers to the ability of malicious
+ code to accentuate the effects of errors (for example, freeing
+ space that is not currently malloc'ed or overwriting past the
+ ends of chunks) in code that calls malloc. This malloc
+ guarantees not to modify any memory locations below the base of
+ heap, i.e., static variables, even in the presence of usage
+ errors. The routines additionally detect most improper frees
+ and reallocs. All this holds as long as the static bookkeeping
+ for malloc itself is not corrupted by some other means. This
+ is only one aspect of security -- these checks do not, and
+ cannot, detect all possible programming errors.
+
+ If FOOTERS is defined nonzero, then each allocated chunk
+ carries an additional check word to verify that it was malloced
+ from its space. These check words are the same within each
+ execution of a program using malloc, but differ across
+ executions, so externally crafted fake chunks cannot be
+ freed. This improves security by rejecting frees/reallocs that
+ could corrupt heap memory, in addition to the checks preventing
+ writes to statics that are always on. This may further improve
+ security at the expense of time and space overhead. (Note that
+ FOOTERS may also be worth using with MSPACES.)
+
+ By default detected errors cause the program to abort (calling
+ "abort()"). You can override this to instead proceed past
+ errors by defining PROCEED_ON_ERROR. In this case, a bad free
+ has no effect, and a malloc that encounters a bad address
+ caused by user overwrites will ignore the bad address by
+ dropping pointers and indices to all known memory. This may
+ be appropriate for programs that should continue if at all
+ possible in the face of programming errors, although they may
+ run out of memory because dropped memory is never reclaimed.
+
+ If you don't like either of these options, you can define
+ CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything
+ else. And if if you are sure that your program using malloc has
+ no errors or vulnerabilities, you can define INSECURE to 1,
+ which might (or might not) provide a small performance improvement.
+
+ It is also possible to limit the maximum total allocatable
+ space, using malloc_set_footprint_limit. This is not
+ designed as a security feature in itself (calls to set limits
+ are not screened or privileged), but may be useful as one
+ aspect of a secure implementation.
+
+ Thread-safety: NOT thread-safe unless USE_LOCKS defined non-zero
+ When USE_LOCKS is defined, each public call to malloc, free,
+ etc is surrounded with a lock. By default, this uses a plain
+ pthread mutex, win32 critical section, or a spin-lock if if
+ available for the platform and not disabled by setting
+ USE_SPIN_LOCKS=0. However, if USE_RECURSIVE_LOCKS is defined,
+ recursive versions are used instead (which are not required for
+ base functionality but may be needed in layered extensions).
+ Using a global lock is not especially fast, and can be a major
+ bottleneck. It is designed only to provide minimal protection
+ in concurrent environments, and to provide a basis for
+ extensions. If you are using malloc in a concurrent program,
+ consider instead using nedmalloc
+ (http://www.nedprod.com/programs/portable/nedmalloc/) or
+ ptmalloc (See http://www.malloc.de), which are derived from
+ versions of this malloc.
+
+ System requirements: Any combination of MORECORE and/or MMAP/MUNMAP
+ This malloc can use unix sbrk or any emulation (invoked using
+ the CALL_MORECORE macro) and/or mmap/munmap or any emulation
+ (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system
+ memory. On most unix systems, it tends to work best if both
+ MORECORE and MMAP are enabled. On Win32, it uses emulations
+ based on VirtualAlloc. It also uses common C library functions
+ like memset.
+
+ Compliance: I believe it is compliant with the Single Unix Specification
+ (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably
+ others as well.
+
+* Overview of algorithms
+
+ This is not the fastest, most space-conserving, most portable, or
+ most tunable malloc ever written. However it is among the fastest
+ while also being among the most space-conserving, portable and
+ tunable. Consistent balance across these factors results in a good
+ general-purpose allocator for malloc-intensive programs.
+
+ In most ways, this malloc is a best-fit allocator. Generally, it
+ chooses the best-fitting existing chunk for a request, with ties
+ broken in approximately least-recently-used order. (This strategy
+ normally maintains low fragmentation.) However, for requests less
+ than 256bytes, it deviates from best-fit when there is not an
+ exactly fitting available chunk by preferring to use space adjacent
+ to that used for the previous small request, as well as by breaking
+ ties in approximately most-recently-used order. (These enhance
+ locality of series of small allocations.) And for very large requests
+ (>= 256Kb by default), it relies on system memory mapping
+ facilities, if supported. (This helps avoid carrying around and
+ possibly fragmenting memory used only for large chunks.)
+
+ All operations (except malloc_stats and mallinfo) have execution
+ times that are bounded by a constant factor of the number of bits in
+ a size_t, not counting any clearing in calloc or copying in realloc,
+ or actions surrounding MORECORE and MMAP that have times
+ proportional to the number of non-contiguous regions returned by
+ system allocation routines, which is often just 1. In real-time
+ applications, you can optionally suppress segment traversals using
+ NO_SEGMENT_TRAVERSAL, which assures bounded execution even when
+ system allocators return non-contiguous spaces, at the typical
+ expense of carrying around more memory and increased fragmentation.
+
+ The implementation is not very modular and seriously overuses
+ macros. Perhaps someday all C compilers will do as good a job
+ inlining modular code as can now be done by brute-force expansion,
+ but now, enough of them seem not to.
+
+ Some compilers issue a lot of warnings about code that is
+ dead/unreachable only on some platforms, and also about intentional
+ uses of negation on unsigned types. All known cases of each can be
+ ignored.
+
+ For a longer but out of date high-level description, see
+ http://gee.cs.oswego.edu/dl/html/malloc.html
+
+* MSPACES
+ If MSPACES is defined, then in addition to malloc, free, etc.,
+ this file also defines mspace_malloc, mspace_free, etc. These
+ are versions of malloc routines that take an "mspace" argument
+ obtained using create_mspace, to control all internal bookkeeping.
+ If ONLY_MSPACES is defined, only these versions are compiled.
+ So if you would like to use this allocator for only some allocations,
+ and your system malloc for others, you can compile with
+ ONLY_MSPACES and then do something like...
+ static mspace mymspace = create_mspace(0,0); // for example
+ #define mymalloc(bytes) mspace_malloc(mymspace, bytes)
+
+ (Note: If you only need one instance of an mspace, you can instead
+ use "USE_DL_PREFIX" to relabel the global malloc.)
+
+ You can similarly create thread-local allocators by storing
+ mspaces as thread-locals. For example:
+ static __thread mspace tlms = 0;
+ void* tlmalloc(size_t bytes) {
+ if (tlms == 0) tlms = create_mspace(0, 0);
+ return mspace_malloc(tlms, bytes);
+ }
+ void tlfree(void* mem) { mspace_free(tlms, mem); }
+
+ Unless FOOTERS is defined, each mspace is completely independent.
+ You cannot allocate from one and free to another (although
+ conformance is only weakly checked, so usage errors are not always
+ caught). If FOOTERS is defined, then each chunk carries around a tag
+ indicating its originating mspace, and frees are directed to their
+ originating spaces. Normally, this requires use of locks.
+
+ ------------------------- Compile-time options ---------------------------
+
+Be careful in setting #define values for numerical constants of type
+size_t. On some systems, literal values are not automatically extended
+to size_t precision unless they are explicitly casted. You can also
+use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below.
+
+WIN32 default: defined if _WIN32 defined
+ Defining WIN32 sets up defaults for MS environment and compilers.
+ Otherwise defaults are for unix. Beware that there seem to be some
+ cases where this malloc might not be a pure drop-in replacement for
+ Win32 malloc: Random-looking failures from Win32 GDI API's (eg;
+ SetDIBits()) may be due to bugs in some video driver implementations
+ when pixel buffers are malloc()ed, and the region spans more than
+ one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb)
+ default granularity, pixel buffers may straddle virtual allocation
+ regions more often than when using the Microsoft allocator. You can
+ avoid this by using VirtualAlloc() and VirtualFree() for all pixel
+ buffers rather than using malloc(). If this is not possible,
+ recompile this malloc with a larger DEFAULT_GRANULARITY. Note:
+ in cases where MSC and gcc (cygwin) are known to differ on WIN32,
+ conditions use _MSC_VER to distinguish them.
+
+DLMALLOC_EXPORT default: extern
+ Defines how public APIs are declared. If you want to export via a
+ Windows DLL, you might define this as
+ #define DLMALLOC_EXPORT extern __declspec(dllexport)
+ If you want a POSIX ELF shared object, you might use
+ #define DLMALLOC_EXPORT extern __attribute__((visibility("default")))
+
+MALLOC_ALIGNMENT default: (size_t)(2 * sizeof(void *))
+ Controls the minimum alignment for malloc'ed chunks. It must be a
+ power of two and at least 8, even on machines for which smaller
+ alignments would suffice. It may be defined as larger than this
+ though. Note however that code and data structures are optimized for
+ the case of 8-byte alignment.
+
+MSPACES default: 0 (false)
+ If true, compile in support for independent allocation spaces.
+ This is only supported if HAVE_MMAP is true.
+
+ONLY_MSPACES default: 0 (false)
+ If true, only compile in mspace versions, not regular versions.
+
+USE_LOCKS default: 0 (false)
+ Causes each call to each public routine to be surrounded with
+ pthread or WIN32 mutex lock/unlock. (If set true, this can be
+ overridden on a per-mspace basis for mspace versions.) If set to a
+ non-zero value other than 1, locks are used, but their
+ implementation is left out, so lock functions must be supplied manually,
+ as described below.
+
+USE_SPIN_LOCKS default: 1 iff USE_LOCKS and spin locks available
+ If true, uses custom spin locks for locking. This is currently
+ supported only gcc >= 4.1, older gccs on x86 platforms, and recent
+ MS compilers. Otherwise, posix locks or win32 critical sections are
+ used.
+
+USE_RECURSIVE_LOCKS default: not defined
+ If defined nonzero, uses recursive (aka reentrant) locks, otherwise
+ uses plain mutexes. This is not required for malloc proper, but may
+ be needed for layered allocators such as nedmalloc.
+
+LOCK_AT_FORK default: not defined
+ If defined nonzero, performs pthread_atfork upon initialization
+ to initialize child lock while holding parent lock. The implementation
+ assumes that pthread locks (not custom locks) are being used. In other
+ cases, you may need to customize the implementation.
+
+FOOTERS default: 0
+ If true, provide extra checking and dispatching by placing
+ information in the footers of allocated chunks. This adds
+ space and time overhead.
+
+INSECURE default: 0
+ If true, omit checks for usage errors and heap space overwrites.
+
+USE_DL_PREFIX default: NOT defined
+ Causes compiler to prefix all public routines with the string 'dl'.
+ This can be useful when you only want to use this malloc in one part
+ of a program, using your regular system malloc elsewhere.
+
+MALLOC_INSPECT_ALL default: NOT defined
+ If defined, compiles malloc_inspect_all and mspace_inspect_all, that
+ perform traversal of all heap space. Unless access to these
+ functions is otherwise restricted, you probably do not want to
+ include them in secure implementations.
+
+ABORT default: defined as abort()
+ Defines how to abort on failed checks. On most systems, a failed
+ check cannot die with an "assert" or even print an informative
+ message, because the underlying print routines in turn call malloc,
+ which will fail again. Generally, the best policy is to simply call
+ abort(). It's not very useful to do more than this because many
+ errors due to overwriting will show up as address faults (null, odd
+ addresses etc) rather than malloc-triggered checks, so will also
+ abort. Also, most compilers know that abort() does not return, so
+ can better optimize code conditionally calling it.
+
+PROCEED_ON_ERROR default: defined as 0 (false)
+ Controls whether detected bad addresses cause them to bypassed
+ rather than aborting. If set, detected bad arguments to free and
+ realloc are ignored. And all bookkeeping information is zeroed out
+ upon a detected overwrite of freed heap space, thus losing the
+ ability to ever return it from malloc again, but enabling the
+ application to proceed. If PROCEED_ON_ERROR is defined, the
+ static variable malloc_corruption_error_count is compiled in
+ and can be examined to see if errors have occurred. This option
+ generates slower code than the default abort policy.
+
+DEBUG default: NOT defined
+ The DEBUG setting is mainly intended for people trying to modify
+ this code or diagnose problems when porting to new platforms.
+ However, it may also be able to better isolate user errors than just
+ using runtime checks. The assertions in the check routines spell
+ out in more detail the assumptions and invariants underlying the
+ algorithms. The checking is fairly extensive, and will slow down
+ execution noticeably. Calling malloc_stats or mallinfo with DEBUG
+ set will attempt to check every non-mmapped allocated and free chunk
+ in the course of computing the summaries.
+
+ABORT_ON_ASSERT_FAILURE default: defined as 1 (true)
+ Debugging assertion failures can be nearly impossible if your
+ version of the assert macro causes malloc to be called, which will
+ lead to a cascade of further failures, blowing the runtime stack.
+ ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(),
+ which will usually make debugging easier.
+
+MALLOC_FAILURE_ACTION default: sets errno to ENOMEM, or no-op on win32
+ The action to take before "return 0" when malloc fails to be able to
+ return memory because there is none available.
+
+HAVE_MORECORE default: 1 (true) unless win32 or ONLY_MSPACES
+ True if this system supports sbrk or an emulation of it.
+
+MORECORE default: sbrk
+ The name of the sbrk-style system routine to call to obtain more
+ memory. See below for guidance on writing custom MORECORE
+ functions. The type of the argument to sbrk/MORECORE varies across
+ systems. It cannot be size_t, because it supports negative
+ arguments, so it is normally the signed type of the same width as
+ size_t (sometimes declared as "intptr_t"). It doesn't much matter
+ though. Internally, we only call it with arguments less than half
+ the max value of a size_t, which should work across all reasonable
+ possibilities, although sometimes generating compiler warnings.
+
+MORECORE_CONTIGUOUS default: 1 (true) if HAVE_MORECORE
+ If true, take advantage of fact that consecutive calls to MORECORE
+ with positive arguments always return contiguous increasing
+ addresses. This is true of unix sbrk. It does not hurt too much to
+ set it true anyway, since malloc copes with non-contiguities.
+ Setting it false when definitely non-contiguous saves time
+ and possibly wasted space it would take to discover this though.
+
+MORECORE_CANNOT_TRIM default: NOT defined
+ True if MORECORE cannot release space back to the system when given
+ negative arguments. This is generally necessary only if you are
+ using a hand-crafted MORECORE function that cannot handle negative
+ arguments.
+
+NO_SEGMENT_TRAVERSAL default: 0
+ If non-zero, suppresses traversals of memory segments
+ returned by either MORECORE or CALL_MMAP. This disables
+ merging of segments that are contiguous, and selectively
+ releasing them to the OS if unused, but bounds execution times.
+
+HAVE_MMAP default: 1 (true)
+ True if this system supports mmap or an emulation of it. If so, and
+ HAVE_MORECORE is not true, MMAP is used for all system
+ allocation. If set and HAVE_MORECORE is true as well, MMAP is
+ primarily used to directly allocate very large blocks. It is also
+ used as a backup strategy in cases where MORECORE fails to provide
+ space from system. Note: A single call to MUNMAP is assumed to be
+ able to unmap memory that may have be allocated using multiple calls
+ to MMAP, so long as they are adjacent.
+
+HAVE_MREMAP default: 1 on linux, else 0
+ If true realloc() uses mremap() to re-allocate large blocks and
+ extend or shrink allocation spaces.
+
+MMAP_CLEARS default: 1 except on WINCE.
+ True if mmap clears memory so calloc doesn't need to. This is true
+ for standard unix mmap using /dev/zero and on WIN32 except for WINCE.
+
+USE_BUILTIN_FFS default: 0 (i.e., not used)
+ Causes malloc to use the builtin ffs() function to compute indices.
+ Some compilers may recognize and intrinsify ffs to be faster than the
+ supplied C version. Also, the case of x86 using gcc is special-cased
+ to an asm instruction, so is already as fast as it can be, and so
+ this setting has no effect. Similarly for Win32 under recent MS compilers.
+ (On most x86s, the asm version is only slightly faster than the C version.)
+
+malloc_getpagesize default: derive from system includes, or 4096.
+ The system page size. To the extent possible, this malloc manages
+ memory from the system in page-size units. This may be (and
+ usually is) a function rather than a constant. This is ignored
+ if WIN32, where page size is determined using getSystemInfo during
+ initialization.
+
+USE_DEV_RANDOM default: 0 (i.e., not used)
+ Causes malloc to use /dev/random to initialize secure magic seed for
+ stamping footers. Otherwise, the current time is used.
+
+NO_MALLINFO default: 0
+ If defined, don't compile "mallinfo". This can be a simple way
+ of dealing with mismatches between system declarations and
+ those in this file.
+
+MALLINFO_FIELD_TYPE default: size_t
+ The type of the fields in the mallinfo struct. This was originally
+ defined as "int" in SVID etc, but is more usefully defined as
+ size_t. The value is used only if HAVE_USR_INCLUDE_MALLOC_H is not set
+
+NO_MALLOC_STATS default: 0
+ If defined, don't compile "malloc_stats". This avoids calls to
+ fprintf and bringing in stdio dependencies you might not want.
+
+REALLOC_ZERO_BYTES_FREES default: not defined
+ This should be set if a call to realloc with zero bytes should
+ be the same as a call to free. Some people think it should. Otherwise,
+ since this malloc returns a unique pointer for malloc(0), so does
+ realloc(p, 0).
+
+LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H
+LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H, LACKS_ERRNO_H
+LACKS_STDLIB_H LACKS_SCHED_H LACKS_TIME_H default: NOT defined unless on WIN32
+ Define these if your system does not have these header files.
+ You might need to manually insert some of the declarations they provide.
+
+DEFAULT_GRANULARITY default: page size if MORECORE_CONTIGUOUS,
+ system_info.dwAllocationGranularity in WIN32,
+ otherwise 64K.
+ Also settable using mallopt(M_GRANULARITY, x)
+ The unit for allocating and deallocating memory from the system. On
+ most systems with contiguous MORECORE, there is no reason to
+ make this more than a page. However, systems with MMAP tend to
+ either require or encourage larger granularities. You can increase
+ this value to prevent system allocation functions to be called so
+ often, especially if they are slow. The value must be at least one
+ page and must be a power of two. Setting to 0 causes initialization
+ to either page size or win32 region size. (Note: In previous
+ versions of malloc, the equivalent of this option was called
+ "TOP_PAD")
+
+DEFAULT_TRIM_THRESHOLD default: 2MB
+ Also settable using mallopt(M_TRIM_THRESHOLD, x)
+ The maximum amount of unused top-most memory to keep before
+ releasing via malloc_trim in free(). Automatic trimming is mainly
+ useful in long-lived programs using contiguous MORECORE. Because
+ trimming via sbrk can be slow on some systems, and can sometimes be
+ wasteful (in cases where programs immediately afterward allocate
+ more large chunks) the value should be high enough so that your
+ overall system performance would improve by releasing this much
+ memory. As a rough guide, you might set to a value close to the
+ average size of a process (program) running on your system.
+ Releasing this much memory would allow such a process to run in
+ memory. Generally, it is worth tuning trim thresholds when a
+ program undergoes phases where several large chunks are allocated
+ and released in ways that can reuse each other's storage, perhaps
+ mixed with phases where there are no such chunks at all. The trim
+ value must be greater than page size to have any useful effect. To
+ disable trimming completely, you can set to MAX_SIZE_T. Note that the trick
+ some people use of mallocing a huge space and then freeing it at
+ program startup, in an attempt to reserve system memory, doesn't
+ have the intended effect under automatic trimming, since that memory
+ will immediately be returned to the system.
+
+DEFAULT_MMAP_THRESHOLD default: 256K
+ Also settable using mallopt(M_MMAP_THRESHOLD, x)
+ The request size threshold for using MMAP to directly service a
+ request. Requests of at least this size that cannot be allocated
+ using already-existing space will be serviced via mmap. (If enough
+ normal freed space already exists it is used instead.) Using mmap
+ segregates relatively large chunks of memory so that they can be
+ individually obtained and released from the host system. A request
+ serviced through mmap is never reused by any other request (at least
+ not directly; the system may just so happen to remap successive
+ requests to the same locations). Segregating space in this way has
+ the benefits that: Mmapped space can always be individually released
+ back to the system, which helps keep the system level memory demands
+ of a long-lived program low. Also, mapped memory doesn't become
+ `locked' between other chunks, as can happen with normally allocated
+ chunks, which means that even trimming via malloc_trim would not
+ release them. However, it has the disadvantage that the space
+ cannot be reclaimed, consolidated, and then used to service later
+ requests, as happens with normal chunks. The advantages of mmap
+ nearly always outweigh disadvantages for "large" chunks, but the
+ value of "large" may vary across systems. The default is an
+ empirically derived value that works well in most systems. You can
+ disable mmap by setting to MAX_SIZE_T.
+
+MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP
+ The number of consolidated frees between checks to release
+ unused segments when freeing. When using non-contiguous segments,
+ especially with multiple mspaces, checking only for topmost space
+ doesn't always suffice to trigger trimming. To compensate for this,
+ free() will, with a period of MAX_RELEASE_CHECK_RATE (or the
+ current number of segments, if greater) try to release unused
+ segments to the OS when freeing chunks that result in
+ consolidation. The best value for this parameter is a compromise
+ between slowing down frees with relatively costly checks that
+ rarely trigger versus holding on to unused memory. To effectively
+ disable, set to MAX_SIZE_T. This may lead to a very slight speed
+ improvement at the expense of carrying around more memory.
+*/
+
+/* Version identifier to allow people to support multiple versions */
+#ifndef DLMALLOC_VERSION
+#define DLMALLOC_VERSION 20806
+#endif /* DLMALLOC_VERSION */
+
+#ifndef DLMALLOC_EXPORT
+#define DLMALLOC_EXPORT extern
+#endif
+
+#ifndef WIN32
+#ifdef _WIN32
+#define WIN32 1
+#endif /* _WIN32 */
+#ifdef _WIN32_WCE
+#define LACKS_FCNTL_H
+#define WIN32 1
+#endif /* _WIN32_WCE */
+#endif /* WIN32 */
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <tchar.h>
+#define HAVE_MMAP 1
+#define HAVE_MORECORE 0
+#define LACKS_UNISTD_H
+#define LACKS_SYS_PARAM_H
+#define LACKS_SYS_MMAN_H
+#define LACKS_STRING_H
+#define LACKS_STRINGS_H
+#define LACKS_SYS_TYPES_H
+#define LACKS_ERRNO_H
+#define LACKS_SCHED_H
+#ifndef MALLOC_FAILURE_ACTION
+#define MALLOC_FAILURE_ACTION
+#endif /* MALLOC_FAILURE_ACTION */
+#ifndef MMAP_CLEARS
+#ifdef _WIN32_WCE /* WINCE reportedly does not clear */
+#define MMAP_CLEARS 0
+#else
+#define MMAP_CLEARS 1
+#endif /* _WIN32_WCE */
+#endif /*MMAP_CLEARS */
+#endif /* WIN32 */
+
+#if defined(DARWIN) || defined(_DARWIN)
+/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */
+#ifndef HAVE_MORECORE
+#define HAVE_MORECORE 0
+#define HAVE_MMAP 1
+/* OSX allocators provide 16 byte alignment */
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT ((size_t)16U)
+#endif
+#endif /* HAVE_MORECORE */
+#endif /* DARWIN */
+
+#ifndef LACKS_SYS_TYPES_H
+#include <sys/types.h> /* For size_t */
+#endif /* LACKS_SYS_TYPES_H */
+
+/* The maximum possible size_t value has all bits set */
+#define MAX_SIZE_T (~(size_t)0)
+
+#ifndef USE_LOCKS /* ensure true if spin or recursive locks set */
+#if ((defined(USE_SPIN_LOCKS) && USE_SPIN_LOCKS != 0) || \
+ (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0))
+#define USE_LOCKS 1
+#else
+#define USE_LOCKS 0
+#endif
+#endif /* USE_LOCKS */
+
+#if USE_LOCKS /* Spin locks for gcc >= 4.1, older gcc on x86, MSC >= 1310 */
+#if ((defined(__GNUC__) && \
+ ((__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) || \
+ defined(__i386__) || defined(__x86_64__))) || \
+ (defined(_MSC_VER) && _MSC_VER>=1310))
+#ifndef USE_SPIN_LOCKS
+#define USE_SPIN_LOCKS 1
+#endif /* USE_SPIN_LOCKS */
+#elif USE_SPIN_LOCKS
+#error "USE_SPIN_LOCKS defined without implementation"
+#endif /* ... locks available... */
+#elif !defined(USE_SPIN_LOCKS)
+#define USE_SPIN_LOCKS 0
+#endif /* USE_LOCKS */
+
+#ifndef ONLY_MSPACES
+#define ONLY_MSPACES 0
+#endif /* ONLY_MSPACES */
+#ifndef MSPACES
+#if ONLY_MSPACES
+#define MSPACES 1
+#else /* ONLY_MSPACES */
+#define MSPACES 0
+#endif /* ONLY_MSPACES */
+#endif /* MSPACES */
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *)))
+#endif /* MALLOC_ALIGNMENT */
+#ifndef FOOTERS
+#define FOOTERS 0
+#endif /* FOOTERS */
+#ifndef ABORT
+#define ABORT abort()
+#endif /* ABORT */
+#ifndef ABORT_ON_ASSERT_FAILURE
+#define ABORT_ON_ASSERT_FAILURE 1
+#endif /* ABORT_ON_ASSERT_FAILURE */
+#ifndef PROCEED_ON_ERROR
+#define PROCEED_ON_ERROR 0
+#endif /* PROCEED_ON_ERROR */
+
+#ifndef INSECURE
+#define INSECURE 0
+#endif /* INSECURE */
+#ifndef MALLOC_INSPECT_ALL
+#define MALLOC_INSPECT_ALL 0
+#endif /* MALLOC_INSPECT_ALL */
+#ifndef HAVE_MMAP
+#define HAVE_MMAP 1
+#endif /* HAVE_MMAP */
+#ifndef MMAP_CLEARS
+#define MMAP_CLEARS 1
+#endif /* MMAP_CLEARS */
+#ifndef HAVE_MREMAP
+#ifdef linux
+#define HAVE_MREMAP 1
+#define _GNU_SOURCE /* Turns on mremap() definition */
+#else /* linux */
+#define HAVE_MREMAP 0
+#endif /* linux */
+#endif /* HAVE_MREMAP */
+#ifndef MALLOC_FAILURE_ACTION
+#define MALLOC_FAILURE_ACTION errno = ENOMEM;
+#endif /* MALLOC_FAILURE_ACTION */
+#ifndef HAVE_MORECORE
+#if ONLY_MSPACES
+#define HAVE_MORECORE 0
+#else /* ONLY_MSPACES */
+#define HAVE_MORECORE 1
+#endif /* ONLY_MSPACES */
+#endif /* HAVE_MORECORE */
+#if !HAVE_MORECORE
+#define MORECORE_CONTIGUOUS 0
+#else /* !HAVE_MORECORE */
+#define MORECORE_DEFAULT sbrk
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif /* MORECORE_CONTIGUOUS */
+#endif /* HAVE_MORECORE */
+#ifndef DEFAULT_GRANULARITY
+#if (MORECORE_CONTIGUOUS || defined(WIN32))
+#define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */
+#else /* MORECORE_CONTIGUOUS */
+#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
+#endif /* MORECORE_CONTIGUOUS */
+#endif /* DEFAULT_GRANULARITY */
+#ifndef DEFAULT_TRIM_THRESHOLD
+#ifndef MORECORE_CANNOT_TRIM
+#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
+#else /* MORECORE_CANNOT_TRIM */
+#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
+#endif /* MORECORE_CANNOT_TRIM */
+#endif /* DEFAULT_TRIM_THRESHOLD */
+#ifndef DEFAULT_MMAP_THRESHOLD
+#if HAVE_MMAP
+#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
+#else /* HAVE_MMAP */
+#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
+#endif /* HAVE_MMAP */
+#endif /* DEFAULT_MMAP_THRESHOLD */
+#ifndef MAX_RELEASE_CHECK_RATE
+#if HAVE_MMAP
+#define MAX_RELEASE_CHECK_RATE 4095
+#else
+#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T
+#endif /* HAVE_MMAP */
+#endif /* MAX_RELEASE_CHECK_RATE */
+#ifndef USE_BUILTIN_FFS
+#define USE_BUILTIN_FFS 0
+#endif /* USE_BUILTIN_FFS */
+#ifndef USE_DEV_RANDOM
+#define USE_DEV_RANDOM 0
+#endif /* USE_DEV_RANDOM */
+#ifndef NO_MALLINFO
+#define NO_MALLINFO 0
+#endif /* NO_MALLINFO */
+#ifndef MALLINFO_FIELD_TYPE
+#define MALLINFO_FIELD_TYPE size_t
+#endif /* MALLINFO_FIELD_TYPE */
+#ifndef NO_MALLOC_STATS
+#define NO_MALLOC_STATS 0
+#endif /* NO_MALLOC_STATS */
+#ifndef NO_SEGMENT_TRAVERSAL
+#define NO_SEGMENT_TRAVERSAL 0
+#endif /* NO_SEGMENT_TRAVERSAL */
+
+/*
+ mallopt tuning options. SVID/XPG defines four standard parameter
+ numbers for mallopt, normally defined in malloc.h. None of these
+ are used in this malloc, so setting them has no effect. But this
+ malloc does support the following options.
+*/
+
+#define M_TRIM_THRESHOLD (-1)
+#define M_GRANULARITY (-2)
+#define M_MMAP_THRESHOLD (-3)
+
+/* ------------------------ Mallinfo declarations ------------------------ */
+
+#if !NO_MALLINFO
+/*
+ This version of malloc supports the standard SVID/XPG mallinfo
+ routine that returns a struct containing usage properties and
+ statistics. It should work on any system that has a
+ /usr/include/malloc.h defining struct mallinfo. The main
+ declaration needed is the mallinfo struct that is returned (by-copy)
+ by mallinfo(). The malloinfo struct contains a bunch of fields that
+ are not even meaningful in this version of malloc. These fields are
+ are instead filled by mallinfo() with other numbers that might be of
+ interest.
+
+ HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
+ /usr/include/malloc.h file that includes a declaration of struct
+ mallinfo. If so, it is included; else a compliant version is
+ declared below. These must be precisely the same for mallinfo() to
+ work. The original SVID version of this struct, defined on most
+ systems with mallinfo, declares all fields as ints. But some others
+ define as unsigned long. If your system defines the fields using a
+ type of different width than listed here, you MUST #include your
+ system version and #define HAVE_USR_INCLUDE_MALLOC_H.
+*/
+
+/* #define HAVE_USR_INCLUDE_MALLOC_H */
+
+#ifdef HAVE_USR_INCLUDE_MALLOC_H
+#include "/usr/include/malloc.h"
+#else /* HAVE_USR_INCLUDE_MALLOC_H */
+#ifndef STRUCT_MALLINFO_DECLARED
+/* HP-UX (and others?) redefines mallinfo unless _STRUCT_MALLINFO is defined */
+#define _STRUCT_MALLINFO
+#define STRUCT_MALLINFO_DECLARED 1
+struct mallinfo {
+ MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */
+ MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */
+ MALLINFO_FIELD_TYPE smblks; /* always 0 */
+ MALLINFO_FIELD_TYPE hblks; /* always 0 */
+ MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */
+ MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */
+ MALLINFO_FIELD_TYPE fsmblks; /* always 0 */
+ MALLINFO_FIELD_TYPE uordblks; /* total allocated space */
+ MALLINFO_FIELD_TYPE fordblks; /* total free space */
+ MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */
+};
+#endif /* STRUCT_MALLINFO_DECLARED */
+#endif /* HAVE_USR_INCLUDE_MALLOC_H */
+#endif /* NO_MALLINFO */
+
+/*
+ Try to persuade compilers to inline. The most critical functions for
+ inlining are defined as macros, so these aren't used for them.
+*/
+
+#ifndef FORCEINLINE
+ #if defined(__GNUC__)
+#define FORCEINLINE __inline __attribute__ ((always_inline))
+ #elif defined(_MSC_VER)
+ #define FORCEINLINE __forceinline
+ #endif
+#endif
+#ifndef NOINLINE
+ #if defined(__GNUC__)
+ #define NOINLINE __attribute__ ((noinline))
+ #elif defined(_MSC_VER)
+ #define NOINLINE __declspec(noinline)
+ #else
+ #define NOINLINE
+ #endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#ifndef FORCEINLINE
+ #define FORCEINLINE inline
+#endif
+#endif /* __cplusplus */
+#ifndef FORCEINLINE
+ #define FORCEINLINE
+#endif
+
+#if !ONLY_MSPACES
+
+/* ------------------- Declarations of public routines ------------------- */
+
+#ifndef USE_DL_PREFIX
+#define dlcalloc calloc
+#define dlfree free
+#define dlmalloc malloc
+#define dlmemalign memalign
+#define dlposix_memalign posix_memalign
+#define dlrealloc realloc
+#define dlrealloc_in_place realloc_in_place
+#define dlvalloc valloc
+#define dlpvalloc pvalloc
+#define dlmallinfo mallinfo
+#define dlmallopt mallopt
+#define dlmalloc_trim malloc_trim
+#define dlmalloc_stats malloc_stats
+#define dlmalloc_usable_size malloc_usable_size
+#define dlmalloc_footprint malloc_footprint
+#define dlmalloc_max_footprint malloc_max_footprint
+#define dlmalloc_footprint_limit malloc_footprint_limit
+#define dlmalloc_set_footprint_limit malloc_set_footprint_limit
+#define dlmalloc_inspect_all malloc_inspect_all
+#define dlindependent_calloc independent_calloc
+#define dlindependent_comalloc independent_comalloc
+#define dlbulk_free bulk_free
+#endif /* USE_DL_PREFIX */
+
+/*
+ malloc(size_t n)
+ Returns a pointer to a newly allocated chunk of at least n bytes, or
+ null if no space is available, in which case errno is set to ENOMEM
+ on ANSI C systems.
+
+ If n is zero, malloc returns a minimum-sized chunk. (The minimum
+ size is 16 bytes on most 32bit systems, and 32 bytes on 64bit
+ systems.) Note that size_t is an unsigned type, so calls with
+ arguments that would be negative if signed are interpreted as
+ requests for huge amounts of space, which will often fail. The
+ maximum supported value of n differs across systems, but is in all
+ cases less than the maximum representable value of a size_t.
+*/
+DLMALLOC_EXPORT void* dlmalloc(size_t);
+
+/*
+ free(void* p)
+ Releases the chunk of memory pointed to by p, that had been previously
+ allocated using malloc or a related routine such as realloc.
+ It has no effect if p is null. If p was not malloced or already
+ freed, free(p) will by default cause the current program to abort.
+*/
+DLMALLOC_EXPORT void dlfree(void*);
+
+/*
+ calloc(size_t n_elements, size_t element_size);
+ Returns a pointer to n_elements * element_size bytes, with all locations
+ set to zero.
+*/
+DLMALLOC_EXPORT void* dlcalloc(size_t, size_t);
+
+/*
+ realloc(void* p, size_t n)
+ Returns a pointer to a chunk of size n that contains the same data
+ as does chunk p up to the minimum of (n, p's size) bytes, or null
+ if no space is available.
+
+ The returned pointer may or may not be the same as p. The algorithm
+ prefers extending p in most cases when possible, otherwise it
+ employs the equivalent of a malloc-copy-free sequence.
+
+ If p is null, realloc is equivalent to malloc.
+
+ If space is not available, realloc returns null, errno is set (if on
+ ANSI) and p is NOT freed.
+
+ if n is for fewer bytes than already held by p, the newly unused
+ space is lopped off and freed if possible. realloc with a size
+ argument of zero (re)allocates a minimum-sized chunk.
+
+ The old unix realloc convention of allowing the last-free'd chunk
+ to be used as an argument to realloc is not supported.
+*/
+DLMALLOC_EXPORT void* dlrealloc(void*, size_t);
+
+/*
+ realloc_in_place(void* p, size_t n)
+ Resizes the space allocated for p to size n, only if this can be
+ done without moving p (i.e., only if there is adjacent space
+ available if n is greater than p's current allocated size, or n is
+ less than or equal to p's size). This may be used instead of plain
+ realloc if an alternative allocation strategy is needed upon failure
+ to expand space; for example, reallocation of a buffer that must be
+ memory-aligned or cleared. You can use realloc_in_place to trigger
+ these alternatives only when needed.
+
+ Returns p if successful; otherwise null.
+*/
+DLMALLOC_EXPORT void* dlrealloc_in_place(void*, size_t);
+
+/*
+ memalign(size_t alignment, size_t n);
+ Returns a pointer to a newly allocated chunk of n bytes, aligned
+ in accord with the alignment argument.
+
+ The alignment argument should be a power of two. If the argument is
+ not a power of two, the nearest greater power is used.
+ 8-byte alignment is guaranteed by normal malloc calls, so don't
+ bother calling memalign with an argument of 8 or less.
+
+ Overreliance on memalign is a sure way to fragment space.
+*/
+DLMALLOC_EXPORT void* dlmemalign(size_t, size_t);
+
+/*
+ int posix_memalign(void** pp, size_t alignment, size_t n);
+ Allocates a chunk of n bytes, aligned in accord with the alignment
+ argument. Differs from memalign only in that it (1) assigns the
+ allocated memory to *pp rather than returning it, (2) fails and
+ returns EINVAL if the alignment is not a power of two (3) fails and
+ returns ENOMEM if memory cannot be allocated.
+*/
+DLMALLOC_EXPORT int dlposix_memalign(void**, size_t, size_t);
+
+/*
+ valloc(size_t n);
+ Equivalent to memalign(pagesize, n), where pagesize is the page
+ size of the system. If the pagesize is unknown, 4096 is used.
+*/
+DLMALLOC_EXPORT void* dlvalloc(size_t);
+
+/*
+ mallopt(int parameter_number, int parameter_value)
+ Sets tunable parameters The format is to provide a
+ (parameter-number, parameter-value) pair. mallopt then sets the
+ corresponding parameter to the argument value if it can (i.e., so
+ long as the value is meaningful), and returns 1 if successful else
+ 0. To workaround the fact that mallopt is specified to use int,
+ not size_t parameters, the value -1 is specially treated as the
+ maximum unsigned size_t value.
+
+ SVID/XPG/ANSI defines four standard param numbers for mallopt,
+ normally defined in malloc.h. None of these are use in this malloc,
+ so setting them has no effect. But this malloc also supports other
+ options in mallopt. See below for details. Briefly, supported
+ parameters are as follows (listed defaults are for "typical"
+ configurations).
+
+ Symbol param # default allowed param values
+ M_TRIM_THRESHOLD -1 2*1024*1024 any (-1 disables)
+ M_GRANULARITY -2 page size any power of 2 >= page size
+ M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support)
+*/
+DLMALLOC_EXPORT int dlmallopt(int, int);
+
+/*
+ malloc_footprint();
+ Returns the number of bytes obtained from the system. The total
+ number of bytes allocated by malloc, realloc etc., is less than this
+ value. Unlike mallinfo, this function returns only a precomputed
+ result, so can be called frequently to monitor memory consumption.
+ Even if locks are otherwise defined, this function does not use them,
+ so results might not be up to date.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_footprint(void);
+
+/*
+ malloc_max_footprint();
+ Returns the maximum number of bytes obtained from the system. This
+ value will be greater than current footprint if deallocated space
+ has been reclaimed by the system. The peak number of bytes allocated
+ by malloc, realloc etc., is less than this value. Unlike mallinfo,
+ this function returns only a precomputed result, so can be called
+ frequently to monitor memory consumption. Even if locks are
+ otherwise defined, this function does not use them, so results might
+ not be up to date.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_max_footprint(void);
+
+/*
+ malloc_footprint_limit();
+ Returns the number of bytes that the heap is allowed to obtain from
+ the system, returning the last value returned by
+ malloc_set_footprint_limit, or the maximum size_t value if
+ never set. The returned value reflects a permission. There is no
+ guarantee that this number of bytes can actually be obtained from
+ the system.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_footprint_limit(void);
+
+/*
+ malloc_set_footprint_limit();
+ Sets the maximum number of bytes to obtain from the system, causing
+ failure returns from malloc and related functions upon attempts to
+ exceed this value. The argument value may be subject to page
+ rounding to an enforceable limit; this actual value is returned.
+ Using an argument of the maximum possible size_t effectively
+ disables checks. If the argument is less than or equal to the
+ current malloc_footprint, then all future allocations that require
+ additional system memory will fail. However, invocation cannot
+ retroactively deallocate existing used memory.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_set_footprint_limit(size_t bytes);
+
+#if MALLOC_INSPECT_ALL
+/*
+ malloc_inspect_all(void(*handler)(void *start,
+ void *end,
+ size_t used_bytes,
+ void* callback_arg),
+ void* arg);
+ Traverses the heap and calls the given handler for each managed
+ region, skipping all bytes that are (or may be) used for bookkeeping
+ purposes. Traversal does not include include chunks that have been
+ directly memory mapped. Each reported region begins at the start
+ address, and continues up to but not including the end address. The
+ first used_bytes of the region contain allocated data. If
+ used_bytes is zero, the region is unallocated. The handler is
+ invoked with the given callback argument. If locks are defined, they
+ are held during the entire traversal. It is a bad idea to invoke
+ other malloc functions from within the handler.
+
+ For example, to count the number of in-use chunks with size greater
+ than 1000, you could write:
+ static int count = 0;
+ void count_chunks(void* start, void* end, size_t used, void* arg) {
+ if (used >= 1000) ++count;
+ }
+ then:
+ malloc_inspect_all(count_chunks, NULL);
+
+ malloc_inspect_all is compiled only if MALLOC_INSPECT_ALL is defined.
+*/
+DLMALLOC_EXPORT void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*),
+ void* arg);
+
+#endif /* MALLOC_INSPECT_ALL */
+
+#if !NO_MALLINFO
+/*
+ mallinfo()
+ Returns (by copy) a struct containing various summary statistics:
+
+ arena: current total non-mmapped bytes allocated from system
+ ordblks: the number of free chunks
+ smblks: always zero.
+ hblks: current number of mmapped regions
+ hblkhd: total bytes held in mmapped regions
+ usmblks: the maximum total allocated space. This will be greater
+ than current total if trimming has occurred.
+ fsmblks: always zero
+ uordblks: current total allocated space (normal or mmapped)
+ fordblks: total free space
+ keepcost: the maximum number of bytes that could ideally be released
+ back to system via malloc_trim. ("ideally" means that
+ it ignores page restrictions etc.)
+
+ Because these fields are ints, but internal bookkeeping may
+ be kept as longs, the reported values may wrap around zero and
+ thus be inaccurate.
+*/
+DLMALLOC_EXPORT struct mallinfo dlmallinfo(void);
+#endif /* NO_MALLINFO */
+
+/*
+ independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
+
+ independent_calloc is similar to calloc, but instead of returning a
+ single cleared space, it returns an array of pointers to n_elements
+ independent elements that can hold contents of size elem_size, each
+ of which starts out cleared, and can be independently freed,
+ realloc'ed etc. The elements are guaranteed to be adjacently
+ allocated (this is not guaranteed to occur with multiple callocs or
+ mallocs), which may also improve cache locality in some
+ applications.
+
+ The "chunks" argument is optional (i.e., may be null, which is
+ probably the most typical usage). If it is null, the returned array
+ is itself dynamically allocated and should also be freed when it is
+ no longer needed. Otherwise, the chunks array must be of at least
+ n_elements in length. It is filled in with the pointers to the
+ chunks.
+
+ In either case, independent_calloc returns this pointer array, or
+ null if the allocation failed. If n_elements is zero and "chunks"
+ is null, it returns a chunk representing an array with zero elements
+ (which should be freed if not wanted).
+
+ Each element must be freed when it is no longer needed. This can be
+ done all at once using bulk_free.
+
+ independent_calloc simplifies and speeds up implementations of many
+ kinds of pools. It may also be useful when constructing large data
+ structures that initially have a fixed number of fixed-sized nodes,
+ but the number is not known at compile time, and some of the nodes
+ may later need to be freed. For example:
+
+ struct Node { int item; struct Node* next; };
+
+ struct Node* build_list() {
+ struct Node** pool;
+ int n = read_number_of_nodes_needed();
+ if (n <= 0) return 0;
+ pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
+ if (pool == 0) die();
+ // organize into a linked list...
+ struct Node* first = pool[0];
+ for (i = 0; i < n-1; ++i)
+ pool[i]->next = pool[i+1];
+ free(pool); // Can now free the array (or not, if it is needed later)
+ return first;
+ }
+*/
+DLMALLOC_EXPORT void** dlindependent_calloc(size_t, size_t, void**);
+
+/*
+ independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+
+ independent_comalloc allocates, all at once, a set of n_elements
+ chunks with sizes indicated in the "sizes" array. It returns
+ an array of pointers to these elements, each of which can be
+ independently freed, realloc'ed etc. The elements are guaranteed to
+ be adjacently allocated (this is not guaranteed to occur with
+ multiple callocs or mallocs), which may also improve cache locality
+ in some applications.
+
+ The "chunks" argument is optional (i.e., may be null). If it is null
+ the returned array is itself dynamically allocated and should also
+ be freed when it is no longer needed. Otherwise, the chunks array
+ must be of at least n_elements in length. It is filled in with the
+ pointers to the chunks.
+
+ In either case, independent_comalloc returns this pointer array, or
+ null if the allocation failed. If n_elements is zero and chunks is
+ null, it returns a chunk representing an array with zero elements
+ (which should be freed if not wanted).
+
+ Each element must be freed when it is no longer needed. This can be
+ done all at once using bulk_free.
+
+ independent_comallac differs from independent_calloc in that each
+ element may have a different size, and also that it does not
+ automatically clear elements.
+
+ independent_comalloc can be used to speed up allocation in cases
+ where several structs or objects must always be allocated at the
+ same time. For example:
+
+ struct Head { ... }
+ struct Foot { ... }
+
+ void send_message(char* msg) {
+ int msglen = strlen(msg);
+ size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
+ void* chunks[3];
+ if (independent_comalloc(3, sizes, chunks) == 0)
+ die();
+ struct Head* head = (struct Head*)(chunks[0]);
+ char* body = (char*)(chunks[1]);
+ struct Foot* foot = (struct Foot*)(chunks[2]);
+ // ...
+ }
+
+ In general though, independent_comalloc is worth using only for
+ larger values of n_elements. For small values, you probably won't
+ detect enough difference from series of malloc calls to bother.
+
+ Overuse of independent_comalloc can increase overall memory usage,
+ since it cannot reuse existing noncontiguous small chunks that
+ might be available for some of the elements.
+*/
+DLMALLOC_EXPORT void** dlindependent_comalloc(size_t, size_t*, void**);
+
+/*
+ bulk_free(void* array[], size_t n_elements)
+ Frees and clears (sets to null) each non-null pointer in the given
+ array. This is likely to be faster than freeing them one-by-one.
+ If footers are used, pointers that have been allocated in different
+ mspaces are not freed or cleared, and the count of all such pointers
+ is returned. For large arrays of pointers with poor locality, it
+ may be worthwhile to sort this array before calling bulk_free.
+*/
+DLMALLOC_EXPORT size_t dlbulk_free(void**, size_t n_elements);
+
+/*
+ pvalloc(size_t n);
+ Equivalent to valloc(minimum-page-that-holds(n)), that is,
+ round up n to nearest pagesize.
+ */
+DLMALLOC_EXPORT void* dlpvalloc(size_t);
+
+/*
+ malloc_trim(size_t pad);
+
+ If possible, gives memory back to the system (via negative arguments
+ to sbrk) if there is unused memory at the `high' end of the malloc
+ pool or in unused MMAP segments. You can call this after freeing
+ large blocks of memory to potentially reduce the system-level memory
+ requirements of a program. However, it cannot guarantee to reduce
+ memory. Under some allocation patterns, some large free blocks of
+ memory will be locked between two used chunks, so they cannot be
+ given back to the system.
+
+ The `pad' argument to malloc_trim represents the amount of free
+ trailing space to leave untrimmed. If this argument is zero, only
+ the minimum amount of memory to maintain internal data structures
+ will be left. Non-zero arguments can be supplied to maintain enough
+ trailing space to service future expected allocations without having
+ to re-obtain memory from the system.
+
+ Malloc_trim returns 1 if it actually released any memory, else 0.
+*/
+DLMALLOC_EXPORT int dlmalloc_trim(size_t);
+
+/*
+ malloc_stats();
+ Prints on stderr the amount of space obtained from the system (both
+ via sbrk and mmap), the maximum amount (which may be more than
+ current if malloc_trim and/or munmap got called), and the current
+ number of bytes allocated via malloc (or realloc, etc) but not yet
+ freed. Note that this is the number of bytes allocated, not the
+ number requested. It will be larger than the number requested
+ because of alignment and bookkeeping overhead. Because it includes
+ alignment wastage as being in use, this figure may be greater than
+ zero even when no user-level chunks are allocated.
+
+ The reported current and maximum system memory can be inaccurate if
+ a program makes other calls to system memory allocation functions
+ (normally sbrk) outside of malloc.
+
+ malloc_stats prints only the most commonly interesting statistics.
+ More information can be obtained by calling mallinfo.
+*/
+DLMALLOC_EXPORT void dlmalloc_stats(void);
+
+/*
+ malloc_usable_size(void* p);
+
+ Returns the number of bytes you can actually use in
+ an allocated chunk, which may be more than you requested (although
+ often not) due to alignment and minimum size constraints.
+ You can use this many bytes without worrying about
+ overwriting other allocated objects. This is not a particularly great
+ programming practice. malloc_usable_size can be more useful in
+ debugging and assertions, for example:
+
+ p = malloc(n);
+ assert(malloc_usable_size(p) >= 256);
+*/
+size_t dlmalloc_usable_size(void*);
+
+#endif /* ONLY_MSPACES */
+
+#if MSPACES
+
+/*
+ mspace is an opaque type representing an independent
+ region of space that supports mspace_malloc, etc.
+*/
+typedef void* mspace;
+
+/*
+ create_mspace creates and returns a new independent space with the
+ given initial capacity, or, if 0, the default granularity size. It
+ returns null if there is no system memory available to create the
+ space. If argument locked is non-zero, the space uses a separate
+ lock to control access. The capacity of the space will grow
+ dynamically as needed to service mspace_malloc requests. You can
+ control the sizes of incremental increases of this space by
+ compiling with a different DEFAULT_GRANULARITY or dynamically
+ setting with mallopt(M_GRANULARITY, value).
+*/
+DLMALLOC_EXPORT mspace create_mspace(size_t capacity, int locked);
+
+/*
+ destroy_mspace destroys the given space, and attempts to return all
+ of its memory back to the system, returning the total number of
+ bytes freed. After destruction, the results of access to all memory
+ used by the space become undefined.
+*/
+DLMALLOC_EXPORT size_t destroy_mspace(mspace msp);
+
+/*
+ create_mspace_with_base uses the memory supplied as the initial base
+ of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this
+ space is used for bookkeeping, so the capacity must be at least this
+ large. (Otherwise 0 is returned.) When this initial space is
+ exhausted, additional memory will be obtained from the system.
+ Destroying this space will deallocate all additionally allocated
+ space (if possible) but not the initial base.
+*/
+DLMALLOC_EXPORT mspace create_mspace_with_base(void* base, size_t capacity, int locked);
+
+/*
+ mspace_track_large_chunks controls whether requests for large chunks
+ are allocated in their own untracked mmapped regions, separate from
+ others in this mspace. By default large chunks are not tracked,
+ which reduces fragmentation. However, such chunks are not
+ necessarily released to the system upon destroy_mspace. Enabling
+ tracking by setting to true may increase fragmentation, but avoids
+ leakage when relying on destroy_mspace to release all memory
+ allocated using this space. The function returns the previous
+ setting.
+*/
+DLMALLOC_EXPORT int mspace_track_large_chunks(mspace msp, int enable);
+
+
+/*
+ mspace_malloc behaves as malloc, but operates within
+ the given space.
+*/
+DLMALLOC_EXPORT void* mspace_malloc(mspace msp, size_t bytes);
+
+/*
+ mspace_free behaves as free, but operates within
+ the given space.
+
+ If compiled with FOOTERS==1, mspace_free is not actually needed.
+ free may be called instead of mspace_free because freed chunks from
+ any space are handled by their originating spaces.
+*/
+DLMALLOC_EXPORT void mspace_free(mspace msp, void* mem);
+
+/*
+ mspace_realloc behaves as realloc, but operates within
+ the given space.
+
+ If compiled with FOOTERS==1, mspace_realloc is not actually
+ needed. realloc may be called instead of mspace_realloc because
+ realloced chunks from any space are handled by their originating
+ spaces.
+*/
+DLMALLOC_EXPORT void* mspace_realloc(mspace msp, void* mem, size_t newsize);
+
+/*
+ mspace_calloc behaves as calloc, but operates within
+ the given space.
+*/
+DLMALLOC_EXPORT void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size);
+
+/*
+ mspace_memalign behaves as memalign, but operates within
+ the given space.
+*/
+DLMALLOC_EXPORT void* mspace_memalign(mspace msp, size_t alignment, size_t bytes);
+
+/*
+ mspace_independent_calloc behaves as independent_calloc, but
+ operates within the given space.
+*/
+DLMALLOC_EXPORT void** mspace_independent_calloc(mspace msp, size_t n_elements,
+ size_t elem_size, void* chunks[]);
+
+/*
+ mspace_independent_comalloc behaves as independent_comalloc, but
+ operates within the given space.
+*/
+DLMALLOC_EXPORT void** mspace_independent_comalloc(mspace msp, size_t n_elements,
+ size_t sizes[], void* chunks[]);
+
+/*
+ mspace_footprint() returns the number of bytes obtained from the
+ system for this space.
+*/
+DLMALLOC_EXPORT size_t mspace_footprint(mspace msp);
+
+/*
+ mspace_max_footprint() returns the peak number of bytes obtained from the
+ system for this space.
+*/
+DLMALLOC_EXPORT size_t mspace_max_footprint(mspace msp);
+
+
+#if !NO_MALLINFO
+/*
+ mspace_mallinfo behaves as mallinfo, but reports properties of
+ the given space.
+*/
+DLMALLOC_EXPORT struct mallinfo mspace_mallinfo(mspace msp);
+#endif /* NO_MALLINFO */
+
+/*
+ malloc_usable_size(void* p) behaves the same as malloc_usable_size;
+*/
+DLMALLOC_EXPORT size_t mspace_usable_size(const void* mem);
+
+/*
+ mspace_malloc_stats behaves as malloc_stats, but reports
+ properties of the given space.
+*/
+DLMALLOC_EXPORT void mspace_malloc_stats(mspace msp);
+
+/*
+ mspace_trim behaves as malloc_trim, but
+ operates within the given space.
+*/
+DLMALLOC_EXPORT int mspace_trim(mspace msp, size_t pad);
+
+/*
+ An alias for mallopt.
+*/
+DLMALLOC_EXPORT int mspace_mallopt(int, int);
+
+#endif /* MSPACES */
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif /* __cplusplus */
+
+/*
+ ========================================================================
+ To make a fully customizable malloc.h header file, cut everything
+ above this line, put into file malloc.h, edit to suit, and #include it
+ on the next line, as well as in programs that use this malloc.
+ ========================================================================
+*/
+
+/* #include "malloc.h" */
+
+/*------------------------------ internal #includes ---------------------- */
+
+#ifdef _MSC_VER
+#pragma warning( disable : 4146 ) /* no "unsigned" warnings */
+#endif /* _MSC_VER */
+#if !NO_MALLOC_STATS
+#include <stdio.h> /* for printing in malloc_stats */
+#endif /* NO_MALLOC_STATS */
+#ifndef LACKS_ERRNO_H
+#include <errno.h> /* for MALLOC_FAILURE_ACTION */
+#endif /* LACKS_ERRNO_H */
+#ifdef DEBUG
+#if ABORT_ON_ASSERT_FAILURE
+#undef assert
+#define assert(x) if(!(x)) ABORT
+#else /* ABORT_ON_ASSERT_FAILURE */
+#include <assert.h>
+#endif /* ABORT_ON_ASSERT_FAILURE */
+#else /* DEBUG */
+#ifndef assert
+#define assert(x)
+#endif
+#define DEBUG 0
+#endif /* DEBUG */
+#if !defined(WIN32) && !defined(LACKS_TIME_H)
+#include <time.h> /* for magic initialization */
+#endif /* WIN32 */
+#ifndef LACKS_STDLIB_H
+#include <stdlib.h> /* for abort() */
+#endif /* LACKS_STDLIB_H */
+#ifndef LACKS_STRING_H
+#include <string.h> /* for memset etc */
+#endif /* LACKS_STRING_H */
+#if USE_BUILTIN_FFS
+#ifndef LACKS_STRINGS_H
+#include <strings.h> /* for ffs */
+#endif /* LACKS_STRINGS_H */
+#endif /* USE_BUILTIN_FFS */
+#if HAVE_MMAP
+#ifndef LACKS_SYS_MMAN_H
+/* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */
+#if (defined(linux) && !defined(__USE_GNU))
+#define __USE_GNU 1
+#include <sys/mman.h> /* for mmap */
+#undef __USE_GNU
+#else
+#include <sys/mman.h> /* for mmap */
+#endif /* linux */
+#endif /* LACKS_SYS_MMAN_H */
+#ifndef LACKS_FCNTL_H
+#include <fcntl.h>
+#endif /* LACKS_FCNTL_H */
+#endif /* HAVE_MMAP */
+#ifndef LACKS_UNISTD_H
+#include <unistd.h> /* for sbrk, sysconf */
+#else /* LACKS_UNISTD_H */
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
+extern void* sbrk(ptrdiff_t);
+#endif /* FreeBSD etc */
+#endif /* LACKS_UNISTD_H */
+
+/* Declarations for locking */
+#if USE_LOCKS
+#ifndef WIN32
+#if defined (__SVR4) && defined (__sun) /* solaris */
+#include <thread.h>
+#elif !defined(LACKS_SCHED_H)
+#include <sched.h>
+#endif /* solaris or LACKS_SCHED_H */
+#if (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0) || !USE_SPIN_LOCKS
+#include <pthread.h>
+#endif /* USE_RECURSIVE_LOCKS ... */
+#elif defined(_MSC_VER)
+#ifndef _M_AMD64
+/* These are already defined on AMD64 builds */
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp);
+LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value);
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* _M_AMD64 */
+#pragma intrinsic (_InterlockedCompareExchange)
+#pragma intrinsic (_InterlockedExchange)
+#define interlockedcompareexchange _InterlockedCompareExchange
+#define interlockedexchange _InterlockedExchange
+#elif defined(WIN32) && defined(__GNUC__)
+#define interlockedcompareexchange(a, b, c) __sync_val_compare_and_swap(a, c, b)
+#define interlockedexchange __sync_lock_test_and_set
+#endif /* Win32 */
+#else /* USE_LOCKS */
+#endif /* USE_LOCKS */
+
+#ifndef LOCK_AT_FORK
+#define LOCK_AT_FORK 0
+#endif
+
+/* Declarations for bit scanning on win32 */
+#if defined(_MSC_VER) && _MSC_VER>=1300
+#ifndef BitScanForward /* Try to avoid pulling in WinNT.h */
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+unsigned char _BitScanForward(unsigned long *index, unsigned long mask);
+unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#define BitScanForward _BitScanForward
+#define BitScanReverse _BitScanReverse
+#pragma intrinsic(_BitScanForward)
+#pragma intrinsic(_BitScanReverse)
+#endif /* BitScanForward */
+#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */
+
+#ifndef WIN32
+#ifndef malloc_getpagesize
+# ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */
+# ifndef _SC_PAGE_SIZE
+# define _SC_PAGE_SIZE _SC_PAGESIZE
+# endif
+# endif
+# ifdef _SC_PAGE_SIZE
+# define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
+# else
+# if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
+ extern size_t getpagesize();
+# define malloc_getpagesize getpagesize()
+# else
+# ifdef WIN32 /* use supplied emulation of getpagesize */
+# define malloc_getpagesize getpagesize()
+# else
+# ifndef LACKS_SYS_PARAM_H
+# include <sys/param.h>
+# endif
+# ifdef EXEC_PAGESIZE
+# define malloc_getpagesize EXEC_PAGESIZE
+# else
+# ifdef NBPG
+# ifndef CLSIZE
+# define malloc_getpagesize NBPG
+# else
+# define malloc_getpagesize (NBPG * CLSIZE)
+# endif
+# else
+# ifdef NBPC
+# define malloc_getpagesize NBPC
+# else
+# ifdef PAGESIZE
+# define malloc_getpagesize PAGESIZE
+# else /* just guess */
+# define malloc_getpagesize ((size_t)4096U)
+# endif
+# endif
+# endif
+# endif
+# endif
+# endif
+# endif
+#endif
+#endif
+
+/* ------------------- size_t and alignment properties -------------------- */
+
+/* The byte and bit size of a size_t */
+#define SIZE_T_SIZE (sizeof(size_t))
+#define SIZE_T_BITSIZE (sizeof(size_t) << 3)
+
+/* Some constants coerced to size_t */
+/* Annoying but necessary to avoid errors on some platforms */
+#define SIZE_T_ZERO ((size_t)0)
+#define SIZE_T_ONE ((size_t)1)
+#define SIZE_T_TWO ((size_t)2)
+#define SIZE_T_FOUR ((size_t)4)
+#define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1)
+#define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2)
+#define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES)
+#define HALF_MAX_SIZE_T (MAX_SIZE_T / 2U)
+
+/* The bit mask value corresponding to MALLOC_ALIGNMENT */
+#define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE)
+
+/* True if address a has acceptable alignment */
+#define is_aligned(A) (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0)
+
+/* the number of bytes to offset an address to align it */
+#define align_offset(A)\
+ ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\
+ ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK))
+
+/* -------------------------- MMAP preliminaries ------------------------- */
+
+/*
+ If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and
+ checks to fail so compiler optimizer can delete code rather than
+ using so many "#if"s.
+*/
+
+
+/* MORECORE and MMAP must return MFAIL on failure */
+#define MFAIL ((void*)(MAX_SIZE_T))
+#define CMFAIL ((char*)(MFAIL)) /* defined for convenience */
+
+#if HAVE_MMAP
+
+#ifndef WIN32
+#define MUNMAP_DEFAULT(a, s) munmap((a), (s))
+#define MMAP_PROT (PROT_READ|PROT_WRITE)
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+#define MAP_ANONYMOUS MAP_ANON
+#endif /* MAP_ANON */
+#ifdef MAP_ANONYMOUS
+#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
+#define MMAP_DEFAULT(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0)
+#else /* MAP_ANONYMOUS */
+/*
+ Nearly all versions of mmap support MAP_ANONYMOUS, so the following
+ is unlikely to be needed, but is supplied just in case.
+*/
+#define MMAP_FLAGS (MAP_PRIVATE)
+static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
+#define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \
+ (dev_zero_fd = open("/dev/zero", O_RDWR), \
+ mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \
+ mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0))
+#endif /* MAP_ANONYMOUS */
+
+#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s)
+
+#else /* WIN32 */
+
+/* Win32 MMAP via VirtualAlloc */
+static FORCEINLINE void* win32mmap(size_t size) {
+ void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+ return (ptr != 0)? ptr: MFAIL;
+}
+
+/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
+static FORCEINLINE void* win32direct_mmap(size_t size) {
+ void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
+ PAGE_READWRITE);
+ return (ptr != 0)? ptr: MFAIL;
+}
+
+/* This function supports releasing coalesed segments */
+static FORCEINLINE int win32munmap(void* ptr, size_t size) {
+ MEMORY_BASIC_INFORMATION minfo;
+ char* cptr = (char*)ptr;
+ while (size) {
+ if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
+ return -1;
+ if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
+ minfo.State != MEM_COMMIT || minfo.RegionSize > size)
+ return -1;
+ if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
+ return -1;
+ cptr += minfo.RegionSize;
+ size -= minfo.RegionSize;
+ }
+ return 0;
+}
+
+#define MMAP_DEFAULT(s) win32mmap(s)
+#define MUNMAP_DEFAULT(a, s) win32munmap((a), (s))
+#define DIRECT_MMAP_DEFAULT(s) win32direct_mmap(s)
+#endif /* WIN32 */
+#endif /* HAVE_MMAP */
+
+#if HAVE_MREMAP
+#ifndef WIN32
+#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv))
+#endif /* WIN32 */
+#endif /* HAVE_MREMAP */
+
+/**
+ * Define CALL_MORECORE
+ */
+#if HAVE_MORECORE
+ #ifdef MORECORE
+ #define CALL_MORECORE(S) MORECORE(S)
+ #else /* MORECORE */
+ #define CALL_MORECORE(S) MORECORE_DEFAULT(S)
+ #endif /* MORECORE */
+#else /* HAVE_MORECORE */
+ #define CALL_MORECORE(S) MFAIL
+#endif /* HAVE_MORECORE */
+
+/**
+ * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP
+ */
+#if HAVE_MMAP
+ #define USE_MMAP_BIT (SIZE_T_ONE)
+
+ #ifdef MMAP
+ #define CALL_MMAP(s) MMAP(s)
+ #else /* MMAP */
+ #define CALL_MMAP(s) MMAP_DEFAULT(s)
+ #endif /* MMAP */
+ #ifdef MUNMAP
+ #define CALL_MUNMAP(a, s) MUNMAP((a), (s))
+ #else /* MUNMAP */
+ #define CALL_MUNMAP(a, s) MUNMAP_DEFAULT((a), (s))
+ #endif /* MUNMAP */
+ #ifdef DIRECT_MMAP
+ #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
+ #else /* DIRECT_MMAP */
+ #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s)
+ #endif /* DIRECT_MMAP */
+#else /* HAVE_MMAP */
+ #define USE_MMAP_BIT (SIZE_T_ZERO)
+
+ #define MMAP(s) MFAIL
+ #define MUNMAP(a, s) (-1)
+ #define DIRECT_MMAP(s) MFAIL
+ #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
+ #define CALL_MMAP(s) MMAP(s)
+ #define CALL_MUNMAP(a, s) MUNMAP((a), (s))
+#endif /* HAVE_MMAP */
+
+/**
+ * Define CALL_MREMAP
+ */
+#if HAVE_MMAP && HAVE_MREMAP
+ #ifdef MREMAP
+ #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv))
+ #else /* MREMAP */
+ #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv))
+ #endif /* MREMAP */
+#else /* HAVE_MMAP && HAVE_MREMAP */
+ #define CALL_MREMAP(addr, osz, nsz, mv) MFAIL
+#endif /* HAVE_MMAP && HAVE_MREMAP */
+
+/* mstate bit set if continguous morecore disabled or failed */
+#define USE_NONCONTIGUOUS_BIT (4U)
+
+/* segment bit set in create_mspace_with_base */
+#define EXTERN_BIT (8U)
+
+
+/* --------------------------- Lock preliminaries ------------------------ */
+
+/*
+ When locks are defined, there is one global lock, plus
+ one per-mspace lock.
+
+ The global lock_ensures that mparams.magic and other unique
+ mparams values are initialized only once. It also protects
+ sequences of calls to MORECORE. In many cases sys_alloc requires
+ two calls, that should not be interleaved with calls by other
+ threads. This does not protect against direct calls to MORECORE
+ by other threads not using this lock, so there is still code to
+ cope the best we can on interference.
+
+ Per-mspace locks surround calls to malloc, free, etc.
+ By default, locks are simple non-reentrant mutexes.
+
+ Because lock-protected regions generally have bounded times, it is
+ OK to use the supplied simple spinlocks. Spinlocks are likely to
+ improve performance for lightly contended applications, but worsen
+ performance under heavy contention.
+
+ If USE_LOCKS is > 1, the definitions of lock routines here are
+ bypassed, in which case you will need to define the type MLOCK_T,
+ and at least INITIAL_LOCK, DESTROY_LOCK, ACQUIRE_LOCK, RELEASE_LOCK
+ and TRY_LOCK. You must also declare a
+ static MLOCK_T malloc_global_mutex = { initialization values };.
+
+*/
+
+#if !USE_LOCKS
+#define USE_LOCK_BIT (0U)
+#define INITIAL_LOCK(l) (0)
+#define DESTROY_LOCK(l) (0)
+#define ACQUIRE_MALLOC_GLOBAL_LOCK()
+#define RELEASE_MALLOC_GLOBAL_LOCK()
+
+#else
+#if USE_LOCKS > 1
+/* ----------------------- User-defined locks ------------------------ */
+/* Define your own lock implementation here */
+/* #define INITIAL_LOCK(lk) ... */
+/* #define DESTROY_LOCK(lk) ... */
+/* #define ACQUIRE_LOCK(lk) ... */
+/* #define RELEASE_LOCK(lk) ... */
+/* #define TRY_LOCK(lk) ... */
+/* static MLOCK_T malloc_global_mutex = ... */
+
+#elif USE_SPIN_LOCKS
+
+/* First, define CAS_LOCK and CLEAR_LOCK on ints */
+/* Note CAS_LOCK defined to return 0 on success */
+
+#if defined(__GNUC__)&& (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
+#define CAS_LOCK(sl) __sync_lock_test_and_set(sl, 1)
+#define CLEAR_LOCK(sl) __sync_lock_release(sl)
+
+#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
+/* Custom spin locks for older gcc on x86 */
+static FORCEINLINE int x86_cas_lock(int *sl) {
+ int ret;
+ int val = 1;
+ int cmp = 0;
+ __asm__ __volatile__ ("lock; cmpxchgl %1, %2"
+ : "=a" (ret)
+ : "r" (val), "m" (*(sl)), "0"(cmp)
+ : "memory", "cc");
+ return ret;
+}
+
+static FORCEINLINE void x86_clear_lock(int* sl) {
+ assert(*sl != 0);
+ int prev = 0;
+ int ret;
+ __asm__ __volatile__ ("lock; xchgl %0, %1"
+ : "=r" (ret)
+ : "m" (*(sl)), "0"(prev)
+ : "memory");
+}
+
+#define CAS_LOCK(sl) x86_cas_lock(sl)
+#define CLEAR_LOCK(sl) x86_clear_lock(sl)
+
+#else /* Win32 MSC */
+#define CAS_LOCK(sl) interlockedexchange(sl, (LONG)1)
+#define CLEAR_LOCK(sl) interlockedexchange (sl, (LONG)0)
+
+#endif /* ... gcc spins locks ... */
+
+/* How to yield for a spin lock */
+#define SPINS_PER_YIELD 63
+#if defined(_MSC_VER)
+#define SLEEP_EX_DURATION 50 /* delay for yield/sleep */
+#define SPIN_LOCK_YIELD SleepEx(SLEEP_EX_DURATION, FALSE)
+#elif defined (__SVR4) && defined (__sun) /* solaris */
+#define SPIN_LOCK_YIELD thr_yield();
+#elif !defined(LACKS_SCHED_H)
+#define SPIN_LOCK_YIELD sched_yield();
+#else
+#define SPIN_LOCK_YIELD
+#endif /* ... yield ... */
+
+#if !defined(USE_RECURSIVE_LOCKS) || USE_RECURSIVE_LOCKS == 0
+/* Plain spin locks use single word (embedded in malloc_states) */
+static int spin_acquire_lock(int *sl) {
+ int spins = 0;
+ while (*(volatile int *)sl != 0 || CAS_LOCK(sl)) {
+ if ((++spins & SPINS_PER_YIELD) == 0) {
+ SPIN_LOCK_YIELD;
+ }
+ }
+ return 0;
+}
+
+#define MLOCK_T int
+#define TRY_LOCK(sl) !CAS_LOCK(sl)
+#define RELEASE_LOCK(sl) CLEAR_LOCK(sl)
+#define ACQUIRE_LOCK(sl) (CAS_LOCK(sl)? spin_acquire_lock(sl) : 0)
+#define INITIAL_LOCK(sl) (*sl = 0)
+#define DESTROY_LOCK(sl) (0)
+static MLOCK_T malloc_global_mutex = 0;
+
+#else /* USE_RECURSIVE_LOCKS */
+/* types for lock owners */
+#ifdef WIN32
+#define THREAD_ID_T DWORD
+#define CURRENT_THREAD GetCurrentThreadId()
+#define EQ_OWNER(X,Y) ((X) == (Y))
+#else
+/*
+ Note: the following assume that pthread_t is a type that can be
+ initialized to (casted) zero. If this is not the case, you will need to
+ somehow redefine these or not use spin locks.
+*/
+#define THREAD_ID_T pthread_t
+#define CURRENT_THREAD pthread_self()
+#define EQ_OWNER(X,Y) pthread_equal(X, Y)
+#endif
+
+struct malloc_recursive_lock {
+ int sl;
+ unsigned int c;
+ THREAD_ID_T threadid;
+};
+
+#define MLOCK_T struct malloc_recursive_lock
+static MLOCK_T malloc_global_mutex = { 0, 0, (THREAD_ID_T)0};
+
+static FORCEINLINE void recursive_release_lock(MLOCK_T *lk) {
+ assert(lk->sl != 0);
+ if (--lk->c == 0) {
+ CLEAR_LOCK(&lk->sl);
+ }
+}
+
+static FORCEINLINE int recursive_acquire_lock(MLOCK_T *lk) {
+ THREAD_ID_T mythreadid = CURRENT_THREAD;
+ int spins = 0;
+ for (;;) {
+ if (*((volatile int *)(&lk->sl)) == 0) {
+ if (!CAS_LOCK(&lk->sl)) {
+ lk->threadid = mythreadid;
+ lk->c = 1;
+ return 0;
+ }
+ }
+ else if (EQ_OWNER(lk->threadid, mythreadid)) {
+ ++lk->c;
+ return 0;
+ }
+ if ((++spins & SPINS_PER_YIELD) == 0) {
+ SPIN_LOCK_YIELD;
+ }
+ }
+}
+
+static FORCEINLINE int recursive_try_lock(MLOCK_T *lk) {
+ THREAD_ID_T mythreadid = CURRENT_THREAD;
+ if (*((volatile int *)(&lk->sl)) == 0) {
+ if (!CAS_LOCK(&lk->sl)) {
+ lk->threadid = mythreadid;
+ lk->c = 1;
+ return 1;
+ }
+ }
+ else if (EQ_OWNER(lk->threadid, mythreadid)) {
+ ++lk->c;
+ return 1;
+ }
+ return 0;
+}
+
+#define RELEASE_LOCK(lk) recursive_release_lock(lk)
+#define TRY_LOCK(lk) recursive_try_lock(lk)
+#define ACQUIRE_LOCK(lk) recursive_acquire_lock(lk)
+#define INITIAL_LOCK(lk) ((lk)->threadid = (THREAD_ID_T)0, (lk)->sl = 0, (lk)->c = 0)
+#define DESTROY_LOCK(lk) (0)
+#endif /* USE_RECURSIVE_LOCKS */
+
+#elif defined(WIN32) /* Win32 critical sections */
+#define MLOCK_T CRITICAL_SECTION
+#define ACQUIRE_LOCK(lk) (EnterCriticalSection(lk), 0)
+#define RELEASE_LOCK(lk) LeaveCriticalSection(lk)
+#define TRY_LOCK(lk) TryEnterCriticalSection(lk)
+#define INITIAL_LOCK(lk) (!InitializeCriticalSectionAndSpinCount((lk), 0x80000000|4000))
+#define DESTROY_LOCK(lk) (DeleteCriticalSection(lk), 0)
+#define NEED_GLOBAL_LOCK_INIT
+
+static MLOCK_T malloc_global_mutex;
+static volatile LONG malloc_global_mutex_status;
+
+/* Use spin loop to initialize global lock */
+static void init_malloc_global_mutex() {
+ for (;;) {
+ long stat = malloc_global_mutex_status;
+ if (stat > 0)
+ return;
+ /* transition to < 0 while initializing, then to > 0) */
+ if (stat == 0 &&
+ interlockedcompareexchange(&malloc_global_mutex_status, (LONG)-1, (LONG)0) == 0) {
+ InitializeCriticalSection(&malloc_global_mutex);
+ interlockedexchange(&malloc_global_mutex_status, (LONG)1);
+ return;
+ }
+ SleepEx(0, FALSE);
+ }
+}
+
+#else /* pthreads-based locks */
+#define MLOCK_T pthread_mutex_t
+#define ACQUIRE_LOCK(lk) pthread_mutex_lock(lk)
+#define RELEASE_LOCK(lk) pthread_mutex_unlock(lk)
+#define TRY_LOCK(lk) (!pthread_mutex_trylock(lk))
+#define INITIAL_LOCK(lk) pthread_init_lock(lk)
+#define DESTROY_LOCK(lk) pthread_mutex_destroy(lk)
+
+#if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0 && defined(linux) && !defined(PTHREAD_MUTEX_RECURSIVE)
+/* Cope with old-style linux recursive lock initialization by adding */
+/* skipped internal declaration from pthread.h */
+extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr,
+ int __kind));
+#define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP
+#define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y)
+#endif /* USE_RECURSIVE_LOCKS ... */
+
+static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static int pthread_init_lock (MLOCK_T *lk) {
+ pthread_mutexattr_t attr;
+ if (pthread_mutexattr_init(&attr)) return 1;
+#if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0
+ if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1;
+#endif
+ if (pthread_mutex_init(lk, &attr)) return 1;
+ if (pthread_mutexattr_destroy(&attr)) return 1;
+ return 0;
+}
+
+#endif /* ... lock types ... */
+
+/* Common code for all lock types */
+#define USE_LOCK_BIT (2U)
+
+#ifndef ACQUIRE_MALLOC_GLOBAL_LOCK
+#define ACQUIRE_MALLOC_GLOBAL_LOCK() ACQUIRE_LOCK(&malloc_global_mutex);
+#endif
+
+#ifndef RELEASE_MALLOC_GLOBAL_LOCK
+#define RELEASE_MALLOC_GLOBAL_LOCK() RELEASE_LOCK(&malloc_global_mutex);
+#endif
+
+#endif /* USE_LOCKS */
+
+/* ----------------------- Chunk representations ------------------------ */
+
+/*
+ (The following includes lightly edited explanations by Colin Plumb.)
+
+ The malloc_chunk declaration below is misleading (but accurate and
+ necessary). It declares a "view" into memory allowing access to
+ necessary fields at known offsets from a given base.
+
+ Chunks of memory are maintained using a `boundary tag' method as
+ originally described by Knuth. (See the paper by Paul Wilson
+ ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such
+ techniques.) Sizes of free chunks are stored both in the front of
+ each chunk and at the end. This makes consolidating fragmented
+ chunks into bigger chunks fast. The head fields also hold bits
+ representing whether chunks are free or in use.
+
+ Here are some pictures to make it clearer. They are "exploded" to
+ show that the state of a chunk can be thought of as extending from
+ the high 31 bits of the head field of its header through the
+ prev_foot and PINUSE_BIT bit of the following chunk header.
+
+ A chunk that's in use looks like:
+
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Size of previous chunk (if P = 0) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
+ | Size of this chunk 1| +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ +- -+
+ | |
+ +- -+
+ | :
+ +- size - sizeof(size_t) available payload bytes -+
+ : |
+ chunk-> +- -+
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1|
+ | Size of next chunk (may or may not be in use) | +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ And if it's free, it looks like this:
+
+ chunk-> +- -+
+ | User payload (must be in use, or we would have merged!) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
+ | Size of this chunk 0| +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Next pointer |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Prev pointer |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | :
+ +- size - sizeof(struct chunk) unused bytes -+
+ : |
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Size of this chunk |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0|
+ | Size of next chunk (must be in use, or we would have merged)| +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | :
+ +- User payload -+
+ : |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |0|
+ +-+
+ Note that since we always merge adjacent free chunks, the chunks
+ adjacent to a free chunk must be in use.
+
+ Given a pointer to a chunk (which can be derived trivially from the
+ payload pointer) we can, in O(1) time, find out whether the adjacent
+ chunks are free, and if so, unlink them from the lists that they
+ are on and merge them with the current chunk.
+
+ Chunks always begin on even word boundaries, so the mem portion
+ (which is returned to the user) is also on an even word boundary, and
+ thus at least double-word aligned.
+
+ The P (PINUSE_BIT) bit, stored in the unused low-order bit of the
+ chunk size (which is always a multiple of two words), is an in-use
+ bit for the *previous* chunk. If that bit is *clear*, then the
+ word before the current chunk size contains the previous chunk
+ size, and can be used to find the front of the previous chunk.
+ The very first chunk allocated always has this bit set, preventing
+ access to non-existent (or non-owned) memory. If pinuse is set for
+ any given chunk, then you CANNOT determine the size of the
+ previous chunk, and might even get a memory addressing fault when
+ trying to do so.
+
+ The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of
+ the chunk size redundantly records whether the current chunk is
+ inuse (unless the chunk is mmapped). This redundancy enables usage
+ checks within free and realloc, and reduces indirection when freeing
+ and consolidating chunks.
+
+ Each freshly allocated chunk must have both cinuse and pinuse set.
+ That is, each allocated chunk borders either a previously allocated
+ and still in-use chunk, or the base of its memory arena. This is
+ ensured by making all allocations from the `lowest' part of any
+ found chunk. Further, no free chunk physically borders another one,
+ so each free chunk is known to be preceded and followed by either
+ inuse chunks or the ends of memory.
+
+ Note that the `foot' of the current chunk is actually represented
+ as the prev_foot of the NEXT chunk. This makes it easier to
+ deal with alignments etc but can be very confusing when trying
+ to extend or adapt this code.
+
+ The exceptions to all this are
+
+ 1. The special chunk `top' is the top-most available chunk (i.e.,
+ the one bordering the end of available memory). It is treated
+ specially. Top is never included in any bin, is used only if
+ no other chunk is available, and is released back to the
+ system if it is very large (see M_TRIM_THRESHOLD). In effect,
+ the top chunk is treated as larger (and thus less well
+ fitting) than any other available chunk. The top chunk
+ doesn't update its trailing size field since there is no next
+ contiguous chunk that would have to index off it. However,
+ space is still allocated for it (TOP_FOOT_SIZE) to enable
+ separation or merging when space is extended.
+
+ 3. Chunks allocated via mmap, have both cinuse and pinuse bits
+ cleared in their head fields. Because they are allocated
+ one-by-one, each must carry its own prev_foot field, which is
+ also used to hold the offset this chunk has within its mmapped
+ region, which is needed to preserve alignment. Each mmapped
+ chunk is trailed by the first two fields of a fake next-chunk
+ for sake of usage checks.
+
+*/
+
+struct malloc_chunk {
+ size_t prev_foot; /* Size of previous chunk (if free). */
+ size_t head; /* Size and inuse bits. */
+ struct malloc_chunk* fd; /* double links -- used only if free. */
+ struct malloc_chunk* bk;
+};
+
+typedef struct malloc_chunk mchunk;
+typedef struct malloc_chunk* mchunkptr;
+typedef struct malloc_chunk* sbinptr; /* The type of bins of chunks */
+typedef unsigned int bindex_t; /* Described below */
+typedef unsigned int binmap_t; /* Described below */
+typedef unsigned int flag_t; /* The type of various bit flag sets */
+
+/* ------------------- Chunks sizes and alignments ----------------------- */
+
+#define MCHUNK_SIZE (sizeof(mchunk))
+
+#if FOOTERS
+#define CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
+#else /* FOOTERS */
+#define CHUNK_OVERHEAD (SIZE_T_SIZE)
+#endif /* FOOTERS */
+
+/* MMapped chunks need a second word of overhead ... */
+#define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
+/* ... and additional padding for fake next-chunk at foot */
+#define MMAP_FOOT_PAD (FOUR_SIZE_T_SIZES)
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+#define MIN_CHUNK_SIZE\
+ ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+/* conversion from malloc headers to user pointers, and back */
+#define chunk2mem(p) ((void*)((char*)(p) + TWO_SIZE_T_SIZES))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES))
+/* chunk associated with aligned address A */
+#define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A)))
+
+/* Bounds on request (not chunk) sizes. */
+#define MAX_REQUEST ((-MIN_CHUNK_SIZE) << 2)
+#define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE)
+
+/* pad request bytes into a usable size */
+#define pad_request(req) \
+ (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+/* pad request, checking for minimum (but not maximum) */
+#define request2size(req) \
+ (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req))
+
+
+/* ------------------ Operations on head and foot fields ----------------- */
+
+/*
+ The head field of a chunk is or'ed with PINUSE_BIT when previous
+ adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in
+ use, unless mmapped, in which case both bits are cleared.
+
+ FLAG4_BIT is not used by this malloc, but might be useful in extensions.
+*/
+
+#define PINUSE_BIT (SIZE_T_ONE)
+#define CINUSE_BIT (SIZE_T_TWO)
+#define FLAG4_BIT (SIZE_T_FOUR)
+#define INUSE_BITS (PINUSE_BIT|CINUSE_BIT)
+#define FLAG_BITS (PINUSE_BIT|CINUSE_BIT|FLAG4_BIT)
+
+/* Head value for fenceposts */
+#define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE)
+
+/* extraction of fields from head words */
+#define cinuse(p) ((p)->head & CINUSE_BIT)
+#define pinuse(p) ((p)->head & PINUSE_BIT)
+#define flag4inuse(p) ((p)->head & FLAG4_BIT)
+#define is_inuse(p) (((p)->head & INUSE_BITS) != PINUSE_BIT)
+#define is_mmapped(p) (((p)->head & INUSE_BITS) == 0)
+
+#define chunksize(p) ((p)->head & ~(FLAG_BITS))
+
+#define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT)
+#define set_flag4(p) ((p)->head |= FLAG4_BIT)
+#define clear_flag4(p) ((p)->head &= ~FLAG4_BIT)
+
+/* Treat space at ptr +/- offset as a chunk */
+#define chunk_plus_offset(p, s) ((mchunkptr)(((char*)(p)) + (s)))
+#define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s)))
+
+/* Ptr to next or previous physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~FLAG_BITS)))
+#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) ))
+
+/* extract next chunk's pinuse bit */
+#define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT)
+
+/* Get/set size at footer */
+#define get_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot)
+#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s))
+
+/* Set size, pinuse bit, and foot */
+#define set_size_and_pinuse_of_free_chunk(p, s)\
+ ((p)->head = (s|PINUSE_BIT), set_foot(p, s))
+
+/* Set size, pinuse bit, foot, and clear next pinuse */
+#define set_free_with_pinuse(p, s, n)\
+ (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s))
+
+/* Get the internal overhead associated with chunk p */
+#define overhead_for(p)\
+ (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD)
+
+/* Return true if malloced space is not necessarily cleared */
+#if MMAP_CLEARS
+#define calloc_must_clear(p) (!is_mmapped(p))
+#else /* MMAP_CLEARS */
+#define calloc_must_clear(p) (1)
+#endif /* MMAP_CLEARS */
+
+/* ---------------------- Overlaid data structures ----------------------- */
+
+/*
+ When chunks are not in use, they are treated as nodes of either
+ lists or trees.
+
+ "Small" chunks are stored in circular doubly-linked lists, and look
+ like this:
+
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Size of previous chunk |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ `head:' | Size of chunk, in bytes |P|
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Forward pointer to next chunk in list |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Back pointer to previous chunk in list |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Unused space (may be 0 bytes long) .
+ . .
+ . |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ `foot:' | Size of chunk, in bytes |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Larger chunks are kept in a form of bitwise digital trees (aka
+ tries) keyed on chunksizes. Because malloc_tree_chunks are only for
+ free chunks greater than 256 bytes, their size doesn't impose any
+ constraints on user chunk sizes. Each node looks like:
+
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Size of previous chunk |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ `head:' | Size of chunk, in bytes |P|
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Forward pointer to next chunk of same size |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Back pointer to previous chunk of same size |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Pointer to left child (child[0]) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Pointer to right child (child[1]) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Pointer to parent |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | bin index of this chunk |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Unused space .
+ . |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ `foot:' | Size of chunk, in bytes |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Each tree holding treenodes is a tree of unique chunk sizes. Chunks
+ of the same size are arranged in a circularly-linked list, with only
+ the oldest chunk (the next to be used, in our FIFO ordering)
+ actually in the tree. (Tree members are distinguished by a non-null
+ parent pointer.) If a chunk with the same size an an existing node
+ is inserted, it is linked off the existing node using pointers that
+ work in the same way as fd/bk pointers of small chunks.
+
+ Each tree contains a power of 2 sized range of chunk sizes (the
+ smallest is 0x100 <= x < 0x180), which is is divided in half at each
+ tree level, with the chunks in the smaller half of the range (0x100
+ <= x < 0x140 for the top nose) in the left subtree and the larger
+ half (0x140 <= x < 0x180) in the right subtree. This is, of course,
+ done by inspecting individual bits.
+
+ Using these rules, each node's left subtree contains all smaller
+ sizes than its right subtree. However, the node at the root of each
+ subtree has no particular ordering relationship to either. (The
+ dividing line between the subtree sizes is based on trie relation.)
+ If we remove the last chunk of a given size from the interior of the
+ tree, we need to replace it with a leaf node. The tree ordering
+ rules permit a node to be replaced by any leaf below it.
+
+ The smallest chunk in a tree (a common operation in a best-fit
+ allocator) can be found by walking a path to the leftmost leaf in
+ the tree. Unlike a usual binary tree, where we follow left child
+ pointers until we reach a null, here we follow the right child
+ pointer any time the left one is null, until we reach a leaf with
+ both child pointers null. The smallest chunk in the tree will be
+ somewhere along that path.
+
+ The worst case number of steps to add, find, or remove a node is
+ bounded by the number of bits differentiating chunks within
+ bins. Under current bin calculations, this ranges from 6 up to 21
+ (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case
+ is of course much better.
+*/
+
+struct malloc_tree_chunk {
+ /* The first four fields must be compatible with malloc_chunk */
+ size_t prev_foot;
+ size_t head;
+ struct malloc_tree_chunk* fd;
+ struct malloc_tree_chunk* bk;
+
+ struct malloc_tree_chunk* child[2];
+ struct malloc_tree_chunk* parent;
+ bindex_t index;
+};
+
+typedef struct malloc_tree_chunk tchunk;
+typedef struct malloc_tree_chunk* tchunkptr;
+typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */
+
+/* A little helper macro for trees */
+#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1])
+
+/* ----------------------------- Segments -------------------------------- */
+
+/*
+ Each malloc space may include non-contiguous segments, held in a
+ list headed by an embedded malloc_segment record representing the
+ top-most space. Segments also include flags holding properties of
+ the space. Large chunks that are directly allocated by mmap are not
+ included in this list. They are instead independently created and
+ destroyed without otherwise keeping track of them.
+
+ Segment management mainly comes into play for spaces allocated by
+ MMAP. Any call to MMAP might or might not return memory that is
+ adjacent to an existing segment. MORECORE normally contiguously
+ extends the current space, so this space is almost always adjacent,
+ which is simpler and faster to deal with. (This is why MORECORE is
+ used preferentially to MMAP when both are available -- see
+ sys_alloc.) When allocating using MMAP, we don't use any of the
+ hinting mechanisms (inconsistently) supported in various
+ implementations of unix mmap, or distinguish reserving from
+ committing memory. Instead, we just ask for space, and exploit
+ contiguity when we get it. It is probably possible to do
+ better than this on some systems, but no general scheme seems
+ to be significantly better.
+
+ Management entails a simpler variant of the consolidation scheme
+ used for chunks to reduce fragmentation -- new adjacent memory is
+ normally prepended or appended to an existing segment. However,
+ there are limitations compared to chunk consolidation that mostly
+ reflect the fact that segment processing is relatively infrequent
+ (occurring only when getting memory from system) and that we
+ don't expect to have huge numbers of segments:
+
+ * Segments are not indexed, so traversal requires linear scans. (It
+ would be possible to index these, but is not worth the extra
+ overhead and complexity for most programs on most platforms.)
+ * New segments are only appended to old ones when holding top-most
+ memory; if they cannot be prepended to others, they are held in
+ different segments.
+
+ Except for the top-most segment of an mstate, each segment record
+ is kept at the tail of its segment. Segments are added by pushing
+ segment records onto the list headed by &mstate.seg for the
+ containing mstate.
+
+ Segment flags control allocation/merge/deallocation policies:
+ * If EXTERN_BIT set, then we did not allocate this segment,
+ and so should not try to deallocate or merge with others.
+ (This currently holds only for the initial segment passed
+ into create_mspace_with_base.)
+ * If USE_MMAP_BIT set, the segment may be merged with
+ other surrounding mmapped segments and trimmed/de-allocated
+ using munmap.
+ * If neither bit is set, then the segment was obtained using
+ MORECORE so can be merged with surrounding MORECORE'd segments
+ and deallocated/trimmed using MORECORE with negative arguments.
+*/
+
+struct malloc_segment {
+ char* base; /* base address */
+ size_t size; /* allocated size */
+ struct malloc_segment* next; /* ptr to next segment */
+ flag_t sflags; /* mmap and extern flag */
+};
+
+#define is_mmapped_segment(S) ((S)->sflags & USE_MMAP_BIT)
+#define is_extern_segment(S) ((S)->sflags & EXTERN_BIT)
+
+typedef struct malloc_segment msegment;
+typedef struct malloc_segment* msegmentptr;
+
+/* ---------------------------- malloc_state ----------------------------- */
+
+/*
+ A malloc_state holds all of the bookkeeping for a space.
+ The main fields are:
+
+ Top
+ The topmost chunk of the currently active segment. Its size is
+ cached in topsize. The actual size of topmost space is
+ topsize+TOP_FOOT_SIZE, which includes space reserved for adding
+ fenceposts and segment records if necessary when getting more
+ space from the system. The size at which to autotrim top is
+ cached from mparams in trim_check, except that it is disabled if
+ an autotrim fails.
+
+ Designated victim (dv)
+ This is the preferred chunk for servicing small requests that
+ don't have exact fits. It is normally the chunk split off most
+ recently to service another small request. Its size is cached in
+ dvsize. The link fields of this chunk are not maintained since it
+ is not kept in a bin.
+
+ SmallBins
+ An array of bin headers for free chunks. These bins hold chunks
+ with sizes less than MIN_LARGE_SIZE bytes. Each bin contains
+ chunks of all the same size, spaced 8 bytes apart. To simplify
+ use in double-linked lists, each bin header acts as a malloc_chunk
+ pointing to the real first node, if it exists (else pointing to
+ itself). This avoids special-casing for headers. But to avoid
+ waste, we allocate only the fd/bk pointers of bins, and then use
+ repositioning tricks to treat these as the fields of a chunk.
+
+ TreeBins
+ Treebins are pointers to the roots of trees holding a range of
+ sizes. There are 2 equally spaced treebins for each power of two
+ from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything
+ larger.
+
+ Bin maps
+ There is one bit map for small bins ("smallmap") and one for
+ treebins ("treemap). Each bin sets its bit when non-empty, and
+ clears the bit when empty. Bit operations are then used to avoid
+ bin-by-bin searching -- nearly all "search" is done without ever
+ looking at bins that won't be selected. The bit maps
+ conservatively use 32 bits per map word, even if on 64bit system.
+ For a good description of some of the bit-based techniques used
+ here, see Henry S. Warren Jr's book "Hacker's Delight" (and
+ supplement at http://hackersdelight.org/). Many of these are
+ intended to reduce the branchiness of paths through malloc etc, as
+ well as to reduce the number of memory locations read or written.
+
+ Segments
+ A list of segments headed by an embedded malloc_segment record
+ representing the initial space.
+
+ Address check support
+ The least_addr field is the least address ever obtained from
+ MORECORE or MMAP. Attempted frees and reallocs of any address less
+ than this are trapped (unless INSECURE is defined).
+
+ Magic tag
+ A cross-check field that should always hold same value as mparams.magic.
+
+ Max allowed footprint
+ The maximum allowed bytes to allocate from system (zero means no limit)
+
+ Flags
+ Bits recording whether to use MMAP, locks, or contiguous MORECORE
+
+ Statistics
+ Each space keeps track of current and maximum system memory
+ obtained via MORECORE or MMAP.
+
+ Trim support
+ Fields holding the amount of unused topmost memory that should trigger
+ trimming, and a counter to force periodic scanning to release unused
+ non-topmost segments.
+
+ Locking
+ If USE_LOCKS is defined, the "mutex" lock is acquired and released
+ around every public call using this mspace.
+
+ Extension support
+ A void* pointer and a size_t field that can be used to help implement
+ extensions to this malloc.
+*/
+
+/* Bin types, widths and sizes */
+#define NSMALLBINS (32U)
+#define NTREEBINS (32U)
+#define SMALLBIN_SHIFT (3U)
+#define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT)
+#define TREEBIN_SHIFT (8U)
+#define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT)
+#define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE)
+#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD)
+
+struct malloc_state {
+ binmap_t smallmap;
+ binmap_t treemap;
+ size_t dvsize;
+ size_t topsize;
+ char* least_addr;
+ mchunkptr dv;
+ mchunkptr top;
+ size_t trim_check;
+ size_t release_checks;
+ size_t magic;
+ mchunkptr smallbins[(NSMALLBINS+1)*2];
+ tbinptr treebins[NTREEBINS];
+ size_t footprint;
+ size_t max_footprint;
+ size_t footprint_limit; /* zero means no limit */
+ flag_t mflags;
+#if USE_LOCKS
+ MLOCK_T mutex; /* locate lock among fields that rarely change */
+#endif /* USE_LOCKS */
+ msegment seg;
+ void* extp; /* Unused but available for extensions */
+ size_t exts;
+};
+
+typedef struct malloc_state* mstate;
+
+/* ------------- Global malloc_state and malloc_params ------------------- */
+
+/*
+ malloc_params holds global properties, including those that can be
+ dynamically set using mallopt. There is a single instance, mparams,
+ initialized in init_mparams. Note that the non-zeroness of "magic"
+ also serves as an initialization flag.
+*/
+
+struct malloc_params {
+ size_t magic;
+ size_t page_size;
+ size_t granularity;
+ size_t mmap_threshold;
+ size_t trim_threshold;
+ flag_t default_mflags;
+};
+
+static struct malloc_params mparams;
+
+/* Ensure mparams initialized */
+#define ensure_initialization() (void)(mparams.magic != 0 || init_mparams())
+
+#if !ONLY_MSPACES
+
+/* The global malloc_state used for all non-"mspace" calls */
+static struct malloc_state _gm_;
+#define gm (&_gm_)
+#define is_global(M) ((M) == &_gm_)
+
+#endif /* !ONLY_MSPACES */
+
+#define is_initialized(M) ((M)->top != 0)
+
+/* -------------------------- system alloc setup ------------------------- */
+
+/* Operations on mflags */
+
+#define use_lock(M) ((M)->mflags & USE_LOCK_BIT)
+#define enable_lock(M) ((M)->mflags |= USE_LOCK_BIT)
+#if USE_LOCKS
+#define disable_lock(M) ((M)->mflags &= ~USE_LOCK_BIT)
+#else
+#define disable_lock(M)
+#endif
+
+#define use_mmap(M) ((M)->mflags & USE_MMAP_BIT)
+#define enable_mmap(M) ((M)->mflags |= USE_MMAP_BIT)
+#if HAVE_MMAP
+#define disable_mmap(M) ((M)->mflags &= ~USE_MMAP_BIT)
+#else
+#define disable_mmap(M)
+#endif
+
+#define use_noncontiguous(M) ((M)->mflags & USE_NONCONTIGUOUS_BIT)
+#define disable_contiguous(M) ((M)->mflags |= USE_NONCONTIGUOUS_BIT)
+
+#define set_lock(M,L)\
+ ((M)->mflags = (L)?\
+ ((M)->mflags | USE_LOCK_BIT) :\
+ ((M)->mflags & ~USE_LOCK_BIT))
+
+/* page-align a size */
+#define page_align(S)\
+ (((S) + (mparams.page_size - SIZE_T_ONE)) & ~(mparams.page_size - SIZE_T_ONE))
+
+/* granularity-align a size */
+#define granularity_align(S)\
+ (((S) + (mparams.granularity - SIZE_T_ONE))\
+ & ~(mparams.granularity - SIZE_T_ONE))
+
+
+/* For mmap, use granularity alignment on windows, else page-align */
+#ifdef WIN32
+#define mmap_align(S) granularity_align(S)
+#else
+#define mmap_align(S) page_align(S)
+#endif
+
+/* For sys_alloc, enough padding to ensure can malloc request on success */
+#define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT)
+
+#define is_page_aligned(S)\
+ (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0)
+#define is_granularity_aligned(S)\
+ (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0)
+
+/* True if segment S holds address A */
+#define segment_holds(S, A)\
+ ((char*)(A) >= S->base && (char*)(A) < S->base + S->size)
+
+/* Return segment holding given address */
+static msegmentptr segment_holding(mstate m, char* addr) {
+ msegmentptr sp = &m->seg;
+ for (;;) {
+ if (addr >= sp->base && addr < sp->base + sp->size)
+ return sp;
+ if ((sp = sp->next) == 0)
+ return 0;
+ }
+}
+
+/* Return true if segment contains a segment link */
+static int has_segment_link(mstate m, msegmentptr ss) {
+ msegmentptr sp = &m->seg;
+ for (;;) {
+ if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size)
+ return 1;
+ if ((sp = sp->next) == 0)
+ return 0;
+ }
+}
+
+#ifndef MORECORE_CANNOT_TRIM
+#define should_trim(M,s) ((s) > (M)->trim_check)
+#else /* MORECORE_CANNOT_TRIM */
+#define should_trim(M,s) (0)
+#endif /* MORECORE_CANNOT_TRIM */
+
+/*
+ TOP_FOOT_SIZE is padding at the end of a segment, including space
+ that may be needed to place segment records and fenceposts when new
+ noncontiguous segments are added.
+*/
+#define TOP_FOOT_SIZE\
+ (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
+
+
+/* ------------------------------- Hooks -------------------------------- */
+
+/*
+ PREACTION should be defined to return 0 on success, and nonzero on
+ failure. If you are not using locking, you can redefine these to do
+ anything you like.
+*/
+
+#if USE_LOCKS
+#define PREACTION(M) ((use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0)
+#define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); }
+#else /* USE_LOCKS */
+
+#ifndef PREACTION
+#define PREACTION(M) (0)
+#endif /* PREACTION */
+
+#ifndef POSTACTION
+#define POSTACTION(M)
+#endif /* POSTACTION */
+
+#endif /* USE_LOCKS */
+
+/*
+ CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses.
+ USAGE_ERROR_ACTION is triggered on detected bad frees and
+ reallocs. The argument p is an address that might have triggered the
+ fault. It is ignored by the two predefined actions, but might be
+ useful in custom actions that try to help diagnose errors.
+*/
+
+#if PROCEED_ON_ERROR
+
+/* A count of the number of corruption errors causing resets */
+int malloc_corruption_error_count;
+
+/* default corruption action */
+static void reset_on_error(mstate m);
+
+#define CORRUPTION_ERROR_ACTION(m) reset_on_error(m)
+#define USAGE_ERROR_ACTION(m, p)
+
+#else /* PROCEED_ON_ERROR */
+
+#ifndef CORRUPTION_ERROR_ACTION
+#define CORRUPTION_ERROR_ACTION(m) ABORT
+#endif /* CORRUPTION_ERROR_ACTION */
+
+#ifndef USAGE_ERROR_ACTION
+#define USAGE_ERROR_ACTION(m,p) ABORT
+#endif /* USAGE_ERROR_ACTION */
+
+#endif /* PROCEED_ON_ERROR */
+
+
+/* -------------------------- Debugging setup ---------------------------- */
+
+#if ! DEBUG
+
+#define check_free_chunk(M,P)
+#define check_inuse_chunk(M,P)
+#define check_malloced_chunk(M,P,N)
+#define check_mmapped_chunk(M,P)
+#define check_malloc_state(M)
+#define check_top_chunk(M,P)
+
+#else /* DEBUG */
+#define check_free_chunk(M,P) do_check_free_chunk(M,P)
+#define check_inuse_chunk(M,P) do_check_inuse_chunk(M,P)
+#define check_top_chunk(M,P) do_check_top_chunk(M,P)
+#define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N)
+#define check_mmapped_chunk(M,P) do_check_mmapped_chunk(M,P)
+#define check_malloc_state(M) do_check_malloc_state(M)
+
+static void do_check_any_chunk(mstate m, mchunkptr p);
+static void do_check_top_chunk(mstate m, mchunkptr p);
+static void do_check_mmapped_chunk(mstate m, mchunkptr p);
+static void do_check_inuse_chunk(mstate m, mchunkptr p);
+static void do_check_free_chunk(mstate m, mchunkptr p);
+static void do_check_malloced_chunk(mstate m, void* mem, size_t s);
+static void do_check_tree(mstate m, tchunkptr t);
+static void do_check_treebin(mstate m, bindex_t i);
+static void do_check_smallbin(mstate m, bindex_t i);
+static void do_check_malloc_state(mstate m);
+static int bin_find(mstate m, mchunkptr x);
+static size_t traverse_and_check(mstate m);
+#endif /* DEBUG */
+
+/* ---------------------------- Indexing Bins ---------------------------- */
+
+#define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS)
+#define small_index(s) (bindex_t)((s) >> SMALLBIN_SHIFT)
+#define small_index2size(i) ((i) << SMALLBIN_SHIFT)
+#define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE))
+
+/* addressing by index. See above about smallbin repositioning */
+#define smallbin_at(M, i) ((sbinptr)((char*)&((M)->smallbins[(i)<<1])))
+#define treebin_at(M,i) (&((M)->treebins[i]))
+
+/* assign tree index for size S to variable I. Use x86 asm if possible */
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+#define compute_tree_index(S, I)\
+{\
+ unsigned int X = S >> TREEBIN_SHIFT;\
+ if (X == 0)\
+ I = 0;\
+ else if (X > 0xFFFF)\
+ I = NTREEBINS-1;\
+ else {\
+ unsigned int K = (unsigned) sizeof(X)*__CHAR_BIT__ - 1 - (unsigned) __builtin_clz(X); \
+ I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
+ }\
+}
+
+#elif defined (__INTEL_COMPILER)
+#define compute_tree_index(S, I)\
+{\
+ size_t X = S >> TREEBIN_SHIFT;\
+ if (X == 0)\
+ I = 0;\
+ else if (X > 0xFFFF)\
+ I = NTREEBINS-1;\
+ else {\
+ unsigned int K = _bit_scan_reverse (X); \
+ I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
+ }\
+}
+
+#elif defined(_MSC_VER) && _MSC_VER>=1300
+#define compute_tree_index(S, I)\
+{\
+ size_t X = S >> TREEBIN_SHIFT;\
+ if (X == 0)\
+ I = 0;\
+ else if (X > 0xFFFF)\
+ I = NTREEBINS-1;\
+ else {\
+ unsigned int K;\
+ _BitScanReverse((DWORD *) &K, (DWORD) X);\
+ I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
+ }\
+}
+
+#else /* GNUC */
+#define compute_tree_index(S, I)\
+{\
+ size_t X = S >> TREEBIN_SHIFT;\
+ if (X == 0)\
+ I = 0;\
+ else if (X > 0xFFFF)\
+ I = NTREEBINS-1;\
+ else {\
+ unsigned int Y = (unsigned int)X;\
+ unsigned int N = ((Y - 0x100) >> 16) & 8;\
+ unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\
+ N += K;\
+ N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\
+ K = 14 - N + ((Y <<= K) >> 15);\
+ I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\
+ }\
+}
+#endif /* GNUC */
+
+/* Bit representing maximum resolved size in a treebin at i */
+#define bit_for_tree_index(i) \
+ (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2)
+
+/* Shift placing maximum resolved bit in a treebin at i as sign bit */
+#define leftshift_for_tree_index(i) \
+ ((i == NTREEBINS-1)? 0 : \
+ ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2)))
+
+/* The size of the smallest chunk held in bin with index i */
+#define minsize_for_tree_index(i) \
+ ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \
+ (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1)))
+
+
+/* ------------------------ Operations on bin maps ----------------------- */
+
+/* bit corresponding to given index */
+#define idx2bit(i) ((binmap_t)(1) << (i))
+
+/* Mark/Clear bits with given index */
+#define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i))
+#define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i))
+#define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i))
+
+#define mark_treemap(M,i) ((M)->treemap |= idx2bit(i))
+#define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i))
+#define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i))
+
+/* isolate the least set bit of a bitmap */
+#define least_bit(x) ((x) & -(x))
+
+/* mask with all bits to left of least bit of x on */
+#define left_bits(x) ((x<<1) | -(x<<1))
+
+/* mask with all bits to left of or equal to least bit of x on */
+#define same_or_left_bits(x) ((x) | -(x))
+
+/* index corresponding to given bit. Use x86 asm if possible */
+
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+#define compute_bit2idx(X, I)\
+{\
+ unsigned int J;\
+ J = __builtin_ctz(X); \
+ I = (bindex_t)J;\
+}
+
+#elif defined (__INTEL_COMPILER)
+#define compute_bit2idx(X, I)\
+{\
+ unsigned int J;\
+ J = _bit_scan_forward (X); \
+ I = (bindex_t)J;\
+}
+
+#elif defined(_MSC_VER) && _MSC_VER>=1300
+#define compute_bit2idx(X, I)\
+{\
+ unsigned int J;\
+ _BitScanForward((DWORD *) &J, X);\
+ I = (bindex_t)J;\
+}
+
+#elif USE_BUILTIN_FFS
+#define compute_bit2idx(X, I) I = ffs(X)-1
+
+#else
+#define compute_bit2idx(X, I)\
+{\
+ unsigned int Y = X - 1;\
+ unsigned int K = Y >> (16-4) & 16;\
+ unsigned int N = K; Y >>= K;\
+ N += K = Y >> (8-3) & 8; Y >>= K;\
+ N += K = Y >> (4-2) & 4; Y >>= K;\
+ N += K = Y >> (2-1) & 2; Y >>= K;\
+ N += K = Y >> (1-0) & 1; Y >>= K;\
+ I = (bindex_t)(N + Y);\
+}
+#endif /* GNUC */
+
+
+/* ----------------------- Runtime Check Support ------------------------- */
+
+/*
+ For security, the main invariant is that malloc/free/etc never
+ writes to a static address other than malloc_state, unless static
+ malloc_state itself has been corrupted, which cannot occur via
+ malloc (because of these checks). In essence this means that we
+ believe all pointers, sizes, maps etc held in malloc_state, but
+ check all of those linked or offsetted from other embedded data
+ structures. These checks are interspersed with main code in a way
+ that tends to minimize their run-time cost.
+
+ When FOOTERS is defined, in addition to range checking, we also
+ verify footer fields of inuse chunks, which can be used guarantee
+ that the mstate controlling malloc/free is intact. This is a
+ streamlined version of the approach described by William Robertson
+ et al in "Run-time Detection of Heap-based Overflows" LISA'03
+ http://www.usenix.org/events/lisa03/tech/robertson.html The footer
+ of an inuse chunk holds the xor of its mstate and a random seed,
+ that is checked upon calls to free() and realloc(). This is
+ (probabalistically) unguessable from outside the program, but can be
+ computed by any code successfully malloc'ing any chunk, so does not
+ itself provide protection against code that has already broken
+ security through some other means. Unlike Robertson et al, we
+ always dynamically check addresses of all offset chunks (previous,
+ next, etc). This turns out to be cheaper than relying on hashes.
+*/
+
+#if !INSECURE
+/* Check if address a is at least as high as any from MORECORE or MMAP */
+#define ok_address(M, a) ((char*)(a) >= (M)->least_addr)
+/* Check if address of next chunk n is higher than base chunk p */
+#define ok_next(p, n) ((char*)(p) < (char*)(n))
+/* Check if p has inuse status */
+#define ok_inuse(p) is_inuse(p)
+/* Check if p has its pinuse bit on */
+#define ok_pinuse(p) pinuse(p)
+
+#else /* !INSECURE */
+#define ok_address(M, a) (1)
+#define ok_next(b, n) (1)
+#define ok_inuse(p) (1)
+#define ok_pinuse(p) (1)
+#endif /* !INSECURE */
+
+#if (FOOTERS && !INSECURE)
+/* Check if (alleged) mstate m has expected magic field */
+#define ok_magic(M) ((M)->magic == mparams.magic)
+#else /* (FOOTERS && !INSECURE) */
+#define ok_magic(M) (1)
+#endif /* (FOOTERS && !INSECURE) */
+
+/* In gcc, use __builtin_expect to minimize impact of checks */
+#if !INSECURE
+#if defined(__GNUC__) && __GNUC__ >= 3
+#define RTCHECK(e) __builtin_expect(e, 1)
+#else /* GNUC */
+#define RTCHECK(e) (e)
+#endif /* GNUC */
+#else /* !INSECURE */
+#define RTCHECK(e) (1)
+#endif /* !INSECURE */
+
+/* macros to set up inuse chunks with or without footers */
+
+#if !FOOTERS
+
+#define mark_inuse_foot(M,p,s)
+
+/* Macros for setting head/foot of non-mmapped chunks */
+
+/* Set cinuse bit and pinuse bit of next chunk */
+#define set_inuse(M,p,s)\
+ ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
+ ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT)
+
+/* Set cinuse and pinuse of this chunk and pinuse of next chunk */
+#define set_inuse_and_pinuse(M,p,s)\
+ ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+ ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT)
+
+/* Set size, cinuse and pinuse bit of this chunk */
+#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
+ ((p)->head = (s|PINUSE_BIT|CINUSE_BIT))
+
+#else /* FOOTERS */
+
+/* Set foot of inuse chunk to be xor of mstate and seed */
+#define mark_inuse_foot(M,p,s)\
+ (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic))
+
+#define get_mstate_for(p)\
+ ((mstate)(((mchunkptr)((char*)(p) +\
+ (chunksize(p))))->prev_foot ^ mparams.magic))
+
+#define set_inuse(M,p,s)\
+ ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
+ (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \
+ mark_inuse_foot(M,p,s))
+
+#define set_inuse_and_pinuse(M,p,s)\
+ ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+ (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\
+ mark_inuse_foot(M,p,s))
+
+#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
+ ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+ mark_inuse_foot(M, p, s))
+
+#endif /* !FOOTERS */
+
+/* ---------------------------- setting mparams -------------------------- */
+
+#if LOCK_AT_FORK
+static void pre_fork(void) { ACQUIRE_LOCK(&(gm)->mutex); }
+static void post_fork_parent(void) { RELEASE_LOCK(&(gm)->mutex); }
+static void post_fork_child(void) { INITIAL_LOCK(&(gm)->mutex); }
+#endif /* LOCK_AT_FORK */
+
+/* Initialize mparams */
+static int init_mparams(void) {
+#ifdef NEED_GLOBAL_LOCK_INIT
+ if (malloc_global_mutex_status <= 0)
+ init_malloc_global_mutex();
+#endif
+
+ ACQUIRE_MALLOC_GLOBAL_LOCK();
+ if (mparams.magic == 0) {
+ size_t magic;
+ size_t psize;
+ size_t gsize;
+
+#ifndef WIN32
+ psize = malloc_getpagesize;
+ gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize);
+#else /* WIN32 */
+ {
+ SYSTEM_INFO system_info;
+ GetSystemInfo(&system_info);
+ psize = system_info.dwPageSize;
+ gsize = ((DEFAULT_GRANULARITY != 0)?
+ DEFAULT_GRANULARITY : system_info.dwAllocationGranularity);
+ }
+#endif /* WIN32 */
+
+ /* Sanity-check configuration:
+ size_t must be unsigned and as wide as pointer type.
+ ints must be at least 4 bytes.
+ alignment must be at least 8.
+ Alignment, min chunk size, and page size must all be powers of 2.
+ */
+ if ((sizeof(size_t) != sizeof(char*)) ||
+ (MAX_SIZE_T < MIN_CHUNK_SIZE) ||
+ (sizeof(int) < 4) ||
+ (MALLOC_ALIGNMENT < (size_t)8U) ||
+ ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) ||
+ ((MCHUNK_SIZE & (MCHUNK_SIZE-SIZE_T_ONE)) != 0) ||
+ ((gsize & (gsize-SIZE_T_ONE)) != 0) ||
+ ((psize & (psize-SIZE_T_ONE)) != 0))
+ ABORT;
+ mparams.granularity = gsize;
+ mparams.page_size = psize;
+ mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD;
+ mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD;
+#if MORECORE_CONTIGUOUS
+ mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT;
+#else /* MORECORE_CONTIGUOUS */
+ mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT;
+#endif /* MORECORE_CONTIGUOUS */
+
+#if !ONLY_MSPACES
+ /* Set up lock for main malloc area */
+ gm->mflags = mparams.default_mflags;
+ (void)INITIAL_LOCK(&gm->mutex);
+#endif
+#if LOCK_AT_FORK
+ pthread_atfork(&pre_fork, &post_fork_parent, &post_fork_child);
+#endif
+
+ {
+#if USE_DEV_RANDOM
+ int fd;
+ unsigned char buf[sizeof(size_t)];
+ /* Try to use /dev/urandom, else fall back on using time */
+ if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 &&
+ read(fd, buf, sizeof(buf)) == sizeof(buf)) {
+ magic = *((size_t *) buf);
+ close(fd);
+ }
+ else
+#endif /* USE_DEV_RANDOM */
+#ifdef WIN32
+ magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U);
+#elif defined(LACKS_TIME_H)
+ magic = (size_t)&magic ^ (size_t)0x55555555U;
+#else
+ magic = (size_t)(time(0) ^ (size_t)0x55555555U);
+#endif
+ magic |= (size_t)8U; /* ensure nonzero */
+ magic &= ~(size_t)7U; /* improve chances of fault for bad values */
+ /* Until memory modes commonly available, use volatile-write */
+ (*(volatile size_t *)(&(mparams.magic))) = magic;
+ }
+ }
+
+ RELEASE_MALLOC_GLOBAL_LOCK();
+ return 1;
+}
+
+/* support for mallopt */
+static int change_mparam(int param_number, int value) {
+ size_t val;
+ ensure_initialization();
+ val = (value == -1)? MAX_SIZE_T : (size_t)value;
+ switch(param_number) {
+ case M_TRIM_THRESHOLD:
+ mparams.trim_threshold = val;
+ return 1;
+ case M_GRANULARITY:
+ if (val >= mparams.page_size && ((val & (val-1)) == 0)) {
+ mparams.granularity = val;
+ return 1;
+ }
+ else
+ return 0;
+ case M_MMAP_THRESHOLD:
+ mparams.mmap_threshold = val;
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+#if DEBUG
+/* ------------------------- Debugging Support --------------------------- */
+
+/* Check properties of any chunk, whether free, inuse, mmapped etc */
+static void do_check_any_chunk(mstate m, mchunkptr p) {
+ assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+ assert(ok_address(m, p));
+}
+
+/* Check properties of top chunk */
+static void do_check_top_chunk(mstate m, mchunkptr p) {
+ msegmentptr sp = segment_holding(m, (char*)p);
+ size_t sz = p->head & ~INUSE_BITS; /* third-lowest bit can be set! */
+ assert(sp != 0);
+ assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+ assert(ok_address(m, p));
+ assert(sz == m->topsize);
+ assert(sz > 0);
+ assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE);
+ assert(pinuse(p));
+ assert(!pinuse(chunk_plus_offset(p, sz)));
+}
+
+/* Check properties of (inuse) mmapped chunks */
+static void do_check_mmapped_chunk(mstate m, mchunkptr p) {
+ size_t sz = chunksize(p);
+ size_t len = (sz + (p->prev_foot) + MMAP_FOOT_PAD);
+ assert(is_mmapped(p));
+ assert(use_mmap(m));
+ assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+ assert(ok_address(m, p));
+ assert(!is_small(sz));
+ assert((len & (mparams.page_size-SIZE_T_ONE)) == 0);
+ assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD);
+ assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0);
+}
+
+/* Check properties of inuse chunks */
+static void do_check_inuse_chunk(mstate m, mchunkptr p) {
+ do_check_any_chunk(m, p);
+ assert(is_inuse(p));
+ assert(next_pinuse(p));
+ /* If not pinuse and not mmapped, previous chunk has OK offset */
+ assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p);
+ if (is_mmapped(p))
+ do_check_mmapped_chunk(m, p);
+}
+
+/* Check properties of free chunks */
+static void do_check_free_chunk(mstate m, mchunkptr p) {
+ size_t sz = chunksize(p);
+ mchunkptr next = chunk_plus_offset(p, sz);
+ do_check_any_chunk(m, p);
+ assert(!is_inuse(p));
+ assert(!next_pinuse(p));
+ assert (!is_mmapped(p));
+ if (p != m->dv && p != m->top) {
+ if (sz >= MIN_CHUNK_SIZE) {
+ assert((sz & CHUNK_ALIGN_MASK) == 0);
+ assert(is_aligned(chunk2mem(p)));
+ assert(next->prev_foot == sz);
+ assert(pinuse(p));
+ assert (next == m->top || is_inuse(next));
+ assert(p->fd->bk == p);
+ assert(p->bk->fd == p);
+ }
+ else /* markers are always of size SIZE_T_SIZE */
+ assert(sz == SIZE_T_SIZE);
+ }
+}
+
+/* Check properties of malloced chunks at the point they are malloced */
+static void do_check_malloced_chunk(mstate m, void* mem, size_t s) {
+ if (mem != 0) {
+ mchunkptr p = mem2chunk(mem);
+ size_t sz = p->head & ~INUSE_BITS;
+ do_check_inuse_chunk(m, p);
+ assert((sz & CHUNK_ALIGN_MASK) == 0);
+ assert(sz >= MIN_CHUNK_SIZE);
+ assert(sz >= s);
+ /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */
+ assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE));
+ }
+}
+
+/* Check a tree and its subtrees. */
+static void do_check_tree(mstate m, tchunkptr t) {
+ tchunkptr head = 0;
+ tchunkptr u = t;
+ bindex_t tindex = t->index;
+ size_t tsize = chunksize(t);
+ bindex_t idx;
+ compute_tree_index(tsize, idx);
+ assert(tindex == idx);
+ assert(tsize >= MIN_LARGE_SIZE);
+ assert(tsize >= minsize_for_tree_index(idx));
+ assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1))));
+
+ do { /* traverse through chain of same-sized nodes */
+ do_check_any_chunk(m, ((mchunkptr)u));
+ assert(u->index == tindex);
+ assert(chunksize(u) == tsize);
+ assert(!is_inuse(u));
+ assert(!next_pinuse(u));
+ assert(u->fd->bk == u);
+ assert(u->bk->fd == u);
+ if (u->parent == 0) {
+ assert(u->child[0] == 0);
+ assert(u->child[1] == 0);
+ }
+ else {
+ assert(head == 0); /* only one node on chain has parent */
+ head = u;
+ assert(u->parent != u);
+ assert (u->parent->child[0] == u ||
+ u->parent->child[1] == u ||
+ *((tbinptr*)(u->parent)) == u);
+ if (u->child[0] != 0) {
+ assert(u->child[0]->parent == u);
+ assert(u->child[0] != u);
+ do_check_tree(m, u->child[0]);
+ }
+ if (u->child[1] != 0) {
+ assert(u->child[1]->parent == u);
+ assert(u->child[1] != u);
+ do_check_tree(m, u->child[1]);
+ }
+ if (u->child[0] != 0 && u->child[1] != 0) {
+ assert(chunksize(u->child[0]) < chunksize(u->child[1]));
+ }
+ }
+ u = u->fd;
+ } while (u != t);
+ assert(head != 0);
+}
+
+/* Check all the chunks in a treebin. */
+static void do_check_treebin(mstate m, bindex_t i) {
+ tbinptr* tb = treebin_at(m, i);
+ tchunkptr t = *tb;
+ int empty = (m->treemap & (1U << i)) == 0;
+ if (t == 0)
+ assert(empty);
+ if (!empty)
+ do_check_tree(m, t);
+}
+
+/* Check all the chunks in a smallbin. */
+static void do_check_smallbin(mstate m, bindex_t i) {
+ sbinptr b = smallbin_at(m, i);
+ mchunkptr p = b->bk;
+ unsigned int empty = (m->smallmap & (1U << i)) == 0;
+ if (p == b)
+ assert(empty);
+ if (!empty) {
+ for (; p != b; p = p->bk) {
+ size_t size = chunksize(p);
+ mchunkptr q;
+ /* each chunk claims to be free */
+ do_check_free_chunk(m, p);
+ /* chunk belongs in bin */
+ assert(small_index(size) == i);
+ assert(p->bk == b || chunksize(p->bk) == chunksize(p));
+ /* chunk is followed by an inuse chunk */
+ q = next_chunk(p);
+ if (q->head != FENCEPOST_HEAD)
+ do_check_inuse_chunk(m, q);
+ }
+ }
+}
+
+/* Find x in a bin. Used in other check functions. */
+static int bin_find(mstate m, mchunkptr x) {
+ size_t size = chunksize(x);
+ if (is_small(size)) {
+ bindex_t sidx = small_index(size);
+ sbinptr b = smallbin_at(m, sidx);
+ if (smallmap_is_marked(m, sidx)) {
+ mchunkptr p = b;
+ do {
+ if (p == x)
+ return 1;
+ } while ((p = p->fd) != b);
+ }
+ }
+ else {
+ bindex_t tidx;
+ compute_tree_index(size, tidx);
+ if (treemap_is_marked(m, tidx)) {
+ tchunkptr t = *treebin_at(m, tidx);
+ size_t sizebits = size << leftshift_for_tree_index(tidx);
+ while (t != 0 && chunksize(t) != size) {
+ t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
+ sizebits <<= 1;
+ }
+ if (t != 0) {
+ tchunkptr u = t;
+ do {
+ if (u == (tchunkptr)x)
+ return 1;
+ } while ((u = u->fd) != t);
+ }
+ }
+ }
+ return 0;
+}
+
+/* Traverse each chunk and check it; return total */
+static size_t traverse_and_check(mstate m) {
+ size_t sum = 0;
+ if (is_initialized(m)) {
+ msegmentptr s = &m->seg;
+ sum += m->topsize + TOP_FOOT_SIZE;
+ while (s != 0) {
+ mchunkptr q = align_as_chunk(s->base);
+ mchunkptr lastq = 0;
+ assert(pinuse(q));
+ while (segment_holds(s, q) &&
+ q != m->top && q->head != FENCEPOST_HEAD) {
+ sum += chunksize(q);
+ if (is_inuse(q)) {
+ assert(!bin_find(m, q));
+ do_check_inuse_chunk(m, q);
+ }
+ else {
+ assert(q == m->dv || bin_find(m, q));
+ assert(lastq == 0 || is_inuse(lastq)); /* Not 2 consecutive free */
+ do_check_free_chunk(m, q);
+ }
+ lastq = q;
+ q = next_chunk(q);
+ }
+ s = s->next;
+ }
+ }
+ return sum;
+}
+
+
+/* Check all properties of malloc_state. */
+static void do_check_malloc_state(mstate m) {
+ bindex_t i;
+ size_t total;
+ /* check bins */
+ for (i = 0; i < NSMALLBINS; ++i)
+ do_check_smallbin(m, i);
+ for (i = 0; i < NTREEBINS; ++i)
+ do_check_treebin(m, i);
+
+ if (m->dvsize != 0) { /* check dv chunk */
+ do_check_any_chunk(m, m->dv);
+ assert(m->dvsize == chunksize(m->dv));
+ assert(m->dvsize >= MIN_CHUNK_SIZE);
+ assert(bin_find(m, m->dv) == 0);
+ }
+
+ if (m->top != 0) { /* check top chunk */
+ do_check_top_chunk(m, m->top);
+ /*assert(m->topsize == chunksize(m->top)); redundant */
+ assert(m->topsize > 0);
+ assert(bin_find(m, m->top) == 0);
+ }
+
+ total = traverse_and_check(m);
+ assert(total <= m->footprint);
+ assert(m->footprint <= m->max_footprint);
+}
+#endif /* DEBUG */
+
+/* ----------------------------- statistics ------------------------------ */
+
+#if !NO_MALLINFO
+static struct mallinfo internal_mallinfo(mstate m) {
+ struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ ensure_initialization();
+ if (!PREACTION(m)) {
+ check_malloc_state(m);
+ if (is_initialized(m)) {
+ size_t nfree = SIZE_T_ONE; /* top always free */
+ size_t mfree = m->topsize + TOP_FOOT_SIZE;
+ size_t sum = mfree;
+ msegmentptr s = &m->seg;
+ while (s != 0) {
+ mchunkptr q = align_as_chunk(s->base);
+ while (segment_holds(s, q) &&
+ q != m->top && q->head != FENCEPOST_HEAD) {
+ size_t sz = chunksize(q);
+ sum += sz;
+ if (!is_inuse(q)) {
+ mfree += sz;
+ ++nfree;
+ }
+ q = next_chunk(q);
+ }
+ s = s->next;
+ }
+
+ nm.arena = sum;
+ nm.ordblks = nfree;
+ nm.hblkhd = m->footprint - sum;
+ nm.usmblks = m->max_footprint;
+ nm.uordblks = m->footprint - mfree;
+ nm.fordblks = mfree;
+ nm.keepcost = m->topsize;
+ }
+
+ POSTACTION(m);
+ }
+ return nm;
+}
+#endif /* !NO_MALLINFO */
+
+#if !NO_MALLOC_STATS
+static void internal_malloc_stats(mstate m) {
+ ensure_initialization();
+ if (!PREACTION(m)) {
+ size_t maxfp = 0;
+ size_t fp = 0;
+ size_t used = 0;
+ check_malloc_state(m);
+ if (is_initialized(m)) {
+ msegmentptr s = &m->seg;
+ maxfp = m->max_footprint;
+ fp = m->footprint;
+ used = fp - (m->topsize + TOP_FOOT_SIZE);
+
+ while (s != 0) {
+ mchunkptr q = align_as_chunk(s->base);
+ while (segment_holds(s, q) &&
+ q != m->top && q->head != FENCEPOST_HEAD) {
+ if (!is_inuse(q))
+ used -= chunksize(q);
+ q = next_chunk(q);
+ }
+ s = s->next;
+ }
+ }
+ POSTACTION(m); /* drop lock */
+ fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp));
+ fprintf(stderr, "system bytes = %10lu\n", (unsigned long)(fp));
+ fprintf(stderr, "in use bytes = %10lu\n", (unsigned long)(used));
+ }
+}
+#endif /* NO_MALLOC_STATS */
+
+/* ----------------------- Operations on smallbins ----------------------- */
+
+/*
+ Various forms of linking and unlinking are defined as macros. Even
+ the ones for trees, which are very long but have very short typical
+ paths. This is ugly but reduces reliance on inlining support of
+ compilers.
+*/
+
+/* Link a free chunk into a smallbin */
+#define insert_small_chunk(M, P, S) {\
+ bindex_t I = small_index(S);\
+ mchunkptr B = smallbin_at(M, I);\
+ mchunkptr F = B;\
+ assert(S >= MIN_CHUNK_SIZE);\
+ if (!smallmap_is_marked(M, I))\
+ mark_smallmap(M, I);\
+ else if (RTCHECK(ok_address(M, B->fd)))\
+ F = B->fd;\
+ else {\
+ CORRUPTION_ERROR_ACTION(M);\
+ }\
+ B->fd = P;\
+ F->bk = P;\
+ P->fd = F;\
+ P->bk = B;\
+}
+
+/* Unlink a chunk from a smallbin */
+#define unlink_small_chunk(M, P, S) {\
+ mchunkptr F = P->fd;\
+ mchunkptr B = P->bk;\
+ bindex_t I = small_index(S);\
+ assert(P != B);\
+ assert(P != F);\
+ assert(chunksize(P) == small_index2size(I));\
+ if (RTCHECK(F == smallbin_at(M,I) || (ok_address(M, F) && F->bk == P))) { \
+ if (B == F) {\
+ clear_smallmap(M, I);\
+ }\
+ else if (RTCHECK(B == smallbin_at(M,I) ||\
+ (ok_address(M, B) && B->fd == P))) {\
+ F->bk = B;\
+ B->fd = F;\
+ }\
+ else {\
+ CORRUPTION_ERROR_ACTION(M);\
+ }\
+ }\
+ else {\
+ CORRUPTION_ERROR_ACTION(M);\
+ }\
+}
+
+/* Unlink the first chunk from a smallbin */
+#define unlink_first_small_chunk(M, B, P, I) {\
+ mchunkptr F = P->fd;\
+ assert(P != B);\
+ assert(P != F);\
+ assert(chunksize(P) == small_index2size(I));\
+ if (B == F) {\
+ clear_smallmap(M, I);\
+ }\
+ else if (RTCHECK(ok_address(M, F) && F->bk == P)) {\
+ F->bk = B;\
+ B->fd = F;\
+ }\
+ else {\
+ CORRUPTION_ERROR_ACTION(M);\
+ }\
+}
+
+/* Replace dv node, binning the old one */
+/* Used only when dvsize known to be small */
+#define replace_dv(M, P, S) {\
+ size_t DVS = M->dvsize;\
+ assert(is_small(DVS));\
+ if (DVS != 0) {\
+ mchunkptr DV = M->dv;\
+ insert_small_chunk(M, DV, DVS);\
+ }\
+ M->dvsize = S;\
+ M->dv = P;\
+}
+
+/* ------------------------- Operations on trees ------------------------- */
+
+/* Insert chunk into tree */
+#define insert_large_chunk(M, X, S) {\
+ tbinptr* H;\
+ bindex_t I;\
+ compute_tree_index(S, I);\
+ H = treebin_at(M, I);\
+ X->index = I;\
+ X->child[0] = X->child[1] = 0;\
+ if (!treemap_is_marked(M, I)) {\
+ mark_treemap(M, I);\
+ *H = X;\
+ X->parent = (tchunkptr)H;\
+ X->fd = X->bk = X;\
+ }\
+ else {\
+ tchunkptr T = *H;\
+ size_t K = S << leftshift_for_tree_index(I);\
+ for (;;) {\
+ if (chunksize(T) != S) {\
+ tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\
+ K <<= 1;\
+ if (*C != 0)\
+ T = *C;\
+ else if (RTCHECK(ok_address(M, C))) {\
+ *C = X;\
+ X->parent = T;\
+ X->fd = X->bk = X;\
+ break;\
+ }\
+ else {\
+ CORRUPTION_ERROR_ACTION(M);\
+ break;\
+ }\
+ }\
+ else {\
+ tchunkptr F = T->fd;\
+ if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\
+ T->fd = F->bk = X;\
+ X->fd = F;\
+ X->bk = T;\
+ X->parent = 0;\
+ break;\
+ }\
+ else {\
+ CORRUPTION_ERROR_ACTION(M);\
+ break;\
+ }\
+ }\
+ }\
+ }\
+}
+
+/*
+ Unlink steps:
+
+ 1. If x is a chained node, unlink it from its same-sized fd/bk links
+ and choose its bk node as its replacement.
+ 2. If x was the last node of its size, but not a leaf node, it must
+ be replaced with a leaf node (not merely one with an open left or
+ right), to make sure that lefts and rights of descendents
+ correspond properly to bit masks. We use the rightmost descendent
+ of x. We could use any other leaf, but this is easy to locate and
+ tends to counteract removal of leftmosts elsewhere, and so keeps
+ paths shorter than minimally guaranteed. This doesn't loop much
+ because on average a node in a tree is near the bottom.
+ 3. If x is the base of a chain (i.e., has parent links) relink
+ x's parent and children to x's replacement (or null if none).
+*/
+
+#define unlink_large_chunk(M, X) {\
+ tchunkptr XP = X->parent;\
+ tchunkptr R;\
+ if (X->bk != X) {\
+ tchunkptr F = X->fd;\
+ R = X->bk;\
+ if (RTCHECK(ok_address(M, F) && F->bk == X && R->fd == X)) {\
+ F->bk = R;\
+ R->fd = F;\
+ }\
+ else {\
+ CORRUPTION_ERROR_ACTION(M);\
+ }\
+ }\
+ else {\
+ tchunkptr* RP;\
+ if (((R = *(RP = &(X->child[1]))) != 0) ||\
+ ((R = *(RP = &(X->child[0]))) != 0)) {\
+ tchunkptr* CP;\
+ while ((*(CP = &(R->child[1])) != 0) ||\
+ (*(CP = &(R->child[0])) != 0)) {\
+ R = *(RP = CP);\
+ }\
+ if (RTCHECK(ok_address(M, RP)))\
+ *RP = 0;\
+ else {\
+ CORRUPTION_ERROR_ACTION(M);\
+ }\
+ }\
+ }\
+ if (XP != 0) {\
+ tbinptr* H = treebin_at(M, X->index);\
+ if (X == *H) {\
+ if ((*H = R) == 0) \
+ clear_treemap(M, X->index);\
+ }\
+ else if (RTCHECK(ok_address(M, XP))) {\
+ if (XP->child[0] == X) \
+ XP->child[0] = R;\
+ else \
+ XP->child[1] = R;\
+ }\
+ else\
+ CORRUPTION_ERROR_ACTION(M);\
+ if (R != 0) {\
+ if (RTCHECK(ok_address(M, R))) {\
+ tchunkptr C0, C1;\
+ R->parent = XP;\
+ if ((C0 = X->child[0]) != 0) {\
+ if (RTCHECK(ok_address(M, C0))) {\
+ R->child[0] = C0;\
+ C0->parent = R;\
+ }\
+ else\
+ CORRUPTION_ERROR_ACTION(M);\
+ }\
+ if ((C1 = X->child[1]) != 0) {\
+ if (RTCHECK(ok_address(M, C1))) {\
+ R->child[1] = C1;\
+ C1->parent = R;\
+ }\
+ else\
+ CORRUPTION_ERROR_ACTION(M);\
+ }\
+ }\
+ else\
+ CORRUPTION_ERROR_ACTION(M);\
+ }\
+ }\
+}
+
+/* Relays to large vs small bin operations */
+
+#define insert_chunk(M, P, S)\
+ if (is_small(S)) insert_small_chunk(M, P, S)\
+ else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); }
+
+#define unlink_chunk(M, P, S)\
+ if (is_small(S)) unlink_small_chunk(M, P, S)\
+ else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); }
+
+
+/* Relays to internal calls to malloc/free from realloc, memalign etc */
+
+#if ONLY_MSPACES
+#define internal_malloc(m, b) mspace_malloc(m, b)
+#define internal_free(m, mem) mspace_free(m,mem);
+#else /* ONLY_MSPACES */
+#if MSPACES
+#define internal_malloc(m, b)\
+ ((m == gm)? dlmalloc(b) : mspace_malloc(m, b))
+#define internal_free(m, mem)\
+ if (m == gm) dlfree(mem); else mspace_free(m,mem);
+#else /* MSPACES */
+#define internal_malloc(m, b) dlmalloc(b)
+#define internal_free(m, mem) dlfree(mem)
+#endif /* MSPACES */
+#endif /* ONLY_MSPACES */
+
+/* ----------------------- Direct-mmapping chunks ----------------------- */
+
+/*
+ Directly mmapped chunks are set up with an offset to the start of
+ the mmapped region stored in the prev_foot field of the chunk. This
+ allows reconstruction of the required argument to MUNMAP when freed,
+ and also allows adjustment of the returned chunk to meet alignment
+ requirements (especially in memalign).
+*/
+
+/* Malloc using mmap */
+static void* mmap_alloc(mstate m, size_t nb) {
+ size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+ if (m->footprint_limit != 0) {
+ size_t fp = m->footprint + mmsize;
+ if (fp <= m->footprint || fp > m->footprint_limit)
+ return 0;
+ }
+ if (mmsize > nb) { /* Check for wrap around 0 */
+ char* mm = (char*)(CALL_DIRECT_MMAP(mmsize));
+ if (mm != CMFAIL) {
+ size_t offset = align_offset(chunk2mem(mm));
+ size_t psize = mmsize - offset - MMAP_FOOT_PAD;
+ mchunkptr p = (mchunkptr)(mm + offset);
+ p->prev_foot = offset;
+ p->head = psize;
+ mark_inuse_foot(m, p, psize);
+ chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD;
+ chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0;
+
+ if (m->least_addr == 0 || mm < m->least_addr)
+ m->least_addr = mm;
+ if ((m->footprint += mmsize) > m->max_footprint)
+ m->max_footprint = m->footprint;
+ assert(is_aligned(chunk2mem(p)));
+ check_mmapped_chunk(m, p);
+ return chunk2mem(p);
+ }
+ }
+ return 0;
+}
+
+/* Realloc using mmap */
+static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb, int flags) {
+ size_t oldsize = chunksize(oldp);
+ (void)flags; /* placate people compiling -Wunused */
+ if (is_small(nb)) /* Can't shrink mmap regions below small size */
+ return 0;
+ /* Keep old chunk if big enough but not too big */
+ if (oldsize >= nb + SIZE_T_SIZE &&
+ (oldsize - nb) <= (mparams.granularity << 1))
+ return oldp;
+ else {
+ size_t offset = oldp->prev_foot;
+ size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD;
+ size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+ char* cp = (char*)CALL_MREMAP((char*)oldp - offset,
+ oldmmsize, newmmsize, flags);
+ if (cp != CMFAIL) {
+ mchunkptr newp = (mchunkptr)(cp + offset);
+ size_t psize = newmmsize - offset - MMAP_FOOT_PAD;
+ newp->head = psize;
+ mark_inuse_foot(m, newp, psize);
+ chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD;
+ chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0;
+
+ if (cp < m->least_addr)
+ m->least_addr = cp;
+ if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint)
+ m->max_footprint = m->footprint;
+ check_mmapped_chunk(m, newp);
+ return newp;
+ }
+ }
+ return 0;
+}
+
+
+/* -------------------------- mspace management -------------------------- */
+
+/* Initialize top chunk and its size */
+static void init_top(mstate m, mchunkptr p, size_t psize) {
+ /* Ensure alignment */
+ size_t offset = align_offset(chunk2mem(p));
+ p = (mchunkptr)((char*)p + offset);
+ psize -= offset;
+
+ m->top = p;
+ m->topsize = psize;
+ p->head = psize | PINUSE_BIT;
+ /* set size of fake trailing chunk holding overhead space only once */
+ chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE;
+ m->trim_check = mparams.trim_threshold; /* reset on each update */
+}
+
+/* Initialize bins for a new mstate that is otherwise zeroed out */
+static void init_bins(mstate m) {
+ /* Establish circular links for smallbins */
+ bindex_t i;
+ for (i = 0; i < NSMALLBINS; ++i) {
+ sbinptr bin = smallbin_at(m,i);
+ bin->fd = bin->bk = bin;
+ }
+}
+
+#if PROCEED_ON_ERROR
+
+/* default corruption action */
+static void reset_on_error(mstate m) {
+ int i;
+ ++malloc_corruption_error_count;
+ /* Reinitialize fields to forget about all memory */
+ m->smallmap = m->treemap = 0;
+ m->dvsize = m->topsize = 0;
+ m->seg.base = 0;
+ m->seg.size = 0;
+ m->seg.next = 0;
+ m->top = m->dv = 0;
+ for (i = 0; i < NTREEBINS; ++i)
+ *treebin_at(m, i) = 0;
+ init_bins(m);
+}
+#endif /* PROCEED_ON_ERROR */
+
+/* Allocate chunk and prepend remainder with chunk in successor base. */
+static void* prepend_alloc(mstate m, char* newbase, char* oldbase,
+ size_t nb) {
+ mchunkptr p = align_as_chunk(newbase);
+ mchunkptr oldfirst = align_as_chunk(oldbase);
+ size_t psize = (char*)oldfirst - (char*)p;
+ mchunkptr q = chunk_plus_offset(p, nb);
+ size_t qsize = psize - nb;
+ set_size_and_pinuse_of_inuse_chunk(m, p, nb);
+
+ assert((char*)oldfirst > (char*)q);
+ assert(pinuse(oldfirst));
+ assert(qsize >= MIN_CHUNK_SIZE);
+
+ /* consolidate remainder with first chunk of old base */
+ if (oldfirst == m->top) {
+ size_t tsize = m->topsize += qsize;
+ m->top = q;
+ q->head = tsize | PINUSE_BIT;
+ check_top_chunk(m, q);
+ }
+ else if (oldfirst == m->dv) {
+ size_t dsize = m->dvsize += qsize;
+ m->dv = q;
+ set_size_and_pinuse_of_free_chunk(q, dsize);
+ }
+ else {
+ if (!is_inuse(oldfirst)) {
+ size_t nsize = chunksize(oldfirst);
+ unlink_chunk(m, oldfirst, nsize);
+ oldfirst = chunk_plus_offset(oldfirst, nsize);
+ qsize += nsize;
+ }
+ set_free_with_pinuse(q, qsize, oldfirst);
+ insert_chunk(m, q, qsize);
+ check_free_chunk(m, q);
+ }
+
+ check_malloced_chunk(m, chunk2mem(p), nb);
+ return chunk2mem(p);
+}
+
+/* Add a segment to hold a new noncontiguous region */
+static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) {
+ /* Determine locations and sizes of segment, fenceposts, old top */
+ char* old_top = (char*)m->top;
+ msegmentptr oldsp = segment_holding(m, old_top);
+ char* old_end = oldsp->base + oldsp->size;
+ size_t ssize = pad_request(sizeof(struct malloc_segment));
+ char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+ size_t offset = align_offset(chunk2mem(rawsp));
+ char* asp = rawsp + offset;
+ char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp;
+ mchunkptr sp = (mchunkptr)csp;
+ msegmentptr ss = (msegmentptr)(chunk2mem(sp));
+ mchunkptr tnext = chunk_plus_offset(sp, ssize);
+ mchunkptr p = tnext;
+ int nfences = 0;
+
+ /* reset top to new space */
+ init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
+
+ /* Set up segment record */
+ assert(is_aligned(ss));
+ set_size_and_pinuse_of_inuse_chunk(m, sp, ssize);
+ *ss = m->seg; /* Push current record */
+ m->seg.base = tbase;
+ m->seg.size = tsize;
+ m->seg.sflags = mmapped;
+ m->seg.next = ss;
+
+ /* Insert trailing fenceposts */
+ for (;;) {
+ mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE);
+ p->head = FENCEPOST_HEAD;
+ ++nfences;
+ if ((char*)(&(nextp->head)) < old_end)
+ p = nextp;
+ else
+ break;
+ }
+ assert(nfences >= 2);
+
+ /* Insert the rest of old top into a bin as an ordinary free chunk */
+ if (csp != old_top) {
+ mchunkptr q = (mchunkptr)old_top;
+ size_t psize = csp - old_top;
+ mchunkptr tn = chunk_plus_offset(q, psize);
+ set_free_with_pinuse(q, psize, tn);
+ insert_chunk(m, q, psize);
+ }
+
+ check_top_chunk(m, m->top);
+}
+
+/* -------------------------- System allocation -------------------------- */
+
+/* Get memory from system using MORECORE or MMAP */
+static void* sys_alloc(mstate m, size_t nb) {
+ char* tbase = CMFAIL;
+ size_t tsize = 0;
+ flag_t mmap_flag = 0;
+ size_t asize; /* allocation size */
+
+ ensure_initialization();
+
+ /* Directly map large chunks, but only if already initialized */
+ if (use_mmap(m) && nb >= mparams.mmap_threshold && m->topsize != 0) {
+ void* mem = mmap_alloc(m, nb);
+ if (mem != 0)
+ return mem;
+ }
+
+ asize = granularity_align(nb + SYS_ALLOC_PADDING);
+ if (asize <= nb)
+ return 0; /* wraparound */
+ if (m->footprint_limit != 0) {
+ size_t fp = m->footprint + asize;
+ if (fp <= m->footprint || fp > m->footprint_limit)
+ return 0;
+ }
+
+ /*
+ Try getting memory in any of three ways (in most-preferred to
+ least-preferred order):
+ 1. A call to MORECORE that can normally contiguously extend memory.
+ (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or
+ or main space is mmapped or a previous contiguous call failed)
+ 2. A call to MMAP new space (disabled if not HAVE_MMAP).
+ Note that under the default settings, if MORECORE is unable to
+ fulfill a request, and HAVE_MMAP is true, then mmap is
+ used as a noncontiguous system allocator. This is a useful backup
+ strategy for systems with holes in address spaces -- in this case
+ sbrk cannot contiguously expand the heap, but mmap may be able to
+ find space.
+ 3. A call to MORECORE that cannot usually contiguously extend memory.
+ (disabled if not HAVE_MORECORE)
+
+ In all cases, we need to request enough bytes from system to ensure
+ we can malloc nb bytes upon success, so pad with enough space for
+ top_foot, plus alignment-pad to make sure we don't lose bytes if
+ not on boundary, and round this up to a granularity unit.
+ */
+
+ if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) {
+ char* br = CMFAIL;
+ size_t ssize = asize; /* sbrk call size */
+ msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top);
+ ACQUIRE_MALLOC_GLOBAL_LOCK();
+
+ if (ss == 0) { /* First time through or recovery */
+ char* base = (char*)CALL_MORECORE(0);
+ if (base != CMFAIL) {
+ size_t fp;
+ /* Adjust to end on a page boundary */
+ if (!is_page_aligned(base))
+ ssize += (page_align((size_t)base) - (size_t)base);
+ fp = m->footprint + ssize; /* recheck limits */
+ if (ssize > nb && ssize < HALF_MAX_SIZE_T &&
+ (m->footprint_limit == 0 ||
+ (fp > m->footprint && fp <= m->footprint_limit)) &&
+ (br = (char*)(CALL_MORECORE(ssize))) == base) {
+ tbase = base;
+ tsize = ssize;
+ }
+ }
+ }
+ else {
+ /* Subtract out existing available top space from MORECORE request. */
+ ssize = granularity_align(nb - m->topsize + SYS_ALLOC_PADDING);
+ /* Use mem here only if it did continuously extend old space */
+ if (ssize < HALF_MAX_SIZE_T &&
+ (br = (char*)(CALL_MORECORE(ssize))) == ss->base+ss->size) {
+ tbase = br;
+ tsize = ssize;
+ }
+ }
+
+ if (tbase == CMFAIL) { /* Cope with partial failure */
+ if (br != CMFAIL) { /* Try to use/extend the space we did get */
+ if (ssize < HALF_MAX_SIZE_T &&
+ ssize < nb + SYS_ALLOC_PADDING) {
+ size_t esize = granularity_align(nb + SYS_ALLOC_PADDING - ssize);
+ if (esize < HALF_MAX_SIZE_T) {
+ char* end = (char*)CALL_MORECORE(esize);
+ if (end != CMFAIL)
+ ssize += esize;
+ else { /* Can't use; try to release */
+ (void) CALL_MORECORE(-ssize);
+ br = CMFAIL;
+ }
+ }
+ }
+ }
+ if (br != CMFAIL) { /* Use the space we did get */
+ tbase = br;
+ tsize = ssize;
+ }
+ else
+ disable_contiguous(m); /* Don't try contiguous path in the future */
+ }
+
+ RELEASE_MALLOC_GLOBAL_LOCK();
+ }
+
+ if (HAVE_MMAP && tbase == CMFAIL) { /* Try MMAP */
+ char* mp = (char*)(CALL_MMAP(asize));
+ if (mp != CMFAIL) {
+ tbase = mp;
+ tsize = asize;
+ mmap_flag = USE_MMAP_BIT;
+ }
+ }
+
+ if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */
+ if (asize < HALF_MAX_SIZE_T) {
+ char* br = CMFAIL;
+ char* end = CMFAIL;
+ ACQUIRE_MALLOC_GLOBAL_LOCK();
+ br = (char*)(CALL_MORECORE(asize));
+ end = (char*)(CALL_MORECORE(0));
+ RELEASE_MALLOC_GLOBAL_LOCK();
+ if (br != CMFAIL && end != CMFAIL && br < end) {
+ size_t ssize = end - br;
+ if (ssize > nb + TOP_FOOT_SIZE) {
+ tbase = br;
+ tsize = ssize;
+ }
+ }
+ }
+ }
+
+ if (tbase != CMFAIL) {
+
+ if ((m->footprint += tsize) > m->max_footprint)
+ m->max_footprint = m->footprint;
+
+ if (!is_initialized(m)) { /* first-time initialization */
+ if (m->least_addr == 0 || tbase < m->least_addr)
+ m->least_addr = tbase;
+ m->seg.base = tbase;
+ m->seg.size = tsize;
+ m->seg.sflags = mmap_flag;
+ m->magic = mparams.magic;
+ m->release_checks = MAX_RELEASE_CHECK_RATE;
+ init_bins(m);
+#if !ONLY_MSPACES
+ if (is_global(m))
+ init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
+ else
+#endif
+ {
+ /* Offset top by embedded malloc_state */
+ mchunkptr mn = next_chunk(mem2chunk(m));
+ init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE);
+ }
+ }
+
+ else {
+ /* Try to merge with an existing segment */
+ msegmentptr sp = &m->seg;
+ /* Only consider most recent segment if traversal suppressed */
+ while (sp != 0 && tbase != sp->base + sp->size)
+ sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next;
+ if (sp != 0 &&
+ !is_extern_segment(sp) &&
+ (sp->sflags & USE_MMAP_BIT) == mmap_flag &&
+ segment_holds(sp, m->top)) { /* append */
+ sp->size += tsize;
+ init_top(m, m->top, m->topsize + tsize);
+ }
+ else {
+ if (tbase < m->least_addr)
+ m->least_addr = tbase;
+ sp = &m->seg;
+ while (sp != 0 && sp->base != tbase + tsize)
+ sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next;
+ if (sp != 0 &&
+ !is_extern_segment(sp) &&
+ (sp->sflags & USE_MMAP_BIT) == mmap_flag) {
+ char* oldbase = sp->base;
+ sp->base = tbase;
+ sp->size += tsize;
+ return prepend_alloc(m, tbase, oldbase, nb);
+ }
+ else
+ add_segment(m, tbase, tsize, mmap_flag);
+ }
+ }
+
+ if (nb < m->topsize) { /* Allocate from new or extended top space */
+ size_t rsize = m->topsize -= nb;
+ mchunkptr p = m->top;
+ mchunkptr r = m->top = chunk_plus_offset(p, nb);
+ r->head = rsize | PINUSE_BIT;
+ set_size_and_pinuse_of_inuse_chunk(m, p, nb);
+ check_top_chunk(m, m->top);
+ check_malloced_chunk(m, chunk2mem(p), nb);
+ return chunk2mem(p);
+ }
+ }
+
+ MALLOC_FAILURE_ACTION;
+ return 0;
+}
+
+/* ----------------------- system deallocation -------------------------- */
+
+/* Unmap and unlink any mmapped segments that don't contain used chunks */
+static size_t release_unused_segments(mstate m) {
+ size_t released = 0;
+ int nsegs = 0;
+ msegmentptr pred = &m->seg;
+ msegmentptr sp = pred->next;
+ while (sp != 0) {
+ char* base = sp->base;
+ size_t size = sp->size;
+ msegmentptr next = sp->next;
+ ++nsegs;
+ if (is_mmapped_segment(sp) && !is_extern_segment(sp)) {
+ mchunkptr p = align_as_chunk(base);
+ size_t psize = chunksize(p);
+ /* Can unmap if first chunk holds entire segment and not pinned */
+ if (!is_inuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) {
+ tchunkptr tp = (tchunkptr)p;
+ assert(segment_holds(sp, (char*)sp));
+ if (p == m->dv) {
+ m->dv = 0;
+ m->dvsize = 0;
+ }
+ else {
+ unlink_large_chunk(m, tp);
+ }
+ if (CALL_MUNMAP(base, size) == 0) {
+ released += size;
+ m->footprint -= size;
+ /* unlink obsoleted record */
+ sp = pred;
+ sp->next = next;
+ }
+ else { /* back out if cannot unmap */
+ insert_large_chunk(m, tp, psize);
+ }
+ }
+ }
+ if (NO_SEGMENT_TRAVERSAL) /* scan only first segment */
+ break;
+ pred = sp;
+ sp = next;
+ }
+ /* Reset check counter */
+ m->release_checks = (((size_t) nsegs > (size_t) MAX_RELEASE_CHECK_RATE)?
+ (size_t) nsegs : (size_t) MAX_RELEASE_CHECK_RATE);
+ return released;
+}
+
+static int sys_trim(mstate m, size_t pad) {
+ size_t released = 0;
+ ensure_initialization();
+ if (pad < MAX_REQUEST && is_initialized(m)) {
+ pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */
+
+ if (m->topsize > pad) {
+ /* Shrink top space in granularity-size units, keeping at least one */
+ size_t unit = mparams.granularity;
+ size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit -
+ SIZE_T_ONE) * unit;
+ msegmentptr sp = segment_holding(m, (char*)m->top);
+
+ if (!is_extern_segment(sp)) {
+ if (is_mmapped_segment(sp)) {
+ if (HAVE_MMAP &&
+ sp->size >= extra &&
+ !has_segment_link(m, sp)) { /* can't shrink if pinned */
+ size_t newsize = sp->size - extra;
+ (void)newsize; /* placate people compiling -Wunused-variable */
+ /* Prefer mremap, fall back to munmap */
+ if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) ||
+ (CALL_MUNMAP(sp->base + newsize, extra) == 0)) {
+ released = extra;
+ }
+ }
+ }
+ else if (HAVE_MORECORE) {
+ if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */
+ extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit;
+ ACQUIRE_MALLOC_GLOBAL_LOCK();
+ {
+ /* Make sure end of memory is where we last set it. */
+ char* old_br = (char*)(CALL_MORECORE(0));
+ if (old_br == sp->base + sp->size) {
+ char* rel_br = (char*)(CALL_MORECORE(-extra));
+ char* new_br = (char*)(CALL_MORECORE(0));
+ if (rel_br != CMFAIL && new_br < old_br)
+ released = old_br - new_br;
+ }
+ }
+ RELEASE_MALLOC_GLOBAL_LOCK();
+ }
+ }
+
+ if (released != 0) {
+ sp->size -= released;
+ m->footprint -= released;
+ init_top(m, m->top, m->topsize - released);
+ check_top_chunk(m, m->top);
+ }
+ }
+
+ /* Unmap any unused mmapped segments */
+ if (HAVE_MMAP)
+ released += release_unused_segments(m);
+
+ /* On failure, disable autotrim to avoid repeated failed future calls */
+ if (released == 0 && m->topsize > m->trim_check)
+ m->trim_check = MAX_SIZE_T;
+ }
+
+ return (released != 0)? 1 : 0;
+}
+
+/* Consolidate and bin a chunk. Differs from exported versions
+ of free mainly in that the chunk need not be marked as inuse.
+*/
+static void dispose_chunk(mstate m, mchunkptr p, size_t psize) {
+ mchunkptr next = chunk_plus_offset(p, psize);
+ if (!pinuse(p)) {
+ mchunkptr prev;
+ size_t prevsize = p->prev_foot;
+ if (is_mmapped(p)) {
+ psize += prevsize + MMAP_FOOT_PAD;
+ if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+ m->footprint -= psize;
+ return;
+ }
+ prev = chunk_minus_offset(p, prevsize);
+ psize += prevsize;
+ p = prev;
+ if (RTCHECK(ok_address(m, prev))) { /* consolidate backward */
+ if (p != m->dv) {
+ unlink_chunk(m, p, prevsize);
+ }
+ else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+ m->dvsize = psize;
+ set_free_with_pinuse(p, psize, next);
+ return;
+ }
+ }
+ else {
+ CORRUPTION_ERROR_ACTION(m);
+ return;
+ }
+ }
+ if (RTCHECK(ok_address(m, next))) {
+ if (!cinuse(next)) { /* consolidate forward */
+ if (next == m->top) {
+ size_t tsize = m->topsize += psize;
+ m->top = p;
+ p->head = tsize | PINUSE_BIT;
+ if (p == m->dv) {
+ m->dv = 0;
+ m->dvsize = 0;
+ }
+ return;
+ }
+ else if (next == m->dv) {
+ size_t dsize = m->dvsize += psize;
+ m->dv = p;
+ set_size_and_pinuse_of_free_chunk(p, dsize);
+ return;
+ }
+ else {
+ size_t nsize = chunksize(next);
+ psize += nsize;
+ unlink_chunk(m, next, nsize);
+ set_size_and_pinuse_of_free_chunk(p, psize);
+ if (p == m->dv) {
+ m->dvsize = psize;
+ return;
+ }
+ }
+ }
+ else {
+ set_free_with_pinuse(p, psize, next);
+ }
+ insert_chunk(m, p, psize);
+ }
+ else {
+ CORRUPTION_ERROR_ACTION(m);
+ }
+}
+
+/* ---------------------------- malloc --------------------------- */
+
+/* allocate a large request from the best fitting chunk in a treebin */
+static void* tmalloc_large(mstate m, size_t nb) {
+ tchunkptr v = 0;
+ size_t rsize = -nb; /* Unsigned negation */
+ tchunkptr t;
+ bindex_t idx;
+ compute_tree_index(nb, idx);
+ if ((t = *treebin_at(m, idx)) != 0) {
+ /* Traverse tree for this bin looking for node with size == nb */
+ size_t sizebits = nb << leftshift_for_tree_index(idx);
+ tchunkptr rst = 0; /* The deepest untaken right subtree */
+ for (;;) {
+ tchunkptr rt;
+ size_t trem = chunksize(t) - nb;
+ if (trem < rsize) {
+ v = t;
+ if ((rsize = trem) == 0)
+ break;
+ }
+ rt = t->child[1];
+ t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
+ if (rt != 0 && rt != t)
+ rst = rt;
+ if (t == 0) {
+ t = rst; /* set t to least subtree holding sizes > nb */
+ break;
+ }
+ sizebits <<= 1;
+ }
+ }
+ if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */
+ binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap;
+ if (leftbits != 0) {
+ bindex_t i;
+ binmap_t leastbit = least_bit(leftbits);
+ compute_bit2idx(leastbit, i);
+ t = *treebin_at(m, i);
+ }
+ }
+
+ while (t != 0) { /* find smallest of tree or subtree */
+ size_t trem = chunksize(t) - nb;
+ if (trem < rsize) {
+ rsize = trem;
+ v = t;
+ }
+ t = leftmost_child(t);
+ }
+
+ /* If dv is a better fit, return 0 so malloc will use it */
+ if (v != 0 && rsize < (size_t)(m->dvsize - nb)) {
+ if (RTCHECK(ok_address(m, v))) { /* split */
+ mchunkptr r = chunk_plus_offset(v, nb);
+ assert(chunksize(v) == rsize + nb);
+ if (RTCHECK(ok_next(v, r))) {
+ unlink_large_chunk(m, v);
+ if (rsize < MIN_CHUNK_SIZE)
+ set_inuse_and_pinuse(m, v, (rsize + nb));
+ else {
+ set_size_and_pinuse_of_inuse_chunk(m, v, nb);
+ set_size_and_pinuse_of_free_chunk(r, rsize);
+ insert_chunk(m, r, rsize);
+ }
+ return chunk2mem(v);
+ }
+ }
+ CORRUPTION_ERROR_ACTION(m);
+ }
+ return 0;
+}
+
+/* allocate a small request from the best fitting chunk in a treebin */
+static void* tmalloc_small(mstate m, size_t nb) {
+ tchunkptr t, v;
+ size_t rsize;
+ bindex_t i;
+ binmap_t leastbit = least_bit(m->treemap);
+ compute_bit2idx(leastbit, i);
+ v = t = *treebin_at(m, i);
+ rsize = chunksize(t) - nb;
+
+ while ((t = leftmost_child(t)) != 0) {
+ size_t trem = chunksize(t) - nb;
+ if (trem < rsize) {
+ rsize = trem;
+ v = t;
+ }
+ }
+
+ if (RTCHECK(ok_address(m, v))) {
+ mchunkptr r = chunk_plus_offset(v, nb);
+ assert(chunksize(v) == rsize + nb);
+ if (RTCHECK(ok_next(v, r))) {
+ unlink_large_chunk(m, v);
+ if (rsize < MIN_CHUNK_SIZE)
+ set_inuse_and_pinuse(m, v, (rsize + nb));
+ else {
+ set_size_and_pinuse_of_inuse_chunk(m, v, nb);
+ set_size_and_pinuse_of_free_chunk(r, rsize);
+ replace_dv(m, r, rsize);
+ }
+ return chunk2mem(v);
+ }
+ }
+
+ CORRUPTION_ERROR_ACTION(m);
+ return 0;
+}
+
+#if !ONLY_MSPACES
+
+void* dlmalloc(size_t bytes) {
+ /*
+ Basic algorithm:
+ If a small request (< 256 bytes minus per-chunk overhead):
+ 1. If one exists, use a remainderless chunk in associated smallbin.
+ (Remainderless means that there are too few excess bytes to
+ represent as a chunk.)
+ 2. If it is big enough, use the dv chunk, which is normally the
+ chunk adjacent to the one used for the most recent small request.
+ 3. If one exists, split the smallest available chunk in a bin,
+ saving remainder in dv.
+ 4. If it is big enough, use the top chunk.
+ 5. If available, get memory from system and use it
+ Otherwise, for a large request:
+ 1. Find the smallest available binned chunk that fits, and use it
+ if it is better fitting than dv chunk, splitting if necessary.
+ 2. If better fitting than any binned chunk, use the dv chunk.
+ 3. If it is big enough, use the top chunk.
+ 4. If request size >= mmap threshold, try to directly mmap this chunk.
+ 5. If available, get memory from system and use it
+
+ The ugly goto's here ensure that postaction occurs along all paths.
+ */
+
+#if USE_LOCKS
+ ensure_initialization(); /* initialize in sys_alloc if not using locks */
+#endif
+
+ if (!PREACTION(gm)) {
+ void* mem;
+ size_t nb;
+ if (bytes <= MAX_SMALL_REQUEST) {
+ bindex_t idx;
+ binmap_t smallbits;
+ nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
+ idx = small_index(nb);
+ smallbits = gm->smallmap >> idx;
+
+ if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
+ mchunkptr b, p;
+ idx += ~smallbits & 1; /* Uses next bin if idx empty */
+ b = smallbin_at(gm, idx);
+ p = b->fd;
+ assert(chunksize(p) == small_index2size(idx));
+ unlink_first_small_chunk(gm, b, p, idx);
+ set_inuse_and_pinuse(gm, p, small_index2size(idx));
+ mem = chunk2mem(p);
+ check_malloced_chunk(gm, mem, nb);
+ goto postaction;
+ }
+
+ else if (nb > gm->dvsize) {
+ if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
+ mchunkptr b, p, r;
+ size_t rsize;
+ bindex_t i;
+ binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
+ binmap_t leastbit = least_bit(leftbits);
+ compute_bit2idx(leastbit, i);
+ b = smallbin_at(gm, i);
+ p = b->fd;
+ assert(chunksize(p) == small_index2size(i));
+ unlink_first_small_chunk(gm, b, p, i);
+ rsize = small_index2size(i) - nb;
+ /* Fit here cannot be remainderless if 4byte sizes */
+ if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
+ set_inuse_and_pinuse(gm, p, small_index2size(i));
+ else {
+ set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+ r = chunk_plus_offset(p, nb);
+ set_size_and_pinuse_of_free_chunk(r, rsize);
+ replace_dv(gm, r, rsize);
+ }
+ mem = chunk2mem(p);
+ check_malloced_chunk(gm, mem, nb);
+ goto postaction;
+ }
+
+ else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) {
+ check_malloced_chunk(gm, mem, nb);
+ goto postaction;
+ }
+ }
+ }
+ else if (bytes >= MAX_REQUEST)
+ nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
+ else {
+ nb = pad_request(bytes);
+ if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) {
+ check_malloced_chunk(gm, mem, nb);
+ goto postaction;
+ }
+ }
+
+ if (nb <= gm->dvsize) {
+ size_t rsize = gm->dvsize - nb;
+ mchunkptr p = gm->dv;
+ if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
+ mchunkptr r = gm->dv = chunk_plus_offset(p, nb);
+ gm->dvsize = rsize;
+ set_size_and_pinuse_of_free_chunk(r, rsize);
+ set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+ }
+ else { /* exhaust dv */
+ size_t dvs = gm->dvsize;
+ gm->dvsize = 0;
+ gm->dv = 0;
+ set_inuse_and_pinuse(gm, p, dvs);
+ }
+ mem = chunk2mem(p);
+ check_malloced_chunk(gm, mem, nb);
+ goto postaction;
+ }
+
+ else if (nb < gm->topsize) { /* Split top */
+ size_t rsize = gm->topsize -= nb;
+ mchunkptr p = gm->top;
+ mchunkptr r = gm->top = chunk_plus_offset(p, nb);
+ r->head = rsize | PINUSE_BIT;
+ set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+ mem = chunk2mem(p);
+ check_top_chunk(gm, gm->top);
+ check_malloced_chunk(gm, mem, nb);
+ goto postaction;
+ }
+
+ mem = sys_alloc(gm, nb);
+
+ postaction:
+ POSTACTION(gm);
+ return mem;
+ }
+
+ return 0;
+}
+
+/* ---------------------------- free --------------------------- */
+
+void dlfree(void* mem) {
+ /*
+ Consolidate freed chunks with preceeding or succeeding bordering
+ free chunks, if they exist, and then place in a bin. Intermixed
+ with special cases for top, dv, mmapped chunks, and usage errors.
+ */
+
+ if (mem != 0) {
+ mchunkptr p = mem2chunk(mem);
+#if FOOTERS
+ mstate fm = get_mstate_for(p);
+ if (!ok_magic(fm)) {
+ USAGE_ERROR_ACTION(fm, p);
+ return;
+ }
+#else /* FOOTERS */
+#define fm gm
+#endif /* FOOTERS */
+ if (!PREACTION(fm)) {
+ check_inuse_chunk(fm, p);
+ if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) {
+ size_t psize = chunksize(p);
+ mchunkptr next = chunk_plus_offset(p, psize);
+ if (!pinuse(p)) {
+ size_t prevsize = p->prev_foot;
+ if (is_mmapped(p)) {
+ psize += prevsize + MMAP_FOOT_PAD;
+ if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+ fm->footprint -= psize;
+ goto postaction;
+ }
+ else {
+ mchunkptr prev = chunk_minus_offset(p, prevsize);
+ psize += prevsize;
+ p = prev;
+ if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */
+ if (p != fm->dv) {
+ unlink_chunk(fm, p, prevsize);
+ }
+ else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+ fm->dvsize = psize;
+ set_free_with_pinuse(p, psize, next);
+ goto postaction;
+ }
+ }
+ else
+ goto erroraction;
+ }
+ }
+
+ if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
+ if (!cinuse(next)) { /* consolidate forward */
+ if (next == fm->top) {
+ size_t tsize = fm->topsize += psize;
+ fm->top = p;
+ p->head = tsize | PINUSE_BIT;
+ if (p == fm->dv) {
+ fm->dv = 0;
+ fm->dvsize = 0;
+ }
+ if (should_trim(fm, tsize))
+ sys_trim(fm, 0);
+ goto postaction;
+ }
+ else if (next == fm->dv) {
+ size_t dsize = fm->dvsize += psize;
+ fm->dv = p;
+ set_size_and_pinuse_of_free_chunk(p, dsize);
+ goto postaction;
+ }
+ else {
+ size_t nsize = chunksize(next);
+ psize += nsize;
+ unlink_chunk(fm, next, nsize);
+ set_size_and_pinuse_of_free_chunk(p, psize);
+ if (p == fm->dv) {
+ fm->dvsize = psize;
+ goto postaction;
+ }
+ }
+ }
+ else
+ set_free_with_pinuse(p, psize, next);
+
+ if (is_small(psize)) {
+ insert_small_chunk(fm, p, psize);
+ check_free_chunk(fm, p);
+ }
+ else {
+ tchunkptr tp = (tchunkptr)p;
+ insert_large_chunk(fm, tp, psize);
+ check_free_chunk(fm, p);
+ if (--fm->release_checks == 0)
+ release_unused_segments(fm);
+ }
+ goto postaction;
+ }
+ }
+ erroraction:
+ USAGE_ERROR_ACTION(fm, p);
+ postaction:
+ POSTACTION(fm);
+ }
+ }
+#if !FOOTERS
+#undef fm
+#endif /* FOOTERS */
+}
+
+void* dlcalloc(size_t n_elements, size_t elem_size) {
+ void* mem;
+ size_t req = 0;
+ if (n_elements != 0) {
+ req = n_elements * elem_size;
+ if (((n_elements | elem_size) & ~(size_t)0xffff) &&
+ (req / n_elements != elem_size))
+ req = MAX_SIZE_T; /* force downstream failure on overflow */
+ }
+ mem = dlmalloc(req);
+ if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
+ memset(mem, 0, req);
+ return mem;
+}
+
+#endif /* !ONLY_MSPACES */
+
+/* ------------ Internal support for realloc, memalign, etc -------------- */
+
+/* Try to realloc; only in-place unless can_move true */
+static mchunkptr try_realloc_chunk(mstate m, mchunkptr p, size_t nb,
+ int can_move) {
+ mchunkptr newp = 0;
+ size_t oldsize = chunksize(p);
+ mchunkptr next = chunk_plus_offset(p, oldsize);
+ if (RTCHECK(ok_address(m, p) && ok_inuse(p) &&
+ ok_next(p, next) && ok_pinuse(next))) {
+ if (is_mmapped(p)) {
+ newp = mmap_resize(m, p, nb, can_move);
+ }
+ else if (oldsize >= nb) { /* already big enough */
+ size_t rsize = oldsize - nb;
+ if (rsize >= MIN_CHUNK_SIZE) { /* split off remainder */
+ mchunkptr r = chunk_plus_offset(p, nb);
+ set_inuse(m, p, nb);
+ set_inuse(m, r, rsize);
+ dispose_chunk(m, r, rsize);
+ }
+ newp = p;
+ }
+ else if (next == m->top) { /* extend into top */
+ if (oldsize + m->topsize > nb) {
+ size_t newsize = oldsize + m->topsize;
+ size_t newtopsize = newsize - nb;
+ mchunkptr newtop = chunk_plus_offset(p, nb);
+ set_inuse(m, p, nb);
+ newtop->head = newtopsize |PINUSE_BIT;
+ m->top = newtop;
+ m->topsize = newtopsize;
+ newp = p;
+ }
+ }
+ else if (next == m->dv) { /* extend into dv */
+ size_t dvs = m->dvsize;
+ if (oldsize + dvs >= nb) {
+ size_t dsize = oldsize + dvs - nb;
+ if (dsize >= MIN_CHUNK_SIZE) {
+ mchunkptr r = chunk_plus_offset(p, nb);
+ mchunkptr n = chunk_plus_offset(r, dsize);
+ set_inuse(m, p, nb);
+ set_size_and_pinuse_of_free_chunk(r, dsize);
+ clear_pinuse(n);
+ m->dvsize = dsize;
+ m->dv = r;
+ }
+ else { /* exhaust dv */
+ size_t newsize = oldsize + dvs;
+ set_inuse(m, p, newsize);
+ m->dvsize = 0;
+ m->dv = 0;
+ }
+ newp = p;
+ }
+ }
+ else if (!cinuse(next)) { /* extend into next free chunk */
+ size_t nextsize = chunksize(next);
+ if (oldsize + nextsize >= nb) {
+ size_t rsize = oldsize + nextsize - nb;
+ unlink_chunk(m, next, nextsize);
+ if (rsize < MIN_CHUNK_SIZE) {
+ size_t newsize = oldsize + nextsize;
+ set_inuse(m, p, newsize);
+ }
+ else {
+ mchunkptr r = chunk_plus_offset(p, nb);
+ set_inuse(m, p, nb);
+ set_inuse(m, r, rsize);
+ dispose_chunk(m, r, rsize);
+ }
+ newp = p;
+ }
+ }
+ }
+ else {
+ USAGE_ERROR_ACTION(m, chunk2mem(p));
+ }
+ return newp;
+}
+
+static void* internal_memalign(mstate m, size_t alignment, size_t bytes) {
+ void* mem = 0;
+ if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */
+ alignment = MIN_CHUNK_SIZE;
+ if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */
+ size_t a = MALLOC_ALIGNMENT << 1;
+ while (a < alignment) a <<= 1;
+ alignment = a;
+ }
+ if (bytes >= MAX_REQUEST - alignment) {
+ if (m != 0) { /* Test isn't needed but avoids compiler warning */
+ MALLOC_FAILURE_ACTION;
+ }
+ }
+ else {
+ size_t nb = request2size(bytes);
+ size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD;
+ mem = internal_malloc(m, req);
+ if (mem != 0) {
+ mchunkptr p = mem2chunk(mem);
+ if (PREACTION(m))
+ return 0;
+ if ((((size_t)(mem)) & (alignment - 1)) != 0) { /* misaligned */
+ /*
+ Find an aligned spot inside chunk. Since we need to give
+ back leading space in a chunk of at least MIN_CHUNK_SIZE, if
+ the first calculation places us at a spot with less than
+ MIN_CHUNK_SIZE leader, we can move to the next aligned spot.
+ We've allocated enough total room so that this is always
+ possible.
+ */
+ char* br = (char*)mem2chunk((size_t)(((size_t)((char*)mem + alignment -
+ SIZE_T_ONE)) &
+ -alignment));
+ char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)?
+ br : br+alignment;
+ mchunkptr newp = (mchunkptr)pos;
+ size_t leadsize = pos - (char*)(p);
+ size_t newsize = chunksize(p) - leadsize;
+
+ if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */
+ newp->prev_foot = p->prev_foot + leadsize;
+ newp->head = newsize;
+ }
+ else { /* Otherwise, give back leader, use the rest */
+ set_inuse(m, newp, newsize);
+ set_inuse(m, p, leadsize);
+ dispose_chunk(m, p, leadsize);
+ }
+ p = newp;
+ }
+
+ /* Give back spare room at the end */
+ if (!is_mmapped(p)) {
+ size_t size = chunksize(p);
+ if (size > nb + MIN_CHUNK_SIZE) {
+ size_t remainder_size = size - nb;
+ mchunkptr remainder = chunk_plus_offset(p, nb);
+ set_inuse(m, p, nb);
+ set_inuse(m, remainder, remainder_size);
+ dispose_chunk(m, remainder, remainder_size);
+ }
+ }
+
+ mem = chunk2mem(p);
+ assert (chunksize(p) >= nb);
+ assert(((size_t)mem & (alignment - 1)) == 0);
+ check_inuse_chunk(m, p);
+ POSTACTION(m);
+ }
+ }
+ return mem;
+}
+
+/*
+ Common support for independent_X routines, handling
+ all of the combinations that can result.
+ The opts arg has:
+ bit 0 set if all elements are same size (using sizes[0])
+ bit 1 set if elements should be zeroed
+*/
+static void** ialloc(mstate m,
+ size_t n_elements,
+ size_t* sizes,
+ int opts,
+ void* chunks[]) {
+
+ size_t element_size; /* chunksize of each element, if all same */
+ size_t contents_size; /* total size of elements */
+ size_t array_size; /* request size of pointer array */
+ void* mem; /* malloced aggregate space */
+ mchunkptr p; /* corresponding chunk */
+ size_t remainder_size; /* remaining bytes while splitting */
+ void** marray; /* either "chunks" or malloced ptr array */
+ mchunkptr array_chunk; /* chunk for malloced ptr array */
+ flag_t was_enabled; /* to disable mmap */
+ size_t size;
+ size_t i;
+
+ ensure_initialization();
+ /* compute array length, if needed */
+ if (chunks != 0) {
+ if (n_elements == 0)
+ return chunks; /* nothing to do */
+ marray = chunks;
+ array_size = 0;
+ }
+ else {
+ /* if empty req, must still return chunk representing empty array */
+ if (n_elements == 0)
+ return (void**)internal_malloc(m, 0);
+ marray = 0;
+ array_size = request2size(n_elements * (sizeof(void*)));
+ }
+
+ /* compute total element size */
+ if (opts & 0x1) { /* all-same-size */
+ element_size = request2size(*sizes);
+ contents_size = n_elements * element_size;
+ }
+ else { /* add up all the sizes */
+ element_size = 0;
+ contents_size = 0;
+ for (i = 0; i != n_elements; ++i)
+ contents_size += request2size(sizes[i]);
+ }
+
+ size = contents_size + array_size;
+
+ /*
+ Allocate the aggregate chunk. First disable direct-mmapping so
+ malloc won't use it, since we would not be able to later
+ free/realloc space internal to a segregated mmap region.
+ */
+ was_enabled = use_mmap(m);
+ disable_mmap(m);
+ mem = internal_malloc(m, size - CHUNK_OVERHEAD);
+ if (was_enabled)
+ enable_mmap(m);
+ if (mem == 0)
+ return 0;
+
+ if (PREACTION(m)) return 0;
+ p = mem2chunk(mem);
+ remainder_size = chunksize(p);
+
+ assert(!is_mmapped(p));
+
+ if (opts & 0x2) { /* optionally clear the elements */
+ memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size);
+ }
+
+ /* If not provided, allocate the pointer array as final part of chunk */
+ if (marray == 0) {
+ size_t array_chunk_size;
+ array_chunk = chunk_plus_offset(p, contents_size);
+ array_chunk_size = remainder_size - contents_size;
+ marray = (void**) (chunk2mem(array_chunk));
+ set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size);
+ remainder_size = contents_size;
+ }
+
+ /* split out elements */
+ for (i = 0; ; ++i) {
+ marray[i] = chunk2mem(p);
+ if (i != n_elements-1) {
+ if (element_size != 0)
+ size = element_size;
+ else
+ size = request2size(sizes[i]);
+ remainder_size -= size;
+ set_size_and_pinuse_of_inuse_chunk(m, p, size);
+ p = chunk_plus_offset(p, size);
+ }
+ else { /* the final element absorbs any overallocation slop */
+ set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size);
+ break;
+ }
+ }
+
+#if DEBUG
+ if (marray != chunks) {
+ /* final element must have exactly exhausted chunk */
+ if (element_size != 0) {
+ assert(remainder_size == element_size);
+ }
+ else {
+ assert(remainder_size == request2size(sizes[i]));
+ }
+ check_inuse_chunk(m, mem2chunk(marray));
+ }
+ for (i = 0; i != n_elements; ++i)
+ check_inuse_chunk(m, mem2chunk(marray[i]));
+
+#endif /* DEBUG */
+
+ POSTACTION(m);
+ return marray;
+}
+
+/* Try to free all pointers in the given array.
+ Note: this could be made faster, by delaying consolidation,
+ at the price of disabling some user integrity checks, We
+ still optimize some consolidations by combining adjacent
+ chunks before freeing, which will occur often if allocated
+ with ialloc or the array is sorted.
+*/
+static size_t internal_bulk_free(mstate m, void* array[], size_t nelem) {
+ size_t unfreed = 0;
+ if (!PREACTION(m)) {
+ void** a;
+ void** fence = &(array[nelem]);
+ for (a = array; a != fence; ++a) {
+ void* mem = *a;
+ if (mem != 0) {
+ mchunkptr p = mem2chunk(mem);
+ size_t psize = chunksize(p);
+#if FOOTERS
+ if (get_mstate_for(p) != m) {
+ ++unfreed;
+ continue;
+ }
+#endif
+ check_inuse_chunk(m, p);
+ *a = 0;
+ if (RTCHECK(ok_address(m, p) && ok_inuse(p))) {
+ void ** b = a + 1; /* try to merge with next chunk */
+ mchunkptr next = next_chunk(p);
+ if (b != fence && *b == chunk2mem(next)) {
+ size_t newsize = chunksize(next) + psize;
+ set_inuse(m, p, newsize);
+ *b = chunk2mem(p);
+ }
+ else
+ dispose_chunk(m, p, psize);
+ }
+ else {
+ CORRUPTION_ERROR_ACTION(m);
+ break;
+ }
+ }
+ }
+ if (should_trim(m, m->topsize))
+ sys_trim(m, 0);
+ POSTACTION(m);
+ }
+ return unfreed;
+}
+
+/* Traversal */
+#if MALLOC_INSPECT_ALL
+static void internal_inspect_all(mstate m,
+ void(*handler)(void *start,
+ void *end,
+ size_t used_bytes,
+ void* callback_arg),
+ void* arg) {
+ if (is_initialized(m)) {
+ mchunkptr top = m->top;
+ msegmentptr s;
+ for (s = &m->seg; s != 0; s = s->next) {
+ mchunkptr q = align_as_chunk(s->base);
+ while (segment_holds(s, q) && q->head != FENCEPOST_HEAD) {
+ mchunkptr next = next_chunk(q);
+ size_t sz = chunksize(q);
+ size_t used;
+ void* start;
+ if (is_inuse(q)) {
+ used = sz - CHUNK_OVERHEAD; /* must not be mmapped */
+ start = chunk2mem(q);
+ }
+ else {
+ used = 0;
+ if (is_small(sz)) { /* offset by possible bookkeeping */
+ start = (void*)((char*)q + sizeof(struct malloc_chunk));
+ }
+ else {
+ start = (void*)((char*)q + sizeof(struct malloc_tree_chunk));
+ }
+ }
+ if (start < (void*)next) /* skip if all space is bookkeeping */
+ handler(start, next, used, arg);
+ if (q == top)
+ break;
+ q = next;
+ }
+ }
+ }
+}
+#endif /* MALLOC_INSPECT_ALL */
+
+/* ------------------ Exported realloc, memalign, etc -------------------- */
+
+#if !ONLY_MSPACES
+
+void* dlrealloc(void* oldmem, size_t bytes) {
+ void* mem = 0;
+ if (oldmem == 0) {
+ mem = dlmalloc(bytes);
+ }
+ else if (bytes >= MAX_REQUEST) {
+ MALLOC_FAILURE_ACTION;
+ }
+#ifdef REALLOC_ZERO_BYTES_FREES
+ else if (bytes == 0) {
+ dlfree(oldmem);
+ }
+#endif /* REALLOC_ZERO_BYTES_FREES */
+ else {
+ size_t nb = request2size(bytes);
+ mchunkptr oldp = mem2chunk(oldmem);
+#if ! FOOTERS
+ mstate m = gm;
+#else /* FOOTERS */
+ mstate m = get_mstate_for(oldp);
+ if (!ok_magic(m)) {
+ USAGE_ERROR_ACTION(m, oldmem);
+ return 0;
+ }
+#endif /* FOOTERS */
+ if (!PREACTION(m)) {
+ mchunkptr newp = try_realloc_chunk(m, oldp, nb, 1);
+ POSTACTION(m);
+ if (newp != 0) {
+ check_inuse_chunk(m, newp);
+ mem = chunk2mem(newp);
+ }
+ else {
+ mem = internal_malloc(m, bytes);
+ if (mem != 0) {
+ size_t oc = chunksize(oldp) - overhead_for(oldp);
+ memcpy(mem, oldmem, (oc < bytes)? oc : bytes);
+ internal_free(m, oldmem);
+ }
+ }
+ }
+ }
+ return mem;
+}
+
+void* dlrealloc_in_place(void* oldmem, size_t bytes) {
+ void* mem = 0;
+ if (oldmem != 0) {
+ if (bytes >= MAX_REQUEST) {
+ MALLOC_FAILURE_ACTION;
+ }
+ else {
+ size_t nb = request2size(bytes);
+ mchunkptr oldp = mem2chunk(oldmem);
+#if ! FOOTERS
+ mstate m = gm;
+#else /* FOOTERS */
+ mstate m = get_mstate_for(oldp);
+ if (!ok_magic(m)) {
+ USAGE_ERROR_ACTION(m, oldmem);
+ return 0;
+ }
+#endif /* FOOTERS */
+ if (!PREACTION(m)) {
+ mchunkptr newp = try_realloc_chunk(m, oldp, nb, 0);
+ POSTACTION(m);
+ if (newp == oldp) {
+ check_inuse_chunk(m, newp);
+ mem = oldmem;
+ }
+ }
+ }
+ }
+ return mem;
+}
+
+void* dlmemalign(size_t alignment, size_t bytes) {
+ if (alignment <= MALLOC_ALIGNMENT) {
+ return dlmalloc(bytes);
+ }
+ return internal_memalign(gm, alignment, bytes);
+}
+
+int dlposix_memalign(void** pp, size_t alignment, size_t bytes) {
+ void* mem = 0;
+ if (alignment == MALLOC_ALIGNMENT)
+ mem = dlmalloc(bytes);
+ else {
+ size_t d = alignment / sizeof(void*);
+ size_t r = alignment % sizeof(void*);
+ if (r != 0 || d == 0 || (d & (d-SIZE_T_ONE)) != 0)
+ return EINVAL;
+ else if (bytes <= MAX_REQUEST - alignment) {
+ if (alignment < MIN_CHUNK_SIZE)
+ alignment = MIN_CHUNK_SIZE;
+ mem = internal_memalign(gm, alignment, bytes);
+ }
+ }
+ if (mem == 0)
+ return ENOMEM;
+ else {
+ *pp = mem;
+ return 0;
+ }
+}
+
+void* dlvalloc(size_t bytes) {
+ size_t pagesz;
+ ensure_initialization();
+ pagesz = mparams.page_size;
+ return dlmemalign(pagesz, bytes);
+}
+
+void* dlpvalloc(size_t bytes) {
+ size_t pagesz;
+ ensure_initialization();
+ pagesz = mparams.page_size;
+ return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE));
+}
+
+void** dlindependent_calloc(size_t n_elements, size_t elem_size,
+ void* chunks[]) {
+ size_t sz = elem_size; /* serves as 1-element array */
+ return ialloc(gm, n_elements, &sz, 3, chunks);
+}
+
+void** dlindependent_comalloc(size_t n_elements, size_t sizes[],
+ void* chunks[]) {
+ return ialloc(gm, n_elements, sizes, 0, chunks);
+}
+
+size_t dlbulk_free(void* array[], size_t nelem) {
+ return internal_bulk_free(gm, array, nelem);
+}
+
+#if MALLOC_INSPECT_ALL
+void dlmalloc_inspect_all(void(*handler)(void *start,
+ void *end,
+ size_t used_bytes,
+ void* callback_arg),
+ void* arg) {
+ ensure_initialization();
+ if (!PREACTION(gm)) {
+ internal_inspect_all(gm, handler, arg);
+ POSTACTION(gm);
+ }
+}
+#endif /* MALLOC_INSPECT_ALL */
+
+int dlmalloc_trim(size_t pad) {
+ int result = 0;
+ ensure_initialization();
+ if (!PREACTION(gm)) {
+ result = sys_trim(gm, pad);
+ POSTACTION(gm);
+ }
+ return result;
+}
+
+size_t dlmalloc_footprint(void) {
+ return gm->footprint;
+}
+
+size_t dlmalloc_max_footprint(void) {
+ return gm->max_footprint;
+}
+
+size_t dlmalloc_footprint_limit(void) {
+ size_t maf = gm->footprint_limit;
+ return maf == 0 ? MAX_SIZE_T : maf;
+}
+
+size_t dlmalloc_set_footprint_limit(size_t bytes) {
+ size_t result; /* invert sense of 0 */
+ if (bytes == 0)
+ result = granularity_align(1); /* Use minimal size */
+ if (bytes == MAX_SIZE_T)
+ result = 0; /* disable */
+ else
+ result = granularity_align(bytes);
+ return gm->footprint_limit = result;
+}
+
+#if !NO_MALLINFO
+struct mallinfo dlmallinfo(void) {
+ return internal_mallinfo(gm);
+}
+#endif /* NO_MALLINFO */
+
+#if !NO_MALLOC_STATS
+void dlmalloc_stats() {
+ internal_malloc_stats(gm);
+}
+#endif /* NO_MALLOC_STATS */
+
+int dlmallopt(int param_number, int value) {
+ return change_mparam(param_number, value);
+}
+
+size_t dlmalloc_usable_size(void* mem) {
+ if (mem != 0) {
+ mchunkptr p = mem2chunk(mem);
+ if (is_inuse(p))
+ return chunksize(p) - overhead_for(p);
+ }
+ return 0;
+}
+
+#endif /* !ONLY_MSPACES */
+
+/* ----------------------------- user mspaces ---------------------------- */
+
+#if MSPACES
+
+static mstate init_user_mstate(char* tbase, size_t tsize) {
+ size_t msize = pad_request(sizeof(struct malloc_state));
+ mchunkptr mn;
+ mchunkptr msp = align_as_chunk(tbase);
+ mstate m = (mstate)(chunk2mem(msp));
+ memset(m, 0, msize);
+ (void)INITIAL_LOCK(&m->mutex);
+ msp->head = (msize|INUSE_BITS);
+ m->seg.base = m->least_addr = tbase;
+ m->seg.size = m->footprint = m->max_footprint = tsize;
+ m->magic = mparams.magic;
+ m->release_checks = MAX_RELEASE_CHECK_RATE;
+ m->mflags = mparams.default_mflags;
+ m->extp = 0;
+ m->exts = 0;
+ disable_contiguous(m);
+ init_bins(m);
+ mn = next_chunk(mem2chunk(m));
+ init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE);
+ check_top_chunk(m, m->top);
+ return m;
+}
+
+mspace create_mspace(size_t capacity, int locked) {
+ mstate m = 0;
+ size_t msize;
+ ensure_initialization();
+ msize = pad_request(sizeof(struct malloc_state));
+ if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) {
+ size_t rs = ((capacity == 0)? mparams.granularity :
+ (capacity + TOP_FOOT_SIZE + msize));
+ size_t tsize = granularity_align(rs);
+ char* tbase = (char*)(CALL_MMAP(tsize));
+ if (tbase != CMFAIL) {
+ m = init_user_mstate(tbase, tsize);
+ m->seg.sflags = USE_MMAP_BIT;
+ set_lock(m, locked);
+ }
+ }
+ return (mspace)m;
+}
+
+mspace create_mspace_with_base(void* base, size_t capacity, int locked) {
+ mstate m = 0;
+ size_t msize;
+ ensure_initialization();
+ msize = pad_request(sizeof(struct malloc_state));
+ if (capacity > msize + TOP_FOOT_SIZE &&
+ capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) {
+ m = init_user_mstate((char*)base, capacity);
+ m->seg.sflags = EXTERN_BIT;
+ set_lock(m, locked);
+ }
+ return (mspace)m;
+}
+
+int mspace_track_large_chunks(mspace msp, int enable) {
+ int ret = 0;
+ mstate ms = (mstate)msp;
+ if (!PREACTION(ms)) {
+ if (!use_mmap(ms)) {
+ ret = 1;
+ }
+ if (!enable) {
+ enable_mmap(ms);
+ } else {
+ disable_mmap(ms);
+ }
+ POSTACTION(ms);
+ }
+ return ret;
+}
+
+size_t destroy_mspace(mspace msp) {
+ size_t freed = 0;
+ mstate ms = (mstate)msp;
+ if (ok_magic(ms)) {
+ msegmentptr sp = &ms->seg;
+ (void)DESTROY_LOCK(&ms->mutex); /* destroy before unmapped */
+ while (sp != 0) {
+ char* base = sp->base;
+ size_t size = sp->size;
+ flag_t flag = sp->sflags;
+ (void)base; /* placate people compiling -Wunused-variable */
+ sp = sp->next;
+ if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) &&
+ CALL_MUNMAP(base, size) == 0)
+ freed += size;
+ }
+ }
+ else {
+ USAGE_ERROR_ACTION(ms,ms);
+ }
+ return freed;
+}
+
+/*
+ mspace versions of routines are near-clones of the global
+ versions. This is not so nice but better than the alternatives.
+*/
+
+void* mspace_malloc(mspace msp, size_t bytes) {
+ mstate ms = (mstate)msp;
+ if (!ok_magic(ms)) {
+ USAGE_ERROR_ACTION(ms,ms);
+ return 0;
+ }
+ if (!PREACTION(ms)) {
+ void* mem;
+ size_t nb;
+ if (bytes <= MAX_SMALL_REQUEST) {
+ bindex_t idx;
+ binmap_t smallbits;
+ nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
+ idx = small_index(nb);
+ smallbits = ms->smallmap >> idx;
+
+ if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
+ mchunkptr b, p;
+ idx += ~smallbits & 1; /* Uses next bin if idx empty */
+ b = smallbin_at(ms, idx);
+ p = b->fd;
+ assert(chunksize(p) == small_index2size(idx));
+ unlink_first_small_chunk(ms, b, p, idx);
+ set_inuse_and_pinuse(ms, p, small_index2size(idx));
+ mem = chunk2mem(p);
+ check_malloced_chunk(ms, mem, nb);
+ goto postaction;
+ }
+
+ else if (nb > ms->dvsize) {
+ if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
+ mchunkptr b, p, r;
+ size_t rsize;
+ bindex_t i;
+ binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
+ binmap_t leastbit = least_bit(leftbits);
+ compute_bit2idx(leastbit, i);
+ b = smallbin_at(ms, i);
+ p = b->fd;
+ assert(chunksize(p) == small_index2size(i));
+ unlink_first_small_chunk(ms, b, p, i);
+ rsize = small_index2size(i) - nb;
+ /* Fit here cannot be remainderless if 4byte sizes */
+ if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
+ set_inuse_and_pinuse(ms, p, small_index2size(i));
+ else {
+ set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+ r = chunk_plus_offset(p, nb);
+ set_size_and_pinuse_of_free_chunk(r, rsize);
+ replace_dv(ms, r, rsize);
+ }
+ mem = chunk2mem(p);
+ check_malloced_chunk(ms, mem, nb);
+ goto postaction;
+ }
+
+ else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) {
+ check_malloced_chunk(ms, mem, nb);
+ goto postaction;
+ }
+ }
+ }
+ else if (bytes >= MAX_REQUEST)
+ nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
+ else {
+ nb = pad_request(bytes);
+ if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) {
+ check_malloced_chunk(ms, mem, nb);
+ goto postaction;
+ }
+ }
+
+ if (nb <= ms->dvsize) {
+ size_t rsize = ms->dvsize - nb;
+ mchunkptr p = ms->dv;
+ if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
+ mchunkptr r = ms->dv = chunk_plus_offset(p, nb);
+ ms->dvsize = rsize;
+ set_size_and_pinuse_of_free_chunk(r, rsize);
+ set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+ }
+ else { /* exhaust dv */
+ size_t dvs = ms->dvsize;
+ ms->dvsize = 0;
+ ms->dv = 0;
+ set_inuse_and_pinuse(ms, p, dvs);
+ }
+ mem = chunk2mem(p);
+ check_malloced_chunk(ms, mem, nb);
+ goto postaction;
+ }
+
+ else if (nb < ms->topsize) { /* Split top */
+ size_t rsize = ms->topsize -= nb;
+ mchunkptr p = ms->top;
+ mchunkptr r = ms->top = chunk_plus_offset(p, nb);
+ r->head = rsize | PINUSE_BIT;
+ set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+ mem = chunk2mem(p);
+ check_top_chunk(ms, ms->top);
+ check_malloced_chunk(ms, mem, nb);
+ goto postaction;
+ }
+
+ mem = sys_alloc(ms, nb);
+
+ postaction:
+ POSTACTION(ms);
+ return mem;
+ }
+
+ return 0;
+}
+
+void mspace_free(mspace msp, void* mem) {
+ if (mem != 0) {
+ mchunkptr p = mem2chunk(mem);
+#if FOOTERS
+ mstate fm = get_mstate_for(p);
+ (void)msp; /* placate people compiling -Wunused */
+#else /* FOOTERS */
+ mstate fm = (mstate)msp;
+#endif /* FOOTERS */
+ if (!ok_magic(fm)) {
+ USAGE_ERROR_ACTION(fm, p);
+ return;
+ }
+ if (!PREACTION(fm)) {
+ check_inuse_chunk(fm, p);
+ if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) {
+ size_t psize = chunksize(p);
+ mchunkptr next = chunk_plus_offset(p, psize);
+ if (!pinuse(p)) {
+ size_t prevsize = p->prev_foot;
+ if (is_mmapped(p)) {
+ psize += prevsize + MMAP_FOOT_PAD;
+ if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+ fm->footprint -= psize;
+ goto postaction;
+ }
+ else {
+ mchunkptr prev = chunk_minus_offset(p, prevsize);
+ psize += prevsize;
+ p = prev;
+ if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */
+ if (p != fm->dv) {
+ unlink_chunk(fm, p, prevsize);
+ }
+ else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+ fm->dvsize = psize;
+ set_free_with_pinuse(p, psize, next);
+ goto postaction;
+ }
+ }
+ else
+ goto erroraction;
+ }
+ }
+
+ if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
+ if (!cinuse(next)) { /* consolidate forward */
+ if (next == fm->top) {
+ size_t tsize = fm->topsize += psize;
+ fm->top = p;
+ p->head = tsize | PINUSE_BIT;
+ if (p == fm->dv) {
+ fm->dv = 0;
+ fm->dvsize = 0;
+ }
+ if (should_trim(fm, tsize))
+ sys_trim(fm, 0);
+ goto postaction;
+ }
+ else if (next == fm->dv) {
+ size_t dsize = fm->dvsize += psize;
+ fm->dv = p;
+ set_size_and_pinuse_of_free_chunk(p, dsize);
+ goto postaction;
+ }
+ else {
+ size_t nsize = chunksize(next);
+ psize += nsize;
+ unlink_chunk(fm, next, nsize);
+ set_size_and_pinuse_of_free_chunk(p, psize);
+ if (p == fm->dv) {
+ fm->dvsize = psize;
+ goto postaction;
+ }
+ }
+ }
+ else
+ set_free_with_pinuse(p, psize, next);
+
+ if (is_small(psize)) {
+ insert_small_chunk(fm, p, psize);
+ check_free_chunk(fm, p);
+ }
+ else {
+ tchunkptr tp = (tchunkptr)p;
+ insert_large_chunk(fm, tp, psize);
+ check_free_chunk(fm, p);
+ if (--fm->release_checks == 0)
+ release_unused_segments(fm);
+ }
+ goto postaction;
+ }
+ }
+ erroraction:
+ USAGE_ERROR_ACTION(fm, p);
+ postaction:
+ POSTACTION(fm);
+ }
+ }
+}
+
+void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) {
+ void* mem;
+ size_t req = 0;
+ mstate ms = (mstate)msp;
+ if (!ok_magic(ms)) {
+ USAGE_ERROR_ACTION(ms,ms);
+ return 0;
+ }
+ if (n_elements != 0) {
+ req = n_elements * elem_size;
+ if (((n_elements | elem_size) & ~(size_t)0xffff) &&
+ (req / n_elements != elem_size))
+ req = MAX_SIZE_T; /* force downstream failure on overflow */
+ }
+ mem = internal_malloc(ms, req);
+ if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
+ memset(mem, 0, req);
+ return mem;
+}
+
+void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) {
+ void* mem = 0;
+ if (oldmem == 0) {
+ mem = mspace_malloc(msp, bytes);
+ }
+ else if (bytes >= MAX_REQUEST) {
+ MALLOC_FAILURE_ACTION;
+ }
+#ifdef REALLOC_ZERO_BYTES_FREES
+ else if (bytes == 0) {
+ mspace_free(msp, oldmem);
+ }
+#endif /* REALLOC_ZERO_BYTES_FREES */
+ else {
+ size_t nb = request2size(bytes);
+ mchunkptr oldp = mem2chunk(oldmem);
+#if ! FOOTERS
+ mstate m = (mstate)msp;
+#else /* FOOTERS */
+ mstate m = get_mstate_for(oldp);
+ if (!ok_magic(m)) {
+ USAGE_ERROR_ACTION(m, oldmem);
+ return 0;
+ }
+#endif /* FOOTERS */
+ if (!PREACTION(m)) {
+ mchunkptr newp = try_realloc_chunk(m, oldp, nb, 1);
+ POSTACTION(m);
+ if (newp != 0) {
+ check_inuse_chunk(m, newp);
+ mem = chunk2mem(newp);
+ }
+ else {
+ mem = mspace_malloc(m, bytes);
+ if (mem != 0) {
+ size_t oc = chunksize(oldp) - overhead_for(oldp);
+ memcpy(mem, oldmem, (oc < bytes)? oc : bytes);
+ mspace_free(m, oldmem);
+ }
+ }
+ }
+ }
+ return mem;
+}
+
+void* mspace_realloc_in_place(mspace msp, void* oldmem, size_t bytes) {
+ void* mem = 0;
+ if (oldmem != 0) {
+ if (bytes >= MAX_REQUEST) {
+ MALLOC_FAILURE_ACTION;
+ }
+ else {
+ size_t nb = request2size(bytes);
+ mchunkptr oldp = mem2chunk(oldmem);
+#if ! FOOTERS
+ mstate m = (mstate)msp;
+#else /* FOOTERS */
+ mstate m = get_mstate_for(oldp);
+ (void)msp; /* placate people compiling -Wunused */
+ if (!ok_magic(m)) {
+ USAGE_ERROR_ACTION(m, oldmem);
+ return 0;
+ }
+#endif /* FOOTERS */
+ if (!PREACTION(m)) {
+ mchunkptr newp = try_realloc_chunk(m, oldp, nb, 0);
+ POSTACTION(m);
+ if (newp == oldp) {
+ check_inuse_chunk(m, newp);
+ mem = oldmem;
+ }
+ }
+ }
+ }
+ return mem;
+}
+
+void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) {
+ mstate ms = (mstate)msp;
+ if (!ok_magic(ms)) {
+ USAGE_ERROR_ACTION(ms,ms);
+ return 0;
+ }
+ if (alignment <= MALLOC_ALIGNMENT)
+ return mspace_malloc(msp, bytes);
+ return internal_memalign(ms, alignment, bytes);
+}
+
+void** mspace_independent_calloc(mspace msp, size_t n_elements,
+ size_t elem_size, void* chunks[]) {
+ size_t sz = elem_size; /* serves as 1-element array */
+ mstate ms = (mstate)msp;
+ if (!ok_magic(ms)) {
+ USAGE_ERROR_ACTION(ms,ms);
+ return 0;
+ }
+ return ialloc(ms, n_elements, &sz, 3, chunks);
+}
+
+void** mspace_independent_comalloc(mspace msp, size_t n_elements,
+ size_t sizes[], void* chunks[]) {
+ mstate ms = (mstate)msp;
+ if (!ok_magic(ms)) {
+ USAGE_ERROR_ACTION(ms,ms);
+ return 0;
+ }
+ return ialloc(ms, n_elements, sizes, 0, chunks);
+}
+
+size_t mspace_bulk_free(mspace msp, void* array[], size_t nelem) {
+ return internal_bulk_free((mstate)msp, array, nelem);
+}
+
+#if MALLOC_INSPECT_ALL
+void mspace_inspect_all(mspace msp,
+ void(*handler)(void *start,
+ void *end,
+ size_t used_bytes,
+ void* callback_arg),
+ void* arg) {
+ mstate ms = (mstate)msp;
+ if (ok_magic(ms)) {
+ if (!PREACTION(ms)) {
+ internal_inspect_all(ms, handler, arg);
+ POSTACTION(ms);
+ }
+ }
+ else {
+ USAGE_ERROR_ACTION(ms,ms);
+ }
+}
+#endif /* MALLOC_INSPECT_ALL */
+
+int mspace_trim(mspace msp, size_t pad) {
+ int result = 0;
+ mstate ms = (mstate)msp;
+ if (ok_magic(ms)) {
+ if (!PREACTION(ms)) {
+ result = sys_trim(ms, pad);
+ POSTACTION(ms);
+ }
+ }
+ else {
+ USAGE_ERROR_ACTION(ms,ms);
+ }
+ return result;
+}
+
+#if !NO_MALLOC_STATS
+void mspace_malloc_stats(mspace msp) {
+ mstate ms = (mstate)msp;
+ if (ok_magic(ms)) {
+ internal_malloc_stats(ms);
+ }
+ else {
+ USAGE_ERROR_ACTION(ms,ms);
+ }
+}
+#endif /* NO_MALLOC_STATS */
+
+size_t mspace_footprint(mspace msp) {
+ size_t result = 0;
+ mstate ms = (mstate)msp;
+ if (ok_magic(ms)) {
+ result = ms->footprint;
+ }
+ else {
+ USAGE_ERROR_ACTION(ms,ms);
+ }
+ return result;
+}
+
+size_t mspace_max_footprint(mspace msp) {
+ size_t result = 0;
+ mstate ms = (mstate)msp;
+ if (ok_magic(ms)) {
+ result = ms->max_footprint;
+ }
+ else {
+ USAGE_ERROR_ACTION(ms,ms);
+ }
+ return result;
+}
+
+size_t mspace_footprint_limit(mspace msp) {
+ size_t result = 0;
+ mstate ms = (mstate)msp;
+ if (ok_magic(ms)) {
+ size_t maf = ms->footprint_limit;
+ result = (maf == 0) ? MAX_SIZE_T : maf;
+ }
+ else {
+ USAGE_ERROR_ACTION(ms,ms);
+ }
+ return result;
+}
+
+size_t mspace_set_footprint_limit(mspace msp, size_t bytes) {
+ size_t result = 0;
+ mstate ms = (mstate)msp;
+ if (ok_magic(ms)) {
+ if (bytes == 0)
+ result = granularity_align(1); /* Use minimal size */
+ if (bytes == MAX_SIZE_T)
+ result = 0; /* disable */
+ else
+ result = granularity_align(bytes);
+ ms->footprint_limit = result;
+ }
+ else {
+ USAGE_ERROR_ACTION(ms,ms);
+ }
+ return result;
+}
+
+#if !NO_MALLINFO
+struct mallinfo mspace_mallinfo(mspace msp) {
+ mstate ms = (mstate)msp;
+ if (!ok_magic(ms)) {
+ USAGE_ERROR_ACTION(ms,ms);
+ }
+ return internal_mallinfo(ms);
+}
+#endif /* NO_MALLINFO */
+
+size_t mspace_usable_size(const void* mem) {
+ if (mem != 0) {
+ mchunkptr p = mem2chunk(mem);
+ if (is_inuse(p))
+ return chunksize(p) - overhead_for(p);
+ }
+ return 0;
+}
+
+int mspace_mallopt(int param_number, int value) {
+ return change_mparam(param_number, value);
+}
+
+#endif /* MSPACES */
+
+
+/* -------------------- Alternative MORECORE functions ------------------- */
+
+/*
+ Guidelines for creating a custom version of MORECORE:
+
+ * For best performance, MORECORE should allocate in multiples of pagesize.
+ * MORECORE may allocate more memory than requested. (Or even less,
+ but this will usually result in a malloc failure.)
+ * MORECORE must not allocate memory when given argument zero, but
+ instead return one past the end address of memory from previous
+ nonzero call.
+ * For best performance, consecutive calls to MORECORE with positive
+ arguments should return increasing addresses, indicating that
+ space has been contiguously extended.
+ * Even though consecutive calls to MORECORE need not return contiguous
+ addresses, it must be OK for malloc'ed chunks to span multiple
+ regions in those cases where they do happen to be contiguous.
+ * MORECORE need not handle negative arguments -- it may instead
+ just return MFAIL when given negative arguments.
+ Negative arguments are always multiples of pagesize. MORECORE
+ must not misinterpret negative args as large positive unsigned
+ args. You can suppress all such calls from even occurring by defining
+ MORECORE_CANNOT_TRIM,
+
+ As an example alternative MORECORE, here is a custom allocator
+ kindly contributed for pre-OSX macOS. It uses virtually but not
+ necessarily physically contiguous non-paged memory (locked in,
+ present and won't get swapped out). You can use it by uncommenting
+ this section, adding some #includes, and setting up the appropriate
+ defines above:
+
+ #define MORECORE osMoreCore
+
+ There is also a shutdown routine that should somehow be called for
+ cleanup upon program exit.
+
+ #define MAX_POOL_ENTRIES 100
+ #define MINIMUM_MORECORE_SIZE (64 * 1024U)
+ static int next_os_pool;
+ void *our_os_pools[MAX_POOL_ENTRIES];
+
+ void *osMoreCore(int size)
+ {
+ void *ptr = 0;
+ static void *sbrk_top = 0;
+
+ if (size > 0)
+ {
+ if (size < MINIMUM_MORECORE_SIZE)
+ size = MINIMUM_MORECORE_SIZE;
+ if (CurrentExecutionLevel() == kTaskLevel)
+ ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+ if (ptr == 0)
+ {
+ return (void *) MFAIL;
+ }
+ // save ptrs so they can be freed during cleanup
+ our_os_pools[next_os_pool] = ptr;
+ next_os_pool++;
+ ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+ sbrk_top = (char *) ptr + size;
+ return ptr;
+ }
+ else if (size < 0)
+ {
+ // we don't currently support shrink behavior
+ return (void *) MFAIL;
+ }
+ else
+ {
+ return sbrk_top;
+ }
+ }
+
+ // cleanup any allocated memory pools
+ // called as last thing before shutting down driver
+
+ void osCleanupMem(void)
+ {
+ void **ptr;
+
+ for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+ if (*ptr)
+ {
+ PoolDeallocate(*ptr);
+ *ptr = 0;
+ }
+ }
+
+*/
+
+
+/* -----------------------------------------------------------------------
+History:
+ v2.8.6 Wed Aug 29 06:57:58 2012 Doug Lea
+ * fix bad comparison in dlposix_memalign
+ * don't reuse adjusted asize in sys_alloc
+ * add LOCK_AT_FORK -- thanks to Kirill Artamonov for the suggestion
+ * reduce compiler warnings -- thanks to all who reported/suggested these
+
+ v2.8.5 Sun May 22 10:26:02 2011 Doug Lea (dl at gee)
+ * Always perform unlink checks unless INSECURE
+ * Add posix_memalign.
+ * Improve realloc to expand in more cases; expose realloc_in_place.
+ Thanks to Peter Buhr for the suggestion.
+ * Add footprint_limit, inspect_all, bulk_free. Thanks
+ to Barry Hayes and others for the suggestions.
+ * Internal refactorings to avoid calls while holding locks
+ * Use non-reentrant locks by default. Thanks to Roland McGrath
+ for the suggestion.
+ * Small fixes to mspace_destroy, reset_on_error.
+ * Various configuration extensions/changes. Thanks
+ to all who contributed these.
+
+ V2.8.4a Thu Apr 28 14:39:43 2011 (dl at gee.cs.oswego.edu)
+ * Update Creative Commons URL
+
+ V2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee)
+ * Use zeros instead of prev foot for is_mmapped
+ * Add mspace_track_large_chunks; thanks to Jean Brouwers
+ * Fix set_inuse in internal_realloc; thanks to Jean Brouwers
+ * Fix insufficient sys_alloc padding when using 16byte alignment
+ * Fix bad error check in mspace_footprint
+ * Adaptations for ptmalloc; thanks to Wolfram Gloger.
+ * Reentrant spin locks; thanks to Earl Chew and others
+ * Win32 improvements; thanks to Niall Douglas and Earl Chew
+ * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options
+ * Extension hook in malloc_state
+ * Various small adjustments to reduce warnings on some compilers
+ * Various configuration extensions/changes for more platforms. Thanks
+ to all who contributed these.
+
+ V2.8.3 Thu Sep 22 11:16:32 2005 Doug Lea (dl at gee)
+ * Add max_footprint functions
+ * Ensure all appropriate literals are size_t
+ * Fix conditional compilation problem for some #define settings
+ * Avoid concatenating segments with the one provided
+ in create_mspace_with_base
+ * Rename some variables to avoid compiler shadowing warnings
+ * Use explicit lock initialization.
+ * Better handling of sbrk interference.
+ * Simplify and fix segment insertion, trimming and mspace_destroy
+ * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x
+ * Thanks especially to Dennis Flanagan for help on these.
+
+ V2.8.2 Sun Jun 12 16:01:10 2005 Doug Lea (dl at gee)
+ * Fix memalign brace error.
+
+ V2.8.1 Wed Jun 8 16:11:46 2005 Doug Lea (dl at gee)
+ * Fix improper #endif nesting in C++
+ * Add explicit casts needed for C++
+
+ V2.8.0 Mon May 30 14:09:02 2005 Doug Lea (dl at gee)
+ * Use trees for large bins
+ * Support mspaces
+ * Use segments to unify sbrk-based and mmap-based system allocation,
+ removing need for emulation on most platforms without sbrk.
+ * Default safety checks
+ * Optional footer checks. Thanks to William Robertson for the idea.
+ * Internal code refactoring
+ * Incorporate suggestions and platform-specific changes.
+ Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas,
+ Aaron Bachmann, Emery Berger, and others.
+ * Speed up non-fastbin processing enough to remove fastbins.
+ * Remove useless cfree() to avoid conflicts with other apps.
+ * Remove internal memcpy, memset. Compilers handle builtins better.
+ * Remove some options that no one ever used and rename others.
+
+ V2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee)
+ * Fix malloc_state bitmap array misdeclaration
+
+ V2.7.1 Thu Jul 25 10:58:03 2002 Doug Lea (dl at gee)
+ * Allow tuning of FIRST_SORTED_BIN_SIZE
+ * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte.
+ * Better detection and support for non-contiguousness of MORECORE.
+ Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger
+ * Bypass most of malloc if no frees. Thanks To Emery Berger.
+ * Fix freeing of old top non-contiguous chunk im sysmalloc.
+ * Raised default trim and map thresholds to 256K.
+ * Fix mmap-related #defines. Thanks to Lubos Lunak.
+ * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield.
+ * Branch-free bin calculation
+ * Default trim and mmap thresholds now 256K.
+
+ V2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee)
+ * Introduce independent_comalloc and independent_calloc.
+ Thanks to Michael Pachos for motivation and help.
+ * Make optional .h file available
+ * Allow > 2GB requests on 32bit systems.
+ * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>.
+ Thanks also to Andreas Mueller <a.mueller at paradatec.de>,
+ and Anonymous.
+ * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for
+ helping test this.)
+ * memalign: check alignment arg
+ * realloc: don't try to shift chunks backwards, since this
+ leads to more fragmentation in some programs and doesn't
+ seem to help in any others.
+ * Collect all cases in malloc requiring system memory into sysmalloc
+ * Use mmap as backup to sbrk
+ * Place all internal state in malloc_state
+ * Introduce fastbins (although similar to 2.5.1)
+ * Many minor tunings and cosmetic improvements
+ * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK
+ * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS
+ Thanks to Tony E. Bennett <tbennett@nvidia.com> and others.
+ * Include errno.h to support default failure action.
+
+ V2.6.6 Sun Dec 5 07:42:19 1999 Doug Lea (dl at gee)
+ * return null for negative arguments
+ * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com>
+ * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h'
+ (e.g. WIN32 platforms)
+ * Cleanup header file inclusion for WIN32 platforms
+ * Cleanup code to avoid Microsoft Visual C++ compiler complaints
+ * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing
+ memory allocation routines
+ * Set 'malloc_getpagesize' for WIN32 platforms (needs more work)
+ * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to
+ usage of 'assert' in non-WIN32 code
+ * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to
+ avoid infinite loop
+ * Always call 'fREe()' rather than 'free()'
+
+ V2.6.5 Wed Jun 17 15:57:31 1998 Doug Lea (dl at gee)
+ * Fixed ordering problem with boundary-stamping
+
+ V2.6.3 Sun May 19 08:17:58 1996 Doug Lea (dl at gee)
+ * Added pvalloc, as recommended by H.J. Liu
+ * Added 64bit pointer support mainly from Wolfram Gloger
+ * Added anonymously donated WIN32 sbrk emulation
+ * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen
+ * malloc_extend_top: fix mask error that caused wastage after
+ foreign sbrks
+ * Add linux mremap support code from HJ Liu
+
+ V2.6.2 Tue Dec 5 06:52:55 1995 Doug Lea (dl at gee)
+ * Integrated most documentation with the code.
+ * Add support for mmap, with help from
+ Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+ * Use last_remainder in more cases.
+ * Pack bins using idea from colin@nyx10.cs.du.edu
+ * Use ordered bins instead of best-fit threshhold
+ * Eliminate block-local decls to simplify tracing and debugging.
+ * Support another case of realloc via move into top
+ * Fix error occuring when initial sbrk_base not word-aligned.
+ * Rely on page size for units instead of SBRK_UNIT to
+ avoid surprises about sbrk alignment conventions.
+ * Add mallinfo, mallopt. Thanks to Raymond Nijssen
+ (raymond@es.ele.tue.nl) for the suggestion.
+ * Add `pad' argument to malloc_trim and top_pad mallopt parameter.
+ * More precautions for cases where other routines call sbrk,
+ courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+ * Added macros etc., allowing use in linux libc from
+ H.J. Lu (hjl@gnu.ai.mit.edu)
+ * Inverted this history list
+
+ V2.6.1 Sat Dec 2 14:10:57 1995 Doug Lea (dl at gee)
+ * Re-tuned and fixed to behave more nicely with V2.6.0 changes.
+ * Removed all preallocation code since under current scheme
+ the work required to undo bad preallocations exceeds
+ the work saved in good cases for most test programs.
+ * No longer use return list or unconsolidated bins since
+ no scheme using them consistently outperforms those that don't
+ given above changes.
+ * Use best fit for very large chunks to prevent some worst-cases.
+ * Added some support for debugging
+
+ V2.6.0 Sat Nov 4 07:05:23 1995 Doug Lea (dl at gee)
+ * Removed footers when chunks are in use. Thanks to
+ Paul Wilson (wilson@cs.texas.edu) for the suggestion.
+
+ V2.5.4 Wed Nov 1 07:54:51 1995 Doug Lea (dl at gee)
+ * Added malloc_trim, with help from Wolfram Gloger
+ (wmglo@Dent.MED.Uni-Muenchen.DE).
+
+ V2.5.3 Tue Apr 26 10:16:01 1994 Doug Lea (dl at g)
+
+ V2.5.2 Tue Apr 5 16:20:40 1994 Doug Lea (dl at g)
+ * realloc: try to expand in both directions
+ * malloc: swap order of clean-bin strategy;
+ * realloc: only conditionally expand backwards
+ * Try not to scavenge used bins
+ * Use bin counts as a guide to preallocation
+ * Occasionally bin return list chunks in first scan
+ * Add a few optimizations from colin@nyx10.cs.du.edu
+
+ V2.5.1 Sat Aug 14 15:40:43 1993 Doug Lea (dl at g)
+ * faster bin computation & slightly different binning
+ * merged all consolidations to one part of malloc proper
+ (eliminating old malloc_find_space & malloc_clean_bin)
+ * Scan 2 returns chunks (not just 1)
+ * Propagate failure in realloc if malloc returns 0
+ * Add stuff to allow compilation on non-ANSI compilers
+ from kpv@research.att.com
+
+ V2.5 Sat Aug 7 07:41:59 1993 Doug Lea (dl at g.oswego.edu)
+ * removed potential for odd address access in prev_chunk
+ * removed dependency on getpagesize.h
+ * misc cosmetics and a bit more internal documentation
+ * anticosmetics: mangled names in macros to evade debugger strangeness
+ * tested on sparc, hp-700, dec-mips, rs6000
+ with gcc & native cc (hp, dec only) allowing
+ Detlefs & Zorn comparison study (in SIGPLAN Notices.)
+
+ Trial version Fri Aug 28 13:14:29 1992 Doug Lea (dl at g.oswego.edu)
+ * Based loosely on libg++-1.2X malloc. (It retains some of the overall
+ structure of old version, but most details differ.)
+
+*/
diff --git a/tools/src/dlmalloc/malloc_config.h b/tools/src/dlmalloc/malloc_config.h
new file mode 100644
index 0000000..7616baf
--- /dev/null
+++ b/tools/src/dlmalloc/malloc_config.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+
+#include <string.h>
+
+#include "../heapblock.h"
+#include "../utils.h"
+
+#define HAVE_MORECORE 1
+#define HAVE_MMAP 0
+#define MORECORE sbrk
+// This is optimal; dlmalloc copes with other users of sbrk/MORECORE gracefully, and heapblock
+// guarantees contiguous returns if called consecutively.
+#define MORECORE_CONTIGUOUS 1
+#define MALLOC_ALIGNMENT 16
+#define ABORT panic("dlmalloc: internal error\n")
+#define NO_MALLINFO 1
+#define NO_MALLOC_STATS 1
+#define malloc_getpagesize 16384
+#define LACKS_FCNTL_H 1
+#define LACKS_SYS_MMAN_H 1
+#define LACKS_SYS_PARAM_H 1
+#define LACKS_SYS_TYPES_H 1
+#define LACKS_STRINGS_H 1
+#define LACKS_STRING_H 1
+#define LACKS_STDLIB_H 1
+#define LACKS_SCHED_H 1
+#define LACKS_TIME_H 1
+#define LACKS_UNISTD_H 1
+#define MALLOC_FAILURE_ACTION panic("dlmalloc: out of memory\n");
+
+static void *sbrk(intptr_t inc)
+{
+ if (inc < 0)
+ return (void *)-1;
+
+ return heapblock_alloc(inc);
+}
diff --git a/tools/src/exception.c b/tools/src/exception.c
new file mode 100644
index 0000000..e849456
--- /dev/null
+++ b/tools/src/exception.c
@@ -0,0 +1,388 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "exception.h"
+#include "aic.h"
+#include "aic_regs.h"
+#include "cpu_regs.h"
+#include "gxf.h"
+#include "iodev.h"
+#include "memory.h"
+#include "uart.h"
+#include "utils.h"
+
+#define EL0_STACK_SIZE 0x4000
+
+u8 el0_stack[EL0_STACK_SIZE] ALIGNED(64);
+void *el0_stack_base = (void *)(u64)(&el0_stack[EL0_STACK_SIZE]);
+
+extern char _vectors_start[0];
+extern char _el1_vectors_start[0];
+
+volatile enum exc_guard_t exc_guard = GUARD_OFF;
+volatile int exc_count = 0;
+
+void el0_ret(void);
+void el1_ret(void);
+
+static char *m_table[0x10] = {
+ [0x00] = "EL0t", //
+ [0x04] = "EL1t", //
+ [0x05] = "EL1h", //
+ [0x08] = "EL2t", //
+ [0x09] = "EL2h", //
+};
+
+static char *gl_m_table[0x10] = {
+ [0x00] = "GL0t", //
+ [0x04] = "GL1t", //
+ [0x05] = "GL1h", //
+ [0x08] = "GL2t", //
+ [0x09] = "GL2h", //
+};
+
+static char *ec_table[0x40] = {
+ [0x00] = "unknown",
+ [0x01] = "wf*",
+ [0x03] = "c15 mcr/mrc",
+ [0x04] = "c15 mcrr/mrrc",
+ [0x05] = "c14 mcr/mrc",
+ [0x06] = "ldc/stc",
+ [0x07] = "FP off",
+ [0x08] = "VMRS access",
+ [0x09] = "PAC off",
+ [0x0a] = "ld/st64b",
+ [0x0c] = "c14 mrrc",
+ [0x0d] = "branch target",
+ [0x0e] = "illegal state",
+ [0x11] = "svc in a32",
+ [0x12] = "hvc in a32",
+ [0x13] = "smc in a32",
+ [0x15] = "svc in a64",
+ [0x16] = "hvc in a64",
+ [0x17] = "smc in a64",
+ [0x18] = "other mcr/mrc/sys",
+ [0x19] = "SVE off",
+ [0x1a] = "eret",
+ [0x1c] = "PAC failure",
+ [0x20] = "instruction abort (lower)",
+ [0x21] = "instruction abort (current)",
+ [0x22] = "pc misaligned",
+ [0x24] = "data abort (lower)",
+ [0x25] = "data abort (current)",
+ [0x26] = "sp misaligned",
+ [0x28] = "FP exception (a32)",
+ [0x2c] = "FP exception (a64)",
+ [0x2f] = "SError",
+ [0x30] = "BP (lower)",
+ [0x31] = "BP (current)",
+ [0x32] = "step (lower)",
+ [0x33] = "step (current)",
+ [0x34] = "watchpoint (lower)",
+ [0x35] = "watchpoint (current)",
+ [0x38] = "bkpt (a32)",
+ [0x3a] = "vector catch (a32)",
+ [0x3c] = "brk (a64)",
+};
+
+static const char *get_exception_source(u64 spsr)
+{
+ u64 aspsr = in_gl12() ? mrs(SYS_IMP_APL_ASPSR_GL1) : 0;
+ const char *m_desc = NULL;
+
+ if (aspsr & 1)
+ m_desc = gl_m_table[spsr & 0xf];
+ else
+ m_desc = m_table[spsr & 0xf];
+
+ if (!m_desc)
+ m_desc = "?";
+
+ return m_desc;
+}
+
+static const char *get_exception_level(void)
+{
+ u64 lvl = mrs(CurrentEL);
+
+ if (in_gl12()) {
+ if (lvl == 0x04)
+ return "GL1";
+ else if (lvl == 0x08)
+ return "GL2";
+ } else {
+ if (lvl == 0x04)
+ return "EL1";
+ else if (lvl == 0x08)
+ return "EL2";
+ }
+
+ return "?";
+}
+
+void exception_initialize(void)
+{
+ msr(VBAR_EL1, _vectors_start);
+
+ // Clear FIQ sources
+ msr(CNTP_CTL_EL0, 7L);
+ msr(CNTV_CTL_EL0, 7L);
+ if (in_el2()) {
+ msr(CNTP_CTL_EL02, 7L);
+ msr(CNTV_CTL_EL02, 7L);
+ }
+ reg_clr(SYS_IMP_APL_PMCR0, PMCR0_IACT | PMCR0_IMODE_MASK);
+ reg_clr(SYS_IMP_APL_UPMCR0, UPMCR0_IMODE_MASK);
+ msr(SYS_IMP_APL_IPI_SR_EL1, IPI_SR_PENDING);
+
+ if (is_primary_core())
+ msr(DAIF, 0 << 6); // Enable SError, IRQ and FIQ
+ else
+ msr(DAIF, 3 << 6); // Disable IRQ and FIQ
+
+ if (in_el2()) {
+ // Set up a sane HCR_EL2
+ msr(HCR_EL2, (BIT(41) | // API
+ BIT(40) | // APK
+ BIT(37) | // TEA
+ BIT(34) | // E2H
+ BIT(31) | // RW
+ BIT(27) | // TGE
+ BIT(5) | // AMO
+ BIT(4) | // IMO
+ BIT(3)); // FMO
+ );
+ // Set up exception forwarding from EL1
+ msr(VBAR_EL12, _el1_vectors_start);
+ sysop("isb");
+ }
+}
+
+void exception_shutdown(void)
+{
+ msr(DAIF, 7 << 6); // Disable SError, IRQ and FIQ
+}
+
+void print_regs(u64 *regs, int el12)
+{
+ bool in_gl;
+ u64 sp = ((u64)(regs)) + 256;
+
+ in_gl = in_gl12();
+
+ u64 spsr = in_gl ? mrs(SYS_IMP_APL_SPSR_GL1) : (el12 ? mrs(SPSR_EL12) : mrs(SPSR_EL1));
+
+ printf("Exception taken from %s\n", get_exception_source(spsr));
+ printf("Running in %s\n", get_exception_level());
+ printf("MPIDR: 0x%lx\n", mrs(MPIDR_EL1));
+ printf("Registers: (@%p)\n", regs);
+ printf(" x0-x3: %016lx %016lx %016lx %016lx\n", regs[0], regs[1], regs[2], regs[3]);
+ printf(" x4-x7: %016lx %016lx %016lx %016lx\n", regs[4], regs[5], regs[6], regs[7]);
+ printf(" x8-x11: %016lx %016lx %016lx %016lx\n", regs[8], regs[9], regs[10], regs[11]);
+ printf("x12-x15: %016lx %016lx %016lx %016lx\n", regs[12], regs[13], regs[14], regs[15]);
+ printf("x16-x19: %016lx %016lx %016lx %016lx\n", regs[16], regs[17], regs[18], regs[19]);
+ printf("x20-x23: %016lx %016lx %016lx %016lx\n", regs[20], regs[21], regs[22], regs[23]);
+ printf("x24-x27: %016lx %016lx %016lx %016lx\n", regs[24], regs[25], regs[26], regs[27]);
+ printf("x28-x30: %016lx %016lx %016lx\n", regs[28], regs[29], regs[30]);
+
+ u64 elr = in_gl ? mrs(SYS_IMP_APL_ELR_GL1) : (el12 ? mrs(ELR_EL12) : mrs(ELR_EL1));
+ u64 esr = in_gl ? mrs(SYS_IMP_APL_ESR_GL1) : (el12 ? mrs(ESR_EL12) : mrs(ESR_EL1));
+ u64 far = in_gl ? mrs(SYS_IMP_APL_FAR_GL1) : (el12 ? mrs(FAR_EL12) : mrs(FAR_EL1));
+
+ printf("PC: 0x%lx (rel: 0x%lx)\n", elr, elr - (u64)_base);
+ printf("SP: 0x%lx\n", sp);
+ printf("SPSR: 0x%lx\n", spsr);
+ if (in_gl12()) {
+ printf("ASPSR: 0x%lx\n", mrs(SYS_IMP_APL_ASPSR_GL1));
+ }
+ printf("FAR: 0x%lx\n", far);
+
+ const char *ec_desc = ec_table[(esr >> 26) & 0x3f];
+ printf("ESR: 0x%lx (%s)\n", esr, ec_desc ? ec_desc : "?");
+
+ u64 sts = mrs(SYS_IMP_APL_L2C_ERR_STS);
+ printf("L2C_ERR_STS: 0x%lx\n", sts);
+ printf("L2C_ERR_ADR: 0x%lx\n", mrs(SYS_IMP_APL_L2C_ERR_ADR));
+ printf("L2C_ERR_INF: 0x%lx\n", mrs(SYS_IMP_APL_L2C_ERR_INF));
+ msr(SYS_IMP_APL_L2C_ERR_STS, sts);
+
+ if (is_ecore()) {
+ printf("E_LSU_ERR_STS: 0x%lx\n", mrs(SYS_IMP_APL_E_LSU_ERR_STS));
+ printf("E_FED_ERR_STS: 0x%lx\n", mrs(SYS_IMP_APL_E_FED_ERR_STS));
+ printf("E_MMU_ERR_STS: 0x%lx\n", mrs(SYS_IMP_APL_E_MMU_ERR_STS));
+ } else {
+ printf("LSU_ERR_STS: 0x%lx\n", mrs(SYS_IMP_APL_LSU_ERR_STS));
+ printf("FED_ERR_STS: 0x%lx\n", mrs(SYS_IMP_APL_FED_ERR_STS));
+ printf("MMU_ERR_STS: 0x%lx\n", mrs(SYS_IMP_APL_MMU_ERR_STS));
+ }
+}
+
+void exc_sync(u64 *regs)
+{
+ u32 insn;
+ int el12 = 0;
+ bool in_gl = in_gl12();
+
+ u64 spsr = in_gl ? mrs(SYS_IMP_APL_SPSR_GL1) : mrs(SPSR_EL1);
+ u64 esr = in_gl ? mrs(SYS_IMP_APL_ESR_GL1) : mrs(ESR_EL1);
+ u64 elr = in_gl ? mrs(SYS_IMP_APL_ELR_GL1) : mrs(ELR_EL1);
+
+ if ((spsr & 0xf) == 0 && ((esr >> 26) & 0x3f) == 0x3c) {
+ // On clean EL0 return, let the normal exception return
+ // path take us back to the return thunk.
+ msr(SPSR_EL1, 0x09); // EL2h
+ msr(ELR_EL1, el0_ret);
+ return;
+ }
+
+ if (in_el2() && !in_gl12() && (spsr & 0xf) == 5 && ((esr >> 26) & 0x3f) == 0x16) {
+ // Hypercall
+ u32 imm = mrs(ESR_EL2) & 0xffff;
+ switch (imm) {
+ case 0:
+ // On clean EL1 return, let the normal exception return
+ // path take us back to the return thunk.
+ msr(SPSR_EL2, 0x09); // EL2h
+ msr(ELR_EL2, el1_ret);
+ return;
+ case 0x10 ... 0x1f:
+ if (!(exc_guard & GUARD_SILENT))
+ printf("EL1 Exception: 0x%x\n", imm);
+ // Short-circuit the hypercall and handle the EL1 exception
+ el12 = 1;
+ msr(SPSR_EL2, mrs(SPSR_EL12));
+ msr(ELR_EL2, mrs(ELR_EL12));
+ break;
+ default:
+ printf("Unknown HVC: 0x%x\n", imm);
+ break;
+ }
+ } else {
+ if (!(exc_guard & GUARD_SILENT))
+ printf("Exception: SYNC\n");
+ }
+
+ sysop("isb");
+ sysop("dsb sy");
+
+ if (!(exc_guard & GUARD_SILENT))
+ print_regs(regs, el12);
+
+ u64 l2c_err_sts = mrs(SYS_IMP_APL_L2C_ERR_STS);
+ msr(SYS_IMP_APL_L2C_ERR_STS, l2c_err_sts); // Clear the L2C_ERR flag bits
+
+ switch (exc_guard & GUARD_TYPE_MASK) {
+ case GUARD_SKIP:
+ elr += 4;
+ break;
+ case GUARD_MARK:
+ // Assuming this is a load or store, dest reg is in low bits
+ insn = read32(elr);
+ regs[insn & 0x1f] = 0xacce5515abad1dea;
+ elr += 4;
+ break;
+ case GUARD_RETURN:
+ regs[0] = 0xacce5515abad1dea;
+ elr = regs[30];
+ exc_guard = GUARD_OFF;
+ break;
+ case GUARD_OFF:
+ default:
+ printf("Unhandled exception, rebooting...\n");
+ flush_and_reboot();
+ }
+
+ exc_count++;
+
+ if (!(exc_guard & GUARD_SILENT))
+ printf("Recovering from exception (ELR=0x%lx)\n", elr);
+ if (in_gl)
+ msr(SYS_IMP_APL_ELR_GL1, elr);
+ else
+ msr(ELR_EL1, elr);
+
+ sysop("isb");
+ sysop("dsb sy");
+}
+
+void exc_irq(u64 *regs)
+{
+ u32 reason = aic_ack();
+
+ printf("Exception: IRQ (from %s) die: %lu type: %lu num: %lu mpidr: %lx\n",
+ get_exception_source(0), FIELD_GET(AIC_EVENT_DIE, reason),
+ FIELD_GET(AIC_EVENT_TYPE, reason), FIELD_GET(AIC_EVENT_NUM, reason), mrs(MPIDR_EL1));
+
+ UNUSED(regs);
+ // print_regs(regs);
+}
+
+void exc_fiq(u64 *regs)
+{
+ printf("Exception: FIQ (from %s)\n", get_exception_source(0));
+
+ u64 reg = mrs(CNTP_CTL_EL0);
+ if (reg == 0x5) {
+ printf(" PHYS timer IRQ, masking\n");
+ msr(CNTP_CTL_EL0, 7L);
+ }
+
+ reg = mrs(CNTV_CTL_EL0);
+ if (reg == 0x5) {
+ printf(" VIRT timer IRQ, masking\n");
+ msr(CNTV_CTL_EL0, 7L);
+ }
+
+ if (in_el2()) {
+ reg = mrs(CNTP_CTL_EL02);
+ if (reg == 0x5) {
+ printf(" PHYS EL02 timer IRQ, masking\n");
+ msr(CNTP_CTL_EL02, 7L);
+ }
+ reg = mrs(CNTV_CTL_EL02);
+ if (reg == 0x5) {
+ printf(" VIRT EL02 timer IRQ, masking\n");
+ msr(CNTV_CTL_EL02, 7L);
+ }
+ }
+
+ reg = mrs(SYS_IMP_APL_PMCR0);
+ if ((reg & (PMCR0_IMODE_MASK | PMCR0_IACT)) == (PMCR0_IMODE_FIQ | PMCR0_IACT)) {
+ printf(" PMC IRQ, masking\n");
+ reg_clr(SYS_IMP_APL_PMCR0, PMCR0_IACT | PMCR0_IMODE_MASK);
+ }
+ reg = mrs(SYS_IMP_APL_UPMCR0);
+ if ((reg & UPMCR0_IMODE_MASK) == UPMCR0_IMODE_FIQ && (mrs(SYS_IMP_APL_UPMSR) & UPMSR_IACT)) {
+ printf(" UPMC IRQ, masking\n");
+ reg_clr(SYS_IMP_APL_UPMCR0, UPMCR0_IMODE_MASK);
+ }
+
+ if (mrs(SYS_IMP_APL_IPI_SR_EL1) & IPI_SR_PENDING) {
+ printf(" Fast IPI IRQ, clearing\n");
+ msr(SYS_IMP_APL_IPI_SR_EL1, IPI_SR_PENDING);
+ }
+
+ UNUSED(regs);
+ // print_regs(regs);
+}
+
+void exc_serr(u64 *regs)
+{
+ if (!(exc_guard & GUARD_SILENT))
+ printf("Exception: SError\n");
+
+ sysop("dsb sy");
+ sysop("isb");
+
+ if (!(exc_guard & GUARD_SILENT))
+ print_regs(regs, 0);
+
+ if ((exc_guard & GUARD_TYPE_MASK) == GUARD_OFF) {
+ printf("Unhandled exception, rebooting...\n");
+ flush_and_reboot();
+ }
+
+ exc_count++;
+
+ sysop("dsb sy");
+ sysop("isb");
+}
diff --git a/tools/src/exception.h b/tools/src/exception.h
new file mode 100644
index 0000000..786f38f
--- /dev/null
+++ b/tools/src/exception.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef __EXCEPTION_H__
+#define __EXCEPTION_H__
+
+#define SIZEOF_EXC_INFO (64 * 8)
+
+#ifndef __ASSEMBLER__
+
+#include <assert.h>
+#include <stdint.h>
+
+#include "types.h"
+
+enum exc_guard_t {
+ GUARD_OFF = 0,
+ GUARD_SKIP,
+ GUARD_MARK,
+ GUARD_RETURN,
+ GUARD_TYPE_MASK = 0xff,
+ GUARD_SILENT = 0x100,
+};
+
+struct exc_info {
+ u64 regs[32];
+ u64 spsr;
+ u64 elr;
+ u64 esr;
+ u64 far;
+ u64 afsr1;
+ u64 sp[3];
+ u64 cpu_id;
+ u64 mpidr;
+ u64 elr_phys;
+ u64 far_phys;
+ u64 sp_phys;
+ void *extra;
+};
+static_assert(sizeof(struct exc_info) <= SIZEOF_EXC_INFO, "Please increase SIZEOF_EXC_INFO");
+static_assert((sizeof(struct exc_info) & 15) == 0, "SIZEOF_EXC_INFO must be a multiple of 16");
+
+extern volatile enum exc_guard_t exc_guard;
+extern volatile int exc_count;
+
+void exception_initialize(void);
+void exception_shutdown(void);
+
+void print_regs(u64 *regs, int el12);
+
+uint64_t el0_call(void *func, uint64_t a, uint64_t b, uint64_t c, uint64_t d);
+uint64_t el1_call(void *func, uint64_t a, uint64_t b, uint64_t c, uint64_t d);
+
+#endif
+
+#endif
diff --git a/tools/src/exception_asm.S b/tools/src/exception_asm.S
new file mode 100644
index 0000000..8c8d01f
--- /dev/null
+++ b/tools/src/exception_asm.S
@@ -0,0 +1,231 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "exception.h"
+#include "memory.h"
+
+.globl exc_sync
+.globl exc_irq
+.globl exc_fiq
+.globl exc_serr
+.globl _vectors_start
+.globl el0_stack
+
+.globl _v_sp0_sync
+.type _v_sp0_sync, @function
+_v_sp0_sync:
+ msr pan, #0
+ sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8)
+ str x30, [sp, #-16]!
+ bl _exc_entry
+ bl exc_sync
+
+ b _exc_return
+
+.globl _v_sp0_irq
+.type _v_sp0_irq, @function
+_v_sp0_irq:
+ msr pan, #0
+ sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8)
+ str x30, [sp, #-16]!
+ bl _exc_entry
+ bl exc_irq
+
+ b _exc_return
+
+.globl _v_sp0_fiq
+.type _v_sp0_fiq, @function
+_v_sp0_fiq:
+ msr pan, #0
+ sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8)
+ str x30, [sp, #-16]!
+ bl _exc_entry
+ bl exc_fiq
+
+ b _exc_return
+
+.globl _v_sp0_serr
+.type _v_sp0_serr, @function
+_v_sp0_serr:
+ msr pan, #0
+ sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8)
+ str x30, [sp, #-16]!
+ bl _exc_entry
+ bl exc_serr
+
+ b _exc_return
+
+.globl _exc_entry
+.type _exc_entry, @function
+_exc_entry:
+ stp x28, x29, [sp, #-16]!
+ stp x26, x27, [sp, #-16]!
+ stp x24, x25, [sp, #-16]!
+ stp x22, x23, [sp, #-16]!
+ stp x20, x21, [sp, #-16]!
+ stp x18, x19, [sp, #-16]!
+ stp x16, x17, [sp, #-16]!
+ stp x14, x15, [sp, #-16]!
+ stp x12, x13, [sp, #-16]!
+ stp x10, x11, [sp, #-16]!
+ stp x8, x9, [sp, #-16]!
+ stp x6, x7, [sp, #-16]!
+ stp x4, x5, [sp, #-16]!
+ stp x2, x3, [sp, #-16]!
+ stp x0, x1, [sp, #-16]!
+
+ mov x0, sp
+ ret
+
+.globl _exc_return
+.type _exc_return, @function
+_exc_return:
+ ldp x0, x1, [sp], #16
+ ldp x2, x3, [sp], #16
+ ldp x4, x5, [sp], #16
+ ldp x6, x7, [sp], #16
+ ldp x8, x9, [sp], #16
+ ldp x10, x11, [sp], #16
+ ldp x12, x13, [sp], #16
+ ldp x14, x15, [sp], #16
+ ldp x16, x17, [sp], #16
+ ldr x18, [sp], #8
+ add sp, sp, #88
+ ldr x30, [sp], #16
+
+ add sp, sp, #(SIZEOF_EXC_INFO - 32 * 8)
+
+ eret
+
+.globl el0_call
+.type el0_call, @function
+el0_call:
+ str x30, [sp, #-16]!
+
+ // Disable EL1
+ mrs x5, hcr_el2
+ orr x5, x5, #(1 << 27)
+ msr hcr_el2, x5
+ isb
+
+ mrs x5, daif
+ msr daifclr, 3
+ msr spsr_el1, x5
+
+ ldr x5, =_el0_thunk
+ msr elr_el1, x5
+
+ mov x5, #REGION_RWX_EL0
+ orr x0, x0, x5
+
+ ldr x5, =el0_stack_base
+ ldr x5, [x5]
+ mov x6, #REGION_RW_EL0
+ orr x5, x5, x6
+ msr spsel, #0
+ mov sp, x5
+
+ eret
+
+_el0_thunk:
+ mov x5, x0
+ mov x0, x1
+ mov x1, x2
+ mov x2, x3
+ mov x3, x4
+
+ blr x5
+
+ brk 0
+ .long 0
+
+.globl el0_ret
+.type el0_ret, @function
+el0_ret:
+ ldr x30, [sp], #16
+ ret
+
+.globl el1_call
+.type el1_call, @function
+el1_call:
+ str x30, [sp, #-16]!
+
+ // Enable EL1, but only if not already done.
+ // this check is here because writes to hcr_el2 are only possible from GL2
+ // if that mode has been enabled
+ mrs x5, hcr_el2
+ bic x6, x5, #(1 << 27)
+ cmp x5, x6
+ beq 1f
+ msr hcr_el2, x6
+ isb
+
+ 1: mrs x5, daif
+ msr daifclr, 3
+ mov x6, #5
+ orr x5, x5, x6 // EL1h
+ msr spsr_el2, x5
+
+ ldr x5, =_el1_thunk
+ msr elr_el2, x5
+
+ ldr x5, =el0_stack_base
+ ldr x5, [x5]
+ msr sp_el1, x5
+
+ eret
+
+_el1_thunk:
+ mov x5, x0
+ mov x0, x1
+ mov x1, x2
+ mov x2, x3
+ mov x3, x4
+
+ blr x5
+
+ hvc 0
+ .long 0
+
+.globl el1_ret
+.type el1_ret, @function
+el1_ret:
+ ldr x30, [sp], #16
+ ret
+
+.align 11
+.globl _el1_vectors_start
+_el1_vectors_start:
+ hvc 0x10
+ .align 7
+ hvc 0x11
+ .align 7
+ hvc 0x12
+ .align 7
+ hvc 0x13
+ .align 7
+
+ hvc 0x14
+ .align 7
+ hvc 0x15
+ .align 7
+ hvc 0x16
+ .align 7
+ hvc 0x17
+ .align 7
+
+ hvc 0x18
+ .align 7
+ hvc 0x19
+ .align 7
+ hvc 0x1a
+ .align 7
+ hvc 0x1b
+ .align 7
+
+ hvc 0x1c
+ .align 7
+ hvc 0x1d
+ .align 7
+ hvc 0x1e
+ .align 7
+ hvc 0x1f
diff --git a/tools/src/fb.c b/tools/src/fb.c
new file mode 100644
index 0000000..4c3e8b5
--- /dev/null
+++ b/tools/src/fb.c
@@ -0,0 +1,415 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "fb.h"
+#include "assert.h"
+#include "iodev.h"
+#include "malloc.h"
+#include "memory.h"
+#include "string.h"
+#include "types.h"
+#include "utils.h"
+#include "xnuboot.h"
+
+#define FB_DEPTH_MASK 0xff
+
+fb_t fb;
+
+struct image {
+ u32 *ptr;
+ u32 width;
+ u32 height;
+};
+
+static struct {
+ struct {
+ u8 *ptr;
+ u32 width;
+ u32 height;
+ } font;
+
+ struct {
+ u32 row;
+ u32 col;
+
+ u32 max_row;
+ u32 max_col;
+ } cursor;
+
+ struct {
+ u32 rows;
+ u32 cols;
+ } margin;
+
+ bool initialized;
+ bool active;
+} console;
+
+extern u8 _binary_build_bootlogo_128_bin_start[];
+extern u8 _binary_build_bootlogo_256_bin_start[];
+
+extern u8 _binary_build_font_bin_start[];
+extern u8 _binary_build_font_retina_bin_start[];
+
+const struct image logo_128 = {
+ .ptr = (void *)_binary_build_bootlogo_128_bin_start,
+ .width = 128,
+ .height = 128,
+};
+
+const struct image logo_256 = {
+ .ptr = (void *)_binary_build_bootlogo_256_bin_start,
+ .width = 256,
+ .height = 256,
+};
+
+const struct image *logo;
+struct image orig_logo;
+
+void fb_update(void)
+{
+ memcpy128(fb.hwptr, fb.ptr, fb.size);
+}
+
+static void fb_clear_font_row(u32 row)
+{
+ const u32 row_size = (console.margin.cols + console.cursor.max_col) * console.font.width * 4;
+ const u32 ystart = (console.margin.rows + row) * console.font.height * fb.stride;
+
+ for (u32 y = 0; y < console.font.height; ++y)
+ memset32(fb.ptr + ystart + y * fb.stride, 0, row_size);
+}
+
+static void fb_move_font_row(u32 dst, u32 src)
+{
+ const u32 row_size = (console.margin.cols + console.cursor.max_col) * console.font.width * 4;
+ u32 ysrc = (console.margin.rows + src) * console.font.height;
+ u32 ydst = (console.margin.rows + dst) * console.font.height;
+
+ ysrc *= fb.stride;
+ ydst *= fb.stride;
+
+ for (u32 y = 0; y < console.font.height; ++y)
+ memcpy32(fb.ptr + ydst + y * fb.stride, fb.ptr + ysrc + y * fb.stride, row_size);
+
+ fb_clear_font_row(src);
+}
+
+static inline u32 rgb2pixel_30(rgb_t c)
+{
+ return (c.b << 2) | (c.g << 12) | (c.r << 22);
+}
+
+static inline rgb_t pixel2rgb_30(u32 c)
+{
+ return (rgb_t){(c >> 22) & 0xff, (c >> 12) & 0xff, c >> 2};
+}
+
+static inline void fb_set_pixel(u32 x, u32 y, rgb_t c)
+{
+ fb.ptr[x + y * fb.stride] = rgb2pixel_30(c);
+}
+
+static inline rgb_t fb_get_pixel(u32 x, u32 y)
+{
+ return pixel2rgb_30(fb.ptr[x + y * fb.stride]);
+}
+
+void fb_blit(u32 x, u32 y, u32 w, u32 h, void *data, u32 stride, pix_fmt_t pix_fmt)
+{
+ u8 *p = data;
+
+ for (u32 i = 0; i < h; i++) {
+ for (u32 j = 0; j < w; j++) {
+ rgb_t color;
+ switch (pix_fmt) {
+ default:
+ case PIX_FMT_XRGB:
+ color.r = p[(j + i * stride) * 4];
+ color.g = p[(j + i * stride) * 4 + 1];
+ color.b = p[(j + i * stride) * 4 + 2];
+ break;
+ case PIX_FMT_XBGR:
+ color.r = p[(j + i * stride) * 4 + 2];
+ color.g = p[(j + i * stride) * 4 + 1];
+ color.b = p[(j + i * stride) * 4];
+ break;
+ }
+ fb_set_pixel(x + j, y + i, color);
+ }
+ }
+ fb_update();
+}
+
+void fb_unblit(u32 x, u32 y, u32 w, u32 h, void *data, u32 stride)
+{
+ u8 *p = data;
+
+ for (u32 i = 0; i < h; i++) {
+ for (u32 j = 0; j < w; j++) {
+ rgb_t color = fb_get_pixel(x + j, y + i);
+ p[(j + i * stride) * 4] = color.r;
+ p[(j + i * stride) * 4 + 1] = color.g;
+ p[(j + i * stride) * 4 + 2] = color.b;
+ p[(j + i * stride) * 4 + 3] = 0xff;
+ }
+ }
+}
+
+void fb_fill(u32 x, u32 y, u32 w, u32 h, rgb_t color)
+{
+ u32 c = rgb2pixel_30(color);
+ for (u32 i = 0; i < h; i++)
+ memset32(&fb.ptr[x + (y + i) * fb.stride], c, w * 4);
+ fb_update();
+}
+
+void fb_clear(rgb_t color)
+{
+ u32 c = rgb2pixel_30(color);
+ memset32(fb.ptr, c, fb.stride * fb.height * 4);
+ fb_update();
+}
+
+void fb_blit_image(u32 x, u32 y, const struct image *img)
+{
+ fb_blit(x, y, img->width, img->height, img->ptr, img->width, PIX_FMT_XRGB);
+}
+
+void fb_unblit_image(u32 x, u32 y, struct image *img)
+{
+ fb_unblit(x, y, img->width, img->height, img->ptr, img->width);
+}
+
+void fb_blit_logo(const struct image *logo)
+{
+ fb_blit_image((fb.width - logo->width) / 2, (fb.height - logo->height) / 2, logo);
+}
+
+void fb_display_logo(void)
+{
+ printf("fb: display logo\n");
+ fb_blit_logo(logo);
+}
+
+void fb_restore_logo(void)
+{
+ if (!orig_logo.ptr)
+ return;
+ fb_blit_logo(&orig_logo);
+}
+
+void fb_improve_logo(void)
+{
+ const u8 magic[] = "BY;iX2gK0b89P9P*Qa";
+ u8 *p = (void *)orig_logo.ptr;
+
+ if (!p || p[orig_logo.width * (orig_logo.height + 1) * 2] <= 250)
+ return;
+
+ for (u32 i = 0; i < orig_logo.height; i++) {
+ const u8 *c = &magic[min((max(i * 128 / orig_logo.height, 41) - 41) / 11, 5) * 3];
+ for (u32 j = 0; j < (orig_logo.width * 4); j++, p++)
+ *p = (*p * (c[(j - (j >> 2)) % 3] - 42)) / 63;
+ }
+}
+
+static inline rgb_t font_get_pixel(u8 c, u32 x, u32 y)
+{
+ c -= 0x20;
+ u8 v =
+ console.font.ptr[c * console.font.width * console.font.height + y * console.font.width + x];
+
+ rgb_t col = {.r = v, .g = v, .b = v};
+ return col;
+}
+
+static void fb_putbyte(u8 c)
+{
+ u32 x = (console.margin.cols + console.cursor.col) * console.font.width;
+ u32 y = (console.margin.rows + console.cursor.row) * console.font.height;
+
+ for (u32 i = 0; i < console.font.height; i++)
+ for (u32 j = 0; j < console.font.width; j++)
+ fb_set_pixel(x + j, y + i, font_get_pixel(c, j, i));
+}
+
+static void fb_putchar(u8 c)
+{
+ if (c == '\r') {
+ console.cursor.col = 0;
+ } else if (c == '\n') {
+ console.cursor.row++;
+ console.cursor.col = 0;
+ } else if (c >= 0x20 && c < 0x7f) {
+ fb_putbyte(c);
+ console.cursor.col++;
+ } else {
+ fb_putbyte('?');
+ console.cursor.col++;
+ }
+
+ if (console.cursor.col == console.cursor.max_col) {
+ console.cursor.row++;
+ console.cursor.col = 0;
+ }
+
+ if (console.cursor.row == console.cursor.max_row)
+ fb_console_scroll(1);
+}
+
+void fb_console_scroll(u32 n)
+{
+ u32 row = 0;
+ n = min(n, console.cursor.row);
+ for (; row < console.cursor.max_row - n; ++row)
+ fb_move_font_row(row, row + n);
+ for (; row < console.cursor.max_row; ++row)
+ fb_clear_font_row(row);
+ console.cursor.row -= n;
+}
+
+void fb_console_reserve_lines(u32 n)
+{
+ if ((console.cursor.max_row - console.cursor.row) <= n)
+ fb_console_scroll(1 + n - (console.cursor.max_row - console.cursor.row));
+ fb_update();
+}
+
+ssize_t fb_console_write(const char *bfr, size_t len)
+{
+ ssize_t wrote = 0;
+
+ if (!console.initialized || !console.active)
+ return 0;
+
+ while (len--) {
+ fb_putchar(*bfr++);
+ wrote++;
+ }
+
+ fb_update();
+
+ return wrote;
+}
+
+static bool fb_console_iodev_can_write(void *opaque)
+{
+ UNUSED(opaque);
+ return console.initialized && console.active;
+}
+
+static ssize_t fb_console_iodev_write(void *opaque, const void *buf, size_t len)
+{
+ UNUSED(opaque);
+ return fb_console_write(buf, len);
+}
+
+const struct iodev_ops iodev_fb_ops = {
+ .can_write = fb_console_iodev_can_write,
+ .write = fb_console_iodev_write,
+};
+
+struct iodev iodev_fb = {
+ .ops = &iodev_fb_ops,
+ .usage = USAGE_CONSOLE,
+ .lock = SPINLOCK_INIT,
+};
+
+static void fb_clear_console(void)
+{
+ for (u32 row = 0; row < console.cursor.max_row; ++row)
+ fb_clear_font_row(row);
+
+ console.cursor.col = 0;
+ console.cursor.row = 0;
+ fb_update();
+}
+
+void fb_init(bool clear)
+{
+ fb.hwptr = (void *)cur_boot_args.video.base;
+ fb.stride = cur_boot_args.video.stride / 4;
+ fb.width = cur_boot_args.video.width;
+ fb.height = cur_boot_args.video.height;
+ fb.depth = cur_boot_args.video.depth & FB_DEPTH_MASK;
+ fb.size = cur_boot_args.video.stride * cur_boot_args.video.height;
+ printf("fb init: %dx%d (%d) [s=%d] @%p\n", fb.width, fb.height, fb.depth, fb.stride, fb.hwptr);
+
+ mmu_add_mapping(cur_boot_args.video.base, cur_boot_args.video.base, ALIGN_UP(fb.size, 0x4000),
+ MAIR_IDX_NORMAL_NC, PERM_RW);
+
+ fb.ptr = malloc(fb.size);
+ memcpy(fb.ptr, fb.hwptr, fb.size);
+
+ if (cur_boot_args.video.depth & FB_DEPTH_FLAG_RETINA) {
+ logo = &logo_256;
+ console.font.ptr = _binary_build_font_retina_bin_start;
+ console.font.width = 16;
+ console.font.height = 32;
+ } else {
+ logo = &logo_128;
+ console.font.ptr = _binary_build_font_bin_start;
+ console.font.width = 8;
+ console.font.height = 16;
+ }
+
+ if (!orig_logo.ptr) {
+ orig_logo = *logo;
+ orig_logo.ptr = malloc(orig_logo.width * orig_logo.height * 4);
+ fb_unblit_image((fb.width - orig_logo.width) / 2, (fb.height - orig_logo.height) / 2,
+ &orig_logo);
+ }
+
+ if (clear)
+ memset32(fb.ptr, 0, fb.size);
+
+ console.margin.rows = 2;
+ console.margin.cols = 4;
+ console.cursor.col = 0;
+ console.cursor.row = 0;
+
+ console.cursor.max_row = (fb.height / console.font.height) - 2 * console.margin.rows;
+ console.cursor.max_col =
+ ((fb.width - logo->width) / 2) / console.font.width - 2 * console.margin.cols;
+
+ console.initialized = true;
+ console.active = false;
+
+ fb_clear_console();
+
+ printf("fb console: max rows %d, max cols %d\n", console.cursor.max_row,
+ console.cursor.max_col);
+}
+
+void fb_set_active(bool active)
+{
+ console.active = active;
+ if (active)
+ iodev_console_kick();
+}
+
+void fb_shutdown(bool restore_logo)
+{
+ if (!console.initialized)
+ return;
+
+ console.active = false;
+ console.initialized = false;
+ fb_clear_console();
+ if (restore_logo) {
+ fb_restore_logo();
+ free(orig_logo.ptr);
+ orig_logo.ptr = NULL;
+ }
+ free(fb.ptr);
+}
+
+void fb_reinit(void)
+{
+ if (!console.initialized)
+ return;
+
+ fb_shutdown(false);
+ fb_init(true);
+ fb_display_logo();
+}
diff --git a/tools/src/fb.h b/tools/src/fb.h
new file mode 100644
index 0000000..2bfd406
--- /dev/null
+++ b/tools/src/fb.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef FB_H
+#define FB_H
+
+#include "types.h"
+
+#define FB_DEPTH_FLAG_RETINA 0x10000
+
+typedef struct {
+ u32 *ptr; /* pointer to the start of the framebuffer */
+ u32 *hwptr; /* pointer to the start of the real framebuffer */
+ u32 stride; /* framebuffer stride divided by four (i.e. stride in pixels) */
+ u32 depth; /* framebuffer depth (i.e. bits per pixel) */
+ u32 width; /* width of the framebuffer in pixels */
+ u32 height; /* height of the framebuffer in pixels */
+ u32 size; /* size of the framebuffer in bytes */
+} fb_t;
+
+typedef struct {
+ u8 r;
+ u8 g;
+ u8 b;
+} rgb_t;
+
+typedef enum {
+ PIX_FMT_XRGB,
+ PIX_FMT_XBGR,
+} pix_fmt_t;
+
+extern fb_t fb;
+
+static inline rgb_t int2rgb(u32 c)
+{
+ return (rgb_t){c >> 16, c >> 8, c};
+}
+
+void fb_init(bool clear);
+void fb_shutdown(bool restore_logo);
+void fb_reinit(void);
+void fb_update(void);
+void fb_set_active(bool active);
+
+void fb_blit(u32 x, u32 y, u32 w, u32 h, void *data, u32 stride, pix_fmt_t format);
+void fb_unblit(u32 x, u32 y, u32 w, u32 h, void *data, u32 stride);
+void fb_fill(u32 x, u32 y, u32 w, u32 h, rgb_t color);
+void fb_clear(rgb_t color);
+
+void fb_display_logo(void);
+void fb_restore_logo(void);
+void fb_improve_logo(void);
+
+void fb_console_scroll(u32 n);
+void fb_console_reserve_lines(u32 n);
+ssize_t fb_console_write(const char *bfr, size_t len);
+
+#endif
diff --git a/tools/src/firmware.c b/tools/src/firmware.c
new file mode 100644
index 0000000..ca5f108
--- /dev/null
+++ b/tools/src/firmware.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "firmware.h"
+#include "adt.h"
+#include "string.h"
+#include "types.h"
+#include "utils.h"
+
+#include "libfdt/libfdt.h"
+#include "libfdt/libfdt_env.h"
+
+struct fw_version_info os_firmware;
+struct fw_version_info system_firmware;
+
+const struct fw_version_info fw_versions[NUM_FW_VERSIONS] = {
+ [V_UNKNOWN] = {V_UNKNOWN, "unknown", {0}, 1, "unknown"},
+ [V12_1] = {V12_1, "12.1", {12, 1, 0}, 3, "iBoot-7429.61.2"},
+ [V12_2] = {V12_2, "12.2", {12, 2, 0}, 3, "iBoot-7429.81.3"},
+ [V12_3] = {V12_3, "12.3", {12, 3, 0}, 3, "iBoot-7459.101.2"},
+ [V12_3_1] = {V12_3_1, "12.3.1", {12, 3, 1}, 3, "iBoot-7459.101.3"},
+ [V12_4] = {V12_4, "12.4", {12, 4, 0}, 3, "iBoot-7459.121.3"},
+ [V12_5] = {V12_5, "12.5", {12, 5, 0}, 3, "iBoot-7459.141.1"},
+ // Same as 12.5
+ // {V12_6, "12.6", {12, 6, 0}, 3, "iBoot-7459.141.1"},
+ [V13_0B4] = {V13_0B4, "13.0 beta4", {12, 99, 4}, 3, "iBoot-8419.0.151.0.1"},
+ [V13_0] = {V13_0, "13.0", {13, 0, 0}, 3, "iBoot-8419.41.10"},
+};
+
+int firmware_set_fdt(void *fdt, int node, const char *prop, const struct fw_version_info *ver)
+{
+ fdt32_t data[ARRAY_SIZE(ver->num)];
+
+ for (size_t i = 0; i < ver->num_length; i++) {
+ data[i] = cpu_to_fdt32(ver->num[i]);
+ }
+
+ return fdt_setprop(fdt, node, prop, data, ver->num_length * sizeof(u32));
+}
+
+static void detect_firmware(struct fw_version_info *info, const char *ver)
+{
+ for (size_t i = 0; i < ARRAY_SIZE(fw_versions); i++) {
+ if (!strcmp(fw_versions[i].iboot, ver)) {
+ *info = fw_versions[i];
+ return;
+ }
+ }
+
+ *info = fw_versions[V_UNKNOWN];
+ info->iboot = ver;
+}
+
+int firmware_init(void)
+{
+ int node = adt_path_offset(adt, "/chosen");
+
+ if (node < 0) {
+ printf("ADT: no /chosen found\n");
+ return -1;
+ }
+
+ u32 len;
+ const char *p = adt_getprop(adt, node, "firmware-version", &len);
+ if (p && len && p[len - 1] == 0) {
+ detect_firmware(&os_firmware, p);
+ printf("OS FW version: %s (%s)\n", os_firmware.string, os_firmware.iboot);
+ } else {
+ printf("ADT: failed to find firmware-version\n");
+ return -1;
+ }
+
+ p = adt_getprop(adt, node, "system-firmware-version", &len);
+ if (p && len && p[len - 1] == 0) {
+ detect_firmware(&system_firmware, p);
+ printf("System FW version: %s (%s)\n", system_firmware.string, system_firmware.iboot);
+ } else {
+ printf("ADT: failed to find system-firmware-version\n");
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/tools/src/firmware.h b/tools/src/firmware.h
new file mode 100644
index 0000000..1a3375b
--- /dev/null
+++ b/tools/src/firmware.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef __FIRMWARE_H__
+#define __FIRMWARE_H__
+
+#include "types.h"
+
+enum fw_version {
+ V_UNKNOWN,
+ V12_1,
+ V12_2,
+ V12_3,
+ V12_3_1,
+ V12_4,
+ V12_5,
+ // V12_6,
+ V13_0B4,
+ V13_0,
+ NUM_FW_VERSIONS,
+};
+
+struct fw_version_info {
+ enum fw_version version;
+ const char *string;
+ u32 num[4];
+ size_t num_length;
+ const char *iboot;
+};
+
+extern struct fw_version_info os_firmware;
+extern struct fw_version_info system_firmware;
+extern const struct fw_version_info fw_versions[NUM_FW_VERSIONS];
+
+int firmware_init(void);
+int firmware_set_fdt(void *fdt, int node, const char *prop, const struct fw_version_info *ver);
+
+#endif
diff --git a/tools/src/gxf.c b/tools/src/gxf.c
new file mode 100644
index 0000000..7b751c5
--- /dev/null
+++ b/tools/src/gxf.c
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "cpu_regs.h"
+#include "exception.h"
+#include "gxf.h"
+#include "malloc.h"
+#include "memory.h"
+#include "smp.h"
+#include "uart.h"
+#include "utils.h"
+
+uint64_t gxf_enter(void *func, uint64_t a, uint64_t b, uint64_t c, uint64_t d);
+
+void _gxf_init(void *gl2_stack, void *gl1_stack);
+
+u8 *gl1_stack[MAX_CPUS];
+u8 *gl2_stack[MAX_CPUS];
+
+void gxf_init(void)
+{
+ int cpu = smp_id();
+
+ if (!gl2_stack[cpu])
+ gl2_stack[cpu] = memalign(0x4000, GL_STACK_SIZE);
+ if (in_el2() && !gl1_stack[cpu])
+ gl1_stack[cpu] = memalign(0x4000, GL_STACK_SIZE);
+
+ _gxf_init(gl2_stack[cpu], gl1_stack[cpu]);
+}
+
+bool gxf_enabled(void)
+{
+ if (!(mrs(SYS_IMP_APL_SPRR_CONFIG_EL1) & SPRR_CONFIG_EN))
+ return false;
+
+ return (mrs(SYS_IMP_APL_GXF_CONFIG_EL1) & GXF_CONFIG_EN);
+}
+
+bool in_gl12(void)
+{
+ if (!gxf_enabled())
+ return false;
+
+ return (mrs(SYS_IMP_APL_GXF_STATUS_EL1) & GXF_STATUS_GUARDED);
+}
+
+static uint64_t gl_call(void *func, uint64_t a, uint64_t b, uint64_t c, uint64_t d)
+{
+ // disable the MMU first since enabling SPRR will change the meaning of all
+ // pagetable permission bits and also prevent us from having rwx pages
+ u64 sprr_state = mrs(SYS_IMP_APL_SPRR_CONFIG_EL1);
+ if (!(sprr_state & SPRR_CONFIG_EN))
+ reg_set_sync(SYS_IMP_APL_SPRR_CONFIG_EL1, sprr_state | SPRR_CONFIG_EN);
+
+ u64 gxf_state = mrs(SYS_IMP_APL_GXF_CONFIG_EL1);
+ if (!(gxf_state & GXF_CONFIG_EN))
+ reg_set_sync(SYS_IMP_APL_GXF_CONFIG_EL1, gxf_state | GXF_CONFIG_EN);
+
+ uint64_t ret = gxf_enter(func, a, b, c, d);
+
+ if (!(gxf_state & GXF_CONFIG_EN))
+ msr_sync(SYS_IMP_APL_GXF_CONFIG_EL1, gxf_state);
+ if (!(sprr_state & SPRR_CONFIG_EN))
+ msr_sync(SYS_IMP_APL_SPRR_CONFIG_EL1, sprr_state);
+
+ return ret;
+}
+
+uint64_t gl2_call(void *func, uint64_t a, uint64_t b, uint64_t c, uint64_t d)
+{
+ if (mrs(CurrentEL) != 0x8)
+ return -1;
+ return gl_call(func, a, b, c, d);
+}
+
+struct gl_call_argv {
+ void *func;
+ uint64_t a, b, c, d;
+};
+
+static uint64_t gl_call_wrapper(struct gl_call_argv *args)
+{
+ return gl_call(args->func, args->a, args->b, args->c, args->d);
+}
+
+uint64_t gl1_call(void *func, uint64_t a, uint64_t b, uint64_t c, uint64_t d)
+{
+ if (mrs(CurrentEL) == 0x4)
+ return gl_call(func, a, b, c, d);
+
+ struct gl_call_argv args;
+ args.func = func;
+ args.a = a;
+ args.b = b;
+ args.c = c;
+ args.d = d;
+
+ // enable EL1 here since once GXF has been enabled HCR_EL2 writes are only possible from GL2
+ if (mrs(HCR_EL2) & HCR_TGE)
+ reg_clr(HCR_EL2, HCR_TGE);
+
+ u64 sprr_state = mrs(SYS_IMP_APL_SPRR_CONFIG_EL1) & SPRR_CONFIG_EN;
+ reg_set_sync(SYS_IMP_APL_SPRR_CONFIG_EL1, SPRR_CONFIG_EN);
+
+ u64 gxf_state = mrs(SYS_IMP_APL_GXF_CONFIG_EL1) & GXF_CONFIG_EN;
+ reg_set_sync(SYS_IMP_APL_GXF_CONFIG_EL1, GXF_CONFIG_EN);
+
+ uint64_t ret = el1_call(gl_call_wrapper, (uint64_t)&args, 0, 0, 0);
+
+ msr_sync(SYS_IMP_APL_GXF_CONFIG_EL1, gxf_state);
+ msr_sync(SYS_IMP_APL_SPRR_CONFIG_EL1, sprr_state);
+
+ return ret;
+}
diff --git a/tools/src/gxf.h b/tools/src/gxf.h
new file mode 100644
index 0000000..9d1f22b
--- /dev/null
+++ b/tools/src/gxf.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef __GXF_H__
+#define __GXF_H__
+
+#include "types.h"
+
+#define GL_STACK_SIZE 0x10000
+
+#ifndef __ASSEMBLER__
+
+bool gxf_enabled(void);
+bool in_gl12(void);
+
+void gxf_init(void);
+
+uint64_t gl1_call(void *func, uint64_t a, uint64_t b, uint64_t c, uint64_t d);
+uint64_t gl2_call(void *func, uint64_t a, uint64_t b, uint64_t c, uint64_t d);
+
+#endif
+
+#endif
diff --git a/tools/src/gxf_asm.S b/tools/src/gxf_asm.S
new file mode 100644
index 0000000..6b6405f
--- /dev/null
+++ b/tools/src/gxf_asm.S
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "gxf.h"
+#include "cpu_regs.h"
+#include "exception.h"
+
+#define genter .long 0x00201420
+#define gexit .long 0x00201400
+
+.global _gxf_init
+.type _gxf_init, @function
+_gxf_init:
+ str x30, [sp, #-16]!
+ mov x5, x0
+ mov x6, x1
+ mov x0, 1
+ msr SYS_IMP_APL_SPRR_CONFIG_EL1, x0
+ isb
+ msr SYS_IMP_APL_GXF_CONFIG_EL1, x0
+ isb
+ ldr x0, =_gxf_setup
+ msr SYS_IMP_APL_GXF_ENTER_EL1, x0
+ isb
+ genter
+ msr SYS_IMP_APL_GXF_CONFIG_EL1, xzr
+ isb
+ msr SYS_IMP_APL_SPRR_CONFIG_EL1, xzr
+ isb
+ ldr x30, [sp], #16
+ ret
+
+.globl gxf_enter
+.type gxf_enter, @function
+gxf_enter:
+ genter
+ ret
+
+_gxf_setup:
+ mov sp, x5
+ ldr x1, =_gxf_vectors
+ ldr x2, =_gxf_exc_sync
+ ldr x3, =_gxf_entry
+ msr SYS_IMP_APL_VBAR_GL1, x1
+ msr SYS_IMP_APL_GXF_ABORT_EL1, x2
+ msr SYS_IMP_APL_GXF_ENTER_EL1, x3
+
+ mrs x4, CurrentEL
+ cmp x4, #8
+ bne 1f
+
+ msr SYS_IMP_APL_SP_GL12, x6
+ msr SYS_IMP_APL_VBAR_GL12, x1
+ msr SYS_IMP_APL_GXF_ABORT_EL12, x2
+ msr SYS_IMP_APL_GXF_ENTER_EL12, x3
+
+1:
+ isb
+ gexit
+
+_gxf_entry:
+ stp x29, x30, [sp, #-16]!
+ stp x23, x24, [sp, #-16]!
+ stp x21, x22, [sp, #-16]!
+ stp x19, x20, [sp, #-16]!
+
+ // these registers would be overwritten by each exception happening in GL1/2
+ // but we need them to gexit correctly again
+ mrs x20, SYS_IMP_APL_SPSR_GL1
+ mrs x21, SYS_IMP_APL_ASPSR_GL1
+ mrs x22, SYS_IMP_APL_ESR_GL1
+ mrs x23, SYS_IMP_APL_ELR_GL1
+ mrs x24, SYS_IMP_APL_FAR_GL1
+
+ mov x5, x0
+ mov x0, x1
+ mov x1, x2
+ mov x2, x3
+ mov x3, x4
+
+ blr x5
+
+ msr SYS_IMP_APL_SPSR_GL1, x20
+ msr SYS_IMP_APL_ASPSR_GL1, x21
+ msr SYS_IMP_APL_ESR_GL1, x22
+ msr SYS_IMP_APL_ELR_GL1, x23
+ msr SYS_IMP_APL_FAR_GL1, x24
+
+ ldp x19, x20, [sp], #16
+ ldp x21, x22, [sp], #16
+ ldp x23, x24, [sp], #16
+ ldp x29, x30, [sp], #16
+
+ isb
+ gexit
+
+.align 11
+_gxf_vectors:
+ mov x9, '0'
+ b _gxf_exc_unk
+ .align 7
+ mov x9, '1'
+ b _gxf_exc_unk
+ .align 7
+ mov x9, '2'
+ b _gxf_exc_unk
+ .align 7
+ mov x9, '3'
+ b _gxf_exc_unk
+ .align 7
+ b _gxf_exc_sync
+ .align 7
+ mov x9, '5'
+ b _gxf_exc_unk
+ .align 7
+ mov x9, '6'
+ b _gxf_exc_unk
+ .align 7
+ b _gxf_serr
+ .align 7
+ b _gxf_exc_sync
+ .align 7
+ mov x9, '9'
+ b _gxf_exc_unk
+ .align 7
+ mov x9, 'a'
+ b _gxf_exc_unk
+ .align 7
+ b _gxf_serr
+ .align 7
+ mov x9, 'c'
+ b _gxf_exc_unk
+ .align 7
+ mov x9, 'd'
+ b _gxf_exc_unk
+ .align 7
+ mov x9, 'e'
+ b _gxf_exc_unk
+ .align 7
+ mov x9, 'f'
+ b _gxf_exc_unk
+ .align 7
+
+_gxf_exc_sync:
+ msr pan, #0
+ sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8)
+ str x30, [sp, #-16]!
+ bl _gxf_exc_entry
+ bl exc_sync
+ b _gxf_exc_return
+
+_gxf_serr:
+ msr pan, #0
+ sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8)
+ str x30, [sp, #-16]!
+ bl _gxf_exc_entry
+ bl exc_serr
+ b _gxf_exc_return
+
+_gxf_exc_entry:
+ stp x28, x29, [sp, #-16]!
+ stp x26, x27, [sp, #-16]!
+ stp x24, x25, [sp, #-16]!
+ stp x22, x23, [sp, #-16]!
+ stp x20, x21, [sp, #-16]!
+ stp x18, x19, [sp, #-16]!
+ stp x16, x17, [sp, #-16]!
+ stp x14, x15, [sp, #-16]!
+ stp x12, x13, [sp, #-16]!
+ stp x10, x11, [sp, #-16]!
+ stp x8, x9, [sp, #-16]!
+ stp x6, x7, [sp, #-16]!
+ stp x4, x5, [sp, #-16]!
+ stp x2, x3, [sp, #-16]!
+ stp x0, x1, [sp, #-16]!
+
+ mov x0, sp
+
+ mrs x1, SYS_IMP_APL_SPSR_GL1
+ msr SPSR_EL1, x1
+ mrs x1, SYS_IMP_APL_ELR_GL1
+ msr ELR_EL1, x1
+ mrs x1, SYS_IMP_APL_ESR_GL1
+ msr ESR_EL1, x1
+ mrs x1, SYS_IMP_APL_FAR_GL1
+ msr FAR_EL1, x1
+
+ ret
+
+_gxf_exc_return:
+ mrs x0, SPSR_EL1
+ msr SYS_IMP_APL_SPSR_GL1, x0
+ mrs x0, ELR_EL1
+ msr SYS_IMP_APL_ELR_GL1, x0
+
+ ldp x0, x1, [sp], #16
+ ldp x2, x3, [sp], #16
+ ldp x4, x5, [sp], #16
+ ldp x6, x7, [sp], #16
+ ldp x8, x9, [sp], #16
+ ldp x10, x11, [sp], #16
+ ldp x12, x13, [sp], #16
+ ldp x14, x15, [sp], #16
+ ldp x16, x17, [sp], #16
+ ldp x18, x19, [sp], #16
+ ldp x20, x21, [sp], #16
+ ldp x22, x23, [sp], #16
+ ldp x24, x25, [sp], #16
+ ldp x26, x27, [sp], #16
+ ldp x28, x29, [sp], #16
+ ldr x30, [sp], #16
+
+ add sp, sp, #(SIZEOF_EXC_INFO - 32 * 8)
+
+ isb
+
+ gexit
+
+_gxf_exc_unk:
+ msr pan, #0
+ mov w0, 0xd /* '\r', clang compat */
+ bl debug_putc
+ mov w0, '\n'
+ bl debug_putc
+ mov w0, '!'
+ bl debug_putc
+ mov w0, 'G'
+ bl debug_putc
+ mov w0, 'L'
+ bl debug_putc
+ mov w0, 'E'
+ bl debug_putc
+ mov w0, 'X'
+ bl debug_putc
+ mov w0, 'C'
+ bl debug_putc
+ mov w0, ':'
+ bl debug_putc
+ mov w0, w9
+ bl debug_putc
+ mov w0, '!'
+ bl debug_putc
+ mov w0, 0xd /* '\r', clang compat */
+ bl debug_putc
+ mov w0, '\n'
+ bl debug_putc
+ b reboot
diff --git a/tools/src/heapblock.c b/tools/src/heapblock.c
new file mode 100644
index 0000000..5f07f44
--- /dev/null
+++ b/tools/src/heapblock.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "heapblock.h"
+#include "assert.h"
+#include "types.h"
+#include "utils.h"
+#include "xnuboot.h"
+
+/*
+ * This is a non-freeing allocator, used as a backend for malloc and for uncompressing data.
+ *
+ * Allocating 0 bytes is allowed, and guarantees "infinite" (until the end of RAM) space is
+ * available at the returned pointer as long as no other malloc/heapblock calls occur, which is
+ * useful as a buffer for unknown-length uncompressed data. A subsequent call with a size will then
+ * actually reserve the block.
+ */
+
+static void *heap_base;
+
+void heapblock_init(void)
+{
+ void *top_of_kernel_data = (void *)cur_boot_args.top_of_kernel_data;
+
+ heap_base = top_of_kernel_data;
+ heapblock_alloc(0); // align base
+
+ printf("Heap base: %p\n", heap_base);
+}
+
+void *heapblock_alloc(size_t size)
+{
+ return heapblock_alloc_aligned(size, 64);
+}
+
+void *heapblock_alloc_aligned(size_t size, size_t align)
+{
+ assert((align & (align - 1)) == 0);
+ assert(heap_base);
+
+ uintptr_t block = (((uintptr_t)heap_base) + align - 1) & ~(align - 1);
+ heap_base = (void *)(block + size);
+
+ return (void *)block;
+}
diff --git a/tools/src/heapblock.h b/tools/src/heapblock.h
new file mode 100644
index 0000000..d67411d
--- /dev/null
+++ b/tools/src/heapblock.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef HEAPBLOCK_H
+#define HEAPBLOCK_H
+
+#include "types.h"
+
+void heapblock_init(void);
+
+void *heapblock_alloc(size_t size);
+void *heapblock_alloc_aligned(size_t size, size_t align);
+
+#endif
diff --git a/tools/src/hv.c b/tools/src/hv.c
new file mode 100644
index 0000000..be01692
--- /dev/null
+++ b/tools/src/hv.c
@@ -0,0 +1,329 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "hv.h"
+#include "assert.h"
+#include "cpu_regs.h"
+#include "display.h"
+#include "gxf.h"
+#include "memory.h"
+#include "pcie.h"
+#include "smp.h"
+#include "string.h"
+#include "usb.h"
+#include "utils.h"
+
+#define HV_TICK_RATE 1000
+
+DECLARE_SPINLOCK(bhl);
+
+void hv_enter_guest(u64 x0, u64 x1, u64 x2, u64 x3, void *entry);
+void hv_exit_guest(void) __attribute__((noreturn));
+
+extern char _hv_vectors_start[0];
+
+u64 hv_tick_interval;
+
+int hv_pinned_cpu;
+int hv_want_cpu;
+
+static bool hv_should_exit;
+bool hv_started_cpus[MAX_CPUS];
+u32 hv_cpus_in_guest;
+u64 hv_saved_sp[MAX_CPUS];
+
+struct hv_secondary_info_t {
+ uint64_t hcr;
+ uint64_t hacr;
+ uint64_t vtcr, vttbr;
+ uint64_t mdcr;
+ uint64_t mdscr;
+ uint64_t amx_ctl;
+ uint64_t apvmkeylo, apvmkeyhi, apsts;
+ uint64_t actlr_el2;
+ uint64_t actlr_el1;
+ uint64_t cnthctl;
+ uint64_t sprr_config;
+ uint64_t gxf_config;
+};
+
+static struct hv_secondary_info_t hv_secondary_info;
+
+void hv_init(void)
+{
+ pcie_shutdown();
+ // Make sure we wake up DCP if we put it to sleep, just quiesce it to match ADT
+ if (display_is_external && display_start_dcp() >= 0)
+ display_shutdown(DCP_QUIESCED);
+ // reenable hpm interrupts for the guest for unused iodevs
+ usb_hpm_restore_irqs(0);
+ smp_start_secondaries();
+ smp_set_wfe_mode(true);
+ hv_wdt_init();
+
+ // Enable physical timer for EL1
+ msr(CNTHCTL_EL2, CNTHCTL_EL1PTEN | CNTHCTL_EL1PCTEN);
+
+ hv_pt_init();
+
+ // Configure hypervisor defaults
+ hv_write_hcr(HCR_API | // Allow PAuth instructions
+ HCR_APK | // Allow PAuth key registers
+ HCR_TEA | // Trap external aborts
+ HCR_E2H | // VHE mode (forced)
+ HCR_RW | // AArch64 guest
+ HCR_AMO | // Trap SError exceptions
+ HCR_VM); // Enable stage 2 translation
+
+ // No guest vectors initially
+ msr(VBAR_EL12, 0);
+
+ // Compute tick interval
+ hv_tick_interval = mrs(CNTFRQ_EL0) / HV_TICK_RATE;
+
+ sysop("dsb ishst");
+ sysop("tlbi alle1is");
+ sysop("dsb ish");
+ sysop("isb");
+}
+
+static void hv_set_gxf_vbar(void)
+{
+ msr(SYS_IMP_APL_VBAR_GL1, _hv_vectors_start);
+}
+
+void hv_start(void *entry, u64 regs[4])
+{
+ hv_should_exit = false;
+ memset(hv_started_cpus, 0, sizeof(hv_started_cpus));
+ hv_started_cpus[0] = 1;
+
+ msr(VBAR_EL1, _hv_vectors_start);
+
+ if (gxf_enabled())
+ gl2_call(hv_set_gxf_vbar, 0, 0, 0, 0);
+
+ hv_secondary_info.hcr = mrs(HCR_EL2);
+ hv_secondary_info.hacr = mrs(HACR_EL2);
+ hv_secondary_info.vtcr = mrs(VTCR_EL2);
+ hv_secondary_info.vttbr = mrs(VTTBR_EL2);
+ hv_secondary_info.mdcr = mrs(MDCR_EL2);
+ hv_secondary_info.mdscr = mrs(MDSCR_EL1);
+ hv_secondary_info.amx_ctl = mrs(SYS_IMP_APL_AMX_CTL_EL2);
+ hv_secondary_info.apvmkeylo = mrs(SYS_IMP_APL_APVMKEYLO_EL2);
+ hv_secondary_info.apvmkeyhi = mrs(SYS_IMP_APL_APVMKEYHI_EL2);
+ hv_secondary_info.apsts = mrs(SYS_IMP_APL_APSTS_EL12);
+ hv_secondary_info.actlr_el2 = mrs(ACTLR_EL2);
+ hv_secondary_info.actlr_el1 = mrs(SYS_IMP_APL_ACTLR_EL12);
+ hv_secondary_info.cnthctl = mrs(CNTHCTL_EL2);
+ hv_secondary_info.sprr_config = mrs(SYS_IMP_APL_SPRR_CONFIG_EL1);
+ hv_secondary_info.gxf_config = mrs(SYS_IMP_APL_GXF_CONFIG_EL1);
+
+ hv_arm_tick();
+ hv_pinned_cpu = -1;
+ hv_want_cpu = -1;
+ hv_cpus_in_guest = 1;
+
+ hv_enter_guest(regs[0], regs[1], regs[2], regs[3], entry);
+
+ __atomic_sub_fetch(&hv_cpus_in_guest, 1, __ATOMIC_ACQUIRE);
+ spin_lock(&bhl);
+
+ hv_wdt_stop();
+
+ hv_should_exit = true;
+ printf("HV: Exiting hypervisor (main CPU)\n");
+
+ for (int i = 0; i < MAX_CPUS; i++) {
+ if (hv_started_cpus[i]) {
+ printf("HV: Waiting for CPU %d to exit\n", i);
+ spin_unlock(&bhl);
+ smp_wait(i);
+ spin_lock(&bhl);
+ hv_started_cpus[i] = false;
+ }
+ }
+
+ printf("HV: All CPUs exited\n");
+ spin_unlock(&bhl);
+}
+
+static void hv_init_secondary(struct hv_secondary_info_t *info)
+{
+ gxf_init();
+
+ msr(VBAR_EL1, _hv_vectors_start);
+
+ msr(HCR_EL2, info->hcr);
+ msr(HACR_EL2, info->hacr);
+ msr(VTCR_EL2, info->vtcr);
+ msr(VTTBR_EL2, info->vttbr);
+ msr(MDCR_EL2, info->mdcr);
+ msr(MDSCR_EL1, info->mdscr);
+ msr(SYS_IMP_APL_AMX_CTL_EL2, info->amx_ctl);
+ msr(SYS_IMP_APL_APVMKEYLO_EL2, info->apvmkeylo);
+ msr(SYS_IMP_APL_APVMKEYHI_EL2, info->apvmkeyhi);
+ msr(SYS_IMP_APL_APSTS_EL12, info->apsts);
+ msr(ACTLR_EL2, info->actlr_el2);
+ msr(SYS_IMP_APL_ACTLR_EL12, info->actlr_el1);
+ msr(CNTHCTL_EL2, info->cnthctl);
+ msr(SYS_IMP_APL_SPRR_CONFIG_EL1, info->sprr_config);
+ msr(SYS_IMP_APL_GXF_CONFIG_EL1, info->gxf_config);
+
+ if (gxf_enabled())
+ gl2_call(hv_set_gxf_vbar, 0, 0, 0, 0);
+
+ hv_arm_tick();
+}
+
+static void hv_enter_secondary(void *entry, u64 regs[4])
+{
+ hv_enter_guest(regs[0], regs[1], regs[2], regs[3], entry);
+
+ spin_lock(&bhl);
+
+ hv_should_exit = true;
+ printf("HV: Exiting from CPU %d\n", smp_id());
+
+ __atomic_sub_fetch(&hv_cpus_in_guest, 1, __ATOMIC_ACQUIRE);
+
+ spin_unlock(&bhl);
+}
+
+void hv_start_secondary(int cpu, void *entry, u64 regs[4])
+{
+ printf("HV: Initializing secondary %d\n", cpu);
+ iodev_console_flush();
+
+ mmu_init_secondary(cpu);
+ iodev_console_flush();
+ smp_call4(cpu, hv_init_secondary, (u64)&hv_secondary_info, 0, 0, 0);
+ smp_wait(cpu);
+ iodev_console_flush();
+
+ printf("HV: Entering guest secondary %d at %p\n", cpu, entry);
+ hv_started_cpus[cpu] = true;
+ __atomic_add_fetch(&hv_cpus_in_guest, 1, __ATOMIC_ACQUIRE);
+
+ iodev_console_flush();
+ smp_call4(cpu, hv_enter_secondary, (u64)entry, (u64)regs, 0, 0);
+}
+
+void hv_rendezvous(void)
+{
+ if (!__atomic_load_n(&hv_cpus_in_guest, __ATOMIC_ACQUIRE))
+ return;
+
+ /* IPI all CPUs. This might result in spurious IPIs to the guest... */
+ for (int i = 0; i < MAX_CPUS; i++) {
+ if (i != smp_id() && hv_started_cpus[i]) {
+ smp_send_ipi(i);
+ }
+ }
+ while (__atomic_load_n(&hv_cpus_in_guest, __ATOMIC_ACQUIRE))
+ ;
+}
+
+bool hv_switch_cpu(int cpu)
+{
+ if (cpu > MAX_CPUS || cpu < 0 || !hv_started_cpus[cpu]) {
+ printf("HV: CPU #%d is inactive or invalid\n", cpu);
+ return false;
+ }
+ printf("HV: switching to CPU #%d\n", cpu);
+ hv_want_cpu = cpu;
+ hv_rendezvous();
+ return true;
+}
+
+void hv_pin_cpu(int cpu)
+{
+ hv_pinned_cpu = cpu;
+}
+
+void hv_write_hcr(u64 val)
+{
+ if (gxf_enabled() && !in_gl12())
+ gl2_call(hv_write_hcr, val, 0, 0, 0);
+ else
+ msr(HCR_EL2, val);
+}
+
+u64 hv_get_spsr(void)
+{
+ if (in_gl12())
+ return mrs(SYS_IMP_APL_SPSR_GL1);
+ else
+ return mrs(SPSR_EL2);
+}
+
+void hv_set_spsr(u64 val)
+{
+ if (in_gl12())
+ return msr(SYS_IMP_APL_SPSR_GL1, val);
+ else
+ return msr(SPSR_EL2, val);
+}
+
+u64 hv_get_esr(void)
+{
+ if (in_gl12())
+ return mrs(SYS_IMP_APL_ESR_GL1);
+ else
+ return mrs(ESR_EL2);
+}
+
+u64 hv_get_far(void)
+{
+ if (in_gl12())
+ return mrs(SYS_IMP_APL_FAR_GL1);
+ else
+ return mrs(FAR_EL2);
+}
+
+u64 hv_get_afsr1(void)
+{
+ if (in_gl12())
+ return mrs(SYS_IMP_APL_AFSR1_GL1);
+ else
+ return mrs(AFSR1_EL2);
+}
+
+u64 hv_get_elr(void)
+{
+ if (in_gl12())
+ return mrs(SYS_IMP_APL_ELR_GL1);
+ else
+ return mrs(ELR_EL2);
+}
+
+void hv_set_elr(u64 val)
+{
+ if (in_gl12())
+ return msr(SYS_IMP_APL_ELR_GL1, val);
+ else
+ return msr(ELR_EL2, val);
+}
+
+void hv_arm_tick(void)
+{
+ msr(CNTP_TVAL_EL0, hv_tick_interval);
+ msr(CNTP_CTL_EL0, CNTx_CTL_ENABLE);
+}
+
+void hv_maybe_exit(void)
+{
+ if (hv_should_exit) {
+ hv_exit_guest();
+ }
+}
+
+void hv_tick(struct exc_info *ctx)
+{
+ hv_wdt_pet();
+ iodev_handle_events(uartproxy_iodev);
+ if (iodev_can_read(uartproxy_iodev)) {
+ if (hv_pinned_cpu == -1 || hv_pinned_cpu == smp_id())
+ hv_exc_proxy(ctx, START_HV, HV_USER_INTERRUPT, NULL);
+ }
+ hv_vuart_poll();
+}
diff --git a/tools/src/hv.h b/tools/src/hv.h
new file mode 100644
index 0000000..c91a444
--- /dev/null
+++ b/tools/src/hv.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef HV_H
+#define HV_H
+
+#include "exception.h"
+#include "iodev.h"
+#include "types.h"
+#include "uartproxy.h"
+
+typedef bool(hv_hook_t)(struct exc_info *ctx, u64 addr, u64 *val, bool write, int width);
+
+#define MMIO_EVT_ATTR GENMASK(31, 24)
+#define MMIO_EVT_CPU GENMASK(23, 16)
+#define MMIO_EVT_SH GENMASK(15, 14)
+#define MMIO_EVT_MULTI BIT(6)
+#define MMIO_EVT_WRITE BIT(5)
+#define MMIO_EVT_WIDTH GENMASK(4, 0)
+
+struct hv_evt_mmiotrace {
+ u32 flags;
+ u32 reserved;
+ u64 pc;
+ u64 addr;
+ u64 data;
+};
+
+struct hv_evt_irqtrace {
+ u32 flags;
+ u16 type;
+ u16 num;
+};
+
+#define HV_MAX_RW_SIZE 64
+#define HV_MAX_RW_WORDS (HV_MAX_RW_SIZE >> 3)
+
+struct hv_vm_proxy_hook_data {
+ u32 flags;
+ u32 id;
+ u64 addr;
+ u64 data[HV_MAX_RW_WORDS];
+};
+
+typedef enum _hv_entry_type {
+ HV_HOOK_VM = 1,
+ HV_VTIMER,
+ HV_USER_INTERRUPT,
+ HV_WDT_BARK,
+ HV_CPU_SWITCH,
+ HV_VIRTIO,
+} hv_entry_type;
+
+/* VM */
+void hv_pt_init(void);
+int hv_map(u64 from, u64 to, u64 size, u64 incr);
+int hv_unmap(u64 from, u64 size);
+int hv_map_hw(u64 from, u64 to, u64 size);
+int hv_map_sw(u64 from, u64 to, u64 size);
+int hv_map_hook(u64 from, hv_hook_t *hook, u64 size);
+u64 hv_translate(u64 addr, bool s1only, bool w, u64 *par_out);
+u64 hv_pt_walk(u64 addr);
+bool hv_handle_dabort(struct exc_info *ctx);
+bool hv_pa_write(struct exc_info *ctx, u64 addr, u64 *val, int width);
+bool hv_pa_read(struct exc_info *ctx, u64 addr, u64 *val, int width);
+bool hv_pa_rw(struct exc_info *ctx, u64 addr, u64 *val, bool write, int width);
+
+/* AIC events through tracing the MMIO event address */
+bool hv_trace_irq(u32 type, u32 num, u32 count, u32 flags);
+
+/* Virtual peripherals */
+void hv_vuart_poll(void);
+void hv_map_vuart(u64 base, int irq, iodev_id_t iodev);
+struct virtio_conf;
+void hv_map_virtio(u64 base, struct virtio_conf *conf);
+void virtio_put_buffer(u64 base, int qu, u32 id, u32 len);
+
+/* Exceptions */
+void hv_exc_proxy(struct exc_info *ctx, uartproxy_boot_reason_t reason, u32 type, void *extra);
+void hv_set_time_stealing(bool enabled, bool reset);
+
+/* WDT */
+void hv_wdt_pet(void);
+void hv_wdt_suspend(void);
+void hv_wdt_resume(void);
+void hv_wdt_init(void);
+void hv_wdt_start(int cpu);
+void hv_wdt_stop(void);
+void hv_wdt_breadcrumb(char c);
+
+/* Utilities */
+void hv_write_hcr(u64 val);
+u64 hv_get_spsr(void);
+void hv_set_spsr(u64 val);
+u64 hv_get_esr(void);
+u64 hv_get_far(void);
+u64 hv_get_elr(void);
+u64 hv_get_afsr1(void);
+void hv_set_elr(u64 val);
+
+/* HV main */
+void hv_init(void);
+void hv_start(void *entry, u64 regs[4]);
+void hv_start_secondary(int cpu, void *entry, u64 regs[4]);
+void hv_rendezvous(void);
+bool hv_switch_cpu(int cpu);
+void hv_pin_cpu(int cpu);
+void hv_arm_tick(void);
+void hv_rearm(void);
+void hv_maybe_exit(void);
+void hv_tick(struct exc_info *ctx);
+
+#endif
diff --git a/tools/src/hv_aic.c b/tools/src/hv_aic.c
new file mode 100644
index 0000000..cc5406a
--- /dev/null
+++ b/tools/src/hv_aic.c
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "adt.h"
+#include "aic.h"
+#include "aic_regs.h"
+#include "hv.h"
+#include "uartproxy.h"
+#include "utils.h"
+
+#define IRQTRACE_IRQ BIT(0)
+
+static u32 trace_hw_num[AIC_MAX_DIES][AIC_MAX_HW_NUM / 32];
+
+static void emit_irqtrace(u16 die, u16 type, u16 num)
+{
+ struct hv_evt_irqtrace evt = {
+ .flags = IRQTRACE_IRQ,
+ .type = type,
+ .num = die * aic->max_irq + num,
+ };
+
+ hv_wdt_suspend();
+ uartproxy_send_event(EVT_IRQTRACE, &evt, sizeof(evt));
+ hv_wdt_resume();
+}
+
+static bool trace_aic_event(struct exc_info *ctx, u64 addr, u64 *val, bool write, int width)
+{
+ if (!hv_pa_rw(ctx, addr, val, write, width))
+ return false;
+
+ if (addr != (aic->base + aic->regs.event) || write || width != 2) {
+ return true;
+ }
+
+ u16 die = FIELD_GET(AIC_EVENT_DIE, *val);
+ u16 type = FIELD_GET(AIC_EVENT_TYPE, *val);
+ u16 num = FIELD_GET(AIC_EVENT_NUM, *val);
+
+ if (die > AIC_MAX_DIES)
+ return true;
+
+ switch (type) {
+ case AIC_EVENT_TYPE_HW:
+ if (trace_hw_num[die][num / 32] & BIT(num & 31)) {
+ emit_irqtrace(die, type, num);
+ }
+ break;
+ default:
+ // ignore
+ break;
+ }
+
+ return true;
+}
+
+bool hv_trace_irq(u32 type, u32 num, u32 count, u32 flags)
+{
+ dprintf("HV: hv_trace_irq type: %u start: %u num: %u flags: 0x%x\n", type, num, count, flags);
+ if (type == AIC_EVENT_TYPE_HW) {
+ u32 die = num / aic->max_irq;
+ num %= AIC_MAX_HW_NUM;
+ if (die >= aic->max_irq || num >= AIC_MAX_HW_NUM || count > AIC_MAX_HW_NUM - num) {
+ printf("HV: invalid IRQ range: (%u, %u) for die %u\n", num, num + count, die);
+ return false;
+ }
+ for (u32 n = num; n < num + count; n++) {
+ switch (flags) {
+ case IRQTRACE_IRQ:
+ trace_hw_num[die][n / 32] |= BIT(n & 31);
+ break;
+ default:
+ trace_hw_num[die][n / 32] &= ~(BIT(n & 31));
+ break;
+ }
+ }
+ } else {
+ printf("HV: not handling AIC event type: 0x%02x num: %u\n", type, num);
+ return false;
+ }
+
+ if (!aic) {
+ printf("HV: AIC not initialized\n");
+ return false;
+ }
+
+ static bool hooked = false;
+
+ if (aic && !hooked) {
+ hv_map_hook(aic->base, trace_aic_event, aic->regs.reg_size);
+ hooked = true;
+ }
+
+ return true;
+}
diff --git a/tools/src/hv_asm.S b/tools/src/hv_asm.S
new file mode 100644
index 0000000..634eb09
--- /dev/null
+++ b/tools/src/hv_asm.S
@@ -0,0 +1,196 @@
+/* spDx-License-Identifier: MIT */
+
+#include "exception.h"
+
+.align 11
+.globl _hv_vectors_start
+_hv_vectors_start:
+
+ /* EL2 with SP_EL0 */
+ mov x9, '0'
+ b cpu_reset
+ .align 7
+ mov x9, '1'
+ b exc_unk
+ .align 7
+ mov x9, '2'
+ b exc_unk
+ .align 7
+ mov x9, '3'
+ b exc_unk
+ .align 7
+
+ /* EL2 with SP_EL2 */
+ b _v_sp0_sync
+ .align 7
+ b _v_sp0_irq
+ .align 7
+ b _v_sp0_fiq
+ .align 7
+ b _v_sp0_serr
+ .align 7
+
+ /* EL1/0 64-bit */
+ b _v_hv_sync
+ .align 7
+ b _v_hv_irq
+ .align 7
+ b _v_hv_fiq
+ .align 7
+ b _v_hv_serr
+ .align 7
+
+ /* EL1/0 32-bit */
+ mov x9, 'p'
+ b exc_unk
+ .align 7
+ mov x9, 'q'
+ b exc_unk
+ .align 7
+ mov x9, 'r'
+ b exc_unk
+ .align 7
+ mov x9, 's'
+ b exc_unk
+ .align 7
+
+.globl _hv_entry
+.type _hv_entry, @function
+_hv_entry:
+ stp x28, x29, [sp, #-16]!
+ stp x26, x27, [sp, #-16]!
+ stp x24, x25, [sp, #-16]!
+ stp x22, x23, [sp, #-16]!
+ stp x20, x21, [sp, #-16]!
+ stp x18, x19, [sp, #-16]!
+ stp x16, x17, [sp, #-16]!
+ stp x14, x15, [sp, #-16]!
+ stp x12, x13, [sp, #-16]!
+ stp x10, x11, [sp, #-16]!
+ stp x8, x9, [sp, #-16]!
+ stp x6, x7, [sp, #-16]!
+ stp x4, x5, [sp, #-16]!
+ stp x2, x3, [sp, #-16]!
+ stp x0, x1, [sp, #-16]!
+
+ dsb sy
+ isb
+
+ mov x0, sp
+ ret
+
+.globl _hv_return
+.type _hv_return, @function
+_hv_return:
+ ldp x0, x1, [sp], #16
+ ldp x2, x3, [sp], #16
+ ldp x4, x5, [sp], #16
+ ldp x6, x7, [sp], #16
+ ldp x8, x9, [sp], #16
+ ldp x10, x11, [sp], #16
+ ldp x12, x13, [sp], #16
+ ldp x14, x15, [sp], #16
+ ldp x16, x17, [sp], #16
+ ldp x18, x19, [sp], #16
+ ldp x20, x21, [sp], #16
+ ldp x22, x23, [sp], #16
+ ldp x24, x25, [sp], #16
+ ldp x26, x27, [sp], #16
+ ldp x28, x29, [sp], #16
+ ldr x30, [sp], #16
+
+ add sp, sp, #(SIZEOF_EXC_INFO - 32 * 8)
+
+ eret
+
+.globl _v_hv_sync
+.type _v_hv_sync, @function
+_v_hv_sync:
+ msr pan, #0
+ sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8)
+ str x30, [sp, #-16]!
+ bl _hv_entry
+ bl hv_exc_sync
+
+ b _hv_return
+
+.globl _v_hv_irq
+.type _v_hv_irq, @function
+_v_hv_irq:
+ msr pan, #0
+ sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8)
+ str x30, [sp, #-16]!
+ bl _hv_entry
+ bl hv_exc_irq
+
+ b _hv_return
+
+.globl _v_hv_fiq
+.type _v_hv_fiq, @function
+_v_hv_fiq:
+ msr pan, #0
+ sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8)
+ str x30, [sp, #-16]!
+ bl _hv_entry
+ bl hv_exc_fiq
+
+ b _hv_return
+
+.globl _v_hv_serr
+.type _v_hv_serr, @function
+_v_hv_serr:
+ msr pan, #0
+ sub sp, sp, #(SIZEOF_EXC_INFO - 32 * 8)
+ str x30, [sp, #-16]!
+ bl _hv_entry
+ bl hv_exc_serr
+
+ b _hv_return
+
+.extern hv_saved_sp
+
+.globl hv_enter_guest
+.type hv_enter_guest, @function
+hv_enter_guest:
+ stp x29, x30, [sp, #-16]!
+ stp x27, x28, [sp, #-16]!
+ stp x25, x26, [sp, #-16]!
+ stp x23, x24, [sp, #-16]!
+ stp x21, x22, [sp, #-16]!
+ stp x19, x20, [sp, #-16]!
+ str x18, [sp, #-16]!
+
+ mrs x7, tpidr_el2
+ ldr x6, =hv_saved_sp
+ mov x5, sp
+ str x5, [x6, x7, LSL #3]
+
+ mrs x5, daif
+ mov x6, #5
+ orr x5, x5, x6 // EL1h
+ msr spsr_el2, x5
+
+ msr elr_el2, x4
+ mov x5, #0
+ msr sp_el0, x5
+ msr sp_el1, x5
+
+ eret
+
+.globl hv_exit_guest
+.type hv_exit_guest, @function
+hv_exit_guest:
+ mrs x7, tpidr_el2
+ ldr x6, =hv_saved_sp
+ ldr x5, [x6, x7, LSL #3]
+ mov sp, x5
+
+ ldr x18, [sp], #16
+ ldp x19, x20, [sp], #16
+ ldp x21, x22, [sp], #16
+ ldp x23, x24, [sp], #16
+ ldp x25, x26, [sp], #16
+ ldp x27, x28, [sp], #16
+ ldp x29, x30, [sp], #16
+
+ ret
diff --git a/tools/src/hv_exc.c b/tools/src/hv_exc.c
new file mode 100644
index 0000000..fc750c1
--- /dev/null
+++ b/tools/src/hv_exc.c
@@ -0,0 +1,515 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "hv.h"
+#include "assert.h"
+#include "cpu_regs.h"
+#include "exception.h"
+#include "smp.h"
+#include "string.h"
+#include "uart.h"
+#include "uartproxy.h"
+
+#define TIME_ACCOUNTING
+
+extern spinlock_t bhl;
+
+#define _SYSREG_ISS(_1, _2, op0, op1, CRn, CRm, op2) \
+ (((op0) << ESR_ISS_MSR_OP0_SHIFT) | ((op1) << ESR_ISS_MSR_OP1_SHIFT) | \
+ ((CRn) << ESR_ISS_MSR_CRn_SHIFT) | ((CRm) << ESR_ISS_MSR_CRm_SHIFT) | \
+ ((op2) << ESR_ISS_MSR_OP2_SHIFT))
+#define SYSREG_ISS(...) _SYSREG_ISS(__VA_ARGS__)
+
+#define PERCPU(x) pcpu[mrs(TPIDR_EL2)].x
+
+struct hv_pcpu_data {
+ u32 ipi_queued;
+ u32 ipi_pending;
+ u32 pmc_pending;
+ u64 pmc_irq_mode;
+ u64 exc_entry_pmcr0_cnt;
+} ALIGNED(64);
+
+struct hv_pcpu_data pcpu[MAX_CPUS];
+
+void hv_exit_guest(void) __attribute__((noreturn));
+
+static u64 stolen_time = 0;
+static u64 exc_entry_time;
+
+extern u32 hv_cpus_in_guest;
+extern int hv_pinned_cpu;
+extern int hv_want_cpu;
+
+static bool time_stealing = true;
+
+static void _hv_exc_proxy(struct exc_info *ctx, uartproxy_boot_reason_t reason, u32 type,
+ void *extra)
+{
+ int from_el = FIELD_GET(SPSR_M, ctx->spsr) >> 2;
+
+ hv_wdt_breadcrumb('P');
+
+ /*
+ * Get all the CPUs into the HV before running the proxy, to make sure they all exit to
+ * the guest with a consistent time offset.
+ */
+ if (time_stealing)
+ hv_rendezvous();
+
+ u64 entry_time = mrs(CNTPCT_EL0);
+
+ ctx->elr_phys = hv_translate(ctx->elr, false, false, NULL);
+ ctx->far_phys = hv_translate(ctx->far, false, false, NULL);
+ ctx->sp_phys = hv_translate(from_el == 0 ? ctx->sp[0] : ctx->sp[1], false, false, NULL);
+ ctx->extra = extra;
+
+ struct uartproxy_msg_start start = {
+ .reason = reason,
+ .code = type,
+ .info = ctx,
+ };
+
+ hv_wdt_suspend();
+ int ret = uartproxy_run(&start);
+ hv_wdt_resume();
+
+ switch (ret) {
+ case EXC_RET_HANDLED:
+ hv_wdt_breadcrumb('p');
+ if (time_stealing) {
+ u64 lost = mrs(CNTPCT_EL0) - entry_time;
+ stolen_time += lost;
+ }
+ break;
+ case EXC_EXIT_GUEST:
+ hv_rendezvous();
+ spin_unlock(&bhl);
+ hv_exit_guest(); // does not return
+ default:
+ printf("Guest exception not handled, rebooting.\n");
+ print_regs(ctx->regs, 0);
+ flush_and_reboot(); // does not return
+ }
+}
+
+static void hv_maybe_switch_cpu(struct exc_info *ctx, uartproxy_boot_reason_t reason, u32 type,
+ void *extra)
+{
+ while (hv_want_cpu != -1) {
+ if (hv_want_cpu == smp_id()) {
+ hv_want_cpu = -1;
+ _hv_exc_proxy(ctx, reason, type, extra);
+ } else {
+ // Unlock the HV so the target CPU can get into the proxy
+ spin_unlock(&bhl);
+ while (hv_want_cpu != -1)
+ sysop("dmb sy");
+ spin_lock(&bhl);
+ }
+ }
+}
+
+void hv_exc_proxy(struct exc_info *ctx, uartproxy_boot_reason_t reason, u32 type, void *extra)
+{
+ /*
+ * Wait while another CPU is pinned or being switched to.
+ * If a CPU switch is requested, handle it before actually handling the
+ * exception. We still tell the host the real reason code, though.
+ */
+ while ((hv_pinned_cpu != -1 && hv_pinned_cpu != smp_id()) || hv_want_cpu != -1) {
+ if (hv_want_cpu == smp_id()) {
+ hv_want_cpu = -1;
+ _hv_exc_proxy(ctx, reason, type, extra);
+ } else {
+ // Unlock the HV so the target CPU can get into the proxy
+ spin_unlock(&bhl);
+ while ((hv_pinned_cpu != -1 && hv_pinned_cpu != smp_id()) || hv_want_cpu != -1)
+ sysop("dmb sy");
+ spin_lock(&bhl);
+ }
+ }
+
+ /* Handle the actual exception */
+ _hv_exc_proxy(ctx, reason, type, extra);
+
+ /*
+ * If as part of handling this exception we want to switch CPUs, handle it without returning
+ * to the guest.
+ */
+ hv_maybe_switch_cpu(ctx, reason, type, extra);
+}
+
+void hv_set_time_stealing(bool enabled, bool reset)
+{
+ time_stealing = enabled;
+ if (reset)
+ stolen_time = 0;
+}
+
+static void hv_update_fiq(void)
+{
+ u64 hcr = mrs(HCR_EL2);
+ bool fiq_pending = false;
+
+ if (mrs(CNTP_CTL_EL02) == (CNTx_CTL_ISTATUS | CNTx_CTL_ENABLE)) {
+ fiq_pending = true;
+ reg_clr(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2, VM_TMR_FIQ_ENA_ENA_P);
+ } else {
+ reg_set(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2, VM_TMR_FIQ_ENA_ENA_P);
+ }
+
+ if (mrs(CNTV_CTL_EL02) == (CNTx_CTL_ISTATUS | CNTx_CTL_ENABLE)) {
+ fiq_pending = true;
+ reg_clr(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2, VM_TMR_FIQ_ENA_ENA_V);
+ } else {
+ reg_set(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2, VM_TMR_FIQ_ENA_ENA_V);
+ }
+
+ fiq_pending |= PERCPU(ipi_pending) || PERCPU(pmc_pending);
+
+ sysop("isb");
+
+ if ((hcr & HCR_VF) && !fiq_pending) {
+ hv_write_hcr(hcr & ~HCR_VF);
+ } else if (!(hcr & HCR_VF) && fiq_pending) {
+ hv_write_hcr(hcr | HCR_VF);
+ }
+}
+
+#define SYSREG_MAP(sr, to) \
+ case SYSREG_ISS(sr): \
+ if (is_read) \
+ regs[rt] = _mrs(sr_tkn(to)); \
+ else \
+ _msr(sr_tkn(to), regs[rt]); \
+ return true;
+
+#define SYSREG_PASS(sr) \
+ case SYSREG_ISS(sr): \
+ if (is_read) \
+ regs[rt] = _mrs(sr_tkn(sr)); \
+ else \
+ _msr(sr_tkn(sr), regs[rt]); \
+ return true;
+
+static bool hv_handle_msr(struct exc_info *ctx, u64 iss)
+{
+ u64 reg = iss & (ESR_ISS_MSR_OP0 | ESR_ISS_MSR_OP2 | ESR_ISS_MSR_OP1 | ESR_ISS_MSR_CRn |
+ ESR_ISS_MSR_CRm);
+ u64 rt = FIELD_GET(ESR_ISS_MSR_Rt, iss);
+ bool is_read = iss & ESR_ISS_MSR_DIR;
+
+ u64 *regs = ctx->regs;
+
+ regs[31] = 0;
+
+ switch (reg) {
+ /* Some kind of timer */
+ SYSREG_PASS(sys_reg(3, 7, 15, 1, 1));
+ SYSREG_PASS(sys_reg(3, 7, 15, 3, 1));
+ /* Spammy stuff seen on t600x p-cores */
+ SYSREG_PASS(sys_reg(3, 2, 15, 12, 0));
+ SYSREG_PASS(sys_reg(3, 2, 15, 13, 0));
+ SYSREG_PASS(sys_reg(3, 2, 15, 14, 0));
+ SYSREG_PASS(sys_reg(3, 2, 15, 15, 0));
+ SYSREG_PASS(sys_reg(3, 1, 15, 7, 0));
+ SYSREG_PASS(sys_reg(3, 1, 15, 8, 0));
+ SYSREG_PASS(sys_reg(3, 1, 15, 9, 0));
+ SYSREG_PASS(sys_reg(3, 1, 15, 10, 0));
+ /* Noisy traps */
+ SYSREG_MAP(SYS_ACTLR_EL1, SYS_IMP_APL_ACTLR_EL12)
+ SYSREG_PASS(SYS_IMP_APL_HID4)
+ SYSREG_PASS(SYS_IMP_APL_EHID4)
+ /* We don't normally trap hese, but if we do, they're noisy */
+ SYSREG_PASS(SYS_IMP_APL_GXF_STATUS_EL1)
+ SYSREG_PASS(SYS_IMP_APL_CNTVCT_ALIAS_EL0)
+ SYSREG_PASS(SYS_IMP_APL_TPIDR_GL1)
+ SYSREG_MAP(SYS_IMP_APL_SPSR_GL1, SYS_IMP_APL_SPSR_GL12)
+ SYSREG_MAP(SYS_IMP_APL_ASPSR_GL1, SYS_IMP_APL_ASPSR_GL12)
+ SYSREG_MAP(SYS_IMP_APL_ELR_GL1, SYS_IMP_APL_ELR_GL12)
+ SYSREG_MAP(SYS_IMP_APL_ESR_GL1, SYS_IMP_APL_ESR_GL12)
+ SYSREG_MAP(SYS_IMP_APL_SPRR_PERM_EL1, SYS_IMP_APL_SPRR_PERM_EL12)
+ SYSREG_MAP(SYS_IMP_APL_APCTL_EL1, SYS_IMP_APL_APCTL_EL12)
+ SYSREG_MAP(SYS_IMP_APL_AMX_CTL_EL1, SYS_IMP_APL_AMX_CTL_EL12)
+ /* FIXME:Might be wrong */
+ SYSREG_PASS(sys_reg(3, 4, 15, 1, 3))
+ /* pass through PMU handling */
+ SYSREG_PASS(SYS_IMP_APL_PMCR1)
+ SYSREG_PASS(SYS_IMP_APL_PMCR2)
+ SYSREG_PASS(SYS_IMP_APL_PMCR3)
+ SYSREG_PASS(SYS_IMP_APL_PMCR4)
+ SYSREG_PASS(SYS_IMP_APL_PMESR0)
+ SYSREG_PASS(SYS_IMP_APL_PMESR1)
+ SYSREG_PASS(SYS_IMP_APL_PMSR)
+#ifndef DEBUG_PMU_IRQ
+ SYSREG_PASS(SYS_IMP_APL_PMC0)
+#endif
+ SYSREG_PASS(SYS_IMP_APL_PMC1)
+ SYSREG_PASS(SYS_IMP_APL_PMC2)
+ SYSREG_PASS(SYS_IMP_APL_PMC3)
+ SYSREG_PASS(SYS_IMP_APL_PMC4)
+ SYSREG_PASS(SYS_IMP_APL_PMC5)
+ SYSREG_PASS(SYS_IMP_APL_PMC6)
+ SYSREG_PASS(SYS_IMP_APL_PMC7)
+ SYSREG_PASS(SYS_IMP_APL_PMC8)
+ SYSREG_PASS(SYS_IMP_APL_PMC9)
+
+ /* Outer Sharable TLB maintenance instructions */
+ SYSREG_PASS(sys_reg(1, 0, 8, 1, 0)) // TLBI VMALLE1OS
+ SYSREG_PASS(sys_reg(1, 0, 8, 1, 1)) // TLBI VAE1OS
+ SYSREG_PASS(sys_reg(1, 0, 8, 1, 2)) // TLBI ASIDE1OS
+ SYSREG_PASS(sys_reg(1, 0, 8, 5, 1)) // TLBI RVAE1OS
+
+ /*
+ * Handle this one here because m1n1/Linux (will) use it for explicit cpuidle.
+ * We can pass it through; going into deep sleep doesn't break the HV since we
+ * don't do any wfis that assume otherwise in m1n1. However, don't het macOS
+ * disable WFI ret (when going into systemwide sleep), since that breaks things.
+ */
+ case SYSREG_ISS(SYS_IMP_APL_CYC_OVRD):
+ if (is_read) {
+ regs[rt] = mrs(SYS_IMP_APL_CYC_OVRD);
+ } else {
+ msr(SYS_IMP_APL_CYC_OVRD, regs[rt] & ~CYC_OVRD_DISABLE_WFI_RET);
+ if (regs[rt] & CYC_OVRD_DISABLE_WFI_RET)
+ printf("msr(SYS_IMP_APL_CYC_OVRD, 0x%08lx): Filtered WFI RET disable\n",
+ regs[rt]);
+ }
+ return true;
+ /* clang-format off */
+
+ /* IPI handling */
+ SYSREG_PASS(SYS_IMP_APL_IPI_CR_EL1)
+ /* clang-format on */
+ case SYSREG_ISS(SYS_IMP_APL_IPI_RR_LOCAL_EL1): {
+ assert(!is_read);
+ u64 mpidr = (regs[rt] & 0xff) | (mrs(MPIDR_EL1) & 0xffff00);
+ msr(SYS_IMP_APL_IPI_RR_LOCAL_EL1, regs[rt]);
+ for (int i = 0; i < MAX_CPUS; i++)
+ if (mpidr == smp_get_mpidr(i))
+ pcpu[i].ipi_queued = true;
+ return true;
+ }
+ case SYSREG_ISS(SYS_IMP_APL_IPI_RR_GLOBAL_EL1):
+ assert(!is_read);
+ u64 mpidr = (regs[rt] & 0xff) | ((regs[rt] & 0xff0000) >> 8);
+ msr(SYS_IMP_APL_IPI_RR_GLOBAL_EL1, regs[rt]);
+ for (int i = 0; i < MAX_CPUS; i++) {
+ if (mpidr == (smp_get_mpidr(i) & 0xffff))
+ pcpu[i].ipi_queued = true;
+ }
+ return true;
+ case SYSREG_ISS(SYS_IMP_APL_IPI_SR_EL1):
+ if (is_read)
+ regs[rt] = PERCPU(ipi_pending) ? IPI_SR_PENDING : 0;
+ else if (regs[rt] & IPI_SR_PENDING)
+ PERCPU(ipi_pending) = false;
+ return true;
+ /* shadow the interrupt mode and state flag */
+ case SYSREG_ISS(SYS_IMP_APL_PMCR0):
+ if (is_read) {
+ u64 val = (mrs(SYS_IMP_APL_PMCR0) & ~PMCR0_IMODE_MASK) | PERCPU(pmc_irq_mode);
+ regs[rt] =
+ val | (PERCPU(pmc_pending) ? PMCR0_IACT : 0) | PERCPU(exc_entry_pmcr0_cnt);
+ } else {
+ PERCPU(pmc_pending) = !!(regs[rt] & PMCR0_IACT);
+ PERCPU(pmc_irq_mode) = regs[rt] & PMCR0_IMODE_MASK;
+ PERCPU(exc_entry_pmcr0_cnt) = regs[rt] & PMCR0_CNT_MASK;
+ msr(SYS_IMP_APL_PMCR0, regs[rt] & ~PERCPU(exc_entry_pmcr0_cnt));
+ }
+ return true;
+#ifdef DEBUG_PMU_IRQ
+ case SYSREG_ISS(SYS_IMP_APL_PMC0):
+ if (is_read) {
+ regs[rt] = mrs(SYS_IMP_APL_PMC0);
+ } else {
+ msr(SYS_IMP_APL_PMC0, regs[rt]);
+ printf("msr(SYS_IMP_APL_PMC0, 0x%04lx_%08lx)\n", regs[rt] >> 32,
+ regs[rt] & 0xFFFFFFFF);
+ }
+ return true;
+#endif
+ /* M1RACLES reg, handle here due to silly 12.0 "mitigation" */
+ case SYSREG_ISS(sys_reg(3, 5, 15, 10, 1)):
+ if (is_read)
+ regs[rt] = 0;
+ return true;
+ }
+
+ return false;
+}
+
+static void hv_exc_entry(struct exc_info *ctx)
+{
+ ctx->spsr = hv_get_spsr();
+ ctx->elr = hv_get_elr();
+ ctx->esr = hv_get_esr();
+ ctx->far = hv_get_far();
+ ctx->afsr1 = hv_get_afsr1();
+ ctx->sp[0] = mrs(SP_EL0);
+ ctx->sp[1] = mrs(SP_EL1);
+ ctx->sp[2] = (u64)ctx;
+ ctx->cpu_id = smp_id();
+ ctx->mpidr = mrs(MPIDR_EL1);
+
+ sysop("isb");
+
+ // Enable SErrors in the HV, but only if not already pending
+ if (!(mrs(ISR_EL1) & 0x100))
+ sysop("msr daifclr, 4");
+
+ __atomic_sub_fetch(&hv_cpus_in_guest, 1, __ATOMIC_ACQUIRE);
+ spin_lock(&bhl);
+ hv_wdt_breadcrumb('X');
+ exc_entry_time = mrs(CNTPCT_EL0);
+ /* disable PMU counters in the hypervisor */
+ u64 pmcr0 = mrs(SYS_IMP_APL_PMCR0);
+ PERCPU(exc_entry_pmcr0_cnt) = pmcr0 & PMCR0_CNT_MASK;
+ msr(SYS_IMP_APL_PMCR0, pmcr0 & ~PMCR0_CNT_MASK);
+}
+
+static void hv_exc_exit(struct exc_info *ctx)
+{
+ hv_wdt_breadcrumb('x');
+ hv_update_fiq();
+ /* reenable PMU counters */
+ reg_set(SYS_IMP_APL_PMCR0, PERCPU(exc_entry_pmcr0_cnt));
+ msr(CNTVOFF_EL2, stolen_time);
+ spin_unlock(&bhl);
+ __atomic_add_fetch(&hv_cpus_in_guest, 1, __ATOMIC_ACQUIRE);
+
+ hv_set_spsr(ctx->spsr);
+ hv_set_elr(ctx->elr);
+ msr(SP_EL0, ctx->sp[0]);
+ msr(SP_EL1, ctx->sp[1]);
+}
+
+void hv_exc_sync(struct exc_info *ctx)
+{
+ hv_wdt_breadcrumb('S');
+ hv_exc_entry(ctx);
+ bool handled = false;
+ u32 ec = FIELD_GET(ESR_EC, ctx->esr);
+
+ switch (ec) {
+ case ESR_EC_DABORT_LOWER:
+ hv_wdt_breadcrumb('D');
+ handled = hv_handle_dabort(ctx);
+ break;
+ case ESR_EC_MSR:
+ hv_wdt_breadcrumb('M');
+ handled = hv_handle_msr(ctx, FIELD_GET(ESR_ISS, ctx->esr));
+ break;
+ case ESR_EC_IMPDEF:
+ hv_wdt_breadcrumb('A');
+ switch (FIELD_GET(ESR_ISS, ctx->esr)) {
+ case ESR_ISS_IMPDEF_MSR:
+ handled = hv_handle_msr(ctx, ctx->afsr1);
+ break;
+ }
+ break;
+ }
+
+ if (handled) {
+ hv_wdt_breadcrumb('+');
+ ctx->elr += 4;
+ } else {
+ hv_wdt_breadcrumb('-');
+ // VM code can forward a nested SError exception here
+ if (FIELD_GET(ESR_EC, ctx->esr) == ESR_EC_SERROR)
+ hv_exc_proxy(ctx, START_EXCEPTION_LOWER, EXC_SERROR, NULL);
+ else
+ hv_exc_proxy(ctx, START_EXCEPTION_LOWER, EXC_SYNC, NULL);
+ }
+
+ hv_exc_exit(ctx);
+ hv_wdt_breadcrumb('s');
+}
+
+void hv_exc_irq(struct exc_info *ctx)
+{
+ hv_wdt_breadcrumb('I');
+ hv_exc_entry(ctx);
+ hv_exc_proxy(ctx, START_EXCEPTION_LOWER, EXC_IRQ, NULL);
+ hv_exc_exit(ctx);
+ hv_wdt_breadcrumb('i');
+}
+
+void hv_exc_fiq(struct exc_info *ctx)
+{
+ bool tick = false;
+
+ hv_maybe_exit();
+
+ if (mrs(CNTP_CTL_EL0) == (CNTx_CTL_ISTATUS | CNTx_CTL_ENABLE)) {
+ msr(CNTP_CTL_EL0, CNTx_CTL_ISTATUS | CNTx_CTL_IMASK | CNTx_CTL_ENABLE);
+ tick = true;
+ }
+
+ int interruptible_cpu = hv_pinned_cpu;
+ if (interruptible_cpu == -1)
+ interruptible_cpu = 0;
+
+ if (smp_id() != interruptible_cpu && !(mrs(ISR_EL1) & 0x40) && hv_want_cpu == -1) {
+ // Non-interruptible CPU and it was just a timer tick (or spurious), so just update FIQs
+ hv_update_fiq();
+ hv_arm_tick();
+ return;
+ }
+
+ // Slow (single threaded) path
+ hv_wdt_breadcrumb('F');
+ hv_exc_entry(ctx);
+
+ // Only poll for HV events in the interruptible CPU
+ if (tick) {
+ if (smp_id() == interruptible_cpu)
+ hv_tick(ctx);
+ hv_arm_tick();
+ }
+
+ if (mrs(CNTV_CTL_EL0) == (CNTx_CTL_ISTATUS | CNTx_CTL_ENABLE)) {
+ msr(CNTV_CTL_EL0, CNTx_CTL_ISTATUS | CNTx_CTL_IMASK | CNTx_CTL_ENABLE);
+ hv_exc_proxy(ctx, START_HV, HV_VTIMER, NULL);
+ }
+
+ u64 reg = mrs(SYS_IMP_APL_PMCR0);
+ if ((reg & (PMCR0_IMODE_MASK | PMCR0_IACT)) == (PMCR0_IMODE_FIQ | PMCR0_IACT)) {
+#ifdef DEBUG_PMU_IRQ
+ printf("[FIQ] PMC IRQ, masking and delivering to the guest\n");
+#endif
+ reg_clr(SYS_IMP_APL_PMCR0, PMCR0_IACT | PMCR0_IMODE_MASK);
+ PERCPU(pmc_pending) = true;
+ }
+
+ reg = mrs(SYS_IMP_APL_UPMCR0);
+ if ((reg & UPMCR0_IMODE_MASK) == UPMCR0_IMODE_FIQ && (mrs(SYS_IMP_APL_UPMSR) & UPMSR_IACT)) {
+ printf("[FIQ] UPMC IRQ, masking");
+ reg_clr(SYS_IMP_APL_UPMCR0, UPMCR0_IMODE_MASK);
+ hv_exc_proxy(ctx, START_EXCEPTION_LOWER, EXC_FIQ, NULL);
+ }
+
+ if (mrs(SYS_IMP_APL_IPI_SR_EL1) & IPI_SR_PENDING) {
+ if (PERCPU(ipi_queued)) {
+ PERCPU(ipi_pending) = true;
+ PERCPU(ipi_queued) = false;
+ }
+ msr(SYS_IMP_APL_IPI_SR_EL1, IPI_SR_PENDING);
+ sysop("isb");
+ }
+
+ hv_maybe_switch_cpu(ctx, START_HV, HV_CPU_SWITCH, NULL);
+
+ // Handles guest timers
+ hv_exc_exit(ctx);
+ hv_wdt_breadcrumb('f');
+}
+
+void hv_exc_serr(struct exc_info *ctx)
+{
+ hv_wdt_breadcrumb('E');
+ hv_exc_entry(ctx);
+ hv_exc_proxy(ctx, START_EXCEPTION_LOWER, EXC_SERROR, NULL);
+ hv_exc_exit(ctx);
+ hv_wdt_breadcrumb('e');
+}
diff --git a/tools/src/hv_virtio.c b/tools/src/hv_virtio.c
new file mode 100644
index 0000000..abe4582
--- /dev/null
+++ b/tools/src/hv_virtio.c
@@ -0,0 +1,308 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "hv.h"
+#include "aic.h"
+#include "iodev.h"
+#include "malloc.h"
+
+#define MAGIC 0x000
+#define VERSION 0x004
+#define DEVID 0x008
+#define VENDID 0x00c
+#define FEAT_HOST 0x010
+#define FEAT_HOST_SEL 0x014
+#define FEAT_GUEST 0x020
+#define FEAT_GUEST_SEL 0x024
+
+#define QSEL 0x030
+#define QMAX 0x034
+#define QSIZE 0x038
+#define QREADY 0x044
+#define QNOTIFY 0x050
+
+#define QDESC 0x080
+#define QGUESTAREA 0x090
+#define QHOSTAREA 0x0a0
+
+#define IRQ_STATUS 0x060
+#define USED_BUFFER BIT(0)
+#define CFG_CHANGE BIT(1)
+#define IRQ_ACK 0x064
+#define DEV_STATUS 0x070
+
+#define DESC_NEXT BIT(0)
+#define DESC_WRITE BIT(1)
+
+struct availring {
+ u16 flags;
+ u16 idx;
+ u16 ring[];
+};
+
+struct usedring {
+ u16 flags;
+ u16 idx;
+ struct {
+ u32 id;
+ u32 len;
+ } ring[];
+};
+
+struct desc {
+ u64 addr;
+ u32 len;
+ u16 flags;
+ u16 id;
+};
+
+struct virtio_q {
+ struct virtio_dev *host;
+ int idx;
+ u32 max;
+ u32 size;
+ bool ready;
+ struct desc *desc;
+
+ u16 avail_seen;
+ struct availring *avail;
+ struct usedring *used;
+
+ u64 area_regs[(QHOSTAREA + 8 - QDESC) / 4];
+};
+
+struct virtio_conf {
+ s32 irq;
+ u32 devid;
+ u64 feats;
+ u32 num_qus;
+ void *config;
+ u64 config_len;
+ u8 verbose;
+} PACKED;
+
+struct virtio_dev {
+ struct virtio_dev *next;
+ u64 base;
+ int irq;
+ int num_qus;
+ u32 devid;
+ u64 feats;
+ uint8_t *config;
+ size_t config_len;
+ bool verbose;
+
+ u32 feat_host_sel;
+ u32 status;
+ u32 irqstatus;
+
+ struct virtio_q *currq;
+ struct virtio_q qs[];
+};
+
+static struct virtio_dev *devlist;
+
+static void notify_avail(struct exc_info *ctx, struct virtio_q *q, int idx)
+{
+ struct desc *d = &q->desc[idx];
+ struct {
+ u64 devbase;
+ u16 qu;
+ u16 idx;
+ u32 pad;
+ u64 descbase;
+ } PACKED info = {
+ q->host->base, q->idx, idx, 0, (u64)q->desc,
+ };
+
+ if (q->host->verbose)
+ printf("virtio @ %lx: available %s buffer at %lx, size %x, flags %x\n", q->host->base,
+ (d->flags & DESC_WRITE) ? "device" : "driver", d->addr, d->len, d->flags);
+
+ hv_exc_proxy(ctx, START_HV, HV_VIRTIO, &info);
+}
+
+static void notify_buffers(struct exc_info *ctx, struct virtio_dev *dev, u32 qidx)
+{
+ struct virtio_q *q = &dev->qs[qidx];
+ struct availring *avail = q->avail;
+
+ if (qidx >= (u32)dev->num_qus)
+ return;
+
+ for (; avail->idx != q->avail_seen; q->avail_seen++)
+ notify_avail(ctx, q, avail->ring[q->avail_seen % q->size]);
+}
+
+static struct virtio_dev *dev_by_base(u64 base)
+{
+ struct virtio_dev *dev;
+
+ for (dev = devlist; dev; dev = dev->next)
+ if (dev->base == base)
+ break;
+
+ return dev;
+}
+
+void virtio_put_buffer(u64 base, int qu, u32 id, u32 len)
+{
+ struct virtio_dev *dev = dev_by_base(base);
+ struct virtio_q *q;
+ struct usedring *used;
+
+ if (!dev) {
+ printf("virtio_put_buffer: no device at %lx\n", base);
+ return;
+ }
+
+ q = &dev->qs[qu];
+ used = q->used;
+
+ used->ring[used->idx % q->size].id = id;
+ used->ring[used->idx % q->size].len = len;
+ used->idx++;
+
+ dev->irqstatus |= USED_BUFFER;
+ aic_set_sw(dev->irq, true);
+}
+
+static bool handle_virtio(struct exc_info *ctx, u64 addr, u64 *val, bool write, int width)
+{
+ struct virtio_dev *dev;
+ struct virtio_q *q;
+ UNUSED(ctx);
+ UNUSED(width);
+
+ dev = dev_by_base(addr & ~0xfff);
+ if (!dev)
+ return false;
+
+ addr &= 0xfff;
+
+ if (write) {
+ if (dev->verbose)
+ printf("virtio @ %lx: W 0x%lx <- 0x%lx (%d)\n", dev->base, addr, *val, width);
+
+ switch (addr) {
+ case DEV_STATUS:
+ dev->status = *val;
+ break;
+ case QSEL:
+ if (((int)*val) <= dev->num_qus)
+ dev->currq = &dev->qs[*val];
+ else
+ dev->currq = NULL;
+ break;
+ case QNOTIFY:
+ notify_buffers(ctx, dev, *val);
+ break;
+ case FEAT_HOST_SEL:
+ dev->feat_host_sel = *val;
+ break;
+ case IRQ_ACK:
+ dev->irqstatus &= ~(*val);
+ if (!dev->irqstatus)
+ aic_set_sw(dev->irq, false);
+ break;
+ }
+
+ q = dev->currq;
+ if (!q)
+ return true;
+
+ switch (addr) {
+ case QSIZE:
+ q->size = *val;
+ break;
+ case QREADY:
+ q->ready = *val & 1;
+ break;
+ case QDESC ... QHOSTAREA + 4:
+ addr -= QDESC;
+ addr /= 4;
+ q->area_regs[addr] = *val;
+
+ q->desc = (void *)(q->area_regs[1] << 32 | q->area_regs[0]);
+ q->avail = (void *)(q->area_regs[5] << 32 | q->area_regs[4]);
+ q->used = (void *)(q->area_regs[9] << 32 | q->area_regs[8]);
+ break;
+ }
+ } else {
+ switch (addr) {
+ case MAGIC:
+ *val = 0x74726976;
+ break;
+ case VERSION:
+ *val = 2;
+ break;
+ case DEVID:
+ *val = dev->devid;
+ break;
+ case DEV_STATUS:
+ *val = dev->status;
+ break;
+ case FEAT_HOST:
+ *val = dev->feats >> (dev->feat_host_sel * 32);
+ break;
+ case IRQ_STATUS:
+ *val = dev->irqstatus;
+ break;
+ case 0x100 ... 0x1000:
+ if (addr - 0x100 < dev->config_len)
+ *val = dev->config[addr - 0x100];
+ else
+ *val = 0;
+ break;
+ default:
+ q = dev->currq;
+ if (!q) {
+ *val = 0;
+ goto rdone;
+ }
+ }
+
+ switch (addr) {
+ case QMAX:
+ *val = q->max;
+ break;
+ case QREADY:
+ *val = q->ready;
+ break;
+ }
+ rdone:
+ if (dev->verbose)
+ printf("virtio @ %lx: R 0x%lx -> 0x%lx (%d)\n", dev->base, addr, *val, width);
+ };
+
+ return true;
+}
+
+void hv_map_virtio(u64 base, struct virtio_conf *conf)
+{
+ struct virtio_dev *dev;
+ int i;
+
+ dev = malloc(sizeof(*dev) + sizeof(struct virtio_q) * conf->num_qus);
+ dev->num_qus = conf->num_qus;
+ dev->base = base;
+ dev->irq = conf->irq;
+ dev->devid = conf->devid;
+ dev->currq = NULL;
+ dev->feats = conf->feats | BIT(32); /* always set: VIRTIO_F_VERSION_1 */
+ dev->config = conf->config;
+ dev->config_len = conf->config_len;
+ dev->verbose = conf->verbose;
+ for (i = 0; i < dev->num_qus; i++) {
+ dev->qs[i].host = dev;
+ dev->qs[i].idx = i;
+ dev->qs[i].max = 256;
+ dev->qs[i].avail_seen = 0;
+ dev->qs[i].ready = 0;
+ }
+
+ if (devlist)
+ dev->next = devlist;
+ devlist = dev;
+
+ hv_map_hook(base, handle_virtio, 0x1000);
+}
diff --git a/tools/src/hv_vm.c b/tools/src/hv_vm.c
new file mode 100644
index 0000000..671ef70
--- /dev/null
+++ b/tools/src/hv_vm.c
@@ -0,0 +1,1278 @@
+/* SPDX-License-Identifier: MIT */
+
+// #define DEBUG
+
+#include "hv.h"
+#include "assert.h"
+#include "cpu_regs.h"
+#include "exception.h"
+#include "iodev.h"
+#include "malloc.h"
+#include "smp.h"
+#include "string.h"
+#include "types.h"
+#include "uartproxy.h"
+#include "utils.h"
+
+#define PAGE_SIZE 0x4000
+#define CACHE_LINE_SIZE 64
+#define CACHE_LINE_LOG2 6
+
+#define PTE_ACCESS BIT(10)
+#define PTE_SH_NS (0b11L << 8)
+#define PTE_S2AP_RW (0b11L << 6)
+#define PTE_MEMATTR_UNCHANGED (0b1111L << 2)
+
+#define PTE_ATTRIBUTES (PTE_ACCESS | PTE_SH_NS | PTE_S2AP_RW | PTE_MEMATTR_UNCHANGED)
+
+#define PTE_LOWER_ATTRIBUTES GENMASK(13, 2)
+
+#define PTE_VALID BIT(0)
+#define PTE_TYPE BIT(1)
+#define PTE_BLOCK 0
+#define PTE_TABLE 1
+#define PTE_PAGE 1
+
+#define VADDR_L4_INDEX_BITS 12
+#define VADDR_L3_INDEX_BITS 11
+#define VADDR_L2_INDEX_BITS 11
+#define VADDR_L1_INDEX_BITS 8
+
+#define VADDR_L4_OFFSET_BITS 2
+#define VADDR_L3_OFFSET_BITS 14
+#define VADDR_L2_OFFSET_BITS 25
+#define VADDR_L1_OFFSET_BITS 36
+
+#define VADDR_L2_ALIGN_MASK GENMASK(VADDR_L2_OFFSET_BITS - 1, VADDR_L3_OFFSET_BITS)
+#define VADDR_L3_ALIGN_MASK GENMASK(VADDR_L3_OFFSET_BITS - 1, VADDR_L4_OFFSET_BITS)
+#define PTE_TARGET_MASK GENMASK(49, VADDR_L3_OFFSET_BITS)
+#define PTE_TARGET_MASK_L4 GENMASK(49, VADDR_L4_OFFSET_BITS)
+
+#define ENTRIES_PER_L1_TABLE BIT(VADDR_L1_INDEX_BITS)
+#define ENTRIES_PER_L2_TABLE BIT(VADDR_L2_INDEX_BITS)
+#define ENTRIES_PER_L3_TABLE BIT(VADDR_L3_INDEX_BITS)
+#define ENTRIES_PER_L4_TABLE BIT(VADDR_L4_INDEX_BITS)
+
+#define SPTE_TRACE_READ BIT(63)
+#define SPTE_TRACE_WRITE BIT(62)
+#define SPTE_TRACE_UNBUF BIT(61)
+#define SPTE_TYPE GENMASK(52, 50)
+#define SPTE_MAP 0
+#define SPTE_HOOK 1
+#define SPTE_PROXY_HOOK_R 2
+#define SPTE_PROXY_HOOK_W 3
+#define SPTE_PROXY_HOOK_RW 4
+
+#define IS_HW(pte) ((pte) && pte & PTE_VALID)
+#define IS_SW(pte) ((pte) && !(pte & PTE_VALID))
+
+#define L1_IS_TABLE(pte) ((pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE)
+
+#define L2_IS_TABLE(pte) ((pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE)
+#define L2_IS_NOT_TABLE(pte) ((pte) && !L2_IS_TABLE(pte))
+#define L2_IS_HW_BLOCK(pte) (IS_HW(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_BLOCK)
+#define L2_IS_SW_BLOCK(pte) \
+ (IS_SW(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_BLOCK && FIELD_GET(SPTE_TYPE, pte) == SPTE_MAP)
+#define L3_IS_TABLE(pte) (IS_SW(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE)
+#define L3_IS_NOT_TABLE(pte) ((pte) && !L3_IS_TABLE(pte))
+#define L3_IS_HW_BLOCK(pte) (IS_HW(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_PAGE)
+#define L3_IS_SW_BLOCK(pte) \
+ (IS_SW(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_BLOCK && FIELD_GET(SPTE_TYPE, pte) == SPTE_MAP)
+
+uint64_t vaddr_bits;
+
+/*
+ * We use 16KB page tables for stage 2 translation, and a 64GB (36-bit) guest
+ * PA size, which results in the following virtual address space:
+ *
+ * [L2 index] [L3 index] [page offset]
+ * 11 bits 11 bits 14 bits
+ *
+ * 32MB L2 mappings look like this:
+ * [L2 index] [page offset]
+ * 11 bits 25 bits
+ *
+ * We implement sub-page granularity mappings for software MMIO hooks, which behave
+ * as an additional page table level used only by software. This works like this:
+ *
+ * [L2 index] [L3 index] [L4 index] [Word offset]
+ * 11 bits 11 bits 12 bits 2 bits
+ *
+ * Thus, L4 sub-page tables are twice the size.
+ *
+ * We use invalid mappings (PTE_VALID == 0) to represent mmiotrace descriptors, but
+ * otherwise the page table format is the same. The PTE_TYPE bit is weird, as 0 means
+ * block but 1 means both table (at L<3) and page (at L3). For mmiotrace, this is
+ * pushed to L4.
+ *
+ * On SoCs with more than 36-bit PA sizes there is an additional L1 translation level,
+ * but no blocks or software mappings are allowed there. This level can have up to 8 bits
+ * at this time.
+ */
+
+static u64 *hv_Ltop;
+
+void hv_pt_init(void)
+{
+ const uint64_t pa_bits[] = {32, 36, 40, 42, 44, 48, 52};
+ uint64_t pa_range = FIELD_GET(ID_AA64MMFR0_PARange, mrs(ID_AA64MMFR0_EL1));
+
+ vaddr_bits = min(44, pa_bits[pa_range]);
+
+ printf("HV: Initializing for %ld-bit PA range\n", vaddr_bits);
+
+ hv_Ltop = memalign(PAGE_SIZE, sizeof(u64) * ENTRIES_PER_L2_TABLE);
+ memset(hv_Ltop, 0, sizeof(u64) * ENTRIES_PER_L2_TABLE);
+
+ u64 sl0 = vaddr_bits > 36 ? 2 : 1;
+
+ msr(VTCR_EL2, FIELD_PREP(VTCR_PS, pa_range) | // Full PA size
+ FIELD_PREP(VTCR_TG0, 2) | // 16KB page size
+ FIELD_PREP(VTCR_SH0, 3) | // PTWs Inner Sharable
+ FIELD_PREP(VTCR_ORGN0, 1) | // PTWs Cacheable
+ FIELD_PREP(VTCR_IRGN0, 1) | // PTWs Cacheable
+ FIELD_PREP(VTCR_SL0, sl0) | // Start level
+ FIELD_PREP(VTCR_T0SZ, 64 - vaddr_bits)); // Translation region == PA
+
+ msr(VTTBR_EL2, hv_Ltop);
+}
+
+static u64 *hv_pt_get_l2(u64 from)
+{
+ u64 l1idx = from >> VADDR_L1_OFFSET_BITS;
+
+ if (vaddr_bits <= 36) {
+ assert(l1idx == 0);
+ return hv_Ltop;
+ }
+
+ u64 l1d = hv_Ltop[l1idx];
+
+ if (L1_IS_TABLE(l1d))
+ return (u64 *)(l1d & PTE_TARGET_MASK);
+
+ u64 *l2 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L2_TABLE * sizeof(u64));
+ memset64(l2, 0, ENTRIES_PER_L2_TABLE * sizeof(u64));
+
+ l1d = ((u64)l2) | FIELD_PREP(PTE_TYPE, PTE_TABLE) | PTE_VALID;
+ hv_Ltop[l1idx] = l1d;
+ return l2;
+}
+
+static void hv_pt_free_l3(u64 *l3)
+{
+ if (!l3)
+ return;
+
+ for (u64 idx = 0; idx < ENTRIES_PER_L3_TABLE; idx++)
+ if (IS_SW(l3[idx]) && FIELD_GET(PTE_TYPE, l3[idx]) == PTE_TABLE)
+ free((void *)(l3[idx] & PTE_TARGET_MASK));
+ free(l3);
+}
+
+static void hv_pt_map_l2(u64 from, u64 to, u64 size, u64 incr)
+{
+ assert((from & MASK(VADDR_L2_OFFSET_BITS)) == 0);
+ assert(IS_SW(to) || (to & PTE_TARGET_MASK & MASK(VADDR_L2_OFFSET_BITS)) == 0);
+ assert((size & MASK(VADDR_L2_OFFSET_BITS)) == 0);
+
+ to |= FIELD_PREP(PTE_TYPE, PTE_BLOCK);
+
+ for (; size; size -= BIT(VADDR_L2_OFFSET_BITS)) {
+ u64 *l2 = hv_pt_get_l2(from);
+ u64 idx = (from >> VADDR_L2_OFFSET_BITS) & MASK(VADDR_L2_INDEX_BITS);
+
+ if (L2_IS_TABLE(l2[idx]))
+ hv_pt_free_l3((u64 *)(l2[idx] & PTE_TARGET_MASK));
+
+ l2[idx] = to;
+ from += BIT(VADDR_L2_OFFSET_BITS);
+ to += incr * BIT(VADDR_L2_OFFSET_BITS);
+ }
+}
+
+static u64 *hv_pt_get_l3(u64 from)
+{
+ u64 *l2 = hv_pt_get_l2(from);
+ u64 l2idx = (from >> VADDR_L2_OFFSET_BITS) & MASK(VADDR_L2_INDEX_BITS);
+ u64 l2d = l2[l2idx];
+
+ if (L2_IS_TABLE(l2d))
+ return (u64 *)(l2d & PTE_TARGET_MASK);
+
+ u64 *l3 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L3_TABLE * sizeof(u64));
+ if (l2d) {
+ u64 incr = 0;
+ u64 l3d = l2d;
+ if (IS_HW(l2d)) {
+ l3d &= ~PTE_TYPE;
+ l3d |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
+ incr = BIT(VADDR_L3_OFFSET_BITS);
+ } else if (IS_SW(l2d) && FIELD_GET(SPTE_TYPE, l3d) == SPTE_MAP) {
+ incr = BIT(VADDR_L3_OFFSET_BITS);
+ }
+ for (u64 idx = 0; idx < ENTRIES_PER_L3_TABLE; idx++, l3d += incr)
+ l3[idx] = l3d;
+ } else {
+ memset64(l3, 0, ENTRIES_PER_L3_TABLE * sizeof(u64));
+ }
+
+ l2d = ((u64)l3) | FIELD_PREP(PTE_TYPE, PTE_TABLE) | PTE_VALID;
+ l2[l2idx] = l2d;
+ return l3;
+}
+
+static void hv_pt_map_l3(u64 from, u64 to, u64 size, u64 incr)
+{
+ assert((from & MASK(VADDR_L3_OFFSET_BITS)) == 0);
+ assert(IS_SW(to) || (to & PTE_TARGET_MASK & MASK(VADDR_L3_OFFSET_BITS)) == 0);
+ assert((size & MASK(VADDR_L3_OFFSET_BITS)) == 0);
+
+ if (IS_HW(to))
+ to |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
+ else
+ to |= FIELD_PREP(PTE_TYPE, PTE_BLOCK);
+
+ for (; size; size -= BIT(VADDR_L3_OFFSET_BITS)) {
+ u64 idx = (from >> VADDR_L3_OFFSET_BITS) & MASK(VADDR_L3_INDEX_BITS);
+ u64 *l3 = hv_pt_get_l3(from);
+
+ if (L3_IS_TABLE(l3[idx]))
+ free((void *)(l3[idx] & PTE_TARGET_MASK));
+
+ l3[idx] = to;
+ from += BIT(VADDR_L3_OFFSET_BITS);
+ to += incr * BIT(VADDR_L3_OFFSET_BITS);
+ }
+}
+
+static u64 *hv_pt_get_l4(u64 from)
+{
+ u64 *l3 = hv_pt_get_l3(from);
+ u64 l3idx = (from >> VADDR_L3_OFFSET_BITS) & MASK(VADDR_L3_INDEX_BITS);
+ u64 l3d = l3[l3idx];
+
+ if (L3_IS_TABLE(l3d)) {
+ return (u64 *)(l3d & PTE_TARGET_MASK);
+ }
+
+ if (IS_HW(l3d)) {
+ assert(FIELD_GET(PTE_TYPE, l3d) == PTE_PAGE);
+ l3d &= PTE_TARGET_MASK;
+ l3d |= FIELD_PREP(PTE_TYPE, PTE_BLOCK) | FIELD_PREP(SPTE_TYPE, SPTE_MAP);
+ }
+
+ u64 *l4 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L4_TABLE * sizeof(u64));
+ if (l3d) {
+ u64 incr = 0;
+ u64 l4d = l3d;
+ l4d &= ~PTE_TYPE;
+ l4d |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
+ if (FIELD_GET(SPTE_TYPE, l4d) == SPTE_MAP)
+ incr = BIT(VADDR_L4_OFFSET_BITS);
+ for (u64 idx = 0; idx < ENTRIES_PER_L4_TABLE; idx++, l4d += incr)
+ l4[idx] = l4d;
+ } else {
+ memset64(l4, 0, ENTRIES_PER_L4_TABLE * sizeof(u64));
+ }
+
+ l3d = ((u64)l4) | FIELD_PREP(PTE_TYPE, PTE_TABLE);
+ l3[l3idx] = l3d;
+ return l4;
+}
+
+static void hv_pt_map_l4(u64 from, u64 to, u64 size, u64 incr)
+{
+ assert((from & MASK(VADDR_L4_OFFSET_BITS)) == 0);
+ assert((size & MASK(VADDR_L4_OFFSET_BITS)) == 0);
+
+ assert(!IS_HW(to));
+
+ if (IS_SW(to))
+ to |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
+
+ for (; size; size -= BIT(VADDR_L4_OFFSET_BITS)) {
+ u64 idx = (from >> VADDR_L4_OFFSET_BITS) & MASK(VADDR_L4_INDEX_BITS);
+ u64 *l4 = hv_pt_get_l4(from);
+
+ l4[idx] = to;
+ from += BIT(VADDR_L4_OFFSET_BITS);
+ to += incr * BIT(VADDR_L4_OFFSET_BITS);
+ }
+}
+
+int hv_map(u64 from, u64 to, u64 size, u64 incr)
+{
+ u64 chunk;
+ bool hw = IS_HW(to);
+
+ if (from & MASK(VADDR_L4_OFFSET_BITS) || size & MASK(VADDR_L4_OFFSET_BITS))
+ return -1;
+
+ if (hw && (from & MASK(VADDR_L3_OFFSET_BITS) || size & MASK(VADDR_L3_OFFSET_BITS))) {
+ printf("HV: cannot use L4 pages with HW mappings (0x%lx -> 0x%lx)\n", from, to);
+ return -1;
+ }
+
+ // L4 mappings to boundary
+ chunk = min(size, ALIGN_UP(from, BIT(VADDR_L3_OFFSET_BITS)) - from);
+ if (chunk) {
+ assert(!hw);
+ hv_pt_map_l4(from, to, chunk, incr);
+ from += chunk;
+ to += incr * chunk;
+ size -= chunk;
+ }
+
+ // L3 mappings to boundary
+ chunk = ALIGN_DOWN(min(size, ALIGN_UP(from, BIT(VADDR_L2_OFFSET_BITS)) - from),
+ BIT(VADDR_L3_OFFSET_BITS));
+ if (chunk) {
+ hv_pt_map_l3(from, to, chunk, incr);
+ from += chunk;
+ to += incr * chunk;
+ size -= chunk;
+ }
+
+ // L2 mappings
+ chunk = ALIGN_DOWN(size, BIT(VADDR_L2_OFFSET_BITS));
+ if (chunk && (!hw || (to & VADDR_L2_ALIGN_MASK) == 0)) {
+ hv_pt_map_l2(from, to, chunk, incr);
+ from += chunk;
+ to += incr * chunk;
+ size -= chunk;
+ }
+
+ // L3 mappings to end
+ chunk = ALIGN_DOWN(size, BIT(VADDR_L3_OFFSET_BITS));
+ if (chunk) {
+ hv_pt_map_l3(from, to, chunk, incr);
+ from += chunk;
+ to += incr * chunk;
+ size -= chunk;
+ }
+
+ // L4 mappings to end
+ if (size) {
+ assert(!hw);
+ hv_pt_map_l4(from, to, size, incr);
+ }
+
+ return 0;
+}
+
+int hv_unmap(u64 from, u64 size)
+{
+ return hv_map(from, 0, size, 0);
+}
+
+int hv_map_hw(u64 from, u64 to, u64 size)
+{
+ return hv_map(from, to | PTE_ATTRIBUTES | PTE_VALID, size, 1);
+}
+
+int hv_map_sw(u64 from, u64 to, u64 size)
+{
+ return hv_map(from, to | FIELD_PREP(SPTE_TYPE, SPTE_MAP), size, 1);
+}
+
+int hv_map_hook(u64 from, hv_hook_t *hook, u64 size)
+{
+ return hv_map(from, ((u64)hook) | FIELD_PREP(SPTE_TYPE, SPTE_HOOK), size, 0);
+}
+
+u64 hv_translate(u64 addr, bool s1, bool w, u64 *par_out)
+{
+ if (!(mrs(SCTLR_EL12) & SCTLR_M))
+ return addr; // MMU off
+
+ u64 el = FIELD_GET(SPSR_M, hv_get_spsr()) >> 2;
+ u64 save = mrs(PAR_EL1);
+
+ if (w) {
+ if (s1) {
+ if (el == 0)
+ asm("at s1e0w, %0" : : "r"(addr));
+ else
+ asm("at s1e1w, %0" : : "r"(addr));
+ } else {
+ if (el == 0)
+ asm("at s12e0w, %0" : : "r"(addr));
+ else
+ asm("at s12e1w, %0" : : "r"(addr));
+ }
+ } else {
+ if (s1) {
+ if (el == 0)
+ asm("at s1e0r, %0" : : "r"(addr));
+ else
+ asm("at s1e1r, %0" : : "r"(addr));
+ } else {
+ if (el == 0)
+ asm("at s12e0r, %0" : : "r"(addr));
+ else
+ asm("at s12e1r, %0" : : "r"(addr));
+ }
+ }
+
+ u64 par = mrs(PAR_EL1);
+ if (par_out)
+ *par_out = par;
+ msr(PAR_EL1, save);
+
+ if (par & PAR_F) {
+ dprintf("hv_translate(0x%lx, %d, %d): fault 0x%lx\n", addr, s1, w, par);
+ return 0; // fault
+ } else {
+ return (par & PAR_PA) | (addr & 0xfff);
+ }
+}
+
+u64 hv_pt_walk(u64 addr)
+{
+ dprintf("hv_pt_walk(0x%lx)\n", addr);
+
+ u64 idx = addr >> VADDR_L1_OFFSET_BITS;
+ u64 *l2;
+ if (vaddr_bits > 36) {
+ assert(idx < ENTRIES_PER_L1_TABLE);
+
+ u64 l1d = hv_Ltop[idx];
+
+ dprintf(" l1d = 0x%lx\n", l2d);
+
+ if (!L1_IS_TABLE(l1d)) {
+ dprintf(" result: 0x%lx\n", l1d);
+ return l1d;
+ }
+ l2 = (u64 *)(l1d & PTE_TARGET_MASK);
+ } else {
+ assert(idx == 0);
+ l2 = hv_Ltop;
+ }
+
+ idx = (addr >> VADDR_L2_OFFSET_BITS) & MASK(VADDR_L2_INDEX_BITS);
+ u64 l2d = l2[idx];
+ dprintf(" l2d = 0x%lx\n", l2d);
+
+ if (!L2_IS_TABLE(l2d)) {
+ if (L2_IS_SW_BLOCK(l2d))
+ l2d += addr & (VADDR_L2_ALIGN_MASK | VADDR_L3_ALIGN_MASK);
+ if (L2_IS_HW_BLOCK(l2d)) {
+ l2d &= ~PTE_LOWER_ATTRIBUTES;
+ l2d |= addr & (VADDR_L2_ALIGN_MASK | VADDR_L3_ALIGN_MASK);
+ }
+
+ dprintf(" result: 0x%lx\n", l2d);
+ return l2d;
+ }
+
+ idx = (addr >> VADDR_L3_OFFSET_BITS) & MASK(VADDR_L3_INDEX_BITS);
+ u64 l3d = ((u64 *)(l2d & PTE_TARGET_MASK))[idx];
+ dprintf(" l3d = 0x%lx\n", l3d);
+
+ if (!L3_IS_TABLE(l3d)) {
+ if (L3_IS_SW_BLOCK(l3d))
+ l3d += addr & VADDR_L3_ALIGN_MASK;
+ if (L3_IS_HW_BLOCK(l3d)) {
+ l3d &= ~PTE_LOWER_ATTRIBUTES;
+ l3d |= addr & VADDR_L3_ALIGN_MASK;
+ }
+ dprintf(" result: 0x%lx\n", l3d);
+ return l3d;
+ }
+
+ idx = (addr >> VADDR_L4_OFFSET_BITS) & MASK(VADDR_L4_INDEX_BITS);
+ dprintf(" l4 idx = 0x%lx\n", idx);
+ u64 l4d = ((u64 *)(l3d & PTE_TARGET_MASK))[idx];
+ dprintf(" l4d = 0x%lx\n", l4d);
+ return l4d;
+}
+
+#define CHECK_RN \
+ if (Rn == 31) \
+ return false
+#define DECODE_OK \
+ if (!val) \
+ return true
+
+#define EXT(n, b) (((s32)(((u32)(n)) << (32 - (b)))) >> (32 - (b)))
+
+union simd_reg {
+ u64 d[2];
+ u32 s[4];
+ u16 h[8];
+ u8 b[16];
+};
+
+static bool emulate_load(struct exc_info *ctx, u32 insn, u64 *val, u64 *width, u64 *vaddr)
+{
+ u64 Rt = insn & 0x1f;
+ u64 Rn = (insn >> 5) & 0x1f;
+ u64 imm12 = EXT((insn >> 10) & 0xfff, 12);
+ u64 imm9 = EXT((insn >> 12) & 0x1ff, 9);
+ u64 imm7 = EXT((insn >> 15) & 0x7f, 7);
+ u64 *regs = ctx->regs;
+
+ union simd_reg simd[32];
+
+ *width = insn >> 30;
+
+ if (val)
+ dprintf("emulate_load(%p, 0x%08x, 0x%08lx, %ld\n", regs, insn, *val, *width);
+
+ if ((insn & 0x3fe00400) == 0x38400400) {
+ // LDRx (immediate) Pre/Post-index
+ CHECK_RN;
+ DECODE_OK;
+ regs[Rn] += imm9;
+ regs[Rt] = *val;
+ } else if ((insn & 0x3fc00000) == 0x39400000) {
+ // LDRx (immediate) Unsigned offset
+ DECODE_OK;
+ regs[Rt] = *val;
+ } else if ((insn & 0x3fa00400) == 0x38800400) {
+ // LDRSx (immediate) Pre/Post-index
+ CHECK_RN;
+ DECODE_OK;
+ regs[Rn] += imm9;
+ regs[Rt] = (s64)EXT(*val, 8 << *width);
+ if (insn & (1 << 22))
+ regs[Rt] &= 0xffffffff;
+ } else if ((insn & 0x3fa00000) == 0x39800000) {
+ // LDRSx (immediate) Unsigned offset
+ DECODE_OK;
+ regs[Rt] = (s64)EXT(*val, 8 << *width);
+ if (insn & (1 << 22))
+ regs[Rt] &= 0xffffffff;
+ } else if ((insn & 0x3fe04c00) == 0x38604800) {
+ // LDRx (register)
+ DECODE_OK;
+ regs[Rt] = *val;
+ } else if ((insn & 0x3fa04c00) == 0x38a04800) {
+ // LDRSx (register)
+ DECODE_OK;
+ regs[Rt] = (s64)EXT(*val, 8 << *width);
+ if (insn & (1 << 22))
+ regs[Rt] &= 0xffffffff;
+ } else if ((insn & 0x3fe00c00) == 0x38400000) {
+ // LDURx (unscaled)
+ DECODE_OK;
+ regs[Rt] = *val;
+ } else if ((insn & 0x3fa00c00) == 0x38a00000) {
+ // LDURSx (unscaled)
+ DECODE_OK;
+ regs[Rt] = (s64)EXT(*val, (8 << *width));
+ if (insn & (1 << 22))
+ regs[Rt] &= 0xffffffff;
+ } else if ((insn & 0xffc00000) == 0x29400000) {
+ // LDP (Signed offset, 32-bit)
+ *width = 3;
+ *vaddr = regs[Rn] + (imm7 * 4);
+ DECODE_OK;
+ u64 Rt2 = (insn >> 10) & 0x1f;
+ regs[Rt] = val[0] & 0xffffffff;
+ regs[Rt2] = val[0] >> 32;
+ } else if ((insn & 0xffc00000) == 0xa9400000) {
+ // LDP (Signed offset, 64-bit)
+ *width = 4;
+ *vaddr = regs[Rn] + (imm7 * 8);
+ DECODE_OK;
+ u64 Rt2 = (insn >> 10) & 0x1f;
+ regs[Rt] = val[0];
+ regs[Rt2] = val[1];
+ } else if ((insn & 0xfec00000) == 0xa8c00000) {
+ // LDP (pre/post-increment, 64-bit)
+ *width = 4;
+ *vaddr = regs[Rn] + ((insn & BIT(24)) ? (imm7 * 8) : 0);
+ DECODE_OK;
+ regs[Rn] += imm7 * 8;
+ u64 Rt2 = (insn >> 10) & 0x1f;
+ regs[Rt] = val[0];
+ regs[Rt2] = val[1];
+ } else if ((insn & 0xfec00000) == 0xac400000) {
+ // LD[N]P (SIMD&FP, 128-bit) Signed offset
+ *width = 5;
+ *vaddr = regs[Rn] + (imm7 * 16);
+ DECODE_OK;
+ u64 Rt2 = (insn >> 10) & 0x1f;
+ get_simd_state(simd);
+ simd[Rt].d[0] = val[0];
+ simd[Rt].d[1] = val[1];
+ simd[Rt2].d[0] = val[2];
+ simd[Rt2].d[1] = val[3];
+ put_simd_state(simd);
+ } else if ((insn & 0x3fc00000) == 0x3d400000) {
+ // LDR (immediate, SIMD&FP) Unsigned offset
+ *vaddr = regs[Rn] + (imm12 << *width);
+ DECODE_OK;
+ get_simd_state(simd);
+ simd[Rt].d[0] = val[0];
+ simd[Rt].d[1] = 0;
+ put_simd_state(simd);
+ } else if ((insn & 0xffc00000) == 0x3dc00000) {
+ // LDR (immediate, SIMD&FP) Unsigned offset, 128-bit
+ *width = 4;
+ *vaddr = regs[Rn] + (imm12 << *width);
+ DECODE_OK;
+ get_simd_state(simd);
+ simd[Rt].d[0] = val[0];
+ simd[Rt].d[1] = val[1];
+ put_simd_state(simd);
+ } else if ((insn & 0xffe00c00) == 0x3cc00000) {
+ // LDURx (unscaled, SIMD&FP, 128-bit)
+ *width = 4;
+ *vaddr = regs[Rn] + (imm9 << *width);
+ DECODE_OK;
+ get_simd_state(simd);
+ simd[Rt].d[0] = val[0];
+ simd[Rt].d[1] = val[1];
+ put_simd_state(simd);
+ } else if ((insn & 0x3fe00400) == 0x3c400400) {
+ // LDR (immediate, SIMD&FP) Pre/Post-index
+ CHECK_RN;
+ DECODE_OK;
+ regs[Rn] += imm9;
+ get_simd_state(simd);
+ simd[Rt].d[0] = val[0];
+ simd[Rt].d[1] = 0;
+ put_simd_state(simd);
+ } else if ((insn & 0xffe00400) == 0x3cc00400) {
+ // LDR (immediate, SIMD&FP) Pre/Post-index, 128-bit
+ *width = 4;
+ CHECK_RN;
+ DECODE_OK;
+ regs[Rn] += imm9;
+ get_simd_state(simd);
+ simd[Rt].d[0] = val[0];
+ simd[Rt].d[1] = val[1];
+ put_simd_state(simd);
+ } else if ((insn & 0x3fe04c00) == 0x3c604800) {
+ // LDR (register, SIMD&FP)
+ DECODE_OK;
+ get_simd_state(simd);
+ simd[Rt].d[0] = val[0];
+ simd[Rt].d[1] = 0;
+ put_simd_state(simd);
+ } else if ((insn & 0xffe04c00) == 0x3ce04800) {
+ // LDR (register, SIMD&FP), 128-bit
+ *width = 4;
+ DECODE_OK;
+ get_simd_state(simd);
+ simd[Rt].d[0] = val[0];
+ simd[Rt].d[1] = val[1];
+ put_simd_state(simd);
+ } else if ((insn & 0xbffffc00) == 0x0d408400) {
+ // LD1 (single structure) No offset, 64-bit
+ *width = 3;
+ DECODE_OK;
+ u64 index = (insn >> 30) & 1;
+ get_simd_state(simd);
+ simd[Rt].d[index] = val[0];
+ put_simd_state(simd);
+ } else if ((insn & 0x3ffffc00) == 0x08dffc00) {
+ // LDAR*
+ DECODE_OK;
+ regs[Rt] = *val;
+ } else {
+ return false;
+ }
+ return true;
+}
+
+static bool emulate_store(struct exc_info *ctx, u32 insn, u64 *val, u64 *width, u64 *vaddr)
+{
+ u64 Rt = insn & 0x1f;
+ u64 Rn = (insn >> 5) & 0x1f;
+ u64 imm9 = EXT((insn >> 12) & 0x1ff, 9);
+ u64 imm7 = EXT((insn >> 15) & 0x7f, 7);
+ u64 *regs = ctx->regs;
+
+ union simd_reg simd[32];
+
+ *width = insn >> 30;
+
+ dprintf("emulate_store(%p, 0x%08x, ..., %ld) = ", regs, insn, *width);
+
+ regs[31] = 0;
+
+ u64 mask = 0xffffffffffffffffUL;
+
+ if (*width < 3)
+ mask = (1UL << (8 << *width)) - 1;
+
+ if ((insn & 0x3fe00400) == 0x38000400) {
+ // STRx (immediate) Pre/Post-index
+ CHECK_RN;
+ regs[Rn] += imm9;
+ *val = regs[Rt] & mask;
+ } else if ((insn & 0x3fc00000) == 0x39000000) {
+ // STRx (immediate) Unsigned offset
+ *val = regs[Rt] & mask;
+ } else if ((insn & 0x3fe04c00) == 0x38204800) {
+ // STRx (register)
+ *val = regs[Rt] & mask;
+ } else if ((insn & 0xfec00000) == 0x28000000) {
+ // ST[N]P (Signed offset, 32-bit)
+ *vaddr = regs[Rn] + (imm7 * 4);
+ u64 Rt2 = (insn >> 10) & 0x1f;
+ val[0] = (regs[Rt] & 0xffffffff) | (regs[Rt2] << 32);
+ *width = 3;
+ } else if ((insn & 0xfec00000) == 0xa8000000) {
+ // ST[N]P (Signed offset, 64-bit)
+ *vaddr = regs[Rn] + (imm7 * 8);
+ u64 Rt2 = (insn >> 10) & 0x1f;
+ val[0] = regs[Rt];
+ val[1] = regs[Rt2];
+ *width = 4;
+ } else if ((insn & 0xfec00000) == 0xa8800000) {
+ // ST[N]P (immediate, 64-bit, pre/post-index)
+ CHECK_RN;
+ *vaddr = regs[Rn] + ((insn & BIT(24)) ? (imm7 * 8) : 0);
+ regs[Rn] += (imm7 * 8);
+ u64 Rt2 = (insn >> 10) & 0x1f;
+ val[0] = regs[Rt];
+ val[1] = regs[Rt2];
+ *width = 4;
+ } else if ((insn & 0x3fc00000) == 0x3d000000) {
+ // STR (immediate, SIMD&FP) Unsigned offset, 8..64-bit
+ get_simd_state(simd);
+ *val = simd[Rt].d[0];
+ } else if ((insn & 0x3fe04c00) == 0x3c204800) {
+ // STR (register, SIMD&FP) 8..64-bit
+ get_simd_state(simd);
+ *val = simd[Rt].d[0];
+ } else if ((insn & 0xffe04c00) == 0x3ca04800) {
+ // STR (register, SIMD&FP) 128-bit
+ get_simd_state(simd);
+ val[0] = simd[Rt].d[0];
+ val[1] = simd[Rt].d[1];
+ *width = 4;
+ } else if ((insn & 0xffc00000) == 0x3d800000) {
+ // STR (immediate, SIMD&FP) Unsigned offset, 128-bit
+ get_simd_state(simd);
+ val[0] = simd[Rt].d[0];
+ val[1] = simd[Rt].d[1];
+ *width = 4;
+ } else if ((insn & 0xffe00000) == 0xbc000000) {
+ // STUR (immediate, SIMD&FP) 32-bit
+ get_simd_state(simd);
+ val[0] = simd[Rt].s[0];
+ *width = 2;
+ } else if ((insn & 0xffe00000) == 0xfc000000) {
+ // STUR (immediate, SIMD&FP) 64-bit
+ get_simd_state(simd);
+ val[0] = simd[Rt].d[0];
+ *width = 3;
+ } else if ((insn & 0xffe00000) == 0x3c800000) {
+ // STUR (immediate, SIMD&FP) 128-bit
+ get_simd_state(simd);
+ val[0] = simd[Rt].d[0];
+ val[1] = simd[Rt].d[1];
+ *width = 4;
+ } else if ((insn & 0xffc00000) == 0x2d000000) {
+ // STP (SIMD&FP, 128-bit) Signed offset
+ *vaddr = regs[Rn] + (imm7 * 4);
+ u64 Rt2 = (insn >> 10) & 0x1f;
+ get_simd_state(simd);
+ val[0] = simd[Rt].s[0] | (((u64)simd[Rt2].s[0]) << 32);
+ *width = 3;
+ } else if ((insn & 0xffc00000) == 0xad000000) {
+ // STP (SIMD&FP, 128-bit) Signed offset
+ *vaddr = regs[Rn] + (imm7 * 16);
+ u64 Rt2 = (insn >> 10) & 0x1f;
+ get_simd_state(simd);
+ val[0] = simd[Rt].d[0];
+ val[1] = simd[Rt].d[1];
+ val[2] = simd[Rt2].d[0];
+ val[3] = simd[Rt2].d[1];
+ *width = 5;
+ } else if ((insn & 0x3fe00c00) == 0x38000000) {
+ // STURx (unscaled)
+ *val = regs[Rt] & mask;
+ } else if ((insn & 0xffffffe0) == 0xd50b7420) {
+ // DC ZVA
+ *vaddr = regs[Rt];
+ memset(val, 0, CACHE_LINE_SIZE);
+ *width = CACHE_LINE_LOG2;
+ } else if ((insn & 0x3ffffc00) == 0x089ffc00) {
+ // STL qR*
+ *val = regs[Rt] & mask;
+ } else {
+ return false;
+ }
+
+ dprintf("0x%lx\n", *width);
+
+ return true;
+}
+
+static void emit_mmiotrace(u64 pc, u64 addr, u64 *data, u64 width, u64 flags, bool sync)
+{
+ struct hv_evt_mmiotrace evt = {
+ .flags = flags | FIELD_PREP(MMIO_EVT_CPU, smp_id()),
+ .pc = pc,
+ .addr = addr,
+ };
+
+ if (width > 3)
+ evt.flags |= FIELD_PREP(MMIO_EVT_WIDTH, 3) | MMIO_EVT_MULTI;
+ else
+ evt.flags |= FIELD_PREP(MMIO_EVT_WIDTH, width);
+
+ for (int i = 0; i < (1 << width); i += 8) {
+ evt.data = *data++;
+ hv_wdt_suspend();
+ uartproxy_send_event(EVT_MMIOTRACE, &evt, sizeof(evt));
+ if (sync) {
+ iodev_flush(uartproxy_iodev);
+ }
+ hv_wdt_resume();
+ evt.addr += 8;
+ }
+}
+
+bool hv_pa_write(struct exc_info *ctx, u64 addr, u64 *val, int width)
+{
+ sysop("dsb sy");
+ exc_count = 0;
+ exc_guard = GUARD_SKIP;
+ switch (width) {
+ case 0:
+ write8(addr, val[0]);
+ break;
+ case 1:
+ write16(addr, val[0]);
+ break;
+ case 2:
+ write32(addr, val[0]);
+ break;
+ case 3:
+ write64(addr, val[0]);
+ break;
+ case 4:
+ case 5:
+ case 6:
+ for (u64 i = 0; i < (1UL << (width - 3)); i++)
+ write64(addr + 8 * i, val[i]);
+ break;
+ default:
+ dprintf("HV: unsupported write width %ld\n", width);
+ exc_guard = GUARD_OFF;
+ return false;
+ }
+ // Make sure we catch SErrors here
+ sysop("dsb sy");
+ sysop("isb");
+ exc_guard = GUARD_OFF;
+ if (exc_count) {
+ printf("HV: Exception during write to 0x%lx (width: %d)\n", addr, width);
+ // Update exception info with "real" cause
+ ctx->esr = hv_get_esr();
+ ctx->far = hv_get_far();
+ return false;
+ }
+ return true;
+}
+
+bool hv_pa_read(struct exc_info *ctx, u64 addr, u64 *val, int width)
+{
+ sysop("dsb sy");
+ exc_count = 0;
+ exc_guard = GUARD_SKIP;
+ switch (width) {
+ case 0:
+ val[0] = read8(addr);
+ break;
+ case 1:
+ val[0] = read16(addr);
+ break;
+ case 2:
+ val[0] = read32(addr);
+ break;
+ case 3:
+ val[0] = read64(addr);
+ break;
+ case 4:
+ val[0] = read64(addr);
+ val[1] = read64(addr + 8);
+ break;
+ case 5:
+ val[0] = read64(addr);
+ val[1] = read64(addr + 8);
+ val[2] = read64(addr + 16);
+ val[3] = read64(addr + 24);
+ break;
+ default:
+ dprintf("HV: unsupported read width %ld\n", width);
+ exc_guard = GUARD_OFF;
+ return false;
+ }
+ sysop("dsb sy");
+ exc_guard = GUARD_OFF;
+ if (exc_count) {
+ dprintf("HV: Exception during read from 0x%lx (width: %d)\n", addr, width);
+ // Update exception info with "real" cause
+ ctx->esr = hv_get_esr();
+ ctx->far = hv_get_far();
+ return false;
+ }
+ return true;
+}
+
+bool hv_pa_rw(struct exc_info *ctx, u64 addr, u64 *val, bool write, int width)
+{
+ if (write)
+ return hv_pa_write(ctx, addr, val, width);
+ else
+ return hv_pa_read(ctx, addr, val, width);
+}
+
+static bool hv_emulate_rw_aligned(struct exc_info *ctx, u64 pte, u64 vaddr, u64 ipa, u64 *val,
+ bool is_write, u64 width, u64 elr, u64 par)
+{
+ assert(pte);
+ assert(((ipa & 0x3fff) + (1 << width)) <= 0x4000);
+
+ u64 target = pte & PTE_TARGET_MASK_L4;
+ u64 paddr = target | (vaddr & MASK(VADDR_L4_OFFSET_BITS));
+ u64 flags = FIELD_PREP(MMIO_EVT_ATTR, FIELD_GET(PAR_ATTR, par)) |
+ FIELD_PREP(MMIO_EVT_SH, FIELD_GET(PAR_SH, par));
+
+ // For split ops, treat hardware mapped pages as SPTE_MAP
+ if (IS_HW(pte))
+ pte = target | FIELD_PREP(PTE_TYPE, PTE_BLOCK) | FIELD_PREP(SPTE_TYPE, SPTE_MAP);
+
+ if (is_write) {
+ // Write
+ hv_wdt_breadcrumb('3');
+
+ if (pte & SPTE_TRACE_WRITE)
+ emit_mmiotrace(elr, ipa, val, width, flags | MMIO_EVT_WRITE, pte & SPTE_TRACE_UNBUF);
+
+ hv_wdt_breadcrumb('4');
+
+ switch (FIELD_GET(SPTE_TYPE, pte)) {
+ case SPTE_PROXY_HOOK_R:
+ paddr = ipa;
+ // fallthrough
+ case SPTE_MAP:
+ hv_wdt_breadcrumb('5');
+ dprintf("HV: SPTE_MAP[W] @0x%lx 0x%lx -> 0x%lx (w=%d): 0x%lx\n", elr, ipa, paddr,
+ 1 << width, val[0]);
+ if (!hv_pa_write(ctx, paddr, val, width))
+ return false;
+ break;
+ case SPTE_HOOK: {
+ hv_wdt_breadcrumb('6');
+ hv_hook_t *hook = (hv_hook_t *)target;
+ if (!hook(ctx, ipa, val, true, width))
+ return false;
+ dprintf("HV: SPTE_HOOK[W] @0x%lx 0x%lx -> 0x%lx (w=%d) @%p: 0x%lx\n", elr, far, ipa,
+ 1 << width, hook, wval);
+ break;
+ }
+ case SPTE_PROXY_HOOK_RW:
+ case SPTE_PROXY_HOOK_W: {
+ hv_wdt_breadcrumb('7');
+ struct hv_vm_proxy_hook_data hook = {
+ .flags = FIELD_PREP(MMIO_EVT_WIDTH, width) | MMIO_EVT_WRITE | flags,
+ .id = FIELD_GET(PTE_TARGET_MASK_L4, pte),
+ .addr = ipa,
+ .data = {0},
+ };
+ memcpy(hook.data, val, 1 << width);
+ hv_exc_proxy(ctx, START_HV, HV_HOOK_VM, &hook);
+ break;
+ }
+ default:
+ printf("HV: invalid SPTE 0x%016lx for IPA 0x%lx\n", pte, ipa);
+ return false;
+ }
+ } else {
+ hv_wdt_breadcrumb('3');
+ switch (FIELD_GET(SPTE_TYPE, pte)) {
+ case SPTE_PROXY_HOOK_W:
+ paddr = ipa;
+ // fallthrough
+ case SPTE_MAP:
+ hv_wdt_breadcrumb('4');
+ if (!hv_pa_read(ctx, paddr, val, width))
+ return false;
+ dprintf("HV: SPTE_MAP[R] @0x%lx 0x%lx -> 0x%lx (w=%d): 0x%lx\n", elr, ipa, paddr,
+ 1 << width, val[0]);
+ break;
+ case SPTE_HOOK: {
+ hv_wdt_breadcrumb('5');
+ hv_hook_t *hook = (hv_hook_t *)target;
+ if (!hook(ctx, ipa, val, false, width))
+ return false;
+ dprintf("HV: SPTE_HOOK[R] @0x%lx 0x%lx -> 0x%lx (w=%d) @%p: 0x%lx\n", elr, far, ipa,
+ 1 << width, hook, val);
+ break;
+ }
+ case SPTE_PROXY_HOOK_RW:
+ case SPTE_PROXY_HOOK_R: {
+ hv_wdt_breadcrumb('6');
+ struct hv_vm_proxy_hook_data hook = {
+ .flags = FIELD_PREP(MMIO_EVT_WIDTH, width) | flags,
+ .id = FIELD_GET(PTE_TARGET_MASK_L4, pte),
+ .addr = ipa,
+ };
+ hv_exc_proxy(ctx, START_HV, HV_HOOK_VM, &hook);
+ memcpy(val, hook.data, 1 << width);
+ break;
+ }
+ default:
+ printf("HV: invalid SPTE 0x%016lx for IPA 0x%lx\n", pte, ipa);
+ return false;
+ }
+
+ hv_wdt_breadcrumb('7');
+ if (pte & SPTE_TRACE_READ)
+ emit_mmiotrace(elr, ipa, val, width, flags, pte & SPTE_TRACE_UNBUF);
+ }
+
+ hv_wdt_breadcrumb('*');
+
+ return true;
+}
+
+static bool hv_emulate_rw(struct exc_info *ctx, u64 pte, u64 vaddr, u64 ipa, u8 *val, bool is_write,
+ u64 bytes, u64 elr, u64 par)
+{
+ u64 aval[HV_MAX_RW_WORDS];
+
+ bool advance = (IS_HW(pte) || (IS_SW(pte) && FIELD_GET(SPTE_TYPE, pte) == SPTE_MAP)) ? 1 : 0;
+ u64 off = 0;
+ u64 width;
+
+ bool first = true;
+
+ u64 left = bytes;
+ u64 paddr = (pte & PTE_TARGET_MASK_L4) | (vaddr & MASK(VADDR_L4_OFFSET_BITS));
+
+ while (left > 0) {
+ memset(aval, 0, sizeof(aval));
+
+ if (left >= 64 && (ipa & 63) == 0)
+ width = 6;
+ else if (left >= 32 && (ipa & 31) == 0)
+ width = 5;
+ else if (left >= 16 && (ipa & 15) == 0)
+ width = 4;
+ else if (left >= 8 && (ipa & 7) == 0)
+ width = 3;
+ else if (left >= 4 && (ipa & 3) == 0)
+ width = 2;
+ else if (left >= 2 && (ipa & 1) == 0)
+ width = 1;
+ else
+ width = 0;
+
+ u64 chunk = 1 << width;
+
+ /*
+ if (chunk != bytes)
+ printf("HV: Splitting unaligned %ld-byte %s: %ld bytes @ 0x%lx\n", bytes,
+ is_write ? "write" : "read", chunk, vaddr);
+ */
+
+ if (is_write)
+ memcpy(aval, val + off, chunk);
+
+ if (advance)
+ pte = (paddr & PTE_TARGET_MASK_L4) | (pte & ~PTE_TARGET_MASK_L4);
+
+ if (!hv_emulate_rw_aligned(ctx, pte, vaddr, ipa, aval, is_write, width, elr, par)) {
+ if (!first)
+ printf("HV: WARNING: Failed to emulate split op but part of it did commit!\n");
+ return false;
+ }
+
+ if (!is_write)
+ memcpy(val + off, aval, chunk);
+
+ left -= chunk;
+ off += chunk;
+
+ ipa += chunk;
+ vaddr += chunk;
+ if (advance)
+ paddr += chunk;
+
+ first = 0;
+ }
+
+ return true;
+}
+
+bool hv_handle_dabort(struct exc_info *ctx)
+{
+ hv_wdt_breadcrumb('0');
+ u64 esr = hv_get_esr();
+ bool is_write = esr & ESR_ISS_DABORT_WnR;
+
+ u64 far = hv_get_far();
+ u64 par;
+ u64 ipa = hv_translate(far, true, is_write, &par);
+
+ dprintf("hv_handle_abort(): stage 1 0x%0lx -> 0x%lx\n", far, ipa);
+
+ if (!ipa) {
+ printf("HV: stage 1 translation failed at VA 0x%0lx\n", far);
+ return false;
+ }
+
+ if (ipa >= BIT(vaddr_bits)) {
+ printf("hv_handle_abort(): IPA out of bounds: 0x%0lx -> 0x%lx\n", far, ipa);
+ return false;
+ }
+
+ u64 pte = hv_pt_walk(ipa);
+
+ if (!pte) {
+ printf("HV: Unmapped IPA 0x%lx\n", ipa);
+ return false;
+ }
+
+ if (IS_HW(pte)) {
+ printf("HV: Data abort on mapped page (0x%lx -> 0x%lx)\n", far, pte);
+ // Try again, this is usually a race
+ ctx->elr -= 4;
+ return true;
+ }
+
+ hv_wdt_breadcrumb('1');
+
+ assert(IS_SW(pte));
+
+ u64 elr = ctx->elr;
+ u64 elr_pa = hv_translate(elr, false, false, NULL);
+ if (!elr_pa) {
+ printf("HV: Failed to fetch instruction for data abort at 0x%lx\n", elr);
+ return false;
+ }
+
+ u32 insn = read32(elr_pa);
+ u64 width;
+
+ hv_wdt_breadcrumb('2');
+
+ u64 vaddr = far;
+
+ u8 val[HV_MAX_RW_SIZE] ALIGNED(HV_MAX_RW_SIZE);
+ memset(val, 0, sizeof(val));
+
+ if (is_write) {
+ hv_wdt_breadcrumb('W');
+
+ if (!emulate_store(ctx, insn, (u64 *)val, &width, &vaddr)) {
+ printf("HV: store not emulated: 0x%08x at 0x%lx\n", insn, ipa);
+ return false;
+ }
+ } else {
+ hv_wdt_breadcrumb('R');
+
+ if (!emulate_load(ctx, insn, NULL, &width, &vaddr)) {
+ printf("HV: load not emulated: 0x%08x at 0x%lx\n", insn, ipa);
+ return false;
+ }
+ }
+
+ /*
+ Check for HW page-straddling conditions
+ Right now we only support the case where the page boundary is exactly halfway
+ through the read/write.
+ */
+ u64 bytes = 1 << width;
+ u64 vaddrp0 = vaddr & ~MASK(VADDR_L3_OFFSET_BITS);
+ u64 vaddrp1 = (vaddr + bytes - 1) & ~MASK(VADDR_L3_OFFSET_BITS);
+
+ if (vaddrp0 == vaddrp1) {
+ // Easy case, no page straddle
+ if (far != vaddr) {
+ printf("HV: faulted at 0x%lx, but expecting 0x%lx\n", far, vaddr);
+ return false;
+ }
+
+ if (!hv_emulate_rw(ctx, pte, vaddr, ipa, val, is_write, bytes, elr, par))
+ return false;
+ } else {
+ // Oops, we're straddling a page boundary
+ // Treat it as two separate loads or stores
+
+ assert(bytes > 1);
+ hv_wdt_breadcrumb('s');
+
+ u64 off = vaddrp1 - vaddr;
+
+ u64 vaddr2;
+ const char *other;
+ if (far == vaddr) {
+ other = "upper";
+ vaddr2 = vaddrp1;
+ } else {
+ if (far != vaddrp1) {
+ printf("HV: faulted at 0x%lx, but expecting 0x%lx\n", far, vaddrp1);
+ return false;
+ }
+ other = "lower";
+ vaddr2 = vaddr;
+ }
+
+ u64 par2;
+ u64 ipa2 = hv_translate(vaddr2, true, esr & ESR_ISS_DABORT_WnR, &par2);
+ if (!ipa2) {
+ printf("HV: %s half stage 1 translation failed at VA 0x%0lx\n", other, vaddr2);
+ return false;
+ }
+ if (ipa2 >= BIT(vaddr_bits)) {
+ printf("hv_handle_abort(): %s half IPA out of bounds: 0x%0lx -> 0x%lx\n", other, vaddr2,
+ ipa2);
+ return false;
+ }
+
+ u64 pte2 = hv_pt_walk(ipa2);
+ if (!pte2) {
+ printf("HV: Unmapped %s half IPA 0x%lx\n", other, ipa2);
+ return false;
+ }
+
+ hv_wdt_breadcrumb('S');
+
+ printf("HV: Emulating %s straddling page boundary as two ops @ 0x%lx (%ld bytes)\n",
+ is_write ? "write" : "read", vaddr, bytes);
+
+ bool upper_ret;
+ if (far == vaddr) {
+ if (!hv_emulate_rw(ctx, pte, vaddr, ipa, val, is_write, off, elr, par))
+ return false;
+ upper_ret =
+ hv_emulate_rw(ctx, pte2, vaddr2, ipa2, val + off, is_write, bytes - off, elr, par2);
+ } else {
+ if (!hv_emulate_rw(ctx, pte2, vaddr2, ipa2, val, is_write, off, elr, par2))
+ return false;
+ upper_ret =
+ hv_emulate_rw(ctx, pte, vaddrp1, ipa, val + off, is_write, bytes - off, elr, par);
+ }
+
+ if (!upper_ret) {
+ printf("HV: WARNING: Failed to emulate upper half but lower half did commit!\n");
+ return false;
+ }
+ }
+
+ if (vaddrp0 != vaddrp1) {
+ printf("HV: Straddled r/w data:\n");
+ hexdump(val, bytes);
+ }
+
+ hv_wdt_breadcrumb('8');
+ if (!is_write && !emulate_load(ctx, insn, (u64 *)val, &width, &vaddr))
+ return false;
+
+ hv_wdt_breadcrumb('9');
+
+ return true;
+}
diff --git a/tools/src/hv_vuart.c b/tools/src/hv_vuart.c
new file mode 100644
index 0000000..595c031
--- /dev/null
+++ b/tools/src/hv_vuart.c
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "hv.h"
+#include "aic.h"
+#include "iodev.h"
+#include "uart.h"
+#include "uart_regs.h"
+#include "usb.h"
+
+bool active = false;
+
+u32 ucon = 0;
+u32 utrstat = 0;
+u32 ufstat = 0;
+
+int vuart_irq = 0;
+
+static void update_irq(void)
+{
+ ssize_t rx_queued;
+
+ iodev_handle_events(IODEV_USB_VUART);
+
+ utrstat |= UTRSTAT_TXBE | UTRSTAT_TXE;
+ utrstat &= ~UTRSTAT_RXD;
+
+ ufstat = 0;
+ if ((rx_queued = iodev_can_read(IODEV_USB_VUART))) {
+ utrstat |= UTRSTAT_RXD;
+ if (rx_queued > 15)
+ ufstat = FIELD_PREP(UFSTAT_RXCNT, 15) | UFSTAT_RXFULL;
+ else
+ ufstat = FIELD_PREP(UFSTAT_RXCNT, rx_queued);
+
+ if (FIELD_GET(UCON_RXMODE, ucon) == UCON_MODE_IRQ && ucon & UCON_RXTO_ENA) {
+ utrstat |= UTRSTAT_RXTO;
+ }
+ }
+
+ if (FIELD_GET(UCON_TXMODE, ucon) == UCON_MODE_IRQ && ucon & UCON_TXTHRESH_ENA) {
+ utrstat |= UTRSTAT_TXTHRESH;
+ }
+
+ if (vuart_irq) {
+ uart_clear_irqs();
+ if (utrstat & (UTRSTAT_TXTHRESH | UTRSTAT_RXTHRESH | UTRSTAT_RXTO)) {
+ aic_set_sw(vuart_irq, true);
+ } else {
+ aic_set_sw(vuart_irq, false);
+ }
+ }
+
+ // printf("HV: vuart UTRSTAT=0x%x UFSTAT=0x%x UCON=0x%x\n", utrstat, ufstat, ucon);
+}
+
+static bool handle_vuart(struct exc_info *ctx, u64 addr, u64 *val, bool write, int width)
+{
+ UNUSED(ctx);
+ UNUSED(width);
+
+ addr &= 0xfff;
+
+ update_irq();
+
+ if (write) {
+ // printf("HV: vuart W 0x%lx <- 0x%lx (%d)\n", addr, *val, width);
+ switch (addr) {
+ case UCON:
+ ucon = *val;
+ break;
+ case UTXH: {
+ uint8_t b = *val;
+ if (iodev_can_write(IODEV_USB_VUART))
+ iodev_write(IODEV_USB_VUART, &b, 1);
+ break;
+ }
+ case UTRSTAT:
+ utrstat &= ~(*val & (UTRSTAT_TXTHRESH | UTRSTAT_RXTHRESH | UTRSTAT_RXTO));
+ break;
+ }
+ } else {
+ switch (addr) {
+ case UCON:
+ *val = ucon;
+ break;
+ case URXH:
+ if (iodev_can_read(IODEV_USB_VUART)) {
+ uint8_t c;
+ iodev_read(IODEV_USB_VUART, &c, 1);
+ *val = c;
+ } else {
+ *val = 0;
+ }
+ break;
+ case UTRSTAT:
+ *val = utrstat;
+ break;
+ case UFSTAT:
+ *val = ufstat;
+ break;
+ default:
+ *val = 0;
+ break;
+ }
+ // printf("HV: vuart R 0x%lx -> 0x%lx (%d)\n", addr, *val, width);
+ }
+
+ return true;
+}
+
+void hv_vuart_poll(void)
+{
+ if (!active)
+ return;
+
+ update_irq();
+}
+
+void hv_map_vuart(u64 base, int irq, iodev_id_t iodev)
+{
+ hv_map_hook(base, handle_vuart, 0x1000);
+ usb_iodev_vuart_setup(iodev);
+ vuart_irq = irq;
+ active = true;
+}
diff --git a/tools/src/hv_wdt.c b/tools/src/hv_wdt.c
new file mode 100644
index 0000000..6010412
--- /dev/null
+++ b/tools/src/hv_wdt.c
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "hv.h"
+#include "adt.h"
+#include "smp.h"
+#include "uart.h"
+#include "utils.h"
+
+#define WDT_TIMEOUT 1
+
+static bool hv_wdt_active = false;
+static bool hv_wdt_enabled = false;
+static volatile u64 hv_wdt_timestamp = 0;
+static u64 hv_wdt_timeout = 0;
+static volatile u64 hv_wdt_breadcrumbs;
+
+static int hv_wdt_cpu;
+
+static u64 cpu_dbg_base = 0;
+
+void hv_wdt_bark(void)
+{
+ u64 tmp = hv_wdt_breadcrumbs;
+ uart_puts("HV watchdog: bark!");
+
+ uart_printf("Breadcrumbs: ");
+ for (int i = 56; i >= 0; i -= 8) {
+ char c = (tmp >> i) & 0xff;
+ if (c)
+ uart_putchar(c);
+ }
+ uart_putchar('\n');
+
+ uart_puts("Attempting to enter proxy");
+
+ struct uartproxy_msg_start start = {
+ .reason = START_HV,
+ .code = HV_WDT_BARK,
+ };
+
+ uartproxy_run(&start);
+ reboot();
+}
+
+void hv_wdt_main(void)
+{
+ while (hv_wdt_active) {
+ if (hv_wdt_enabled) {
+ sysop("dmb ish");
+ u64 timestamp = hv_wdt_timestamp;
+ sysop("isb");
+ u64 now = mrs(CNTPCT_EL0);
+ sysop("isb");
+ if ((now - timestamp) > hv_wdt_timeout)
+ hv_wdt_bark();
+ }
+
+ udelay(1000);
+
+ sysop("dmb ish");
+ }
+}
+
+void hv_wdt_pet(void)
+{
+ hv_wdt_timestamp = mrs(CNTPCT_EL0);
+ sysop("dmb ish");
+}
+
+void hv_wdt_suspend(void)
+{
+ hv_wdt_enabled = false;
+ sysop("dsb ish");
+}
+
+void hv_wdt_resume(void)
+{
+ hv_wdt_pet();
+ hv_wdt_enabled = true;
+ sysop("dsb ish");
+}
+
+void hv_wdt_breadcrumb(char c)
+{
+ u64 tmp = hv_wdt_breadcrumbs;
+ tmp <<= 8;
+ tmp |= c;
+ hv_wdt_breadcrumbs = tmp;
+ sysop("dmb ish");
+}
+
+void hv_wdt_init(void)
+{
+ int node = adt_path_offset(adt, "/cpus/cpu0");
+ if (node < 0) {
+ printf("Error getting /cpus/cpu0 node\n");
+ return;
+ }
+
+ u64 reg[2];
+ if (ADT_GETPROP_ARRAY(adt, node, "cpu-uttdbg-reg", reg) < 0) {
+ printf("Error getting cpu-uttdbg-reg property\n");
+ return;
+ }
+
+ cpu_dbg_base = reg[0];
+}
+
+void hv_wdt_start(int cpu)
+{
+ if (hv_wdt_active)
+ return;
+
+ hv_wdt_cpu = cpu;
+ hv_wdt_breadcrumbs = 0;
+ hv_wdt_timeout = mrs(CNTFRQ_EL0) * WDT_TIMEOUT;
+ hv_wdt_pet();
+ hv_wdt_active = true;
+ hv_wdt_enabled = true;
+ smp_call4(hv_wdt_cpu, hv_wdt_main, 0, 0, 0, 0);
+}
+
+void hv_wdt_stop(void)
+{
+ if (!hv_wdt_active)
+ return;
+
+ hv_wdt_active = false;
+ smp_wait(hv_wdt_cpu);
+}
diff --git a/tools/src/i2c.c b/tools/src/i2c.c
new file mode 100644
index 0000000..942ef1e
--- /dev/null
+++ b/tools/src/i2c.c
@@ -0,0 +1,216 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "adt.h"
+#include "i2c.h"
+#include "malloc.h"
+#include "pmgr.h"
+#include "types.h"
+#include "utils.h"
+
+#define PASEMI_FIFO_TX 0x00
+#define PASEMI_TX_FLAG_READ BIT(10)
+#define PASEMI_TX_FLAG_STOP BIT(9)
+#define PASEMI_TX_FLAG_START BIT(8)
+
+#define PASEMI_FIFO_RX 0x04
+#define PASEMI_RX_FLAG_EMPTY BIT(8)
+
+#define PASEMI_STATUS 0x14
+#define PASEMI_STATUS_XFER_BUSY BIT(28)
+#define PASEMI_STATUS_XFER_ENDED BIT(27)
+
+#define PASEMI_CONTROL 0x1c
+#define PASEMI_CONTROL_CLEAR_RX BIT(10)
+#define PASEMI_CONTROL_CLEAR_TX BIT(9)
+
+struct i2c_dev {
+ uintptr_t base;
+};
+
+i2c_dev_t *i2c_init(const char *adt_node)
+{
+ int adt_path[8];
+ int adt_offset;
+ adt_offset = adt_path_offset_trace(adt, adt_node, adt_path);
+ if (adt_offset < 0) {
+ printf("i2c: Error getting %s node\n", adt_node);
+ return NULL;
+ }
+
+ u64 base;
+ if (adt_get_reg(adt, adt_path, "reg", 0, &base, NULL) < 0) {
+ printf("i2c: Error getting %s regs\n", adt_node);
+ return NULL;
+ }
+
+ if (pmgr_adt_power_enable(adt_node)) {
+ printf("i2c: Error enabling power for %s\n", adt_node);
+ return NULL;
+ }
+
+ i2c_dev_t *dev = malloc(sizeof(*dev));
+ if (!dev)
+ return NULL;
+
+ dev->base = base;
+ return dev;
+}
+
+void i2c_shutdown(i2c_dev_t *dev)
+{
+ free(dev);
+}
+
+static void i2c_clear_fifos(i2c_dev_t *dev)
+{
+ set32(dev->base + PASEMI_CONTROL, PASEMI_CONTROL_CLEAR_TX | PASEMI_CONTROL_CLEAR_RX);
+}
+
+static void i2c_clear_status(i2c_dev_t *dev)
+{
+ write32(dev->base + PASEMI_STATUS, 0xffffffff);
+}
+
+static void i2c_xfer_start_read(i2c_dev_t *dev, u8 addr, size_t len)
+{
+ write32(dev->base + PASEMI_FIFO_TX, PASEMI_TX_FLAG_START | (addr << 1) | 1);
+ write32(dev->base + PASEMI_FIFO_TX, PASEMI_TX_FLAG_READ | PASEMI_TX_FLAG_STOP | len);
+}
+
+static size_t i2c_xfer_read(i2c_dev_t *dev, u8 *bfr, size_t len)
+{
+ for (size_t i = 0; i < len; ++i) {
+ u32 timeout = 5000;
+ u32 val;
+
+ do {
+ val = read32(dev->base + PASEMI_FIFO_RX);
+ if (!(val & PASEMI_RX_FLAG_EMPTY))
+ break;
+ udelay(10);
+ } while (--timeout);
+
+ if (val & PASEMI_RX_FLAG_EMPTY) {
+ printf("i2c: timeout while reading (got %lu, expected %lu bytes)\n", i, len);
+ return i;
+ }
+
+ bfr[i] = val;
+ }
+
+ return len;
+}
+
+static int i2c_xfer_write(i2c_dev_t *dev, u8 addr, u32 start, u32 stop, const u8 *bfr, size_t len)
+{
+ if (start)
+ write32(dev->base + PASEMI_FIFO_TX, PASEMI_TX_FLAG_START | (addr << 1));
+
+ for (size_t i = 0; i < len; ++i) {
+ u32 data = bfr[i];
+ if (i == (len - 1) && stop)
+ data |= PASEMI_TX_FLAG_STOP;
+
+ write32(dev->base + PASEMI_FIFO_TX, data);
+ }
+
+ if (!stop)
+ return 0;
+
+ if (poll32(dev->base + PASEMI_STATUS, PASEMI_STATUS_XFER_BUSY, 0, 50000)) {
+ printf(
+ "i2c: timeout while waiting for PASEMI_STATUS_XFER_BUSY to clear after write xfer\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int i2c_smbus_read(i2c_dev_t *dev, u8 addr, u8 reg, u8 *bfr, size_t len)
+{
+ int ret = -1;
+
+ i2c_clear_fifos(dev);
+ i2c_clear_status(dev);
+
+ if (i2c_xfer_write(dev, addr, 1, 0, &reg, 1))
+ goto err;
+
+ i2c_xfer_start_read(dev, addr, len + 1);
+ u8 len_reply;
+ if (i2c_xfer_read(dev, &len_reply, 1) != 1)
+ goto err;
+
+ if (len_reply < len)
+ printf("i2c: want to read %ld bytes from addr %d but can only read %d\n", len, addr,
+ len_reply);
+ if (len_reply > len)
+ printf("i2c: want to read %ld bytes from addr %d but device wants to send %d\n", len, addr,
+ len_reply);
+
+ ret = i2c_xfer_read(dev, bfr, min(len, len_reply));
+
+err:
+ if (poll32(dev->base + PASEMI_STATUS, PASEMI_STATUS_XFER_BUSY, 0, 50000)) {
+ printf("i2c: timeout while waiting for PASEMI_STATUS_XFER_BUSY to clear after read xfer\n");
+ return -1;
+ }
+
+ return ret;
+}
+
+int i2c_smbus_write(i2c_dev_t *dev, u8 addr, u8 reg, const u8 *bfr, size_t len)
+{
+ i2c_clear_fifos(dev);
+ i2c_clear_status(dev);
+
+ if (i2c_xfer_write(dev, addr, 1, 0, &reg, 1))
+ return -1;
+
+ u8 len_send = len;
+ if (i2c_xfer_write(dev, addr, 0, 0, &len_send, 1))
+ return -1;
+ if (i2c_xfer_write(dev, addr, 0, 1, bfr, len))
+ return -1;
+
+ return len_send;
+}
+
+int i2c_smbus_read32(i2c_dev_t *dev, u8 addr, u8 reg, u32 *val)
+{
+ u8 bfr[4];
+ if (i2c_smbus_read(dev, addr, reg, bfr, 4) != 4)
+ return -1;
+
+ *val = (bfr[0]) | (bfr[1] << 8) | (bfr[2] << 16) | (bfr[3] << 24);
+ return 0;
+}
+
+int i2c_smbus_read16(i2c_dev_t *dev, u8 addr, u8 reg, u16 *val)
+{
+ u8 bfr[2];
+ if (i2c_smbus_read(dev, addr, reg, bfr, 2) != 2)
+ return -1;
+
+ *val = (bfr[0]) | (bfr[1] << 8);
+ return 0;
+}
+
+int i2c_smbus_write32(i2c_dev_t *dev, u8 addr, u8 reg, u32 val)
+{
+ u8 bfr[4];
+
+ bfr[0] = val;
+ bfr[1] = val >> 8;
+ bfr[2] = val >> 16;
+ bfr[3] = val >> 24;
+
+ return i2c_smbus_write(dev, addr, reg, bfr, 4);
+}
+
+int i2c_smbus_read8(i2c_dev_t *dev, u8 addr, u8 reg, u8 *val)
+{
+ if (i2c_smbus_read(dev, addr, reg, val, 1) != 1)
+ return -1;
+ return 0;
+}
diff --git a/tools/src/i2c.h b/tools/src/i2c.h
new file mode 100644
index 0000000..cbfc119
--- /dev/null
+++ b/tools/src/i2c.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef I2C_H
+#define I2C_H
+
+#include "types.h"
+
+typedef struct i2c_dev i2c_dev_t;
+
+i2c_dev_t *i2c_init(const char *adt_node);
+void i2c_shutdown(i2c_dev_t *dev);
+
+int i2c_smbus_read(i2c_dev_t *dev, u8 addr, u8 reg, u8 *bfr, size_t len);
+int i2c_smbus_write(i2c_dev_t *dev, u8 addr, u8 reg, const u8 *bfr, size_t len);
+
+int i2c_smbus_read32(i2c_dev_t *dev, u8 addr, u8 reg, u32 *val);
+int i2c_smbus_write32(i2c_dev_t *dev, u8 addr, u8 reg, u32 val);
+
+int i2c_smbus_read16(i2c_dev_t *dev, u8 addr, u8 reg, u16 *val);
+int i2c_smbus_read8(i2c_dev_t *dev, u8 addr, u8 reg, u8 *val);
+
+#endif
diff --git a/tools/src/iodev.c b/tools/src/iodev.c
new file mode 100644
index 0000000..32b5831
--- /dev/null
+++ b/tools/src/iodev.c
@@ -0,0 +1,319 @@
+/* SPDX-License-Identifier: MIT */
+
+// #define DEBUG_IODEV
+
+#include "iodev.h"
+#include "memory.h"
+#include "string.h"
+
+#ifdef DEBUG_IODEV
+#define dprintf printf
+#else
+#define dprintf(...) \
+ do { \
+ } while (0)
+#endif
+
+#define CONSOLE_BUFFER_SIZE 8192
+
+extern struct iodev iodev_uart;
+extern struct iodev iodev_fb;
+extern struct iodev iodev_usb_vuart;
+
+struct iodev *iodevs[IODEV_MAX] = {
+ [IODEV_UART] = &iodev_uart,
+ [IODEV_FB] = &iodev_fb,
+ [IODEV_USB_VUART] = &iodev_usb_vuart,
+};
+
+char con_buf[CONSOLE_BUFFER_SIZE];
+size_t con_wp;
+size_t con_rp[IODEV_MAX];
+
+void iodev_register_device(iodev_id_t id, struct iodev *dev)
+{
+ if (id >= IODEV_MAX)
+ return;
+ iodevs[id] = dev;
+}
+
+struct iodev *iodev_unregister_device(iodev_id_t id)
+{
+ if (id < IODEV_USB0 || id >= IODEV_MAX)
+ return NULL;
+
+ struct iodev *dev = iodevs[id];
+ iodevs[id] = NULL;
+ return dev;
+}
+
+ssize_t iodev_can_read(iodev_id_t id)
+{
+ if (!iodevs[id] || !iodevs[id]->ops->can_read)
+ return 0;
+
+ if (mmu_active())
+ spin_lock(&iodevs[id]->lock);
+ ssize_t ret = iodevs[id]->ops->can_read(iodevs[id]->opaque);
+ if (mmu_active())
+ spin_unlock(&iodevs[id]->lock);
+ return ret;
+}
+
+bool iodev_can_write(iodev_id_t id)
+{
+ if (!iodevs[id] || !iodevs[id]->ops->can_write)
+ return false;
+
+ if (mmu_active())
+ spin_lock(&iodevs[id]->lock);
+ bool ret = iodevs[id]->ops->can_write(iodevs[id]->opaque);
+ if (mmu_active())
+ spin_unlock(&iodevs[id]->lock);
+ return ret;
+}
+
+ssize_t iodev_read(iodev_id_t id, void *buf, size_t length)
+{
+ if (!iodevs[id] || !iodevs[id]->ops->read)
+ return -1;
+
+ if (mmu_active())
+ spin_lock(&iodevs[id]->lock);
+ ssize_t ret = iodevs[id]->ops->read(iodevs[id]->opaque, buf, length);
+ if (mmu_active())
+ spin_unlock(&iodevs[id]->lock);
+ return ret;
+}
+
+ssize_t iodev_write(iodev_id_t id, const void *buf, size_t length)
+{
+ if (!iodevs[id] || !iodevs[id]->ops->write)
+ return -1;
+
+ if (mmu_active())
+ spin_lock(&iodevs[id]->lock);
+ ssize_t ret = iodevs[id]->ops->write(iodevs[id]->opaque, buf, length);
+ if (mmu_active())
+ spin_unlock(&iodevs[id]->lock);
+ return ret;
+}
+
+ssize_t iodev_queue(iodev_id_t id, const void *buf, size_t length)
+{
+ if (!iodevs[id] || !iodevs[id]->ops->queue)
+ return iodev_write(id, buf, length);
+
+ if (mmu_active())
+ spin_lock(&iodevs[id]->lock);
+ ssize_t ret = iodevs[id]->ops->queue(iodevs[id]->opaque, buf, length);
+ if (mmu_active())
+ spin_unlock(&iodevs[id]->lock);
+ return ret;
+}
+
+void iodev_flush(iodev_id_t id)
+{
+ if (!iodevs[id] || !iodevs[id]->ops->flush)
+ return;
+
+ if (mmu_active())
+ spin_lock(&iodevs[id]->lock);
+ iodevs[id]->ops->flush(iodevs[id]->opaque);
+ if (mmu_active())
+ spin_unlock(&iodevs[id]->lock);
+}
+
+void iodev_lock(iodev_id_t id)
+{
+ if (!iodevs[id])
+ return;
+
+ if (mmu_active())
+ spin_lock(&iodevs[id]->lock);
+}
+
+void iodev_unlock(iodev_id_t id)
+{
+ if (!iodevs[id])
+ return;
+
+ if (mmu_active())
+ spin_unlock(&iodevs[id]->lock);
+}
+
+int in_iodev = 0;
+
+static DECLARE_SPINLOCK(console_lock);
+
+void iodev_console_write(const void *buf, size_t length)
+{
+ bool do_lock = mmu_active();
+
+ if (!do_lock && !is_primary_core()) {
+ if (length && iodevs[IODEV_UART]->usage & USAGE_CONSOLE) {
+ iodevs[IODEV_UART]->ops->write(iodevs[IODEV_UART]->opaque, "*", 1);
+ iodevs[IODEV_UART]->ops->write(iodevs[IODEV_UART]->opaque, buf, length);
+ }
+ return;
+ }
+
+ if (do_lock)
+ spin_lock(&console_lock);
+
+ if (in_iodev) {
+ if (length && iodevs[IODEV_UART]->usage & USAGE_CONSOLE) {
+ iodevs[IODEV_UART]->ops->write(iodevs[IODEV_UART]->opaque, "+", 1);
+ iodevs[IODEV_UART]->ops->write(iodevs[IODEV_UART]->opaque, buf, length);
+ }
+ if (do_lock)
+ spin_unlock(&console_lock);
+ return;
+ }
+ in_iodev++;
+
+ dprintf(" iodev_console_write() wp=%d\n", con_wp);
+ for (iodev_id_t id = 0; id < IODEV_MAX; id++) {
+ if (!iodevs[id])
+ continue;
+
+ if (!(iodevs[id]->usage & USAGE_CONSOLE)) {
+ /* Drop buffer */
+ con_rp[id] = con_wp + length;
+ continue;
+ }
+
+ if (!iodev_can_write(id))
+ continue;
+
+ if (con_wp > CONSOLE_BUFFER_SIZE)
+ con_rp[id] = max(con_wp - CONSOLE_BUFFER_SIZE, con_rp[id]);
+
+ dprintf(" rp=%d\n", con_rp[id]);
+ // Flush existing buffer to device if possible
+ while (con_rp[id] < con_wp) {
+ size_t buf_rp = con_rp[id] % CONSOLE_BUFFER_SIZE;
+ size_t block = min(con_wp - con_rp[id], CONSOLE_BUFFER_SIZE - buf_rp);
+
+ dprintf(" write buf %d\n", block);
+ ssize_t ret = iodev_write(id, &con_buf[buf_rp], block);
+
+ if (ret <= 0)
+ goto next_dev;
+
+ con_rp[id] += ret;
+ }
+
+ const u8 *p = buf;
+ size_t wrote = 0;
+
+ // Write the current buffer
+ while (wrote < length) {
+ ssize_t ret = iodev_write(id, p, length - wrote);
+
+ if (ret <= 0)
+ goto next_dev;
+
+ con_rp[id] += ret;
+ wrote += ret;
+ p += ret;
+ }
+
+ next_dev:;
+ }
+
+ // Update console buffer
+
+ if (length > CONSOLE_BUFFER_SIZE) {
+ buf += (length - CONSOLE_BUFFER_SIZE);
+ con_wp += (length - CONSOLE_BUFFER_SIZE);
+ length = CONSOLE_BUFFER_SIZE;
+ }
+
+ while (length) {
+ size_t buf_wp = con_wp % CONSOLE_BUFFER_SIZE;
+ size_t block = min(length, CONSOLE_BUFFER_SIZE - buf_wp);
+ memcpy(&con_buf[buf_wp], buf, block);
+ buf += block;
+ con_wp += block;
+ length -= block;
+ }
+
+ in_iodev--;
+ if (do_lock)
+ spin_unlock(&console_lock);
+}
+
+void iodev_handle_events(iodev_id_t id)
+{
+ bool do_lock = mmu_active();
+
+ if (do_lock)
+ spin_lock(&console_lock);
+
+ if (in_iodev) {
+ if (do_lock)
+ spin_unlock(&console_lock);
+ return;
+ }
+
+ in_iodev++;
+
+ if (iodevs[id]->ops->handle_events)
+ iodevs[id]->ops->handle_events(iodevs[id]->opaque);
+
+ in_iodev--;
+
+ if (iodev_can_write(id))
+ iodev_console_write(NULL, 0);
+
+ if (do_lock)
+ spin_unlock(&console_lock);
+}
+
+void iodev_console_kick(void)
+{
+ iodev_console_write(NULL, 0);
+
+ for (iodev_id_t id = 0; id < IODEV_MAX; id++) {
+ if (!iodevs[id])
+ continue;
+ if (!(iodevs[id]->usage & USAGE_CONSOLE))
+ continue;
+
+ iodev_handle_events(id);
+ }
+}
+
+void iodev_console_flush(void)
+{
+ for (iodev_id_t id = 0; id < IODEV_MAX; id++) {
+ if (!iodevs[id])
+ continue;
+ if (!(iodevs[id]->usage & USAGE_CONSOLE))
+ continue;
+
+ iodev_flush(id);
+ }
+}
+
+void iodev_set_usage(iodev_id_t id, iodev_usage_t usage)
+{
+ if (iodevs[id])
+ iodevs[id]->usage = usage;
+}
+
+iodev_usage_t iodev_get_usage(iodev_id_t id)
+{
+ if (iodevs[id])
+ return iodevs[id]->usage;
+ return 0;
+}
+
+void *iodev_get_opaque(iodev_id_t id)
+{
+ if (id >= IODEV_MAX || !iodevs[id])
+ return NULL;
+
+ return iodevs[id]->opaque;
+}
diff --git a/tools/src/iodev.h b/tools/src/iodev.h
new file mode 100644
index 0000000..24187c7
--- /dev/null
+++ b/tools/src/iodev.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef IODEV_H
+#define IODEV_H
+
+#include "types.h"
+#include "utils.h"
+
+#define USB_IODEV_COUNT 8
+
+typedef enum _iodev_id_t {
+ IODEV_UART,
+ IODEV_FB,
+ IODEV_USB_VUART,
+ IODEV_USB0,
+ IODEV_MAX = IODEV_USB0 + USB_IODEV_COUNT,
+} iodev_id_t;
+
+typedef enum _iodev_usage_t {
+ USAGE_CONSOLE = BIT(0),
+ USAGE_UARTPROXY = BIT(1),
+} iodev_usage_t;
+
+struct iodev_ops {
+ ssize_t (*can_read)(void *opaque);
+ bool (*can_write)(void *opaque);
+ ssize_t (*read)(void *opaque, void *buf, size_t length);
+ ssize_t (*write)(void *opaque, const void *buf, size_t length);
+ ssize_t (*queue)(void *opaque, const void *buf, size_t length);
+ void (*flush)(void *opaque);
+ void (*handle_events)(void *opaque);
+};
+
+struct iodev {
+ const struct iodev_ops *ops;
+
+ spinlock_t lock;
+ iodev_usage_t usage;
+ void *opaque;
+};
+
+void iodev_register_device(iodev_id_t id, struct iodev *dev);
+struct iodev *iodev_unregister_device(iodev_id_t id);
+
+ssize_t iodev_can_read(iodev_id_t id);
+bool iodev_can_write(iodev_id_t id);
+ssize_t iodev_read(iodev_id_t id, void *buf, size_t length);
+ssize_t iodev_write(iodev_id_t id, const void *buf, size_t length);
+ssize_t iodev_queue(iodev_id_t id, const void *buf, size_t length);
+void iodev_flush(iodev_id_t id);
+void iodev_handle_events(iodev_id_t id);
+void iodev_lock(iodev_id_t id);
+void iodev_unlock(iodev_id_t id);
+
+void iodev_console_write(const void *buf, size_t length);
+void iodev_console_kick(void);
+void iodev_console_flush(void);
+
+iodev_usage_t iodev_get_usage(iodev_id_t id);
+void iodev_set_usage(iodev_id_t id, iodev_usage_t usage);
+void *iodev_get_opaque(iodev_id_t id);
+
+#endif
diff --git a/tools/src/iova.c b/tools/src/iova.c
new file mode 100644
index 0000000..c3146cd
--- /dev/null
+++ b/tools/src/iova.c
@@ -0,0 +1,233 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "iova.h"
+#include "malloc.h"
+#include "string.h"
+#include "utils.h"
+
+struct iova_block {
+ u64 iova;
+ size_t sz;
+ struct iova_block *next;
+};
+
+struct iova_domain {
+ u64 base;
+ u64 limit;
+ struct iova_block *free_list;
+};
+
+iova_domain_t *iovad_init(u64 base, u64 limit)
+{
+ if (base != ALIGN_UP(base, SZ_32M)) {
+ printf("iovad_init: base it not is not aligned to SZ_32M\n");
+ return NULL;
+ }
+
+ iova_domain_t *iovad = malloc(sizeof(*iovad));
+ if (!iovad)
+ return NULL;
+
+ memset(iovad, 0, sizeof(*iovad));
+
+ struct iova_block *blk = malloc(sizeof(*blk));
+ if (!blk) {
+ free(iovad);
+ return NULL;
+ }
+
+ /* don't hand out NULL pointers */
+ blk->iova = base;
+ blk->sz = limit - SZ_16K;
+ blk->next = NULL;
+ iovad->base = base;
+ iovad->limit = limit;
+ iovad->free_list = blk;
+
+ return iovad;
+}
+
+void iovad_shutdown(iova_domain_t *iovad, dart_dev_t *dart)
+{
+ struct iova_block *blk = iovad->free_list;
+
+ while (blk != NULL) {
+ struct iova_block *blk_free = blk;
+ blk = blk->next;
+
+ free(blk_free);
+ }
+
+ if (dart)
+ for (u64 addr = iovad->base; addr < iovad->limit; addr += SZ_32M)
+ dart_free_l2(dart, addr);
+
+ free(iovad);
+}
+
+bool iova_reserve(iova_domain_t *iovad, u64 iova, size_t sz)
+{
+ iova = ALIGN_DOWN(iova, SZ_16K);
+ sz = ALIGN_UP(sz, SZ_16K);
+
+ if (iova == 0) {
+ iova += SZ_16K;
+ sz -= SZ_16K;
+ }
+ if (sz == 0)
+ return true;
+
+ if (!iovad->free_list) {
+ printf("iova_reserve: trying to reserve iova range but empty free list\n");
+ return false;
+ }
+
+ struct iova_block *blk = iovad->free_list;
+ struct iova_block *blk_prev = NULL;
+ while (blk != NULL) {
+ if (iova >= blk->iova && iova < (blk->iova + blk->sz)) {
+ if (iova + sz >= (blk->iova + blk->sz)) {
+ printf("iova_reserve: tried to reserve [%lx; +%lx] but block in free list has "
+ "range [%lx; +%lx]\n",
+ iova, sz, blk->iova, blk->sz);
+ return false;
+ }
+
+ if (iova == blk->iova && sz == blk->sz) {
+ /* if the to-be-reserved range is present as a single block in the free list we just
+ * need to remove it */
+ if (blk_prev)
+ blk_prev->next = blk->next;
+ else
+ iovad->free_list = NULL;
+
+ free(blk);
+ return true;
+ } else if (iova == blk->iova) {
+ /* cut off the reserved range from the beginning */
+ blk->iova += sz;
+ blk->sz -= sz;
+ return true;
+ } else if (iova + sz == blk->iova + blk->sz) {
+ /* cut off the reserved range from the end */
+ blk->sz -= sz;
+ return true;
+ } else {
+ /* the to-be-reserved range is in the middle and we'll have to split this block */
+ struct iova_block *blk_new = malloc(sizeof(*blk_new));
+ if (!blk_new) {
+ printf("iova_reserve: out of memory.\n");
+ return false;
+ }
+
+ blk_new->iova = iova + sz;
+ blk_new->sz = blk->iova + blk->sz - blk_new->iova;
+ blk_new->next = blk->next;
+ blk->next = blk_new;
+ blk->sz = iova - blk->iova;
+ return true;
+ }
+ }
+
+ blk_prev = blk;
+ blk = blk->next;
+ }
+
+ printf("iova_reserve: tried to reserve [%lx; +%lx] but range is already used.\n", iova, sz);
+ return false;
+}
+
+u64 iova_alloc(iova_domain_t *iovad, size_t sz)
+{
+ sz = ALIGN_UP(sz, SZ_16K);
+
+ struct iova_block *blk_prev = NULL;
+ struct iova_block *blk = iovad->free_list;
+ while (blk != NULL) {
+ if (blk->sz == sz) {
+ u64 iova = blk->iova;
+
+ if (blk_prev)
+ blk_prev->next = blk->next;
+ else
+ iovad->free_list = blk->next;
+
+ free(blk);
+ return iova;
+ } else if (blk->sz > sz) {
+ u64 iova = blk->iova;
+
+ blk->iova += sz;
+ blk->sz -= sz;
+
+ return iova;
+ }
+
+ blk_prev = blk;
+ blk = blk->next;
+ }
+
+ return 0;
+}
+
+void iova_free(iova_domain_t *iovad, u64 iova, size_t sz)
+{
+ sz = ALIGN_UP(sz, SZ_16K);
+
+ struct iova_block *blk_prev = NULL;
+ struct iova_block *blk = iovad->free_list;
+
+ /* create a new free list if it's empty */
+ if (!blk) {
+ blk = malloc(sizeof(*blk));
+ if (!blk)
+ panic("out of memory in iovad_free");
+ blk->iova = iova;
+ blk->sz = sz;
+ blk->next = NULL;
+ iovad->free_list = blk;
+ return;
+ }
+
+ while (blk != NULL) {
+ if ((iova + sz) == blk->iova) {
+ /* extend the block at the beginning */
+ blk->iova -= sz;
+ blk->sz += sz;
+
+ /* if we have just extended the start of the free list we're already done */
+ if (!blk_prev)
+ return;
+
+ /* check if we can merge two blocks otherwise */
+ if ((blk_prev->iova + blk_prev->sz) == blk->iova) {
+ blk_prev->sz += blk->sz;
+ blk_prev->next = blk->next;
+ free(blk);
+ }
+
+ return;
+ } else if ((iova + sz) < blk->iova) {
+ /* create a new block */
+ struct iova_block *blk_new = malloc(sizeof(*blk_new));
+ if (!blk_new)
+ panic("iova_free: out of memory\n");
+
+ blk_new->iova = iova;
+ blk_new->sz = sz;
+ blk_new->next = blk;
+
+ if (blk_prev)
+ blk_prev->next = blk_new;
+ else
+ iovad->free_list = blk_new;
+
+ return;
+ }
+
+ blk_prev = blk;
+ blk = blk->next;
+ }
+
+ panic("iovad_free: corruption detected, unable to insert freed range\n");
+}
diff --git a/tools/src/iova.h b/tools/src/iova.h
new file mode 100644
index 0000000..1637be4
--- /dev/null
+++ b/tools/src/iova.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef IOVA_H
+#define IOVA_H
+
+#include "dart.h"
+#include "types.h"
+
+typedef struct iova_domain iova_domain_t;
+
+iova_domain_t *iovad_init(u64 base, u64 limit);
+void iovad_shutdown(iova_domain_t *iovad, dart_dev_t *dart);
+
+bool iova_reserve(iova_domain_t *iovad, u64 iova, size_t sz);
+u64 iova_alloc(iova_domain_t *iovad, size_t sz);
+void iova_free(iova_domain_t *iovad, u64 iova, size_t sz);
+
+#endif
diff --git a/tools/src/kboot.c b/tools/src/kboot.c
new file mode 100644
index 0000000..c56c0e7
--- /dev/null
+++ b/tools/src/kboot.c
@@ -0,0 +1,1937 @@
+/* SPDX-License-Identifier: MIT */
+
+#include <stdint.h>
+
+#include "kboot.h"
+#include "adt.h"
+#include "assert.h"
+#include "dapf.h"
+#include "devicetree.h"
+#include "exception.h"
+#include "firmware.h"
+#include "malloc.h"
+#include "memory.h"
+#include "pcie.h"
+#include "pmgr.h"
+#include "sep.h"
+#include "smp.h"
+#include "types.h"
+#include "usb.h"
+#include "utils.h"
+#include "xnuboot.h"
+
+#include "libfdt/libfdt.h"
+
+#define MAX_CHOSEN_PARAMS 16
+
+#define MAX_ATC_DEVS 8
+#define MAX_CIO_DEVS 8
+
+#define MAX_DISP_MAPPINGS 8
+
+static void *dt = NULL;
+static int dt_bufsize = 0;
+static void *initrd_start = NULL;
+static size_t initrd_size = 0;
+static char *chosen_params[MAX_CHOSEN_PARAMS][2];
+
+extern const char *const m1n1_version;
+
+int dt_set_gpu(void *dt);
+
+#define DT_ALIGN 16384
+
+#define bail(...) \
+ do { \
+ printf(__VA_ARGS__); \
+ return -1; \
+ } while (0)
+
+#define bail_cleanup(...) \
+ do { \
+ printf(__VA_ARGS__); \
+ ret = -1; \
+ goto err; \
+ } while (0)
+
+void get_notchless_fb(u64 *fb_base, u64 *fb_height)
+{
+ *fb_base = cur_boot_args.video.base;
+ *fb_height = cur_boot_args.video.height;
+
+ int node = adt_path_offset(adt, "/product");
+
+ if (node < 0) {
+ printf("FDT: /product node not found\n");
+ return;
+ }
+
+ u32 val;
+
+ if (ADT_GETPROP(adt, node, "partially-occluded-display", &val) < 0 || !val) {
+ printf("FDT: No notch detected\n");
+ return;
+ }
+
+ u64 hfrac = cur_boot_args.video.height * 16 / cur_boot_args.video.width;
+ u64 new_height = cur_boot_args.video.width * hfrac / 16;
+
+ if (new_height == cur_boot_args.video.height) {
+ printf("FDT: Notch detected, but display aspect is already 16:%lu?\n", hfrac);
+ return;
+ }
+
+ u64 offset = cur_boot_args.video.height - new_height;
+
+ printf("display: Hiding notch, %lux%lu -> %lux%lu (+%lu, 16:%lu)\n", cur_boot_args.video.width,
+ cur_boot_args.video.height, cur_boot_args.video.width, new_height, offset, hfrac);
+
+ *fb_base += cur_boot_args.video.stride * offset;
+ *fb_height = new_height;
+}
+
+static int dt_set_rng_seed_sep(int node)
+{
+ u64 kaslr_seed;
+ uint8_t rng_seed[128]; // same size used by Linux for kexec
+
+ if (sep_get_random(&kaslr_seed, sizeof(kaslr_seed)) != sizeof(kaslr_seed))
+ bail("SEP: couldn't get enough random bytes for KASLR seed");
+ if (sep_get_random(rng_seed, sizeof(rng_seed)) != sizeof(rng_seed))
+ bail("SEP: couldn't get enough random bytes for RNG seed");
+
+ if (fdt_setprop_u64(dt, node, "kaslr-seed", kaslr_seed))
+ bail("FDT: couldn't set kaslr-seed\n");
+ if (fdt_setprop(dt, node, "rng-seed", rng_seed, sizeof(rng_seed)))
+ bail("FDT: couldn't set rng-seed\n");
+
+ printf("FDT: Passing %ld bytes of KASLR seed and %ld bytes of random seed\n",
+ sizeof(kaslr_seed), sizeof(rng_seed));
+
+ return 0;
+}
+
+static int dt_set_rng_seed_adt(int node)
+{
+ int anode = adt_path_offset(adt, "/chosen");
+
+ if (anode < 0)
+ bail("ADT: /chosen not found\n");
+
+ const uint8_t *random_seed;
+ u32 seed_length;
+
+ random_seed = adt_getprop(adt, anode, "random-seed", &seed_length);
+ if (random_seed) {
+ printf("ADT: %d bytes of random seed available\n", seed_length);
+
+ if (seed_length >= sizeof(u64)) {
+ u64 kaslr_seed;
+
+ memcpy(&kaslr_seed, random_seed, sizeof(kaslr_seed));
+
+ // Ideally we would throw away the kaslr_seed part of random_seed
+ // and avoid reusing it. However, Linux wants 64 bytes of bootloader
+ // random seed to consider its CRNG initialized, which is exactly
+ // how much iBoot gives us. This probably doesn't matter, since
+ // that entropy is going to get shuffled together and Linux makes
+ // sure to clear the FDT randomness after using it anyway, but just
+ // in case let's mix in a few bits from our own KASLR base to make
+ // kaslr_seed unique.
+
+ kaslr_seed ^= (u64)cur_boot_args.virt_base;
+
+ if (fdt_setprop_u64(dt, node, "kaslr-seed", kaslr_seed))
+ bail("FDT: couldn't set kaslr-seed\n");
+
+ printf("FDT: KASLR seed initialized\n");
+ } else {
+ printf("ADT: not enough random data for kaslr-seed\n");
+ }
+
+ if (seed_length) {
+ if (fdt_setprop(dt, node, "rng-seed", random_seed, seed_length))
+ bail("FDT: couldn't set rng-seed\n");
+
+ printf("FDT: Passing %d bytes of random seed\n", seed_length);
+ }
+ } else {
+ printf("ADT: no random-seed available!\n");
+ }
+
+ return 0;
+}
+
+static int dt_set_chosen(void)
+{
+
+ int node = fdt_path_offset(dt, "/chosen");
+ if (node < 0)
+ bail("FDT: /chosen node not found in devtree\n");
+
+ for (int i = 0; i < MAX_CHOSEN_PARAMS; i++) {
+ if (!chosen_params[i][0])
+ break;
+
+ const char *name = chosen_params[i][0];
+ const char *value = chosen_params[i][1];
+ if (fdt_setprop(dt, node, name, value, strlen(value) + 1) < 0)
+ bail("FDT: couldn't set chosen.%s property\n", name);
+ printf("FDT: %s = '%s'\n", name, value);
+ }
+
+ if (initrd_start && initrd_size) {
+ if (fdt_setprop_u64(dt, node, "linux,initrd-start", (u64)initrd_start))
+ bail("FDT: couldn't set chosen.linux,initrd-start property\n");
+
+ u64 end = ((u64)initrd_start) + initrd_size;
+ if (fdt_setprop_u64(dt, node, "linux,initrd-end", end))
+ bail("FDT: couldn't set chosen.linux,initrd-end property\n");
+
+ if (fdt_add_mem_rsv(dt, (u64)initrd_start, initrd_size))
+ bail("FDT: couldn't add reservation for the initrd\n");
+
+ printf("FDT: initrd at %p size 0x%lx\n", initrd_start, initrd_size);
+ }
+
+ if (cur_boot_args.video.base) {
+ int fb = fdt_path_offset(dt, "/chosen/framebuffer");
+ if (fb < 0)
+ bail("FDT: /chosen node not found in devtree\n");
+
+ u64 fb_base, fb_height;
+ get_notchless_fb(&fb_base, &fb_height);
+ u64 fb_size = cur_boot_args.video.stride * fb_height;
+ u64 fbreg[2] = {cpu_to_fdt64(fb_base), cpu_to_fdt64(fb_size)};
+ char fbname[32];
+
+ snprintf(fbname, sizeof(fbname), "framebuffer@%lx", fb_base);
+
+ if (fdt_setprop(dt, fb, "reg", fbreg, sizeof(fbreg)))
+ bail("FDT: couldn't set framebuffer.reg property\n");
+
+ if (fdt_set_name(dt, fb, fbname))
+ bail("FDT: couldn't set framebuffer name\n");
+
+ if (fdt_setprop_u32(dt, fb, "width", cur_boot_args.video.width))
+ bail("FDT: couldn't set framebuffer width\n");
+
+ if (fdt_setprop_u32(dt, fb, "height", fb_height))
+ bail("FDT: couldn't set framebuffer height\n");
+
+ if (fdt_setprop_u32(dt, fb, "stride", cur_boot_args.video.stride))
+ bail("FDT: couldn't set framebuffer stride\n");
+
+ const char *format = NULL;
+
+ switch (cur_boot_args.video.depth & 0xff) {
+ case 32:
+ format = "x8r8g8b8";
+ break;
+ case 30:
+ format = "x2r10g10b10";
+ break;
+ case 16:
+ format = "r5g6b5";
+ break;
+ default:
+ printf("FDT: unsupported fb depth %lu, not enabling\n", cur_boot_args.video.depth);
+ return 0; // Do not error out, but don't set the FB
+ }
+
+ if (fdt_setprop_string(dt, fb, "format", format))
+ bail("FDT: couldn't set framebuffer format\n");
+
+ fdt_delprop(dt, fb, "status"); // may fail if it does not exist
+
+ printf("FDT: %s base 0x%lx size 0x%lx\n", fbname, fb_base, fb_size);
+
+ // We do not need to reserve the framebuffer, as it will be excluded from the usable RAM
+ // range already.
+
+ // save notch height in the dcp node if present
+ if (cur_boot_args.video.height - fb_height) {
+ int dcp = fdt_path_offset(dt, "dcp");
+ if (dcp >= 0)
+ if (fdt_appendprop_u32(dt, dcp, "apple,notch-height",
+ cur_boot_args.video.height - fb_height))
+ printf("FDT: couldn't set apple,notch-height\n");
+ }
+ }
+ node = fdt_path_offset(dt, "/chosen");
+ if (node < 0)
+ bail("FDT: /chosen node not found in devtree\n");
+
+ int ipd = adt_path_offset(adt, "/arm-io/spi3/ipd");
+ if (ipd < 0)
+ ipd = adt_path_offset(adt, "/arm-io/dockchannel-mtp/mtp-transport/keyboard");
+
+ if (ipd < 0) {
+ printf("ADT: no keyboard found\n");
+ } else {
+ u32 len;
+ const u8 *kblang = adt_getprop(adt, ipd, "kblang-calibration", &len);
+ if (kblang && len >= 2) {
+ if (fdt_setprop_u32(dt, node, "asahi,kblang-code", kblang[1]))
+ bail("FDT: couldn't set asahi,kblang-code");
+ } else {
+ printf("ADT: kblang-calibration not found, no keyboard layout\n");
+ }
+ }
+
+ if (fdt_setprop(dt, node, "asahi,iboot1-version", system_firmware.iboot,
+ strlen(system_firmware.iboot) + 1))
+ bail("FDT: couldn't set asahi,iboot1-version");
+
+ if (fdt_setprop(dt, node, "asahi,system-fw-version", system_firmware.string,
+ strlen(system_firmware.string) + 1))
+ bail("FDT: couldn't set asahi,system-fw-version");
+
+ if (fdt_setprop(dt, node, "asahi,iboot2-version", os_firmware.iboot,
+ strlen(os_firmware.iboot) + 1))
+ bail("FDT: couldn't set asahi,iboot2-version");
+
+ if (fdt_setprop(dt, node, "asahi,os-fw-version", os_firmware.string,
+ strlen(os_firmware.string) + 1))
+ bail("FDT: couldn't set asahi,os-fw-version");
+
+ if (fdt_setprop(dt, node, "asahi,m1n1-stage2-version", m1n1_version, strlen(m1n1_version) + 1))
+ bail("FDT: couldn't set asahi,m1n1-stage2-version");
+
+ if (dt_set_rng_seed_sep(node))
+ return dt_set_rng_seed_adt(node);
+
+ return 0;
+}
+
+static int dt_set_memory(void)
+{
+ int anode = adt_path_offset(adt, "/chosen");
+
+ if (anode < 0)
+ bail("ADT: /chosen not found\n");
+
+ u64 dram_base, dram_size;
+
+ if (ADT_GETPROP(adt, anode, "dram-base", &dram_base) < 0)
+ bail("ADT: Failed to get dram-base\n");
+ if (ADT_GETPROP(adt, anode, "dram-size", &dram_size) < 0)
+ bail("ADT: Failed to get dram-size\n");
+
+ // Tell the kernel our usable memory range. We cannot declare all of DRAM, and just reserve the
+ // bottom and top, because the kernel would still map it (and just not use it), which breaks
+ // ioremap (e.g. simplefb).
+
+ u64 dram_min = cur_boot_args.phys_base;
+ u64 dram_max = cur_boot_args.phys_base + cur_boot_args.mem_size;
+
+ printf("FDT: DRAM at 0x%lx size 0x%lx\n", dram_base, dram_size);
+ printf("FDT: Usable memory is 0x%lx..0x%lx (0x%lx)\n", dram_min, dram_max, dram_max - dram_min);
+
+ u64 memreg[2] = {cpu_to_fdt64(dram_min), cpu_to_fdt64(dram_max - dram_min)};
+
+ int node = fdt_path_offset(dt, "/memory");
+ if (node < 0)
+ bail("FDT: /memory node not found in devtree\n");
+
+ if (fdt_setprop(dt, node, "reg", memreg, sizeof(memreg)))
+ bail("FDT: couldn't set memory.reg property\n");
+
+ return 0;
+}
+
+static int dt_set_serial_number(void)
+{
+
+ int fdt_root = fdt_path_offset(dt, "/");
+ int adt_root = adt_path_offset(adt, "/");
+
+ if (fdt_root < 0)
+ bail("FDT: could not open a handle to FDT root.\n");
+ if (adt_root < 0)
+ bail("ADT: could not open a handle to ADT root.\n");
+
+ u32 sn_len;
+ const char *serial_number = adt_getprop(adt, adt_root, "serial-number", &sn_len);
+ if (fdt_setprop_string(dt, fdt_root, "serial-number", serial_number))
+ bail("FDT: unable to set device serial number!\n");
+ printf("FDT: reporting device serial number: %s\n", serial_number);
+
+ return 0;
+}
+
+static int dt_set_cpus(void)
+{
+ int ret = 0;
+
+ int cpus = fdt_path_offset(dt, "/cpus");
+ if (cpus < 0)
+ bail("FDT: /cpus node not found in devtree\n");
+
+ uint32_t *pruned_phandles = malloc(MAX_CPUS * sizeof(uint32_t));
+ size_t pruned = 0;
+ if (!pruned_phandles)
+ bail("FDT: out of memory\n");
+
+ /* Prune CPU nodes */
+ int node, cpu = 0;
+ for (node = fdt_first_subnode(dt, cpus); node >= 0;) {
+ const char *name = fdt_get_name(dt, node, NULL);
+ if (strncmp(name, "cpu@", 4))
+ goto next_node;
+
+ if (cpu > MAX_CPUS)
+ bail_cleanup("Maximum number of CPUs exceeded, consider increasing MAX_CPUS\n");
+
+ const fdt64_t *prop = fdt_getprop(dt, node, "reg", NULL);
+ if (!prop)
+ bail_cleanup("FDT: failed to get reg property of CPU\n");
+
+ u64 dt_mpidr = fdt64_ld(prop);
+
+ if (dt_mpidr == (mrs(MPIDR_EL1) & 0xFFFFFF))
+ goto next_cpu;
+
+ if (!smp_is_alive(cpu)) {
+ printf("FDT: CPU %d is not alive, disabling...\n", cpu);
+ pruned_phandles[pruned++] = fdt_get_phandle(dt, node);
+
+ int next = fdt_next_subnode(dt, node);
+ fdt_nop_node(dt, node);
+ cpu++;
+ node = next;
+ continue;
+ }
+
+ u64 mpidr = smp_get_mpidr(cpu);
+
+ if (dt_mpidr != mpidr)
+ bail_cleanup("FDT: DT CPU %d MPIDR mismatch: 0x%lx != 0x%lx\n", cpu, dt_mpidr, mpidr);
+
+ u64 release_addr = smp_get_release_addr(cpu);
+ if (fdt_setprop_inplace_u64(dt, node, "cpu-release-addr", release_addr))
+ bail_cleanup("FDT: couldn't set cpu-release-addr property\n");
+
+ printf("FDT: CPU %d MPIDR=0x%lx release-addr=0x%lx\n", cpu, mpidr, release_addr);
+
+ next_cpu:
+ cpu++;
+ next_node:
+ node = fdt_next_subnode(dt, node);
+ }
+
+ if ((node < 0) && (node != -FDT_ERR_NOTFOUND)) {
+ bail_cleanup("FDT: error iterating through CPUs\n");
+ }
+
+ /* Prune AIC PMU affinities */
+ int aic = fdt_node_offset_by_compatible(dt, -1, "apple,aic");
+ if (aic == -FDT_ERR_NOTFOUND)
+ aic = fdt_node_offset_by_compatible(dt, -1, "apple,aic2");
+ if (aic < 0)
+ bail_cleanup("FDT: Failed to find AIC node\n");
+
+ int affinities = fdt_subnode_offset(dt, aic, "affinities");
+ if (affinities < 0) {
+ printf("FDT: Failed to find AIC affinities node, ignoring...\n");
+ } else {
+ int node;
+ for (node = fdt_first_subnode(dt, affinities); node >= 0;
+ node = fdt_next_subnode(dt, node)) {
+ int len;
+ const fdt32_t *phs = fdt_getprop(dt, node, "cpus", &len);
+ if (!phs)
+ bail_cleanup("FDT: Failed to find cpus property under AIC affinity\n");
+
+ fdt32_t *new_phs = malloc(len);
+ size_t index = 0;
+ size_t count = len / sizeof(fdt32_t);
+
+ for (size_t i = 0; i < count; i++) {
+ uint32_t phandle = fdt32_ld(&phs[i]);
+ bool prune = false;
+
+ for (size_t j = 0; j < pruned; j++) {
+ if (pruned_phandles[j] == phandle) {
+ prune = true;
+ break;
+ }
+ }
+ if (!prune)
+ new_phs[index++] = phs[i];
+ }
+
+ ret = fdt_setprop(dt, node, "cpus", new_phs, sizeof(fdt32_t) * index);
+ free(new_phs);
+
+ if (ret < 0)
+ bail_cleanup("FDT: Failed to set cpus property under AIC affinity\n");
+
+ const char *name = fdt_get_name(dt, node, NULL);
+ printf("FDT: Pruned %ld/%ld CPU references in [AIC]/affinities/%s\n", count - index,
+ count, name);
+ }
+
+ if ((node < 0) && (node != -FDT_ERR_NOTFOUND))
+ bail_cleanup("FDT: Error iterating through affinity nodes\n");
+ }
+
+ /* Prune CPU-map */
+ int cpu_map = fdt_path_offset(dt, "/cpus/cpu-map");
+ if (cpu_map < 0) {
+ printf("FDT: /cpus/cpu-map node not found in devtree, ignoring...\n");
+ free(pruned_phandles);
+ return 0;
+ }
+
+ int cluster_idx = 0;
+ int cluster_node;
+ for (cluster_node = fdt_first_subnode(dt, cpu_map); cluster_node >= 0;) {
+ const char *name = fdt_get_name(dt, cluster_node, NULL);
+ int cpu_idx = 0;
+
+ if (strncmp(name, "cluster", 7))
+ goto next_cluster;
+
+ int cpu_node;
+ for (cpu_node = fdt_first_subnode(dt, cluster_node); cpu_node >= 0;) {
+ const char *cpu_name = fdt_get_name(dt, cpu_node, NULL);
+
+ if (strncmp(cpu_name, "core", 4))
+ goto next_map_cpu;
+
+ int len;
+ const fdt32_t *cpu_ph = fdt_getprop(dt, cpu_node, "cpu", &len);
+
+ if (!cpu_ph || len != sizeof(*cpu_ph))
+ bail_cleanup("FDT: Failed to get cpu prop for /cpus/cpu-map/%s/%s\n", name,
+ cpu_name);
+
+ uint32_t phandle = fdt32_ld(cpu_ph);
+ bool prune = false;
+ for (size_t i = 0; i < pruned; i++) {
+ if (pruned_phandles[i] == phandle) {
+ prune = true;
+ break;
+ }
+ }
+
+ if (prune) {
+ printf("FDT: Pruning /cpus/cpu-map/%s/%s\n", name, cpu_name);
+
+ int next = fdt_next_subnode(dt, cpu_node);
+ fdt_nop_node(dt, cpu_node);
+ cpu_node = next;
+ continue;
+ } else {
+ char new_name[16];
+
+ snprintf(new_name, 16, "core%d", cpu_idx++);
+ fdt_set_name(dt, cpu_node, new_name);
+ }
+ next_map_cpu:
+ cpu_node = fdt_next_subnode(dt, cpu_node);
+ }
+
+ if ((cpu_node < 0) && (cpu_node != -FDT_ERR_NOTFOUND))
+ bail_cleanup("FDT: Error iterating through CPU nodes\n");
+
+ if (cpu_idx == 0) {
+ printf("FDT: Pruning /cpus/cpu-map/%s\n", name);
+
+ int next = fdt_next_subnode(dt, cluster_node);
+ fdt_nop_node(dt, cluster_node);
+ cluster_node = next;
+ continue;
+ } else {
+ char new_name[16];
+
+ snprintf(new_name, 16, "cluster%d", cluster_idx++);
+ fdt_set_name(dt, cluster_node, new_name);
+ }
+ next_cluster:
+ cluster_node = fdt_next_subnode(dt, cluster_node);
+ }
+
+ if ((cluster_node < 0) && (cluster_node != -FDT_ERR_NOTFOUND))
+ bail_cleanup("FDT: Error iterating through CPU clusters\n");
+
+ return 0;
+
+err:
+ free(pruned_phandles);
+ return ret;
+}
+
+static struct {
+ const char *alias;
+ const char *fdt_property;
+ bool swap;
+} mac_address_devices[] = {
+ {
+ .alias = "bluetooth0",
+ .fdt_property = "local-bd-address",
+ .swap = true,
+ },
+ {
+ .alias = "ethernet0",
+ .fdt_property = "local-mac-address",
+ },
+ {
+ .alias = "wifi0",
+ .fdt_property = "local-mac-address",
+ },
+};
+
+static int dt_set_mac_addresses(void)
+{
+ int anode = adt_path_offset(adt, "/chosen");
+
+ if (anode < 0)
+ bail("ADT: /chosen not found\n");
+
+ for (size_t i = 0; i < sizeof(mac_address_devices) / sizeof(*mac_address_devices); i++) {
+ char propname[32];
+ snprintf(propname, sizeof(propname), "mac-address-%s", mac_address_devices[i].alias);
+
+ uint8_t addr[6];
+ if (ADT_GETPROP_ARRAY(adt, anode, propname, addr) < 0)
+ continue;
+
+ if (mac_address_devices[i].swap) {
+ for (size_t i = 0; i < sizeof(addr) / 2; ++i) {
+ uint8_t tmp = addr[i];
+ addr[i] = addr[sizeof(addr) - i - 1];
+ addr[sizeof(addr) - i - 1] = tmp;
+ }
+ }
+
+ const char *path = fdt_get_alias(dt, mac_address_devices[i].alias);
+ if (path == NULL)
+ continue;
+
+ int node = fdt_path_offset(dt, path);
+ if (node < 0)
+ continue;
+
+ fdt_setprop(dt, node, mac_address_devices[i].fdt_property, addr, sizeof(addr));
+ }
+
+ return 0;
+}
+
+static int dt_set_bluetooth_cal(int anode, int node, const char *adt_name, const char *fdt_name)
+{
+ u32 len;
+ const u8 *cal_blob = adt_getprop(adt, anode, adt_name, &len);
+
+ if (!cal_blob || !len)
+ bail("ADT: Failed to get %s", adt_name);
+
+ fdt_setprop(dt, node, fdt_name, cal_blob, len);
+ return 0;
+}
+
+static int dt_set_bluetooth(void)
+{
+ int ret;
+ int anode = adt_path_offset(adt, "/arm-io/bluetooth");
+
+ if (anode < 0)
+ bail("ADT: /arm-io/bluetooth not found\n");
+
+ const char *path = fdt_get_alias(dt, "bluetooth0");
+ if (path == NULL)
+ return 0;
+
+ int node = fdt_path_offset(dt, path);
+ if (node < 0)
+ return 0;
+
+ ret = dt_set_bluetooth_cal(anode, node, "bluetooth-taurus-calibration-bf",
+ "brcm,taurus-bf-cal-blob");
+ if (ret)
+ return ret;
+
+ ret = dt_set_bluetooth_cal(anode, node, "bluetooth-taurus-calibration", "brcm,taurus-cal-blob");
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int dt_set_multitouch(void)
+{
+ const char *path = fdt_get_alias(dt, "touchbar0");
+ if (path == NULL)
+ return 0;
+
+ int node = fdt_path_offset(dt, path);
+ if (node < 0)
+ bail("FDT: alias points at nonexistent node");
+
+ int anode = adt_path_offset(adt, "/arm-io/spi0/multi-touch");
+ if (anode < 0)
+ bail("ADT /arm-io/spi0/multi-touch not found\n");
+
+ u32 len;
+ const u8 *cal_blob = adt_getprop(adt, anode, "multi-touch-calibration", &len);
+ if (!cal_blob || !len)
+ bail("ADT: Failed to get multi-touch-calibration");
+
+ fdt_setprop(dt, node, "apple,z2-cal-blob", cal_blob, len);
+ return 0;
+}
+
+static int dt_set_wifi(void)
+{
+ int anode = adt_path_offset(adt, "/arm-io/wlan");
+
+ if (anode < 0)
+ bail("ADT: /arm-io/wlan not found\n");
+
+ uint8_t info[16];
+ if (ADT_GETPROP_ARRAY(adt, anode, "wifi-antenna-sku-info", info) < 0)
+ bail("ADT: Failed to get wifi-antenna-sku-info");
+
+ const char *path = fdt_get_alias(dt, "wifi0");
+ if (path == NULL)
+ return 0;
+
+ int node = fdt_path_offset(dt, path);
+ if (node < 0)
+ return 0;
+
+ char antenna[8];
+ memcpy(antenna, &info[8], sizeof(antenna));
+ fdt_setprop_string(dt, node, "apple,antenna-sku", antenna);
+
+ u32 len;
+ const u8 *cal_blob = adt_getprop(adt, anode, "wifi-calibration-msf", &len);
+
+ if (!cal_blob || !len)
+ bail("ADT: Failed to get wifi-calibration-msf");
+
+ fdt_setprop(dt, node, "brcm,cal-blob", cal_blob, len);
+
+ return 0;
+}
+
+static void dt_set_uboot_dm_preloc(int node)
+{
+ // Tell U-Boot to bind this node early
+ fdt_setprop_empty(dt, node, "u-boot,dm-pre-reloc");
+ fdt_setprop_empty(dt, node, "bootph-all");
+
+ // Make sure the power domains are bound early as well
+ int pds_size;
+ const fdt32_t *pds = fdt_getprop(dt, node, "power-domains", &pds_size);
+ if (!pds)
+ return;
+
+ fdt32_t *phandles = malloc(pds_size);
+ if (!phandles) {
+ printf("FDT: out of memory\n");
+ return;
+ }
+ memcpy(phandles, pds, pds_size);
+
+ for (int i = 0; i < pds_size / 4; i++) {
+ node = fdt_node_offset_by_phandle(dt, fdt32_ld(&phandles[i]));
+ if (node < 0)
+ continue;
+ dt_set_uboot_dm_preloc(node);
+
+ // restore node offset after DT update
+ node = fdt_node_offset_by_phandle(dt, fdt32_ld(&phandles[i]));
+ if (node < 0)
+ continue;
+
+ // And make sure the PMGR node is bound early too
+ node = fdt_parent_offset(dt, node);
+ if (node < 0)
+ continue;
+ dt_set_uboot_dm_preloc(node);
+ }
+
+ free(phandles);
+}
+
+static int dt_set_uboot(void)
+{
+ // Make sure that U-Boot can initialize the serial port in its
+ // pre-relocation phase by marking its node and the nodes of the
+ // power domains it depends on with a "u-boot,dm-pre-reloc"
+ // property.
+
+ const char *path = fdt_get_alias(dt, "serial0");
+ if (path == NULL)
+ return 0;
+
+ int node = fdt_path_offset(dt, path);
+ if (node < 0)
+ return 0;
+
+ dt_set_uboot_dm_preloc(node);
+ return 0;
+}
+
+struct atc_tunable {
+ u32 offset : 24;
+ u32 size : 8;
+ u32 mask;
+ u32 value;
+} PACKED;
+static_assert(sizeof(struct atc_tunable) == 12, "Invalid atc_tunable size");
+
+struct adt_tunable_info {
+ const char *adt_name;
+ const char *fdt_name;
+ size_t reg_offset;
+ size_t reg_size;
+ bool required;
+};
+
+static const struct adt_tunable_info atc_tunables[] = {
+ /* global tunables applied after power on or reset */
+ {"tunable_ATC0AXI2AF", "apple,tunable-axi2af", 0x0, 0x4000, true},
+ {"tunable_ATC_FABRIC", "apple,tunable-common", 0x45000, 0x4000, true},
+ {"tunable_AUS_CMN_TOP", "apple,tunable-common", 0x800, 0x4000, true},
+ {"tunable_AUS_CMN_SHM", "apple,tunable-common", 0xa00, 0x4000, true},
+ {"tunable_AUSPLL_CORE", "apple,tunable-common", 0x2200, 0x4000, true},
+ {"tunable_AUSPLL_TOP", "apple,tunable-common", 0x2000, 0x4000, true},
+ {"tunable_CIO3PLL_CORE", "apple,tunable-common", 0x2a00, 0x4000, true},
+ {"tunable_CIO3PLL_TOP", "apple,tunable-common", 0x2800, 0x4000, true},
+ {"tunable_CIO_CIO3PLL_TOP", "apple,tunable-common", 0x2800, 0x4000, false},
+ {"tunable_USB_ACIOPHY_TOP", "apple,tunable-common", 0x0, 0x4000, true},
+ /* lane-specific tunables applied after a cable is connected */
+ {"tunable_DP_LN0_AUSPMA_TX_TOP", "apple,tunable-lane0-dp", 0xc000, 0x1000, true},
+ {"tunable_DP_LN1_AUSPMA_TX_TOP", "apple,tunable-lane1-dp", 0x13000, 0x1000, true},
+ {"tunable_USB_LN0_AUSPMA_RX_TOP", "apple,tunable-lane0-usb", 0x9000, 0x1000, true},
+ {"tunable_USB_LN0_AUSPMA_RX_EQ", "apple,tunable-lane0-usb", 0xa000, 0x1000, true},
+ {"tunable_USB_LN0_AUSPMA_RX_SHM", "apple,tunable-lane0-usb", 0xb000, 0x1000, true},
+ {"tunable_USB_LN0_AUSPMA_TX_TOP", "apple,tunable-lane0-usb", 0xc000, 0x1000, true},
+ {"tunable_USB_LN1_AUSPMA_RX_TOP", "apple,tunable-lane1-usb", 0x10000, 0x1000, true},
+ {"tunable_USB_LN1_AUSPMA_RX_EQ", "apple,tunable-lane1-usb", 0x11000, 0x1000, true},
+ {"tunable_USB_LN1_AUSPMA_RX_SHM", "apple,tunable-lane1-usb", 0x12000, 0x1000, true},
+ {"tunable_USB_LN1_AUSPMA_TX_TOP", "apple,tunable-lane1-usb", 0x13000, 0x1000, true},
+ {"tunable_CIO_LN0_AUSPMA_RX_TOP", "apple,tunable-lane0-cio", 0x9000, 0x1000, true},
+ {"tunable_CIO_LN0_AUSPMA_RX_EQ", "apple,tunable-lane0-cio", 0xa000, 0x1000, true},
+ {"tunable_CIO_LN0_AUSPMA_RX_SHM", "apple,tunable-lane0-cio", 0xb000, 0x1000, true},
+ {"tunable_CIO_LN0_AUSPMA_TX_TOP", "apple,tunable-lane0-cio", 0xc000, 0x1000, true},
+ {"tunable_CIO_LN1_AUSPMA_RX_TOP", "apple,tunable-lane1-cio", 0x10000, 0x1000, true},
+ {"tunable_CIO_LN1_AUSPMA_RX_EQ", "apple,tunable-lane1-cio", 0x11000, 0x1000, true},
+ {"tunable_CIO_LN1_AUSPMA_RX_SHM", "apple,tunable-lane1-cio", 0x12000, 0x1000, true},
+ {"tunable_CIO_LN1_AUSPMA_TX_TOP", "apple,tunable-lane1-cio", 0x13000, 0x1000, true},
+};
+
+static int dt_append_atc_tunable(int adt_node, int fdt_node,
+ const struct adt_tunable_info *tunable_info)
+{
+ u32 tunables_len;
+ const struct atc_tunable *tunable_adt =
+ adt_getprop(adt, adt_node, tunable_info->adt_name, &tunables_len);
+
+ if (!tunable_adt) {
+ printf("ADT: tunable %s not found\n", tunable_info->adt_name);
+
+ if (tunable_info->required)
+ return -1;
+ else
+ return 0;
+ }
+
+ if (tunables_len % sizeof(*tunable_adt)) {
+ printf("ADT: tunable %s with invalid length %d\n", tunable_info->adt_name, tunables_len);
+ return -1;
+ }
+
+ u32 n_tunables = tunables_len / sizeof(*tunable_adt);
+ for (size_t j = 0; j < n_tunables; j++) {
+ const struct atc_tunable *tunable = &tunable_adt[j];
+
+ if (tunable->size != 32) {
+ printf("kboot: ATC tunable has invalid size %d\n", tunable->size);
+ return -1;
+ }
+
+ if (tunable->offset % (tunable->size / 8)) {
+ printf("kboot: ATC tunable has unaligned offset %x\n", tunable->offset);
+ return -1;
+ }
+
+ if (tunable->offset + (tunable->size / 8) > tunable_info->reg_size) {
+ printf("kboot: ATC tunable has invalid offset %x\n", tunable->offset);
+ return -1;
+ }
+
+ if (fdt_appendprop_u32(dt, fdt_node, tunable_info->fdt_name,
+ tunable->offset + tunable_info->reg_offset) < 0)
+ return -1;
+ if (fdt_appendprop_u32(dt, fdt_node, tunable_info->fdt_name, tunable->mask) < 0)
+ return -1;
+ if (fdt_appendprop_u32(dt, fdt_node, tunable_info->fdt_name, tunable->value) < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+static void dt_copy_atc_tunables(const char *adt_path, const char *dt_alias)
+{
+ int ret;
+
+ int adt_node = adt_path_offset(adt, adt_path);
+ if (adt_node < 0)
+ return;
+
+ const char *fdt_path = fdt_get_alias(dt, dt_alias);
+ if (fdt_path == NULL) {
+ printf("FDT: Unable to find alias %s\n", dt_alias);
+ return;
+ }
+
+ int fdt_node = fdt_path_offset(dt, fdt_path);
+ if (fdt_node < 0) {
+ printf("FDT: Unable to find path %s for alias %s\n", fdt_path, dt_alias);
+ return;
+ }
+
+ for (size_t i = 0; i < sizeof(atc_tunables) / sizeof(*atc_tunables); ++i) {
+ ret = dt_append_atc_tunable(adt_node, fdt_node, &atc_tunables[i]);
+ if (ret)
+ goto cleanup;
+ }
+
+ return;
+
+cleanup:
+ /*
+ * USB3 and Thunderbolt won't work if something went wrong. Clean up to make
+ * sure we don't leave half-filled properties around so that we can at least
+ * try to boot with USB2 support only.
+ */
+ for (size_t i = 0; i < sizeof(atc_tunables) / sizeof(*atc_tunables); ++i)
+ fdt_delprop(dt, fdt_node, atc_tunables[i].fdt_name);
+
+ printf("FDT: Unable to setup ATC tunables for %s - USB3/Thunderbolt will not work\n", adt_path);
+}
+
+static int dt_set_atc_tunables(void)
+{
+ char adt_path[32];
+ char fdt_alias[32];
+
+ for (int i = 0; i < MAX_ATC_DEVS; ++i) {
+ memset(adt_path, 0, sizeof(adt_path));
+ snprintf(adt_path, sizeof(adt_path), "/arm-io/atc-phy%d", i);
+
+ memset(fdt_alias, 0, sizeof(adt_path));
+ snprintf(fdt_alias, sizeof(fdt_alias), "atcphy%d", i);
+
+ dt_copy_atc_tunables(adt_path, fdt_alias);
+ }
+
+ return 0;
+}
+
+static const struct adt_tunable_info acio_tunables[] = {
+ /* NHI tunables */
+ {"hi_up_tx_desc_fabric_tunables", "apple,tunable-nhi", 0xf0000, 0x4000, true},
+ {"hi_up_tx_data_fabric_tunables", "apple,tunable-nhi", 0xec000, 0x4000, true},
+ {"hi_up_rx_desc_fabric_tunables", "apple,tunable-nhi", 0xe8000, 0x4000, true},
+ {"hi_up_wr_fabric_tunables", "apple,tunable-nhi", 0xf4000, 0x4000, true},
+ {"hi_up_merge_fabric_tunables", "apple,tunable-nhi", 0xf8000, 0x4000, true},
+ {"hi_dn_merge_fabric_tunables", "apple,tunable-nhi", 0xfc000, 0x4000, true},
+ {"fw_int_ctl_management_tunables", "apple,tunable-nhi", 0x4000, 0x4000, true},
+ /* M3 tunables */
+ {"top_tunables", "apple,tunable-m3", 0x0, 0x4000, true},
+ {"hbw_fabric_tunables", "apple,tunable-m3", 0x4000, 0x4000, true},
+ {"lbw_fabric_tunables", "apple,tunable-m3", 0x8000, 0x4000, true},
+ /* PCIe adapter tunables */
+ {"pcie_adapter_regs_tunables", "apple,tunable-pcie-adapter", 0x0, 0x4000, true},
+};
+
+struct acio_tunable {
+ u32 offset;
+ u32 size;
+ u64 mask;
+ u64 value;
+} PACKED;
+static_assert(sizeof(struct acio_tunable) == 24, "Invalid acio_tunable size");
+
+/*
+ * This is *almost* identical to dt_append_atc_tunable except for the different
+ * tunable struct and that tunable->size is in bytes instead of bits.
+ * If only C had generics that aren't macros :-(
+ */
+static int dt_append_acio_tunable(int adt_node, int fdt_node,
+ const struct adt_tunable_info *tunable_info)
+{
+ u32 tunables_len;
+ const struct acio_tunable *tunable_adt =
+ adt_getprop(adt, adt_node, tunable_info->adt_name, &tunables_len);
+
+ if (!tunable_adt) {
+ printf("ADT: tunable %s not found\n", tunable_info->adt_name);
+
+ if (tunable_info->required)
+ return -1;
+ else
+ return 0;
+ }
+
+ if (tunables_len % sizeof(*tunable_adt)) {
+ printf("ADT: tunable %s with invalid length %d\n", tunable_info->adt_name, tunables_len);
+ return -1;
+ }
+
+ u32 n_tunables = tunables_len / sizeof(*tunable_adt);
+ for (size_t j = 0; j < n_tunables; j++) {
+ const struct acio_tunable *tunable = &tunable_adt[j];
+
+ if (tunable->size != 4) {
+ printf("kboot: ACIO tunable has invalid size %d\n", tunable->size);
+ return -1;
+ }
+
+ if (tunable->offset % tunable->size) {
+ printf("kboot: ACIO tunable has unaligned offset %x\n", tunable->offset);
+ return -1;
+ }
+
+ if (tunable->offset + tunable->size > tunable_info->reg_size) {
+ printf("kboot: ACIO tunable has invalid offset %x\n", tunable->offset);
+ return -1;
+ }
+
+ if (fdt_appendprop_u32(dt, fdt_node, tunable_info->fdt_name,
+ tunable->offset + tunable_info->reg_offset) < 0)
+ return -1;
+ if (fdt_appendprop_u32(dt, fdt_node, tunable_info->fdt_name, tunable->mask) < 0)
+ return -1;
+ if (fdt_appendprop_u32(dt, fdt_node, tunable_info->fdt_name, tunable->value) < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+static int dt_copy_acio_tunables(const char *adt_path, const char *dt_alias)
+{
+ int ret;
+ int adt_node = adt_path_offset(adt, adt_path);
+ if (adt_node < 0)
+ return -1;
+
+ const char *fdt_path = fdt_get_alias(dt, dt_alias);
+ if (fdt_path == NULL)
+ bail("FDT: Unable to find alias %s\n", dt_alias);
+
+ int fdt_node = fdt_path_offset(dt, fdt_path);
+ if (fdt_node < 0)
+ bail("FDT: Unable to find path %s for alias %s\n", fdt_path, dt_alias);
+
+ u32 drom_len;
+ const u8 *drom_blob = adt_getprop(adt, adt_node, "thunderbolt-drom", &drom_len);
+ if (!drom_blob || !drom_len)
+ bail("ADT: Failed to get thunderbolt-drom");
+
+ fdt_setprop(dt, fdt_node, "apple,thunderbolt-drom", drom_blob, drom_len);
+ for (size_t i = 0; i < sizeof(acio_tunables) / sizeof(*acio_tunables); ++i) {
+ ret = dt_append_acio_tunable(adt_node, fdt_node, &acio_tunables[i]);
+ if (ret)
+ bail_cleanup("ADT: unable to convert '%s' tunable", acio_tunables[i].adt_name);
+ }
+
+ return 0;
+
+err:
+ fdt_delprop(dt, fdt_node, "apple,thunderbolt-drom");
+ fdt_delprop(dt, fdt_node, "apple,tunable-nhi");
+ fdt_delprop(dt, fdt_node, "apple,tunable-m3");
+ fdt_delprop(dt, fdt_node, "apple,tunable-pcie-adapter");
+
+ return -1;
+}
+
+static int dt_set_acio_tunables(void)
+{
+ char adt_path[32];
+ char fdt_alias[32];
+
+ for (int i = 0; i < MAX_CIO_DEVS; ++i) {
+ memset(adt_path, 0, sizeof(adt_path));
+ snprintf(adt_path, sizeof(adt_path), "/arm-io/acio%d", i);
+
+ memset(fdt_alias, 0, sizeof(adt_path));
+ snprintf(fdt_alias, sizeof(fdt_alias), "acio%d", i);
+
+ dt_copy_acio_tunables(adt_path, fdt_alias);
+ }
+
+ return 0;
+}
+
+static int dt_get_iommu_node(int node, u32 num)
+{
+ int len;
+ assert(num < 32);
+ const void *prop = fdt_getprop(dt, node, "iommus", &len);
+ if (!prop || len < 0 || (u32)len < 8 * (num + 1)) {
+ printf("FDT: unexpected 'iommus' prop / len %d\n", len);
+ return -FDT_ERR_NOTFOUND;
+ }
+
+ const fdt32_t *iommus = prop;
+ uint32_t phandle = fdt32_ld(&iommus[num * 2]);
+
+ return fdt_node_offset_by_phandle(dt, phandle);
+}
+
+static dart_dev_t *dt_init_dart_by_node(int node, u32 num)
+{
+ int len;
+ assert(num < 32);
+ const void *prop = fdt_getprop(dt, node, "iommus", &len);
+ if (!prop || len < 0 || (u32)len < 8 * (num + 1)) {
+ printf("FDT: unexpected 'iommus' prop / len %d\n", len);
+ return NULL;
+ }
+
+ const fdt32_t *iommus = prop;
+ u32 iommu_phandle = fdt32_ld(&iommus[num * 2]);
+ u32 iommu_stream = fdt32_ld(&iommus[num * 2 + 1]);
+
+ printf("FDT: iommu phande:%u stream:%u\n", iommu_phandle, iommu_stream);
+
+ return dart_init_fdt(dt, iommu_phandle, iommu_stream, true);
+}
+
+static u64 dart_get_mapping(dart_dev_t *dart, const char *path, u64 paddr, size_t size)
+{
+ u64 iova = dart_search(dart, (void *)paddr);
+ if (DART_IS_ERR(iova)) {
+ printf("ADT: %s paddr: 0x%lx is not mapped\n", path, paddr);
+ return iova;
+ }
+
+ u64 pend = (u64)dart_translate(dart, iova + size - 1);
+ if (pend != (paddr + size - 1)) {
+ printf("ADT: %s is not continuously mapped: 0x%lx\n", path, pend);
+ return DART_PTR_ERR;
+ }
+
+ return iova;
+}
+
+static int dt_device_set_reserved_mem(int node, dart_dev_t *dart, const char *name,
+ uint32_t phandle, u64 paddr, u64 size)
+{
+ int ret;
+
+ u64 iova = dart_get_mapping(dart, name, paddr, size);
+ if (DART_IS_ERR(iova))
+ bail("ADT: no mapping found for '%s' 0x%012lx iova:0x%08lx)\n", name, paddr, iova);
+
+ ret = fdt_appendprop_u32(dt, node, "iommu-addresses", phandle);
+ if (ret != 0)
+ bail("DT: could not append phandle '%s.compatible' property: %d\n", name, ret);
+
+ ret = fdt_appendprop_u64(dt, node, "iommu-addresses", iova);
+ if (ret != 0)
+ bail("DT: could not append iova to '%s.iommu-addresses' property: %d\n", name, ret);
+
+ ret = fdt_appendprop_u64(dt, node, "iommu-addresses", size);
+ if (ret != 0)
+ bail("DT: could not append size to '%s.iommu-addresses' property: %d\n", name, ret);
+
+ return 0;
+}
+
+static int dt_get_or_add_reserved_mem(const char *node_name, const char *compat, u64 paddr,
+ size_t size)
+{
+ int ret;
+ int resv_node = fdt_path_offset(dt, "/reserved-memory");
+ if (resv_node < 0)
+ bail("DT: '/reserved-memory' not found\n");
+
+ int node = fdt_subnode_offset(dt, resv_node, node_name);
+ if (node >= 0)
+ return node;
+
+ node = fdt_add_subnode(dt, resv_node, node_name);
+ if (node < 0)
+ bail("DT: failed to add node '%s' to '/reserved-memory'\n", node_name);
+
+ uint32_t phandle;
+ ret = fdt_generate_phandle(dt, &phandle);
+ if (ret)
+ bail("DT: failed to generate phandle: %d\n", ret);
+
+ ret = fdt_setprop_u32(dt, node, "phandle", phandle);
+ if (ret != 0)
+ bail("DT: couldn't set '%s.phandle' property: %d\n", node_name, ret);
+
+ u64 reg[2] = {cpu_to_fdt64(paddr), cpu_to_fdt64(size)};
+ ret = fdt_setprop(dt, node, "reg", reg, sizeof(reg));
+ if (ret != 0)
+ bail("DT: couldn't set '%s.reg' property: %d\n", node_name, ret);
+
+ ret = fdt_setprop_string(dt, node, "compatible", compat);
+ if (ret != 0)
+ bail("DT: couldn't set '%s.compatible' property: %d\n", node_name, ret);
+
+ ret = fdt_setprop_empty(dt, node, "no-map");
+ if (ret != 0)
+ bail("DT: couldn't set '%s.no-map' property: %d\n", node_name, ret);
+
+ return node;
+}
+
+static int dt_device_add_mem_region(const char *alias, uint32_t phandle, const char *name)
+{
+ int ret;
+ int dev_node = fdt_path_offset(dt, alias);
+ if (dev_node < 0)
+ bail("DT: failed to get node for alias '%s'\n", alias);
+
+ ret = fdt_appendprop_u32(dt, dev_node, "memory-region", phandle);
+ if (ret != 0)
+ bail("DT: failed to append to 'memory-region' property\n");
+
+ dev_node = fdt_path_offset(dt, alias);
+ if (dev_node < 0)
+ bail("DT: failed to update node for alias '%s'\n", alias);
+
+ ret = fdt_appendprop_string(dt, dev_node, "memory-region-names", name);
+ if (ret != 0)
+ bail("DT: failed to append to 'memory-region-names' property\n");
+
+ return 0;
+}
+
+static int dt_set_dcp_firmware(const char *alias)
+{
+ const char *path = fdt_get_alias(dt, alias);
+
+ if (!path)
+ return 0;
+
+ int node = fdt_path_offset(dt, path);
+ if (node < 0)
+ return 0;
+
+ if (firmware_set_fdt(dt, node, "apple,firmware-version", &os_firmware) < 0)
+ bail("FDT: Could not set apple,firmware-version for %s\n", path);
+
+ const struct fw_version_info *compat;
+
+ switch (os_firmware.version) {
+ case V12_3_1:
+ case V12_4:
+ compat = &fw_versions[V12_3];
+ break;
+ default:
+ compat = &os_firmware;
+ break;
+ }
+
+ if (firmware_set_fdt(dt, node, "apple,firmware-compat", compat) < 0)
+ bail("FDT: Could not set apple,firmware-compat for %s\n", path);
+
+ return 0;
+}
+
+struct disp_mapping {
+ char region_adt[24];
+ char mem_fdt[24];
+ bool map_dcp;
+ bool map_disp;
+ bool map_piodma;
+};
+
+struct mem_region {
+ u64 paddr;
+ u64 size;
+};
+
+static int dt_add_reserved_regions(const char *dcp_alias, const char *disp_alias,
+ const char *piodma_alias, const char *compat,
+ struct disp_mapping *maps, struct mem_region *region,
+ u32 num_maps)
+{
+ int ret = 0;
+ dart_dev_t *dart_dcp = NULL, *dart_disp = NULL, *dart_piodma = NULL;
+ uint32_t dcp_phandle = 0, disp_phandle = 0, piodma_phandle = 0;
+
+ /* Check for display device aliases, if one is missing assume it is an old DT
+ * without display nodes and return without error.
+ * Otherwise init each dart and retrieve the node's phandle.
+ */
+ if (dcp_alias) {
+ int dcp_node = fdt_path_offset(dt, dcp_alias);
+ if (dcp_node < 0) {
+ printf("DT: could not resolve '%s' alias\n", dcp_alias);
+ goto err; // cleanup
+ }
+ dart_dcp = dt_init_dart_by_node(dcp_node, 0);
+ if (!dart_dcp)
+ bail_cleanup("DT: failed to init DART for '%s'\n", dcp_alias);
+ dcp_phandle = fdt_get_phandle(dt, dcp_node);
+ }
+
+ if (disp_alias) {
+ int disp_node = fdt_path_offset(dt, disp_alias);
+ if (disp_node < 0) {
+ printf("DT: could not resolve '%s' alias\n", disp_alias);
+ goto err; // cleanup
+ }
+ dart_disp = dt_init_dart_by_node(disp_node, 0);
+ if (!dart_disp)
+ bail_cleanup("DT: failed to init DART for '%s'\n", disp_alias);
+ disp_phandle = fdt_get_phandle(dt, disp_node);
+ }
+
+ if (piodma_alias) {
+ int piodma_node = fdt_path_offset(dt, piodma_alias);
+ if (piodma_node < 0) {
+ printf("DT: could not resolve '%s' alias\n", piodma_alias);
+ goto err; // cleanup
+ }
+
+ dart_piodma = dt_init_dart_by_node(piodma_node, 0);
+ if (!dart_piodma)
+ bail_cleanup("DT: failed to init DART for '%s'\n", piodma_alias);
+ piodma_phandle = fdt_get_phandle(dt, piodma_node);
+ }
+
+ for (unsigned i = 0; i < num_maps; i++) {
+ const char *name = maps[i].mem_fdt;
+ char node_name[64];
+
+ snprintf(node_name, sizeof(node_name), "%s@%lx", name, region[i].paddr);
+ int mem_node =
+ dt_get_or_add_reserved_mem(node_name, compat, region[i].paddr, region[i].size);
+ if (mem_node < 0)
+ goto err;
+
+ uint32_t mem_phandle = fdt_get_phandle(dt, mem_node);
+
+ if (maps[i].map_dcp && dart_dcp) {
+ ret = dt_device_set_reserved_mem(mem_node, dart_dcp, node_name, dcp_phandle,
+ region[i].paddr, region[i].size);
+ if (ret != 0)
+ goto err;
+ }
+ if (maps[i].map_disp && dart_disp) {
+ ret = dt_device_set_reserved_mem(mem_node, dart_disp, node_name, disp_phandle,
+ region[i].paddr, region[i].size);
+ if (ret != 0)
+ goto err;
+ }
+ if (maps[i].map_piodma && dart_piodma) {
+ ret = dt_device_set_reserved_mem(mem_node, dart_piodma, node_name, piodma_phandle,
+ region[i].paddr, region[i].size);
+ if (ret != 0)
+ goto err;
+ }
+
+ /* modify device nodes after filling /reserved-memory to avoid
+ * reloading mem_node's offset */
+ if (maps[i].map_dcp && dcp_alias) {
+ ret = dt_device_add_mem_region(dcp_alias, mem_phandle, maps[i].mem_fdt);
+ if (ret < 0)
+ goto err;
+ }
+ if (maps[i].map_disp && disp_alias) {
+ ret = dt_device_add_mem_region(disp_alias, mem_phandle, maps[i].mem_fdt);
+ if (ret < 0)
+ goto err;
+ }
+ if (maps[i].map_piodma && piodma_alias) {
+ ret = dt_device_add_mem_region(piodma_alias, mem_phandle, maps[i].mem_fdt);
+ if (ret < 0)
+ goto err;
+ }
+ }
+
+ /* enable dart-disp0, it is disabled in device tree to avoid resetting
+ * it and breaking display scanout when booting with old m1n1 which
+ * does not lock dart-disp0.
+ */
+ if (disp_alias) {
+ int disp_node = fdt_path_offset(dt, disp_alias);
+
+ int dart_disp0 = dt_get_iommu_node(disp_node, 0);
+ if (dart_disp0 < 0)
+ bail_cleanup("DT: failed to find 'dart-disp0'\n");
+
+ if (fdt_setprop_string(dt, dart_disp0, "status", "okay") < 0)
+ bail_cleanup("DT: failed to enable 'dart-disp0'\n");
+ }
+err:
+ if (dart_dcp)
+ dart_shutdown(dart_dcp);
+ if (dart_disp)
+ dart_shutdown(dart_disp);
+ if (dart_piodma)
+ dart_shutdown(dart_piodma);
+
+ return ret;
+}
+
+static int dt_carveout_reserved_regions(const char *dcp_alias, const char *disp_alias,
+ const char *piodma_alias, struct disp_mapping *maps,
+ u32 num_maps)
+{
+ int ret = 0;
+
+ struct mem_region region[MAX_DISP_MAPPINGS];
+
+ assert(num_maps <= MAX_DISP_MAPPINGS);
+
+ // return early if dcp_alias does not exists
+ if (!fdt_get_alias(dt, dcp_alias))
+ return 0;
+
+ ret = dt_set_dcp_firmware(dcp_alias);
+ if (ret)
+ return ret;
+
+ int node = adt_path_offset(adt, "/chosen/carveout-memory-map");
+ if (node < 0)
+ bail("ADT: '/chosen/carveout-memory-map' not found\n");
+
+ /* read physical addresses of reserved memory regions */
+ /* do this up front to avoid errors after modifying the DT */
+ for (unsigned i = 0; i < num_maps; i++) {
+
+ int ret;
+ u64 phys_map[2];
+ struct disp_mapping *map = &maps[i];
+ const char *name = map->region_adt;
+
+ ret = ADT_GETPROP_ARRAY(adt, node, name, phys_map);
+ if (ret != sizeof(phys_map))
+ bail("ADT: could not get carveout memory '%s'\n", name);
+ if (!phys_map[0] || !phys_map[1])
+ bail("ADT: carveout memory '%s'\n", name);
+
+ region[i].paddr = phys_map[0];
+ region[i].size = phys_map[1];
+ }
+
+ return dt_add_reserved_regions(dcp_alias, disp_alias, piodma_alias, "apple,asc-mem", maps,
+ region, num_maps);
+}
+
+static struct disp_mapping disp_reserved_regions_vram[] = {
+ // boot framebuffer, mapped to dart-disp0 sid 0 and dart-dcp sid 0/5
+ {"vram", "framebuffer", true, true, false},
+};
+
+static int dt_vram_reserved_region(const char *dcp_alias, const char *disp_alias)
+{
+ int ret = 0;
+ int adt_path[4];
+ struct mem_region region;
+
+ // return early if dcp_alias does not exists
+ if (!fdt_get_alias(dt, dcp_alias))
+ return 0;
+
+ int node = adt_path_offset_trace(adt, "/vram", adt_path);
+
+ if (node < 0)
+ bail("ADT: '/vram' not found\n");
+
+ int pp = 0;
+ while (adt_path[pp])
+ pp++;
+ adt_path[pp + 1] = 0;
+
+ ret = adt_get_reg(adt, adt_path, "reg", 0, &region.paddr, &region.size);
+ if (ret < 0)
+ bail("ADT: failed to read /vram/reg\n");
+
+ return dt_add_reserved_regions(dcp_alias, disp_alias, NULL, "framebuffer",
+ disp_reserved_regions_vram, &region, 1);
+}
+
+static struct disp_mapping disp_reserved_regions_t8103[] = {
+ {"region-id-50", "dcp_data", true, false, false},
+ {"region-id-57", "region57", true, false, false},
+ // The 2 following regions are mapped in dart-dcp sid 0 and dart-disp0 sid 0 and 4
+ {"region-id-94", "region94", true, true, false},
+ {"region-id-95", "region95", true, false, true},
+};
+
+static struct disp_mapping dcpext_reserved_regions_t8103[] = {
+ {"region-id-73", "dcpext_data", true, false, false},
+ {"region-id-74", "region74", true, false, false},
+};
+
+static struct disp_mapping disp_reserved_regions_t8112[] = {
+ {"region-id-49", "dcp_txt", true, false, false},
+ {"region-id-50", "dcp_data", true, false, false},
+ {"region-id-57", "region57", true, false, false},
+ // The 2 following regions are mapped in dart-dcp sid 5 and dart-disp0 sid 0 and 4
+ {"region-id-94", "region94", true, true, false},
+ {"region-id-95", "region95", true, false, true},
+};
+
+static struct disp_mapping dcpext_reserved_regions_t8112[] = {
+ {"region-id-49", "dcp_txt", true, false, false},
+ {"region-id-73", "dcpext_data", true, false, false},
+ {"region-id-74", "region74", true, false, false},
+};
+
+static struct disp_mapping disp_reserved_regions_t600x[] = {
+ {"region-id-50", "dcp_data", true, false, false},
+ {"region-id-57", "region57", true, false, false},
+ // The 2 following regions are mapped in dart-dcp sid 0 and dart-disp0 sid 0 and 4
+ {"region-id-94", "region94", true, true, false},
+ {"region-id-95", "region95", true, false, true},
+ // used on M1 Pro/Max/Ultra, mapped to dcp and disp0
+ {"region-id-157", "region157", true, true, false},
+};
+
+#define MAX_DCPEXT 8
+
+static struct disp_mapping dcpext_reserved_regions_t600x[MAX_DCPEXT][2] = {
+ {
+ {"region-id-73", "dcpext0_data", true, false, false},
+ {"region-id-74", "", true, false, false},
+ },
+ {
+ {"region-id-88", "dcpext1_data", true, false, false},
+ {"region-id-89", "region89", true, false, false},
+ },
+ {
+ {"region-id-111", "dcpext2_data", true, false, false},
+ {"region-id-112", "region112", true, false, false},
+ },
+ {
+ {"region-id-119", "dcpext3_data", true, false, false},
+ {"region-id-120", "region120", true, false, false},
+ },
+ {
+ {"region-id-127", "dcpext4_data", true, false, false},
+ {"region-id-128", "region128", true, false, false},
+ },
+ {
+ {"region-id-135", "dcpext5_data", true, false, false},
+ {"region-id-136", "region136", true, false, false},
+ },
+ {
+ {"region-id-143", "dcpext6_data", true, false, false},
+ {"region-id-144", "region144", true, false, false},
+ },
+ {
+ {"region-id-151", "dcpext7_data", true, false, false},
+ {"region-id-152", "region152", true, false, false},
+ },
+};
+
+#define ARRAY_SIZE(s) (sizeof(s) / sizeof((s)[0]))
+
+static int dt_set_display(void)
+{
+ /* lock dart-disp0 to prevent old software from resetting it */
+ dart_lock_adt("/arm-io/dart-disp0", 0);
+
+ /* Add "/reserved-memory" nodes with iommu mapping and link them to their
+ * devices. The memory is already excluded from useable RAM so these nodes
+ * are only required to inform the OS about the existing mappings.
+ * Required for disp0, dcp and all dcpext.
+ * Checks for dcp* / disp*_piodma / disp* aliases and fails silently if
+ * they are missing. */
+
+ int ret = 0;
+
+ if (!fdt_node_check_compatible(dt, 0, "apple,t8103")) {
+ ret = dt_carveout_reserved_regions("dcp", "disp0", "disp0_piodma",
+ disp_reserved_regions_t8103,
+ ARRAY_SIZE(disp_reserved_regions_t8103));
+ if (ret)
+ return ret;
+
+ ret = dt_carveout_reserved_regions("dcpext", NULL, NULL, dcpext_reserved_regions_t8103,
+ ARRAY_SIZE(dcpext_reserved_regions_t8103));
+ } else if (!fdt_node_check_compatible(dt, 0, "apple,t8112")) {
+ ret = dt_carveout_reserved_regions("dcp", "disp0", "disp0_piodma",
+ disp_reserved_regions_t8112,
+ ARRAY_SIZE(disp_reserved_regions_t8112));
+ if (ret)
+ return ret;
+
+ ret = dt_carveout_reserved_regions("dcpext", NULL, NULL, dcpext_reserved_regions_t8112,
+ ARRAY_SIZE(dcpext_reserved_regions_t8112));
+ } else if (!fdt_node_check_compatible(dt, 0, "apple,t6000") ||
+ !fdt_node_check_compatible(dt, 0, "apple,t6001") ||
+ !fdt_node_check_compatible(dt, 0, "apple,t6002")) {
+ ret = dt_carveout_reserved_regions("dcp", "disp0", "disp0_piodma",
+ disp_reserved_regions_t600x,
+ ARRAY_SIZE(disp_reserved_regions_t600x));
+ if (ret)
+ return ret;
+
+ for (int n = 0; n < MAX_DCPEXT && ret == 0; n++) {
+ char dcpext_alias[16];
+
+ snprintf(dcpext_alias, sizeof(dcpext_alias), "dcpext%d", n);
+ ret = dt_carveout_reserved_regions(dcpext_alias, NULL, NULL,
+ dcpext_reserved_regions_t600x[n],
+ ARRAY_SIZE(dcpext_reserved_regions_t600x[n]));
+ }
+ } else {
+ printf("DT: unknown compatible, skip display reserved-memory setup\n");
+ return 0;
+ }
+ if (ret)
+ return ret;
+
+ return dt_vram_reserved_region("dcp", "disp0");
+}
+
+static int dt_disable_missing_devs(const char *adt_prefix, const char *dt_prefix, int max_devs)
+{
+ int ret = -1;
+ int adt_prefix_len = strlen(adt_prefix);
+ int dt_prefix_len = strlen(dt_prefix);
+
+ int acnt = 0, phcnt = 0;
+ u64 *addrs = malloc(max_devs * sizeof(u64));
+ u32 *phandles = malloc(max_devs * sizeof(u32) * 4); // Allow up to 4 extra nodes per device
+ if (!addrs || !phandles)
+ bail_cleanup("FDT: out of memory\n");
+
+ int path[8];
+ int node = adt_path_offset_trace(adt, "/arm-io", path);
+ if (node < 0)
+ bail_cleanup("ADT: /arm-io not found\n");
+
+ int pp = 0;
+ while (path[pp])
+ pp++;
+ path[pp + 1] = 0;
+
+ u32 die_count;
+ if (ADT_GETPROP(adt, node, "die-count", &die_count) < 0) {
+ die_count = 1;
+ }
+ if (die_count > 8) {
+ printf("ADT: limiting die-count from %u to 8\n", die_count);
+ die_count = 8;
+ }
+
+ /* Find ADT registers */
+ ADT_FOREACH_CHILD(adt, node)
+ {
+ const char *name = adt_get_name(adt, node);
+ if (strncmp(name, adt_prefix, adt_prefix_len))
+ continue;
+
+ path[pp] = node;
+ if (adt_get_reg(adt, path, "reg", 0, &addrs[acnt++], NULL) < 0)
+ bail_cleanup("Error getting /arm-io/%s regs\n", name);
+ }
+
+ for (u32 die = 0; die < die_count; ++die) {
+ char path[32] = "/soc";
+
+ if (die_count > 1) {
+ // pre-linux submission multi-die path
+ // can probably removed the next time someone read this comment.
+ snprintf(path, sizeof(path), "/soc/die%u", die);
+ int die_node = fdt_path_offset(dt, path);
+ if (die_node < 0) {
+ /* this should use aliases for the soc nodes */
+ u64 die_unit_addr = die * PMGR_DIE_OFFSET + 0x200000000;
+ snprintf(path, sizeof(path), "/soc@%lx", die_unit_addr);
+ }
+ }
+
+ int soc = fdt_path_offset(dt, path);
+ if (soc < 0)
+ bail("FDT: %s node not found in devtree\n", path);
+
+ // parse ranges for address translation
+ struct dt_ranges_tbl ranges[DT_MAX_RANGES] = {0};
+ dt_parse_ranges(dt, soc, ranges);
+
+ /* Disable primary devices */
+ fdt_for_each_subnode(node, dt, soc)
+ {
+ const char *name = fdt_get_name(dt, node, NULL);
+ if (strncmp(name, dt_prefix, dt_prefix_len))
+ continue;
+
+ const fdt64_t *reg = fdt_getprop(dt, node, "reg", NULL);
+ if (!reg)
+ bail_cleanup("FDT: failed to get reg property of %s\n", name);
+
+ u64 addr = dt_translate(ranges, reg);
+
+ int i;
+ for (i = 0; i < acnt; i++)
+ if (addrs[i] == addr)
+ break;
+ if (i < acnt)
+ continue;
+
+ int iommus_size;
+ const fdt32_t *iommus = fdt_getprop(dt, node, "iommus", &iommus_size);
+ if (iommus) {
+ if (iommus_size & 7 || iommus_size > 4 * 8) {
+ printf("FDT: bad iommus property for %s/%s\n", path, name);
+ } else {
+ for (int i = 0; i < iommus_size / 8; i++)
+ phandles[phcnt++] = fdt32_ld(&iommus[i * 2]);
+ }
+ }
+
+ int phys_size;
+ const fdt32_t *phys = fdt_getprop(dt, node, "phys", &phys_size);
+ if (phys) {
+ if (phys_size & 7 || phys_size > 4 * 8) {
+ printf("FDT: bad phys property for %s/%s\n", path, name);
+ } else {
+ for (int i = 0; i < phys_size / 8; i++)
+ phandles[phcnt++] = fdt32_ld(&phys[i * 2]);
+ }
+ }
+
+ const char *status = fdt_getprop(dt, node, "status", NULL);
+ if (!status || strcmp(status, "disabled")) {
+ printf("FDT: Disabling missing device %s/%s\n", path, name);
+
+ if (fdt_setprop_string(dt, node, "status", "disabled") < 0)
+ bail_cleanup("FDT: failed to set status property of %s/%s\n", path, name);
+ }
+ }
+
+ /* Disable secondary devices */
+ fdt_for_each_subnode(node, dt, soc)
+ {
+ const char *name = fdt_get_name(dt, node, NULL);
+ u32 phandle = fdt_get_phandle(dt, node);
+
+ for (int i = 0; i < phcnt; i++) {
+ if (phandles[i] != phandle)
+ continue;
+
+ const char *status = fdt_getprop(dt, node, "status", NULL);
+ if (status && !strcmp(status, "disabled"))
+ continue;
+
+ printf("FDT: Disabling secondary device %s/%s\n", path, name);
+
+ if (fdt_setprop_string(dt, node, "status", "disabled") < 0)
+ bail_cleanup("FDT: failed to set status property of %s/%s\n", path, name);
+ break;
+ }
+ }
+ }
+
+ ret = 0;
+err:
+ free(phandles);
+ free(addrs);
+
+ return ret;
+}
+
+static int dt_transfer_virtios(void)
+{
+ int path[3];
+ path[0] = adt_path_offset(adt, "/arm-io/");
+ if (path[0] < 0)
+ bail("ADT: /arm-io not found\n");
+
+ int aic = fdt_node_offset_by_compatible(dt, -1, "apple,aic");
+ if (aic == -FDT_ERR_NOTFOUND)
+ aic = fdt_node_offset_by_compatible(dt, -1, "apple,aic2");
+ if (aic < 0)
+ bail("FDT: failed to find AIC node\n");
+
+ u32 aic_phandle = fdt_get_phandle(dt, aic);
+ const fdt32_t *ic_prop = fdt_getprop(dt, aic, "#interrupt-cells", NULL);
+ u32 intcells = 0;
+ if (ic_prop)
+ intcells = fdt32_ld(ic_prop);
+ if (intcells < 3 || intcells > 4)
+ bail("FDT: bad '#interrupt-cells' on AIC node (%d)\n", intcells);
+
+ for (u32 i = 0; i < 16; i++) {
+ char name[16], fullname[32];
+ snprintf(name, sizeof(name) - 1, "virtio%d", i);
+
+ path[1] = adt_subnode_offset(adt, path[0], name);
+ if (path[1] < 0)
+ break;
+ path[2] = 0;
+
+ u64 addr, size;
+ if (adt_get_reg(adt, path, "reg", 0, &addr, &size) < 0)
+ bail("ADT: error getting /arm-io/%s regs\n", name);
+
+ u32 irq;
+ ADT_GETPROP(adt, path[1], "interrupts", &irq);
+
+ snprintf(fullname, sizeof(fullname) - 1, "virtio@%lx", addr);
+ printf("FDT: Adding %s found in ADT\n", name);
+
+ int fnode = fdt_add_subnode(dt, 0, fullname);
+ if (fnode < 0)
+ bail("FDT: failed to create %s\n", fullname);
+
+ if (fdt_setprop_string(dt, fnode, "compatible", "virtio,mmio"))
+ bail("FDT: couldn't set %s.compatible\n", fullname);
+
+ fdt64_t reg[2];
+ fdt64_st(reg + 0, addr);
+ fdt64_st(reg + 1, size);
+ if (fdt_setprop(dt, fnode, "reg", reg, sizeof(reg)))
+ bail("FDT: couldn't set %s.reg\n", fullname);
+
+ if (fdt_setprop_u32(dt, fnode, "interrupt-parent", aic_phandle))
+ bail("FDT: couldn't set %s.interrupt-parent\n", fullname);
+
+ fdt32_t intprop[4];
+ fdt32_st(intprop + 0, 0); // AIC_IRQ
+ fdt32_st(intprop + 1, 0);
+ fdt32_st(intprop + intcells - 2, irq);
+ fdt32_st(intprop + intcells - 1, 4); // IRQ_TYPE_LEVEL_HIGH
+ if (fdt_setprop(dt, fnode, "interrupts", intprop, 4 * intcells))
+ bail("FDT: couldn't set %s.interrupts\n", fullname);
+ }
+
+ return 0;
+}
+
+void kboot_set_initrd(void *start, size_t size)
+{
+ initrd_start = start;
+ initrd_size = size;
+}
+
+int kboot_set_chosen(const char *name, const char *value)
+{
+ int i = 0;
+
+ if (!name)
+ return -1;
+
+ for (i = 0; i < MAX_CHOSEN_PARAMS; i++) {
+ if (!chosen_params[i][0]) {
+ chosen_params[i][0] = malloc(strlen(name) + 1);
+ strcpy(chosen_params[i][0], name);
+ break;
+ }
+
+ if (!strcmp(name, chosen_params[i][0])) {
+ free(chosen_params[i][1]);
+ chosen_params[i][1] = NULL;
+ break;
+ }
+ }
+
+ if (i >= MAX_CHOSEN_PARAMS)
+ return -1;
+
+ if (value) {
+ chosen_params[i][1] = malloc(strlen(value) + 1);
+ strcpy(chosen_params[i][1], value);
+ }
+
+ return i;
+}
+
+int kboot_prepare_dt(void *fdt)
+{
+ if (dt) {
+ free(dt);
+ dt = NULL;
+ }
+
+ dt_bufsize = fdt_totalsize(fdt);
+ assert(dt_bufsize);
+
+ dt_bufsize += 64 * 1024; // Add 64K of buffer for modifications
+ dt = memalign(DT_ALIGN, dt_bufsize);
+
+ if (fdt_open_into(fdt, dt, dt_bufsize) < 0)
+ bail("FDT: fdt_open_into() failed\n");
+
+ if (fdt_add_mem_rsv(dt, (u64)dt, dt_bufsize))
+ bail("FDT: couldn't add reservation for the devtree\n");
+
+ if (fdt_add_mem_rsv(dt, (u64)_base, ((u64)_end) - ((u64)_base)))
+ bail("FDT: couldn't add reservation for m1n1\n");
+
+ if (dt_set_chosen())
+ return -1;
+ if (dt_set_serial_number())
+ return -1;
+ if (dt_set_memory())
+ return -1;
+ if (dt_set_cpus())
+ return -1;
+ if (dt_set_mac_addresses())
+ return -1;
+ if (dt_set_wifi())
+ return -1;
+ if (dt_set_bluetooth())
+ return -1;
+ if (dt_set_uboot())
+ return -1;
+ if (dt_set_atc_tunables())
+ return -1;
+ if (dt_set_acio_tunables())
+ return -1;
+ if (dt_set_display())
+ return -1;
+ if (dt_set_gpu(dt))
+ return -1;
+ if (dt_set_multitouch())
+ return -1;
+ if (dt_disable_missing_devs("usb-drd", "usb@", 8))
+ return -1;
+ if (dt_disable_missing_devs("i2c", "i2c@", 8))
+ return -1;
+#ifndef RELEASE
+ if (dt_transfer_virtios())
+ return 1;
+#endif
+
+ if (fdt_pack(dt))
+ bail("FDT: fdt_pack() failed\n");
+
+ printf("FDT prepared at %p\n", dt);
+
+ return 0;
+}
+
+int kboot_boot(void *kernel)
+{
+ usb_init();
+ pcie_init();
+ dapf_init_all();
+
+ printf("Setting SMP mode to WFE...\n");
+ smp_set_wfe_mode(true);
+ printf("Preparing to boot kernel at %p with fdt at %p\n", kernel, dt);
+
+ next_stage.entry = kernel;
+ next_stage.args[0] = (u64)dt;
+ next_stage.args[1] = 0;
+ next_stage.args[2] = 0;
+ next_stage.args[3] = 0;
+ next_stage.args[4] = 0;
+ next_stage.restore_logo = false;
+
+ return 0;
+}
diff --git a/tools/src/kboot.h b/tools/src/kboot.h
new file mode 100644
index 0000000..44a8740
--- /dev/null
+++ b/tools/src/kboot.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef KBOOT_H
+#define KBOOT_H
+
+#include "types.h"
+
+struct kernel_header {
+ u32 code[2]; /* Executable code */
+ u64 text_offset; /* Image load offset, little endian */
+ u64 image_size; /* Effective Image size, little endian */
+ u64 flags; /* kernel flags, little endian */
+ u64 res2; /* reserved */
+ u64 res3; /* reserved */
+ u64 res4; /* reserved */
+ u32 magic; /* Magic number, little endian, "ARM\x64" */
+ u32 res5; /* reserved (used for PE COFF offset) */
+};
+
+void kboot_set_initrd(void *start, size_t size);
+int kboot_set_chosen(const char *name, const char *value);
+int kboot_prepare_dt(void *fdt);
+int kboot_boot(void *kernel);
+
+#endif
diff --git a/tools/src/kboot_gpu.c b/tools/src/kboot_gpu.c
new file mode 100644
index 0000000..54e6d03
--- /dev/null
+++ b/tools/src/kboot_gpu.c
@@ -0,0 +1,452 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "kboot.h"
+#include "adt.h"
+#include "assert.h"
+#include "firmware.h"
+#include "math.h"
+#include "pmgr.h"
+#include "soc.h"
+#include "utils.h"
+
+#include "libfdt/libfdt.h"
+
+#define bail(...) \
+ do { \
+ printf(__VA_ARGS__); \
+ return -1; \
+ } while (0)
+
+#define MAX_PSTATES 16
+#define MAX_CLUSTERS 8
+
+struct perf_state {
+ u32 freq;
+ u32 volt;
+};
+
+static int get_core_counts(u32 *count, u32 nclusters, u32 ncores)
+{
+ u64 base;
+ pmgr_adt_power_enable("/arm-io/sgx");
+
+ int adt_sgx_path[8];
+ if (adt_path_offset_trace(adt, "/arm-io/sgx", adt_sgx_path) < 0)
+ bail("ADT: GPU: Failed to get sgx\n");
+
+ if (adt_get_reg(adt, adt_sgx_path, "reg", 0, &base, NULL) < 0)
+ bail("ADT: GPU: Failed to get sgx reg 0\n");
+
+ u32 cores_lo = read32(base + 0xd01500);
+ u32 cores_hi = read32(base + 0xd01514);
+
+ u64 cores = (((u64)cores_hi) << 32) | cores_lo;
+
+ for (u32 i = 0; i < nclusters; i++) {
+ count[i] = __builtin_popcount(cores & MASK(ncores));
+ cores >>= ncores;
+ }
+
+ return 0;
+}
+
+static void adjust_leakage(float *val, u32 clusters, u32 *cores, u32 max, float uncore_fraction)
+{
+ for (u32 i = 0; i < clusters; i++) {
+ float uncore = val[i] * uncore_fraction;
+ float core = val[i] - uncore;
+
+ val[i] = uncore + (cores[i] / (float)max) * core;
+ }
+}
+
+static void load_fuses(float *out, u32 count, u64 base, u32 start, u32 width, float scale,
+ float offset, bool flip)
+{
+ for (u32 i = 0; i < count; i++) {
+ base += (start / 32) * 4;
+ start &= 31;
+
+ u32 low = read32(base);
+ u32 high = read32(base + 4);
+ u32 val = (((((u64)high) << 32) | low) >> start) & MASK(width);
+
+ float fval = (float)val * scale + offset;
+
+ if (flip)
+ out[count - i - 1] = fval;
+ else
+ out[i] = fval;
+
+ start += width;
+ }
+}
+
+static u32 t8103_pwr_scale[] = {0, 63, 80, 108, 150, 198, 210};
+
+static int calc_power_t8103(u32 count, u32 table_count, const struct perf_state *core,
+ const struct perf_state *sram, u32 *max_pwr, float *core_leak,
+ float *sram_leak)
+{
+ UNUSED(sram);
+ UNUSED(core_leak);
+ UNUSED(sram_leak);
+ u32 *pwr_scale;
+ u32 pwr_scale_count;
+ u32 core_count;
+ u32 max_cores;
+
+ switch (chip_id) {
+ case T8103:
+ pwr_scale = t8103_pwr_scale;
+ pwr_scale_count = ARRAY_SIZE(t8103_pwr_scale);
+ max_cores = 8;
+ break;
+ default:
+ bail("ADT: GPU: Unsupported chip\n");
+ }
+
+ if (get_core_counts(&core_count, 1, max_cores))
+ return -1;
+
+ if (table_count != 1)
+ bail("ADT: GPU: expected 1 perf state table but got %d\n", table_count);
+
+ if (count != pwr_scale_count)
+ bail("ADT: GPU: expected %d perf states but got %d\n", pwr_scale_count, count);
+
+ for (u32 i = 0; i < pwr_scale_count; i++)
+ max_pwr[i] = (u32)core[i].volt * (u32)pwr_scale[i] * 100;
+
+ core_leak[0] = 1000.0;
+ sram_leak[0] = 45.0;
+
+ adjust_leakage(core_leak, 1, &core_count, max_cores, 0.12);
+ adjust_leakage(sram_leak, 1, &core_count, max_cores, 0.2);
+
+ return 0;
+}
+
+static int calc_power_t600x(u32 count, u32 table_count, const struct perf_state *core,
+ const struct perf_state *sram, u32 *max_pwr, float *core_leak,
+ float *sram_leak)
+{
+ float s_sram, k_sram, s_core, k_core;
+ float dk_core, dk_sram;
+ float imax = 1000;
+
+ u32 nclusters = 0;
+ u32 ncores = 0;
+ u32 core_count[MAX_CLUSTERS];
+
+ bool simple_exps = false;
+ bool adjust_leakages = true;
+
+ switch (chip_id) {
+ case T6002:
+ nclusters += 4;
+ load_fuses(core_leak + 4, 4, 0x22922bc1b8, 25, 13, 2, 2, true);
+ load_fuses(sram_leak + 4, 4, 0x22922bc1cc, 4, 9, 1, 1, true);
+ // fallthrough
+ case T6001:
+ nclusters += 2;
+ case T6000:
+ nclusters += 2;
+ load_fuses(core_leak + 0, min(4, nclusters), 0x2922bc1b8, 25, 13, 2, 2, false);
+ load_fuses(sram_leak + 0, min(4, nclusters), 0x2922bc1cc, 4, 9, 1, 1, false);
+
+ s_sram = 4.3547606;
+ k_sram = 0.024927923;
+ // macOS difference: macOS uses a misbehaved piecewise function here
+ // Since it's obviously wrong, let's just use only the first component
+ s_core = 1.48461742;
+ k_core = 0.39013552;
+ dk_core = 8.558;
+ dk_sram = 0.05;
+
+ ncores = 8;
+ adjust_leakages = true;
+ imax = 26.0;
+ break;
+ case T8112:
+ nclusters = 1;
+ load_fuses(core_leak, 1, 0x23d2c84dc, 30, 13, 2, 2, false);
+ load_fuses(sram_leak, 1, 0x23d2c84b0, 15, 9, 1, 1, false);
+
+ s_sram = 3.61619841;
+ k_sram = 0.0529281;
+ // macOS difference: macOS uses a misbehaved piecewise function here
+ // Since it's obviously wrong, let's just use only the first component
+ s_core = 1.21356187;
+ k_core = 0.43328839;
+ dk_core = 9.83196;
+ dk_sram = 0.07828;
+
+ simple_exps = true;
+ ncores = 10;
+ adjust_leakages = false; // pre-adjusted?
+ imax = 24.0;
+ break;
+ }
+
+ if (get_core_counts(core_count, nclusters, ncores))
+ return -1;
+
+ printf("FDT: GPU: Core counts: ");
+ for (u32 i = 0; i < nclusters; i++) {
+ printf("%d ", core_count[i]);
+ }
+ printf("\n");
+
+ if (adjust_leakages) {
+ adjust_leakage(core_leak, nclusters, core_count, ncores, 0.0825);
+ adjust_leakage(sram_leak, nclusters, core_count, ncores, 0.2247);
+ }
+
+ if (table_count != nclusters)
+ bail("ADT: GPU: expected %d perf state tables but got %d\n", nclusters, table_count);
+
+ max_pwr[0] = 0;
+
+ for (u32 i = 1; i < count; i++) {
+ u32 total_mw = 0;
+
+ for (u32 j = 0; j < nclusters; j++) {
+ // macOS difference: macOS truncates Hz to integer MHz before doing this math.
+ // That's probably wrong, so let's not do that.
+
+ float mw = 0;
+ size_t idx = j * count + i;
+
+ mw += sram[idx].volt / 1000.f * sram_leak[j] * k_sram *
+ expf(sram[idx].volt / 1000.f * s_sram);
+ mw += core[idx].volt / 1000.f * core_leak[j] * k_core *
+ expf(core[idx].volt / 1000.f * s_core);
+
+ float sbase = sram[idx].volt / 750.f;
+ float sram_v_p;
+ if (simple_exps)
+ sram_v_p = sbase * sbase; // v ^ 2
+ else
+ sram_v_p = sbase * sbase * sbase; // v ^ 3
+ mw += dk_sram * (sram[idx].freq / 1000000.f) * sram_v_p;
+
+ float cbase = core[idx].volt / 750.f;
+ float core_v_p;
+ if (simple_exps || core[idx].volt < 750)
+ core_v_p = cbase * cbase; // v ^ 2
+ else
+ core_v_p = cbase * cbase * cbase; // v ^ 3
+ mw += dk_core * (core[idx].freq / 1000000.f) * core_v_p;
+
+ if (mw > imax * core[idx].volt)
+ mw = imax * core[idx].volt;
+
+ total_mw += mw;
+ }
+
+ max_pwr[i] = total_mw * 1000;
+ }
+
+ return 0;
+}
+
+static int dt_set_region(void *dt, int sgx, const char *name, const char *path)
+{
+ u64 base, size;
+ char prop[64];
+
+ snprintf(prop, sizeof(prop), "%s-base", name);
+ if (ADT_GETPROP(adt, sgx, prop, &base) < 0 || !base)
+ bail("ADT: GPU: failed to find %s property\n", prop);
+
+ snprintf(prop, sizeof(prop), "%s-size", name);
+ if (ADT_GETPROP(adt, sgx, prop, &size) < 0 || !base)
+ bail("ADT: GPU: failed to find %s property\n", prop);
+
+ int node = fdt_path_offset(dt, path);
+ if (node < 0)
+ bail("FDT: GPU: failed to find %s node\n", path);
+
+ fdt64_t reg[2];
+
+ fdt64_st(&reg[0], base);
+ fdt64_st(&reg[1], size);
+
+ if (fdt_setprop_inplace(dt, node, "reg", reg, sizeof(reg)))
+ bail("FDT: GPU: failed to set reg prop for %s\n", path);
+
+ return 0;
+}
+
+int fdt_set_float_array(void *dt, int node, const char *name, float *val, int count)
+{
+ fdt32_t data[MAX_CLUSTERS];
+
+ if (count > MAX_CLUSTERS)
+ bail("FDT: GPU: fdt_set_float_array() with too many values\n");
+
+ memcpy(data, val, sizeof(float) * count);
+ for (int i = 0; i < count; i++) {
+ data[i] = cpu_to_fdt32(data[i]);
+ }
+
+ if (fdt_setprop_inplace(dt, node, name, data, sizeof(u32) * count))
+ bail("FDT: GPU: Failed to set %s\n", name);
+
+ return 0;
+}
+
+int dt_set_gpu(void *dt)
+{
+ int (*calc_power)(u32 count, u32 table_count, const struct perf_state *perf,
+ const struct perf_state *sram, u32 *max_pwr, float *core_leak,
+ float *sram_leak);
+
+ printf("FDT: GPU: Initializing GPU info\n");
+
+ switch (chip_id) {
+ case T8103:
+ calc_power = calc_power_t8103;
+ break;
+ case T6000:
+ case T6001:
+ case T6002:
+ case T8112:
+ calc_power = calc_power_t600x;
+ break;
+ default:
+ printf("ADT: GPU: unsupported chip!\n");
+ return 0;
+ }
+
+ int gpu = fdt_path_offset(dt, "gpu");
+ if (gpu < 0) {
+ printf("FDT: GPU: gpu alias not found in device tree\n");
+ return 0;
+ }
+
+ int len;
+ const fdt32_t *opps_ph = fdt_getprop(dt, gpu, "operating-points-v2", &len);
+ if (!opps_ph || len != 4)
+ bail("FDT: GPU: operating-points-v2 not found\n");
+
+ int opps = fdt_node_offset_by_phandle(dt, fdt32_ld(opps_ph));
+ if (opps < 0)
+ bail("FDT: GPU: node for phandle %u not found\n", fdt32_ld(opps_ph));
+
+ int sgx = adt_path_offset(adt, "/arm-io/sgx");
+ if (sgx < 0)
+ bail("ADT: GPU: /arm-io/sgx node not found\n");
+
+ u32 perf_state_count;
+ if (ADT_GETPROP(adt, sgx, "perf-state-count", &perf_state_count) < 0 || !perf_state_count)
+ bail("ADT: GPU: missing perf-state-count\n");
+
+ u32 perf_state_table_count;
+ if (ADT_GETPROP(adt, sgx, "perf-state-table-count", &perf_state_table_count) < 0 ||
+ !perf_state_table_count)
+ bail("ADT: GPU: missing perf-state-table-count\n");
+
+ if (perf_state_count > MAX_PSTATES)
+ bail("ADT: GPU: perf-state-count too large\n");
+
+ if (perf_state_table_count > MAX_CLUSTERS)
+ bail("ADT: GPU: perf-state-table-count too large\n");
+
+ u32 perf_states_len;
+ const struct perf_state *perf_states, *perf_states_sram;
+
+ perf_states = adt_getprop(adt, sgx, "perf-states", &perf_states_len);
+ if (!perf_states ||
+ perf_states_len != sizeof(*perf_states) * perf_state_count * perf_state_table_count)
+ bail("ADT: GPU: invalid perf-states length\n");
+
+ perf_states_sram = adt_getprop(adt, sgx, "perf-states-sram", &perf_states_len);
+ if (perf_states_sram &&
+ perf_states_len != sizeof(*perf_states) * perf_state_count * perf_state_table_count)
+ bail("ADT: GPU: invalid perf-states-sram length\n");
+
+ u32 max_pwr[MAX_PSTATES];
+ float core_leak[MAX_CLUSTERS];
+ float sram_leak[MAX_CLUSTERS];
+
+ if (calc_power(perf_state_count, perf_state_table_count, perf_states, perf_states_sram, max_pwr,
+ core_leak, sram_leak))
+ return -1;
+
+ printf("FDT: GPU: Max power table: ");
+ for (u32 i = 0; i < perf_state_count; i++) {
+ printf("%d ", max_pwr[i]);
+ }
+ printf("\nFDT: GPU: Core leakage table: ");
+ for (u32 i = 0; i < perf_state_table_count; i++) {
+ printf("%d.%03d ", (int)core_leak[i], ((int)(core_leak[i] * 1000) % 1000));
+ }
+ printf("\nFDT: GPU: SRAM leakage table: ");
+ for (u32 i = 0; i < perf_state_table_count; i++) {
+ printf("%d.%03d ", (int)sram_leak[i], ((int)(sram_leak[i] * 1000) % 1000));
+ }
+ printf("\n");
+
+ if (fdt_set_float_array(dt, gpu, "apple,core-leak-coef", core_leak, perf_state_table_count))
+ return -1;
+
+ if (fdt_set_float_array(dt, gpu, "apple,sram-leak-coef", sram_leak, perf_state_table_count))
+ return -1;
+
+ if (firmware_set_fdt(dt, gpu, "apple,firmware-version", &os_firmware))
+ return -1;
+
+ const struct fw_version_info *compat;
+
+ switch (os_firmware.version) {
+ case V12_3_1:
+ compat = &fw_versions[V12_3];
+ break;
+ default:
+ compat = &os_firmware;
+ break;
+ }
+
+ if (firmware_set_fdt(dt, gpu, "apple,firmware-compat", compat))
+ return -1;
+
+ u32 i = 0;
+ int opp;
+ fdt_for_each_subnode(opp, dt, opps)
+ {
+ fdt32_t volts[MAX_CLUSTERS];
+
+ for (u32 j = 0; j < perf_state_table_count; j++) {
+ volts[j] = cpu_to_fdt32(perf_states[i + j * perf_state_count].volt * 1000);
+ }
+
+ if (i >= perf_state_count)
+ bail("FDT: GPU: Expected %d operating points, but found more\n", perf_state_count);
+
+ if (fdt_setprop_inplace(dt, opp, "opp-microvolt", &volts,
+ sizeof(u32) * perf_state_table_count))
+ bail("FDT: GPU: Failed to set opp-microvolt for PS %d\n", i);
+
+ if (fdt_setprop_inplace_u64(dt, opp, "opp-hz", perf_states[i].freq))
+ bail("FDT: GPU: Failed to set opp-hz for PS %d\n", i);
+
+ if (fdt_setprop_inplace_u32(dt, opp, "opp-microwatt", max_pwr[i]))
+ bail("FDT: GPU: Failed to set opp-microwatt for PS %d\n", i);
+
+ i++;
+ }
+
+ if (i != perf_state_count)
+ bail("FDT: GPU: Expected %d operating points, but found %d\n", perf_state_count, i);
+
+ if (dt_set_region(dt, sgx, "gfx-handoff", "/reserved-memory/uat-handoff"))
+ return -1;
+ if (dt_set_region(dt, sgx, "gfx-shared-region", "/reserved-memory/uat-pagetables"))
+ return -1;
+ if (dt_set_region(dt, sgx, "gpu-region", "/reserved-memory/uat-ttbs"))
+ return -1;
+
+ return 0;
+}
diff --git a/tools/src/libfdt/fdt.c b/tools/src/libfdt/fdt.c
new file mode 100644
index 0000000..ebd163a
--- /dev/null
+++ b/tools/src/libfdt/fdt.c
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2006 David Gibson, IBM Corporation.
+ */
+#include "libfdt_env.h"
+
+#include "fdt.h"
+#include "libfdt.h"
+
+#include "libfdt_internal.h"
+
+/*
+ * Minimal sanity check for a read-only tree. fdt_ro_probe_() checks
+ * that the given buffer contains what appears to be a flattened
+ * device tree with sane information in its header.
+ */
+int32_t fdt_ro_probe_(const void *fdt)
+{
+ uint32_t totalsize = fdt_totalsize(fdt);
+
+ if (can_assume(VALID_DTB))
+ return totalsize;
+
+ if (fdt_magic(fdt) == FDT_MAGIC) {
+ /* Complete tree */
+ if (!can_assume(LATEST)) {
+ if (fdt_version(fdt) < FDT_FIRST_SUPPORTED_VERSION)
+ return -FDT_ERR_BADVERSION;
+ if (fdt_last_comp_version(fdt) >
+ FDT_LAST_SUPPORTED_VERSION)
+ return -FDT_ERR_BADVERSION;
+ }
+ } else if (fdt_magic(fdt) == FDT_SW_MAGIC) {
+ /* Unfinished sequential-write blob */
+ if (!can_assume(VALID_INPUT) && fdt_size_dt_struct(fdt) == 0)
+ return -FDT_ERR_BADSTATE;
+ } else {
+ return -FDT_ERR_BADMAGIC;
+ }
+
+ if (totalsize < INT32_MAX)
+ return totalsize;
+ else
+ return -FDT_ERR_TRUNCATED;
+}
+
+static int check_off_(uint32_t hdrsize, uint32_t totalsize, uint32_t off)
+{
+ return (off >= hdrsize) && (off <= totalsize);
+}
+
+static int check_block_(uint32_t hdrsize, uint32_t totalsize,
+ uint32_t base, uint32_t size)
+{
+ if (!check_off_(hdrsize, totalsize, base))
+ return 0; /* block start out of bounds */
+ if ((base + size) < base)
+ return 0; /* overflow */
+ if (!check_off_(hdrsize, totalsize, base + size))
+ return 0; /* block end out of bounds */
+ return 1;
+}
+
+size_t fdt_header_size_(uint32_t version)
+{
+ if (version <= 1)
+ return FDT_V1_SIZE;
+ else if (version <= 2)
+ return FDT_V2_SIZE;
+ else if (version <= 3)
+ return FDT_V3_SIZE;
+ else if (version <= 16)
+ return FDT_V16_SIZE;
+ else
+ return FDT_V17_SIZE;
+}
+
+size_t fdt_header_size(const void *fdt)
+{
+ return can_assume(LATEST) ? FDT_V17_SIZE :
+ fdt_header_size_(fdt_version(fdt));
+}
+
+int fdt_check_header(const void *fdt)
+{
+ size_t hdrsize;
+
+ if (fdt_magic(fdt) != FDT_MAGIC)
+ return -FDT_ERR_BADMAGIC;
+ if (!can_assume(LATEST)) {
+ if ((fdt_version(fdt) < FDT_FIRST_SUPPORTED_VERSION)
+ || (fdt_last_comp_version(fdt) >
+ FDT_LAST_SUPPORTED_VERSION))
+ return -FDT_ERR_BADVERSION;
+ if (fdt_version(fdt) < fdt_last_comp_version(fdt))
+ return -FDT_ERR_BADVERSION;
+ }
+ hdrsize = fdt_header_size(fdt);
+ if (!can_assume(VALID_DTB)) {
+
+ if ((fdt_totalsize(fdt) < hdrsize)
+ || (fdt_totalsize(fdt) > INT_MAX))
+ return -FDT_ERR_TRUNCATED;
+
+ /* Bounds check memrsv block */
+ if (!check_off_(hdrsize, fdt_totalsize(fdt),
+ fdt_off_mem_rsvmap(fdt)))
+ return -FDT_ERR_TRUNCATED;
+ }
+
+ if (!can_assume(VALID_DTB)) {
+ /* Bounds check structure block */
+ if (!can_assume(LATEST) && fdt_version(fdt) < 17) {
+ if (!check_off_(hdrsize, fdt_totalsize(fdt),
+ fdt_off_dt_struct(fdt)))
+ return -FDT_ERR_TRUNCATED;
+ } else {
+ if (!check_block_(hdrsize, fdt_totalsize(fdt),
+ fdt_off_dt_struct(fdt),
+ fdt_size_dt_struct(fdt)))
+ return -FDT_ERR_TRUNCATED;
+ }
+
+ /* Bounds check strings block */
+ if (!check_block_(hdrsize, fdt_totalsize(fdt),
+ fdt_off_dt_strings(fdt),
+ fdt_size_dt_strings(fdt)))
+ return -FDT_ERR_TRUNCATED;
+ }
+
+ return 0;
+}
+
+const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int len)
+{
+ unsigned int uoffset = offset;
+ unsigned int absoffset = offset + fdt_off_dt_struct(fdt);
+
+ if (offset < 0)
+ return NULL;
+
+ if (!can_assume(VALID_INPUT))
+ if ((absoffset < uoffset)
+ || ((absoffset + len) < absoffset)
+ || (absoffset + len) > fdt_totalsize(fdt))
+ return NULL;
+
+ if (can_assume(LATEST) || fdt_version(fdt) >= 0x11)
+ if (((uoffset + len) < uoffset)
+ || ((offset + len) > fdt_size_dt_struct(fdt)))
+ return NULL;
+
+ return fdt_offset_ptr_(fdt, offset);
+}
+
+uint32_t fdt_next_tag(const void *fdt, int startoffset, int *nextoffset)
+{
+ const fdt32_t *tagp, *lenp;
+ uint32_t tag;
+ int offset = startoffset;
+ const char *p;
+
+ *nextoffset = -FDT_ERR_TRUNCATED;
+ tagp = fdt_offset_ptr(fdt, offset, FDT_TAGSIZE);
+ if (!can_assume(VALID_DTB) && !tagp)
+ return FDT_END; /* premature end */
+ tag = fdt32_to_cpu(*tagp);
+ offset += FDT_TAGSIZE;
+
+ *nextoffset = -FDT_ERR_BADSTRUCTURE;
+ switch (tag) {
+ case FDT_BEGIN_NODE:
+ /* skip name */
+ do {
+ p = fdt_offset_ptr(fdt, offset++, 1);
+ } while (p && (*p != '\0'));
+ if (!can_assume(VALID_DTB) && !p)
+ return FDT_END; /* premature end */
+ break;
+
+ case FDT_PROP:
+ lenp = fdt_offset_ptr(fdt, offset, sizeof(*lenp));
+ if (!can_assume(VALID_DTB) && !lenp)
+ return FDT_END; /* premature end */
+ /* skip-name offset, length and value */
+ offset += sizeof(struct fdt_property) - FDT_TAGSIZE
+ + fdt32_to_cpu(*lenp);
+ if (!can_assume(LATEST) &&
+ fdt_version(fdt) < 0x10 && fdt32_to_cpu(*lenp) >= 8 &&
+ ((offset - fdt32_to_cpu(*lenp)) % 8) != 0)
+ offset += 4;
+ break;
+
+ case FDT_END:
+ case FDT_END_NODE:
+ case FDT_NOP:
+ break;
+
+ default:
+ return FDT_END;
+ }
+
+ if (!fdt_offset_ptr(fdt, startoffset, offset - startoffset))
+ return FDT_END; /* premature end */
+
+ *nextoffset = FDT_TAGALIGN(offset);
+ return tag;
+}
+
+int fdt_check_node_offset_(const void *fdt, int offset)
+{
+ if (!can_assume(VALID_INPUT)
+ && ((offset < 0) || (offset % FDT_TAGSIZE)))
+ return -FDT_ERR_BADOFFSET;
+
+ if (fdt_next_tag(fdt, offset, &offset) != FDT_BEGIN_NODE)
+ return -FDT_ERR_BADOFFSET;
+
+ return offset;
+}
+
+int fdt_check_prop_offset_(const void *fdt, int offset)
+{
+ if (!can_assume(VALID_INPUT)
+ && ((offset < 0) || (offset % FDT_TAGSIZE)))
+ return -FDT_ERR_BADOFFSET;
+
+ if (fdt_next_tag(fdt, offset, &offset) != FDT_PROP)
+ return -FDT_ERR_BADOFFSET;
+
+ return offset;
+}
+
+int fdt_next_node(const void *fdt, int offset, int *depth)
+{
+ int nextoffset = 0;
+ uint32_t tag;
+
+ if (offset >= 0)
+ if ((nextoffset = fdt_check_node_offset_(fdt, offset)) < 0)
+ return nextoffset;
+
+ do {
+ offset = nextoffset;
+ tag = fdt_next_tag(fdt, offset, &nextoffset);
+
+ switch (tag) {
+ case FDT_PROP:
+ case FDT_NOP:
+ break;
+
+ case FDT_BEGIN_NODE:
+ if (depth)
+ (*depth)++;
+ break;
+
+ case FDT_END_NODE:
+ if (depth && ((--(*depth)) < 0))
+ return nextoffset;
+ break;
+
+ case FDT_END:
+ if ((nextoffset >= 0)
+ || ((nextoffset == -FDT_ERR_TRUNCATED) && !depth))
+ return -FDT_ERR_NOTFOUND;
+ else
+ return nextoffset;
+ }
+ } while (tag != FDT_BEGIN_NODE);
+
+ return offset;
+}
+
+int fdt_first_subnode(const void *fdt, int offset)
+{
+ int depth = 0;
+
+ offset = fdt_next_node(fdt, offset, &depth);
+ if (offset < 0 || depth != 1)
+ return -FDT_ERR_NOTFOUND;
+
+ return offset;
+}
+
+int fdt_next_subnode(const void *fdt, int offset)
+{
+ int depth = 1;
+
+ /*
+ * With respect to the parent, the depth of the next subnode will be
+ * the same as the last.
+ */
+ do {
+ offset = fdt_next_node(fdt, offset, &depth);
+ if (offset < 0 || depth < 1)
+ return -FDT_ERR_NOTFOUND;
+ } while (depth > 1);
+
+ return offset;
+}
+
+const char *fdt_find_string_(const char *strtab, int tabsize, const char *s)
+{
+ int len = strlen(s) + 1;
+ const char *last = strtab + tabsize - len;
+ const char *p;
+
+ for (p = strtab; p <= last; p++)
+ if (memcmp(p, s, len) == 0)
+ return p;
+ return NULL;
+}
+
+int fdt_move(const void *fdt, void *buf, int bufsize)
+{
+ if (!can_assume(VALID_INPUT) && bufsize < 0)
+ return -FDT_ERR_NOSPACE;
+
+ FDT_RO_PROBE(fdt);
+
+ if (fdt_totalsize(fdt) > (unsigned int)bufsize)
+ return -FDT_ERR_NOSPACE;
+
+ memmove(buf, fdt, fdt_totalsize(fdt));
+ return 0;
+}
diff --git a/tools/src/libfdt/fdt.h b/tools/src/libfdt/fdt.h
new file mode 100644
index 0000000..f2e6880
--- /dev/null
+++ b/tools/src/libfdt/fdt.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */
+#ifndef FDT_H
+#define FDT_H
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2006 David Gibson, IBM Corporation.
+ * Copyright 2012 Kim Phillips, Freescale Semiconductor.
+ */
+
+#ifndef __ASSEMBLY__
+
+struct fdt_header {
+ fdt32_t magic; /* magic word FDT_MAGIC */
+ fdt32_t totalsize; /* total size of DT block */
+ fdt32_t off_dt_struct; /* offset to structure */
+ fdt32_t off_dt_strings; /* offset to strings */
+ fdt32_t off_mem_rsvmap; /* offset to memory reserve map */
+ fdt32_t version; /* format version */
+ fdt32_t last_comp_version; /* last compatible version */
+
+ /* version 2 fields below */
+ fdt32_t boot_cpuid_phys; /* Which physical CPU id we're
+ booting on */
+ /* version 3 fields below */
+ fdt32_t size_dt_strings; /* size of the strings block */
+
+ /* version 17 fields below */
+ fdt32_t size_dt_struct; /* size of the structure block */
+};
+
+struct fdt_reserve_entry {
+ fdt64_t address;
+ fdt64_t size;
+};
+
+struct fdt_node_header {
+ fdt32_t tag;
+ char name[0];
+};
+
+struct fdt_property {
+ fdt32_t tag;
+ fdt32_t len;
+ fdt32_t nameoff;
+ char data[0];
+};
+
+#endif /* !__ASSEMBLY */
+
+#define FDT_MAGIC 0xd00dfeed /* 4: version, 4: total size */
+#define FDT_TAGSIZE sizeof(fdt32_t)
+
+#define FDT_BEGIN_NODE 0x1 /* Start node: full name */
+#define FDT_END_NODE 0x2 /* End node */
+#define FDT_PROP 0x3 /* Property: name off,
+ size, content */
+#define FDT_NOP 0x4 /* nop */
+#define FDT_END 0x9
+
+#define FDT_V1_SIZE (7*sizeof(fdt32_t))
+#define FDT_V2_SIZE (FDT_V1_SIZE + sizeof(fdt32_t))
+#define FDT_V3_SIZE (FDT_V2_SIZE + sizeof(fdt32_t))
+#define FDT_V16_SIZE FDT_V3_SIZE
+#define FDT_V17_SIZE (FDT_V16_SIZE + sizeof(fdt32_t))
+
+#endif /* FDT_H */
diff --git a/tools/src/libfdt/fdt_addresses.c b/tools/src/libfdt/fdt_addresses.c
new file mode 100644
index 0000000..6357859
--- /dev/null
+++ b/tools/src/libfdt/fdt_addresses.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2014 David Gibson <david@gibson.dropbear.id.au>
+ * Copyright (C) 2018 embedded brains GmbH
+ */
+#include "libfdt_env.h"
+
+#include "fdt.h"
+#include "libfdt.h"
+
+#include "libfdt_internal.h"
+
+static int fdt_cells(const void *fdt, int nodeoffset, const char *name)
+{
+ const fdt32_t *c;
+ uint32_t val;
+ int len;
+
+ c = fdt_getprop(fdt, nodeoffset, name, &len);
+ if (!c)
+ return len;
+
+ if (len != sizeof(*c))
+ return -FDT_ERR_BADNCELLS;
+
+ val = fdt32_to_cpu(*c);
+ if (val > FDT_MAX_NCELLS)
+ return -FDT_ERR_BADNCELLS;
+
+ return (int)val;
+}
+
+int fdt_address_cells(const void *fdt, int nodeoffset)
+{
+ int val;
+
+ val = fdt_cells(fdt, nodeoffset, "#address-cells");
+ if (val == 0)
+ return -FDT_ERR_BADNCELLS;
+ if (val == -FDT_ERR_NOTFOUND)
+ return 2;
+ return val;
+}
+
+int fdt_size_cells(const void *fdt, int nodeoffset)
+{
+ int val;
+
+ val = fdt_cells(fdt, nodeoffset, "#size-cells");
+ if (val == -FDT_ERR_NOTFOUND)
+ return 1;
+ return val;
+}
+
+/* This function assumes that [address|size]_cells is 1 or 2 */
+int fdt_appendprop_addrrange(void *fdt, int parent, int nodeoffset,
+ const char *name, uint64_t addr, uint64_t size)
+{
+ int addr_cells, size_cells, ret;
+ uint8_t data[sizeof(fdt64_t) * 2], *prop;
+
+ ret = fdt_address_cells(fdt, parent);
+ if (ret < 0)
+ return ret;
+ addr_cells = ret;
+
+ ret = fdt_size_cells(fdt, parent);
+ if (ret < 0)
+ return ret;
+ size_cells = ret;
+
+ /* check validity of address */
+ prop = data;
+ if (addr_cells == 1) {
+ if ((addr > UINT32_MAX) || ((UINT32_MAX + 1 - addr) < size))
+ return -FDT_ERR_BADVALUE;
+
+ fdt32_st(prop, (uint32_t)addr);
+ } else if (addr_cells == 2) {
+ fdt64_st(prop, addr);
+ } else {
+ return -FDT_ERR_BADNCELLS;
+ }
+
+ /* check validity of size */
+ prop += addr_cells * sizeof(fdt32_t);
+ if (size_cells == 1) {
+ if (size > UINT32_MAX)
+ return -FDT_ERR_BADVALUE;
+
+ fdt32_st(prop, (uint32_t)size);
+ } else if (size_cells == 2) {
+ fdt64_st(prop, size);
+ } else {
+ return -FDT_ERR_BADNCELLS;
+ }
+
+ return fdt_appendprop(fdt, nodeoffset, name, data,
+ (addr_cells + size_cells) * sizeof(fdt32_t));
+}
diff --git a/tools/src/libfdt/fdt_empty_tree.c b/tools/src/libfdt/fdt_empty_tree.c
new file mode 100644
index 0000000..15f0cd7
--- /dev/null
+++ b/tools/src/libfdt/fdt_empty_tree.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2012 David Gibson, IBM Corporation.
+ */
+#include "libfdt_env.h"
+
+#include "fdt.h"
+#include "libfdt.h"
+
+#include "libfdt_internal.h"
+
+int fdt_create_empty_tree(void *buf, int bufsize)
+{
+ int err;
+
+ err = fdt_create(buf, bufsize);
+ if (err)
+ return err;
+
+ err = fdt_finish_reservemap(buf);
+ if (err)
+ return err;
+
+ err = fdt_begin_node(buf, "");
+ if (err)
+ return err;
+
+ err = fdt_end_node(buf);
+ if (err)
+ return err;
+
+ err = fdt_finish(buf);
+ if (err)
+ return err;
+
+ return fdt_open_into(buf, buf, bufsize);
+}
diff --git a/tools/src/libfdt/fdt_overlay.c b/tools/src/libfdt/fdt_overlay.c
new file mode 100644
index 0000000..1fc78d4
--- /dev/null
+++ b/tools/src/libfdt/fdt_overlay.c
@@ -0,0 +1,882 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2016 Free Electrons
+ * Copyright (C) 2016 NextThing Co.
+ */
+#include "libfdt_env.h"
+
+#include "fdt.h"
+#include "libfdt.h"
+
+#include "libfdt_internal.h"
+
+/**
+ * overlay_get_target_phandle - retrieves the target phandle of a fragment
+ * @fdto: pointer to the device tree overlay blob
+ * @fragment: node offset of the fragment in the overlay
+ *
+ * overlay_get_target_phandle() retrieves the target phandle of an
+ * overlay fragment when that fragment uses a phandle (target
+ * property) instead of a path (target-path property).
+ *
+ * returns:
+ * the phandle pointed by the target property
+ * 0, if the phandle was not found
+ * -1, if the phandle was malformed
+ */
+static uint32_t overlay_get_target_phandle(const void *fdto, int fragment)
+{
+ const fdt32_t *val;
+ int len;
+
+ val = fdt_getprop(fdto, fragment, "target", &len);
+ if (!val)
+ return 0;
+
+ if ((len != sizeof(*val)) || (fdt32_to_cpu(*val) == (uint32_t)-1))
+ return (uint32_t)-1;
+
+ return fdt32_to_cpu(*val);
+}
+
+/**
+ * overlay_get_target - retrieves the offset of a fragment's target
+ * @fdt: Base device tree blob
+ * @fdto: Device tree overlay blob
+ * @fragment: node offset of the fragment in the overlay
+ * @pathp: pointer which receives the path of the target (or NULL)
+ *
+ * overlay_get_target() retrieves the target offset in the base
+ * device tree of a fragment, no matter how the actual targeting is
+ * done (through a phandle or a path)
+ *
+ * returns:
+ * the targeted node offset in the base device tree
+ * Negative error code on error
+ */
+static int overlay_get_target(const void *fdt, const void *fdto,
+ int fragment, char const **pathp)
+{
+ uint32_t phandle;
+ const char *path = NULL;
+ int path_len = 0, ret;
+
+ /* Try first to do a phandle based lookup */
+ phandle = overlay_get_target_phandle(fdto, fragment);
+ if (phandle == (uint32_t)-1)
+ return -FDT_ERR_BADPHANDLE;
+
+ /* no phandle, try path */
+ if (!phandle) {
+ /* And then a path based lookup */
+ path = fdt_getprop(fdto, fragment, "target-path", &path_len);
+ if (path)
+ ret = fdt_path_offset(fdt, path);
+ else
+ ret = path_len;
+ } else
+ ret = fdt_node_offset_by_phandle(fdt, phandle);
+
+ /*
+ * If we haven't found either a target or a
+ * target-path property in a node that contains a
+ * __overlay__ subnode (we wouldn't be called
+ * otherwise), consider it a improperly written
+ * overlay
+ */
+ if (ret < 0 && path_len == -FDT_ERR_NOTFOUND)
+ ret = -FDT_ERR_BADOVERLAY;
+
+ /* return on error */
+ if (ret < 0)
+ return ret;
+
+ /* return pointer to path (if available) */
+ if (pathp)
+ *pathp = path ? path : NULL;
+
+ return ret;
+}
+
+/**
+ * overlay_phandle_add_offset - Increases a phandle by an offset
+ * @fdt: Base device tree blob
+ * @node: Device tree overlay blob
+ * @name: Name of the property to modify (phandle or linux,phandle)
+ * @delta: offset to apply
+ *
+ * overlay_phandle_add_offset() increments a node phandle by a given
+ * offset.
+ *
+ * returns:
+ * 0 on success.
+ * Negative error code on error
+ */
+static int overlay_phandle_add_offset(void *fdt, int node,
+ const char *name, uint32_t delta)
+{
+ const fdt32_t *val;
+ uint32_t adj_val;
+ int len;
+
+ val = fdt_getprop(fdt, node, name, &len);
+ if (!val)
+ return len;
+
+ if (len != sizeof(*val))
+ return -FDT_ERR_BADPHANDLE;
+
+ adj_val = fdt32_to_cpu(*val);
+ if ((adj_val + delta) < adj_val)
+ return -FDT_ERR_NOPHANDLES;
+
+ adj_val += delta;
+ if (adj_val == (uint32_t)-1)
+ return -FDT_ERR_NOPHANDLES;
+
+ return fdt_setprop_inplace_u32(fdt, node, name, adj_val);
+}
+
+/**
+ * overlay_adjust_node_phandles - Offsets the phandles of a node
+ * @fdto: Device tree overlay blob
+ * @node: Offset of the node we want to adjust
+ * @delta: Offset to shift the phandles of
+ *
+ * overlay_adjust_node_phandles() adds a constant to all the phandles
+ * of a given node. This is mainly use as part of the overlay
+ * application process, when we want to update all the overlay
+ * phandles to not conflict with the overlays of the base device tree.
+ *
+ * returns:
+ * 0 on success
+ * Negative error code on failure
+ */
+static int overlay_adjust_node_phandles(void *fdto, int node,
+ uint32_t delta)
+{
+ int child;
+ int ret;
+
+ ret = overlay_phandle_add_offset(fdto, node, "phandle", delta);
+ if (ret && ret != -FDT_ERR_NOTFOUND)
+ return ret;
+
+ ret = overlay_phandle_add_offset(fdto, node, "linux,phandle", delta);
+ if (ret && ret != -FDT_ERR_NOTFOUND)
+ return ret;
+
+ fdt_for_each_subnode(child, fdto, node) {
+ ret = overlay_adjust_node_phandles(fdto, child, delta);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * overlay_adjust_local_phandles - Adjust the phandles of a whole overlay
+ * @fdto: Device tree overlay blob
+ * @delta: Offset to shift the phandles of
+ *
+ * overlay_adjust_local_phandles() adds a constant to all the
+ * phandles of an overlay. This is mainly use as part of the overlay
+ * application process, when we want to update all the overlay
+ * phandles to not conflict with the overlays of the base device tree.
+ *
+ * returns:
+ * 0 on success
+ * Negative error code on failure
+ */
+static int overlay_adjust_local_phandles(void *fdto, uint32_t delta)
+{
+ /*
+ * Start adjusting the phandles from the overlay root
+ */
+ return overlay_adjust_node_phandles(fdto, 0, delta);
+}
+
+/**
+ * overlay_update_local_node_references - Adjust the overlay references
+ * @fdto: Device tree overlay blob
+ * @tree_node: Node offset of the node to operate on
+ * @fixup_node: Node offset of the matching local fixups node
+ * @delta: Offset to shift the phandles of
+ *
+ * overlay_update_local_nodes_references() update the phandles
+ * pointing to a node within the device tree overlay by adding a
+ * constant delta.
+ *
+ * This is mainly used as part of a device tree application process,
+ * where you want the device tree overlays phandles to not conflict
+ * with the ones from the base device tree before merging them.
+ *
+ * returns:
+ * 0 on success
+ * Negative error code on failure
+ */
+static int overlay_update_local_node_references(void *fdto,
+ int tree_node,
+ int fixup_node,
+ uint32_t delta)
+{
+ int fixup_prop;
+ int fixup_child;
+ int ret;
+
+ fdt_for_each_property_offset(fixup_prop, fdto, fixup_node) {
+ const fdt32_t *fixup_val;
+ const char *tree_val;
+ const char *name;
+ int fixup_len;
+ int tree_len;
+ int i;
+
+ fixup_val = fdt_getprop_by_offset(fdto, fixup_prop,
+ &name, &fixup_len);
+ if (!fixup_val)
+ return fixup_len;
+
+ if (fixup_len % sizeof(uint32_t))
+ return -FDT_ERR_BADOVERLAY;
+ fixup_len /= sizeof(uint32_t);
+
+ tree_val = fdt_getprop(fdto, tree_node, name, &tree_len);
+ if (!tree_val) {
+ if (tree_len == -FDT_ERR_NOTFOUND)
+ return -FDT_ERR_BADOVERLAY;
+
+ return tree_len;
+ }
+
+ for (i = 0; i < fixup_len; i++) {
+ fdt32_t adj_val;
+ uint32_t poffset;
+
+ poffset = fdt32_to_cpu(fixup_val[i]);
+
+ /*
+ * phandles to fixup can be unaligned.
+ *
+ * Use a memcpy for the architectures that do
+ * not support unaligned accesses.
+ */
+ memcpy(&adj_val, tree_val + poffset, sizeof(adj_val));
+
+ adj_val = cpu_to_fdt32(fdt32_to_cpu(adj_val) + delta);
+
+ ret = fdt_setprop_inplace_namelen_partial(fdto,
+ tree_node,
+ name,
+ strlen(name),
+ poffset,
+ &adj_val,
+ sizeof(adj_val));
+ if (ret == -FDT_ERR_NOSPACE)
+ return -FDT_ERR_BADOVERLAY;
+
+ if (ret)
+ return ret;
+ }
+ }
+
+ fdt_for_each_subnode(fixup_child, fdto, fixup_node) {
+ const char *fixup_child_name = fdt_get_name(fdto, fixup_child,
+ NULL);
+ int tree_child;
+
+ tree_child = fdt_subnode_offset(fdto, tree_node,
+ fixup_child_name);
+ if (tree_child == -FDT_ERR_NOTFOUND)
+ return -FDT_ERR_BADOVERLAY;
+ if (tree_child < 0)
+ return tree_child;
+
+ ret = overlay_update_local_node_references(fdto,
+ tree_child,
+ fixup_child,
+ delta);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * overlay_update_local_references - Adjust the overlay references
+ * @fdto: Device tree overlay blob
+ * @delta: Offset to shift the phandles of
+ *
+ * overlay_update_local_references() update all the phandles pointing
+ * to a node within the device tree overlay by adding a constant
+ * delta to not conflict with the base overlay.
+ *
+ * This is mainly used as part of a device tree application process,
+ * where you want the device tree overlays phandles to not conflict
+ * with the ones from the base device tree before merging them.
+ *
+ * returns:
+ * 0 on success
+ * Negative error code on failure
+ */
+static int overlay_update_local_references(void *fdto, uint32_t delta)
+{
+ int fixups;
+
+ fixups = fdt_path_offset(fdto, "/__local_fixups__");
+ if (fixups < 0) {
+ /* There's no local phandles to adjust, bail out */
+ if (fixups == -FDT_ERR_NOTFOUND)
+ return 0;
+
+ return fixups;
+ }
+
+ /*
+ * Update our local references from the root of the tree
+ */
+ return overlay_update_local_node_references(fdto, 0, fixups,
+ delta);
+}
+
+/**
+ * overlay_fixup_one_phandle - Set an overlay phandle to the base one
+ * @fdt: Base Device Tree blob
+ * @fdto: Device tree overlay blob
+ * @symbols_off: Node offset of the symbols node in the base device tree
+ * @path: Path to a node holding a phandle in the overlay
+ * @path_len: number of path characters to consider
+ * @name: Name of the property holding the phandle reference in the overlay
+ * @name_len: number of name characters to consider
+ * @poffset: Offset within the overlay property where the phandle is stored
+ * @label: Label of the node referenced by the phandle
+ *
+ * overlay_fixup_one_phandle() resolves an overlay phandle pointing to
+ * a node in the base device tree.
+ *
+ * This is part of the device tree overlay application process, when
+ * you want all the phandles in the overlay to point to the actual
+ * base dt nodes.
+ *
+ * returns:
+ * 0 on success
+ * Negative error code on failure
+ */
+static int overlay_fixup_one_phandle(void *fdt, void *fdto,
+ int symbols_off,
+ const char *path, uint32_t path_len,
+ const char *name, uint32_t name_len,
+ int poffset, const char *label)
+{
+ const char *symbol_path;
+ uint32_t phandle;
+ fdt32_t phandle_prop;
+ int symbol_off, fixup_off;
+ int prop_len;
+
+ if (symbols_off < 0)
+ return symbols_off;
+
+ symbol_path = fdt_getprop(fdt, symbols_off, label,
+ &prop_len);
+ if (!symbol_path)
+ return prop_len;
+
+ symbol_off = fdt_path_offset(fdt, symbol_path);
+ if (symbol_off < 0)
+ return symbol_off;
+
+ phandle = fdt_get_phandle(fdt, symbol_off);
+ if (!phandle)
+ return -FDT_ERR_NOTFOUND;
+
+ fixup_off = fdt_path_offset_namelen(fdto, path, path_len);
+ if (fixup_off == -FDT_ERR_NOTFOUND)
+ return -FDT_ERR_BADOVERLAY;
+ if (fixup_off < 0)
+ return fixup_off;
+
+ phandle_prop = cpu_to_fdt32(phandle);
+ return fdt_setprop_inplace_namelen_partial(fdto, fixup_off,
+ name, name_len, poffset,
+ &phandle_prop,
+ sizeof(phandle_prop));
+};
+
+/**
+ * overlay_fixup_phandle - Set an overlay phandle to the base one
+ * @fdt: Base Device Tree blob
+ * @fdto: Device tree overlay blob
+ * @symbols_off: Node offset of the symbols node in the base device tree
+ * @property: Property offset in the overlay holding the list of fixups
+ *
+ * overlay_fixup_phandle() resolves all the overlay phandles pointed
+ * to in a __fixups__ property, and updates them to match the phandles
+ * in use in the base device tree.
+ *
+ * This is part of the device tree overlay application process, when
+ * you want all the phandles in the overlay to point to the actual
+ * base dt nodes.
+ *
+ * returns:
+ * 0 on success
+ * Negative error code on failure
+ */
+static int overlay_fixup_phandle(void *fdt, void *fdto, int symbols_off,
+ int property)
+{
+ const char *value;
+ const char *label;
+ int len;
+
+ value = fdt_getprop_by_offset(fdto, property,
+ &label, &len);
+ if (!value) {
+ if (len == -FDT_ERR_NOTFOUND)
+ return -FDT_ERR_INTERNAL;
+
+ return len;
+ }
+
+ do {
+ const char *path, *name, *fixup_end;
+ const char *fixup_str = value;
+ uint32_t path_len, name_len;
+ uint32_t fixup_len;
+ char *sep, *endptr;
+ int poffset, ret;
+
+ fixup_end = memchr(value, '\0', len);
+ if (!fixup_end)
+ return -FDT_ERR_BADOVERLAY;
+ fixup_len = fixup_end - fixup_str;
+
+ len -= fixup_len + 1;
+ value += fixup_len + 1;
+
+ path = fixup_str;
+ sep = memchr(fixup_str, ':', fixup_len);
+ if (!sep || *sep != ':')
+ return -FDT_ERR_BADOVERLAY;
+
+ path_len = sep - path;
+ if (path_len == (fixup_len - 1))
+ return -FDT_ERR_BADOVERLAY;
+
+ fixup_len -= path_len + 1;
+ name = sep + 1;
+ sep = memchr(name, ':', fixup_len);
+ if (!sep || *sep != ':')
+ return -FDT_ERR_BADOVERLAY;
+
+ name_len = sep - name;
+ if (!name_len)
+ return -FDT_ERR_BADOVERLAY;
+
+ poffset = strtoul(sep + 1, &endptr, 10);
+ if ((*endptr != '\0') || (endptr <= (sep + 1)))
+ return -FDT_ERR_BADOVERLAY;
+
+ ret = overlay_fixup_one_phandle(fdt, fdto, symbols_off,
+ path, path_len, name, name_len,
+ poffset, label);
+ if (ret)
+ return ret;
+ } while (len > 0);
+
+ return 0;
+}
+
+/**
+ * overlay_fixup_phandles - Resolve the overlay phandles to the base
+ * device tree
+ * @fdt: Base Device Tree blob
+ * @fdto: Device tree overlay blob
+ *
+ * overlay_fixup_phandles() resolves all the overlay phandles pointing
+ * to nodes in the base device tree.
+ *
+ * This is one of the steps of the device tree overlay application
+ * process, when you want all the phandles in the overlay to point to
+ * the actual base dt nodes.
+ *
+ * returns:
+ * 0 on success
+ * Negative error code on failure
+ */
+static int overlay_fixup_phandles(void *fdt, void *fdto)
+{
+ int fixups_off, symbols_off;
+ int property;
+
+ /* We can have overlays without any fixups */
+ fixups_off = fdt_path_offset(fdto, "/__fixups__");
+ if (fixups_off == -FDT_ERR_NOTFOUND)
+ return 0; /* nothing to do */
+ if (fixups_off < 0)
+ return fixups_off;
+
+ /* And base DTs without symbols */
+ symbols_off = fdt_path_offset(fdt, "/__symbols__");
+ if ((symbols_off < 0 && (symbols_off != -FDT_ERR_NOTFOUND)))
+ return symbols_off;
+
+ fdt_for_each_property_offset(property, fdto, fixups_off) {
+ int ret;
+
+ ret = overlay_fixup_phandle(fdt, fdto, symbols_off, property);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * overlay_apply_node - Merges a node into the base device tree
+ * @fdt: Base Device Tree blob
+ * @target: Node offset in the base device tree to apply the fragment to
+ * @fdto: Device tree overlay blob
+ * @node: Node offset in the overlay holding the changes to merge
+ *
+ * overlay_apply_node() merges a node into a target base device tree
+ * node pointed.
+ *
+ * This is part of the final step in the device tree overlay
+ * application process, when all the phandles have been adjusted and
+ * resolved and you just have to merge overlay into the base device
+ * tree.
+ *
+ * returns:
+ * 0 on success
+ * Negative error code on failure
+ */
+static int overlay_apply_node(void *fdt, int target,
+ void *fdto, int node)
+{
+ int property;
+ int subnode;
+
+ fdt_for_each_property_offset(property, fdto, node) {
+ const char *name;
+ const void *prop;
+ int prop_len;
+ int ret;
+
+ prop = fdt_getprop_by_offset(fdto, property, &name,
+ &prop_len);
+ if (prop_len == -FDT_ERR_NOTFOUND)
+ return -FDT_ERR_INTERNAL;
+ if (prop_len < 0)
+ return prop_len;
+
+ ret = fdt_setprop(fdt, target, name, prop, prop_len);
+ if (ret)
+ return ret;
+ }
+
+ fdt_for_each_subnode(subnode, fdto, node) {
+ const char *name = fdt_get_name(fdto, subnode, NULL);
+ int nnode;
+ int ret;
+
+ nnode = fdt_add_subnode(fdt, target, name);
+ if (nnode == -FDT_ERR_EXISTS) {
+ nnode = fdt_subnode_offset(fdt, target, name);
+ if (nnode == -FDT_ERR_NOTFOUND)
+ return -FDT_ERR_INTERNAL;
+ }
+
+ if (nnode < 0)
+ return nnode;
+
+ ret = overlay_apply_node(fdt, nnode, fdto, subnode);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * overlay_merge - Merge an overlay into its base device tree
+ * @fdt: Base Device Tree blob
+ * @fdto: Device tree overlay blob
+ *
+ * overlay_merge() merges an overlay into its base device tree.
+ *
+ * This is the next to last step in the device tree overlay application
+ * process, when all the phandles have been adjusted and resolved and
+ * you just have to merge overlay into the base device tree.
+ *
+ * returns:
+ * 0 on success
+ * Negative error code on failure
+ */
+static int overlay_merge(void *fdt, void *fdto)
+{
+ int fragment;
+
+ fdt_for_each_subnode(fragment, fdto, 0) {
+ int overlay;
+ int target;
+ int ret;
+
+ /*
+ * Each fragments will have an __overlay__ node. If
+ * they don't, it's not supposed to be merged
+ */
+ overlay = fdt_subnode_offset(fdto, fragment, "__overlay__");
+ if (overlay == -FDT_ERR_NOTFOUND)
+ continue;
+
+ if (overlay < 0)
+ return overlay;
+
+ target = overlay_get_target(fdt, fdto, fragment, NULL);
+ if (target < 0)
+ return target;
+
+ ret = overlay_apply_node(fdt, target, fdto, overlay);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int get_path_len(const void *fdt, int nodeoffset)
+{
+ int len = 0, namelen;
+ const char *name;
+
+ FDT_RO_PROBE(fdt);
+
+ for (;;) {
+ name = fdt_get_name(fdt, nodeoffset, &namelen);
+ if (!name)
+ return namelen;
+
+ /* root? we're done */
+ if (namelen == 0)
+ break;
+
+ nodeoffset = fdt_parent_offset(fdt, nodeoffset);
+ if (nodeoffset < 0)
+ return nodeoffset;
+ len += namelen + 1;
+ }
+
+ /* in case of root pretend it's "/" */
+ if (len == 0)
+ len++;
+ return len;
+}
+
+/**
+ * overlay_symbol_update - Update the symbols of base tree after a merge
+ * @fdt: Base Device Tree blob
+ * @fdto: Device tree overlay blob
+ *
+ * overlay_symbol_update() updates the symbols of the base tree with the
+ * symbols of the applied overlay
+ *
+ * This is the last step in the device tree overlay application
+ * process, allowing the reference of overlay symbols by subsequent
+ * overlay operations.
+ *
+ * returns:
+ * 0 on success
+ * Negative error code on failure
+ */
+static int overlay_symbol_update(void *fdt, void *fdto)
+{
+ int root_sym, ov_sym, prop, path_len, fragment, target;
+ int len, frag_name_len, ret, rel_path_len;
+ const char *s, *e;
+ const char *path;
+ const char *name;
+ const char *frag_name;
+ const char *rel_path;
+ const char *target_path;
+ char *buf;
+ void *p;
+
+ ov_sym = fdt_subnode_offset(fdto, 0, "__symbols__");
+
+ /* if no overlay symbols exist no problem */
+ if (ov_sym < 0)
+ return 0;
+
+ root_sym = fdt_subnode_offset(fdt, 0, "__symbols__");
+
+ /* it no root symbols exist we should create them */
+ if (root_sym == -FDT_ERR_NOTFOUND)
+ root_sym = fdt_add_subnode(fdt, 0, "__symbols__");
+
+ /* any error is fatal now */
+ if (root_sym < 0)
+ return root_sym;
+
+ /* iterate over each overlay symbol */
+ fdt_for_each_property_offset(prop, fdto, ov_sym) {
+ path = fdt_getprop_by_offset(fdto, prop, &name, &path_len);
+ if (!path)
+ return path_len;
+
+ /* verify it's a string property (terminated by a single \0) */
+ if (path_len < 1 || memchr(path, '\0', path_len) != &path[path_len - 1])
+ return -FDT_ERR_BADVALUE;
+
+ /* keep end marker to avoid strlen() */
+ e = path + path_len;
+
+ if (*path != '/')
+ return -FDT_ERR_BADVALUE;
+
+ /* get fragment name first */
+ s = strchr(path + 1, '/');
+ if (!s) {
+ /* Symbol refers to something that won't end
+ * up in the target tree */
+ continue;
+ }
+
+ frag_name = path + 1;
+ frag_name_len = s - path - 1;
+
+ /* verify format; safe since "s" lies in \0 terminated prop */
+ len = sizeof("/__overlay__/") - 1;
+ if ((e - s) > len && (memcmp(s, "/__overlay__/", len) == 0)) {
+ /* /<fragment-name>/__overlay__/<relative-subnode-path> */
+ rel_path = s + len;
+ rel_path_len = e - rel_path - 1;
+ } else if ((e - s) == len
+ && (memcmp(s, "/__overlay__", len - 1) == 0)) {
+ /* /<fragment-name>/__overlay__ */
+ rel_path = "";
+ rel_path_len = 0;
+ } else {
+ /* Symbol refers to something that won't end
+ * up in the target tree */
+ continue;
+ }
+
+ /* find the fragment index in which the symbol lies */
+ ret = fdt_subnode_offset_namelen(fdto, 0, frag_name,
+ frag_name_len);
+ /* not found? */
+ if (ret < 0)
+ return -FDT_ERR_BADOVERLAY;
+ fragment = ret;
+
+ /* an __overlay__ subnode must exist */
+ ret = fdt_subnode_offset(fdto, fragment, "__overlay__");
+ if (ret < 0)
+ return -FDT_ERR_BADOVERLAY;
+
+ /* get the target of the fragment */
+ ret = overlay_get_target(fdt, fdto, fragment, &target_path);
+ if (ret < 0)
+ return ret;
+ target = ret;
+
+ /* if we have a target path use */
+ if (!target_path) {
+ ret = get_path_len(fdt, target);
+ if (ret < 0)
+ return ret;
+ len = ret;
+ } else {
+ len = strlen(target_path);
+ }
+
+ ret = fdt_setprop_placeholder(fdt, root_sym, name,
+ len + (len > 1) + rel_path_len + 1, &p);
+ if (ret < 0)
+ return ret;
+
+ if (!target_path) {
+ /* again in case setprop_placeholder changed it */
+ ret = overlay_get_target(fdt, fdto, fragment, &target_path);
+ if (ret < 0)
+ return ret;
+ target = ret;
+ }
+
+ buf = p;
+ if (len > 1) { /* target is not root */
+ if (!target_path) {
+ ret = fdt_get_path(fdt, target, buf, len + 1);
+ if (ret < 0)
+ return ret;
+ } else
+ memcpy(buf, target_path, len + 1);
+
+ } else
+ len--;
+
+ buf[len] = '/';
+ memcpy(buf + len + 1, rel_path, rel_path_len);
+ buf[len + 1 + rel_path_len] = '\0';
+ }
+
+ return 0;
+}
+
+int fdt_overlay_apply(void *fdt, void *fdto)
+{
+ uint32_t delta;
+ int ret;
+
+ FDT_RO_PROBE(fdt);
+ FDT_RO_PROBE(fdto);
+
+ ret = fdt_find_max_phandle(fdt, &delta);
+ if (ret)
+ goto err;
+
+ ret = overlay_adjust_local_phandles(fdto, delta);
+ if (ret)
+ goto err;
+
+ ret = overlay_update_local_references(fdto, delta);
+ if (ret)
+ goto err;
+
+ ret = overlay_fixup_phandles(fdt, fdto);
+ if (ret)
+ goto err;
+
+ ret = overlay_merge(fdt, fdto);
+ if (ret)
+ goto err;
+
+ ret = overlay_symbol_update(fdt, fdto);
+ if (ret)
+ goto err;
+
+ /*
+ * The overlay has been damaged, erase its magic.
+ */
+ fdt_set_magic(fdto, ~0);
+
+ return 0;
+
+err:
+ /*
+ * The overlay might have been damaged, erase its magic.
+ */
+ fdt_set_magic(fdto, ~0);
+
+ /*
+ * The base device tree might have been damaged, erase its
+ * magic.
+ */
+ fdt_set_magic(fdt, ~0);
+
+ return ret;
+}
diff --git a/tools/src/libfdt/fdt_ro.c b/tools/src/libfdt/fdt_ro.c
new file mode 100644
index 0000000..e7f8b62
--- /dev/null
+++ b/tools/src/libfdt/fdt_ro.c
@@ -0,0 +1,859 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2006 David Gibson, IBM Corporation.
+ */
+#include "libfdt_env.h"
+
+#include "fdt.h"
+#include "libfdt.h"
+
+#include "libfdt_internal.h"
+
+static int fdt_nodename_eq_(const void *fdt, int offset,
+ const char *s, int len)
+{
+ int olen;
+ const char *p = fdt_get_name(fdt, offset, &olen);
+
+ if (!p || olen < len)
+ /* short match */
+ return 0;
+
+ if (memcmp(p, s, len) != 0)
+ return 0;
+
+ if (p[len] == '\0')
+ return 1;
+ else if (!memchr(s, '@', len) && (p[len] == '@'))
+ return 1;
+ else
+ return 0;
+}
+
+const char *fdt_get_string(const void *fdt, int stroffset, int *lenp)
+{
+ int32_t totalsize;
+ uint32_t absoffset;
+ size_t len;
+ int err;
+ const char *s, *n;
+
+ if (can_assume(VALID_INPUT)) {
+ s = (const char *)fdt + fdt_off_dt_strings(fdt) + stroffset;
+
+ if (lenp)
+ *lenp = strlen(s);
+ return s;
+ }
+ totalsize = fdt_ro_probe_(fdt);
+ err = totalsize;
+ if (totalsize < 0)
+ goto fail;
+
+ err = -FDT_ERR_BADOFFSET;
+ absoffset = stroffset + fdt_off_dt_strings(fdt);
+ if (absoffset >= (unsigned)totalsize)
+ goto fail;
+ len = totalsize - absoffset;
+
+ if (fdt_magic(fdt) == FDT_MAGIC) {
+ if (stroffset < 0)
+ goto fail;
+ if (can_assume(LATEST) || fdt_version(fdt) >= 17) {
+ if ((unsigned)stroffset >= fdt_size_dt_strings(fdt))
+ goto fail;
+ if ((fdt_size_dt_strings(fdt) - stroffset) < len)
+ len = fdt_size_dt_strings(fdt) - stroffset;
+ }
+ } else if (fdt_magic(fdt) == FDT_SW_MAGIC) {
+ unsigned int sw_stroffset = -stroffset;
+
+ if ((stroffset >= 0) ||
+ (sw_stroffset > fdt_size_dt_strings(fdt)))
+ goto fail;
+ if (sw_stroffset < len)
+ len = sw_stroffset;
+ } else {
+ err = -FDT_ERR_INTERNAL;
+ goto fail;
+ }
+
+ s = (const char *)fdt + absoffset;
+ n = memchr(s, '\0', len);
+ if (!n) {
+ /* missing terminating NULL */
+ err = -FDT_ERR_TRUNCATED;
+ goto fail;
+ }
+
+ if (lenp)
+ *lenp = n - s;
+ return s;
+
+fail:
+ if (lenp)
+ *lenp = err;
+ return NULL;
+}
+
+const char *fdt_string(const void *fdt, int stroffset)
+{
+ return fdt_get_string(fdt, stroffset, NULL);
+}
+
+static int fdt_string_eq_(const void *fdt, int stroffset,
+ const char *s, int len)
+{
+ int slen;
+ const char *p = fdt_get_string(fdt, stroffset, &slen);
+
+ return p && (slen == len) && (memcmp(p, s, len) == 0);
+}
+
+int fdt_find_max_phandle(const void *fdt, uint32_t *phandle)
+{
+ uint32_t max = 0;
+ int offset = -1;
+
+ while (true) {
+ uint32_t value;
+
+ offset = fdt_next_node(fdt, offset, NULL);
+ if (offset < 0) {
+ if (offset == -FDT_ERR_NOTFOUND)
+ break;
+
+ return offset;
+ }
+
+ value = fdt_get_phandle(fdt, offset);
+
+ if (value > max)
+ max = value;
+ }
+
+ if (phandle)
+ *phandle = max;
+
+ return 0;
+}
+
+int fdt_generate_phandle(const void *fdt, uint32_t *phandle)
+{
+ uint32_t max;
+ int err;
+
+ err = fdt_find_max_phandle(fdt, &max);
+ if (err < 0)
+ return err;
+
+ if (max == FDT_MAX_PHANDLE)
+ return -FDT_ERR_NOPHANDLES;
+
+ if (phandle)
+ *phandle = max + 1;
+
+ return 0;
+}
+
+static const struct fdt_reserve_entry *fdt_mem_rsv(const void *fdt, int n)
+{
+ unsigned int offset = n * sizeof(struct fdt_reserve_entry);
+ unsigned int absoffset = fdt_off_mem_rsvmap(fdt) + offset;
+
+ if (!can_assume(VALID_INPUT)) {
+ if (absoffset < fdt_off_mem_rsvmap(fdt))
+ return NULL;
+ if (absoffset > fdt_totalsize(fdt) -
+ sizeof(struct fdt_reserve_entry))
+ return NULL;
+ }
+ return fdt_mem_rsv_(fdt, n);
+}
+
+int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size)
+{
+ const struct fdt_reserve_entry *re;
+
+ FDT_RO_PROBE(fdt);
+ re = fdt_mem_rsv(fdt, n);
+ if (!can_assume(VALID_INPUT) && !re)
+ return -FDT_ERR_BADOFFSET;
+
+ *address = fdt64_ld(&re->address);
+ *size = fdt64_ld(&re->size);
+ return 0;
+}
+
+int fdt_num_mem_rsv(const void *fdt)
+{
+ int i;
+ const struct fdt_reserve_entry *re;
+
+ for (i = 0; (re = fdt_mem_rsv(fdt, i)) != NULL; i++) {
+ if (fdt64_ld(&re->size) == 0)
+ return i;
+ }
+ return -FDT_ERR_TRUNCATED;
+}
+
+static int nextprop_(const void *fdt, int offset)
+{
+ uint32_t tag;
+ int nextoffset;
+
+ do {
+ tag = fdt_next_tag(fdt, offset, &nextoffset);
+
+ switch (tag) {
+ case FDT_END:
+ if (nextoffset >= 0)
+ return -FDT_ERR_BADSTRUCTURE;
+ else
+ return nextoffset;
+
+ case FDT_PROP:
+ return offset;
+ }
+ offset = nextoffset;
+ } while (tag == FDT_NOP);
+
+ return -FDT_ERR_NOTFOUND;
+}
+
+int fdt_subnode_offset_namelen(const void *fdt, int offset,
+ const char *name, int namelen)
+{
+ int depth;
+
+ FDT_RO_PROBE(fdt);
+
+ for (depth = 0;
+ (offset >= 0) && (depth >= 0);
+ offset = fdt_next_node(fdt, offset, &depth))
+ if ((depth == 1)
+ && fdt_nodename_eq_(fdt, offset, name, namelen))
+ return offset;
+
+ if (depth < 0)
+ return -FDT_ERR_NOTFOUND;
+ return offset; /* error */
+}
+
+int fdt_subnode_offset(const void *fdt, int parentoffset,
+ const char *name)
+{
+ return fdt_subnode_offset_namelen(fdt, parentoffset, name, strlen(name));
+}
+
+int fdt_path_offset_namelen(const void *fdt, const char *path, int namelen)
+{
+ const char *end = path + namelen;
+ const char *p = path;
+ int offset = 0;
+
+ FDT_RO_PROBE(fdt);
+
+ /* see if we have an alias */
+ if (*path != '/') {
+ const char *q = memchr(path, '/', end - p);
+
+ if (!q)
+ q = end;
+
+ p = fdt_get_alias_namelen(fdt, p, q - p);
+ if (!p)
+ return -FDT_ERR_BADPATH;
+ offset = fdt_path_offset(fdt, p);
+
+ p = q;
+ }
+
+ while (p < end) {
+ const char *q;
+
+ while (*p == '/') {
+ p++;
+ if (p == end)
+ return offset;
+ }
+ q = memchr(p, '/', end - p);
+ if (! q)
+ q = end;
+
+ offset = fdt_subnode_offset_namelen(fdt, offset, p, q-p);
+ if (offset < 0)
+ return offset;
+
+ p = q;
+ }
+
+ return offset;
+}
+
+int fdt_path_offset(const void *fdt, const char *path)
+{
+ return fdt_path_offset_namelen(fdt, path, strlen(path));
+}
+
+const char *fdt_get_name(const void *fdt, int nodeoffset, int *len)
+{
+ const struct fdt_node_header *nh = fdt_offset_ptr_(fdt, nodeoffset);
+ const char *nameptr;
+ int err;
+
+ if (((err = fdt_ro_probe_(fdt)) < 0)
+ || ((err = fdt_check_node_offset_(fdt, nodeoffset)) < 0))
+ goto fail;
+
+ nameptr = nh->name;
+
+ if (!can_assume(LATEST) && fdt_version(fdt) < 0x10) {
+ /*
+ * For old FDT versions, match the naming conventions of V16:
+ * give only the leaf name (after all /). The actual tree
+ * contents are loosely checked.
+ */
+ const char *leaf;
+ leaf = strrchr(nameptr, '/');
+ if (leaf == NULL) {
+ err = -FDT_ERR_BADSTRUCTURE;
+ goto fail;
+ }
+ nameptr = leaf+1;
+ }
+
+ if (len)
+ *len = strlen(nameptr);
+
+ return nameptr;
+
+ fail:
+ if (len)
+ *len = err;
+ return NULL;
+}
+
+int fdt_first_property_offset(const void *fdt, int nodeoffset)
+{
+ int offset;
+
+ if ((offset = fdt_check_node_offset_(fdt, nodeoffset)) < 0)
+ return offset;
+
+ return nextprop_(fdt, offset);
+}
+
+int fdt_next_property_offset(const void *fdt, int offset)
+{
+ if ((offset = fdt_check_prop_offset_(fdt, offset)) < 0)
+ return offset;
+
+ return nextprop_(fdt, offset);
+}
+
+static const struct fdt_property *fdt_get_property_by_offset_(const void *fdt,
+ int offset,
+ int *lenp)
+{
+ int err;
+ const struct fdt_property *prop;
+
+ if (!can_assume(VALID_INPUT) &&
+ (err = fdt_check_prop_offset_(fdt, offset)) < 0) {
+ if (lenp)
+ *lenp = err;
+ return NULL;
+ }
+
+ prop = fdt_offset_ptr_(fdt, offset);
+
+ if (lenp)
+ *lenp = fdt32_ld(&prop->len);
+
+ return prop;
+}
+
+const struct fdt_property *fdt_get_property_by_offset(const void *fdt,
+ int offset,
+ int *lenp)
+{
+ /* Prior to version 16, properties may need realignment
+ * and this API does not work. fdt_getprop_*() will, however. */
+
+ if (!can_assume(LATEST) && fdt_version(fdt) < 0x10) {
+ if (lenp)
+ *lenp = -FDT_ERR_BADVERSION;
+ return NULL;
+ }
+
+ return fdt_get_property_by_offset_(fdt, offset, lenp);
+}
+
+static const struct fdt_property *fdt_get_property_namelen_(const void *fdt,
+ int offset,
+ const char *name,
+ int namelen,
+ int *lenp,
+ int *poffset)
+{
+ for (offset = fdt_first_property_offset(fdt, offset);
+ (offset >= 0);
+ (offset = fdt_next_property_offset(fdt, offset))) {
+ const struct fdt_property *prop;
+
+ prop = fdt_get_property_by_offset_(fdt, offset, lenp);
+ if (!can_assume(LIBFDT_FLAWLESS) && !prop) {
+ offset = -FDT_ERR_INTERNAL;
+ break;
+ }
+ if (fdt_string_eq_(fdt, fdt32_ld(&prop->nameoff),
+ name, namelen)) {
+ if (poffset)
+ *poffset = offset;
+ return prop;
+ }
+ }
+
+ if (lenp)
+ *lenp = offset;
+ return NULL;
+}
+
+
+const struct fdt_property *fdt_get_property_namelen(const void *fdt,
+ int offset,
+ const char *name,
+ int namelen, int *lenp)
+{
+ /* Prior to version 16, properties may need realignment
+ * and this API does not work. fdt_getprop_*() will, however. */
+ if (!can_assume(LATEST) && fdt_version(fdt) < 0x10) {
+ if (lenp)
+ *lenp = -FDT_ERR_BADVERSION;
+ return NULL;
+ }
+
+ return fdt_get_property_namelen_(fdt, offset, name, namelen, lenp,
+ NULL);
+}
+
+
+const struct fdt_property *fdt_get_property(const void *fdt,
+ int nodeoffset,
+ const char *name, int *lenp)
+{
+ return fdt_get_property_namelen(fdt, nodeoffset, name,
+ strlen(name), lenp);
+}
+
+const void *fdt_getprop_namelen(const void *fdt, int nodeoffset,
+ const char *name, int namelen, int *lenp)
+{
+ int poffset;
+ const struct fdt_property *prop;
+
+ prop = fdt_get_property_namelen_(fdt, nodeoffset, name, namelen, lenp,
+ &poffset);
+ if (!prop)
+ return NULL;
+
+ /* Handle realignment */
+ if (!can_assume(LATEST) && fdt_version(fdt) < 0x10 &&
+ (poffset + sizeof(*prop)) % 8 && fdt32_ld(&prop->len) >= 8)
+ return prop->data + 4;
+ return prop->data;
+}
+
+const void *fdt_getprop_by_offset(const void *fdt, int offset,
+ const char **namep, int *lenp)
+{
+ const struct fdt_property *prop;
+
+ prop = fdt_get_property_by_offset_(fdt, offset, lenp);
+ if (!prop)
+ return NULL;
+ if (namep) {
+ const char *name;
+ int namelen;
+
+ if (!can_assume(VALID_INPUT)) {
+ name = fdt_get_string(fdt, fdt32_ld(&prop->nameoff),
+ &namelen);
+ if (!name) {
+ if (lenp)
+ *lenp = namelen;
+ return NULL;
+ }
+ *namep = name;
+ } else {
+ *namep = fdt_string(fdt, fdt32_ld(&prop->nameoff));
+ }
+ }
+
+ /* Handle realignment */
+ if (!can_assume(LATEST) && fdt_version(fdt) < 0x10 &&
+ (offset + sizeof(*prop)) % 8 && fdt32_ld(&prop->len) >= 8)
+ return prop->data + 4;
+ return prop->data;
+}
+
+const void *fdt_getprop(const void *fdt, int nodeoffset,
+ const char *name, int *lenp)
+{
+ return fdt_getprop_namelen(fdt, nodeoffset, name, strlen(name), lenp);
+}
+
+uint32_t fdt_get_phandle(const void *fdt, int nodeoffset)
+{
+ const fdt32_t *php;
+ int len;
+
+ /* FIXME: This is a bit sub-optimal, since we potentially scan
+ * over all the properties twice. */
+ php = fdt_getprop(fdt, nodeoffset, "phandle", &len);
+ if (!php || (len != sizeof(*php))) {
+ php = fdt_getprop(fdt, nodeoffset, "linux,phandle", &len);
+ if (!php || (len != sizeof(*php)))
+ return 0;
+ }
+
+ return fdt32_ld(php);
+}
+
+const char *fdt_get_alias_namelen(const void *fdt,
+ const char *name, int namelen)
+{
+ int aliasoffset;
+
+ aliasoffset = fdt_path_offset(fdt, "/aliases");
+ if (aliasoffset < 0)
+ return NULL;
+
+ return fdt_getprop_namelen(fdt, aliasoffset, name, namelen, NULL);
+}
+
+const char *fdt_get_alias(const void *fdt, const char *name)
+{
+ return fdt_get_alias_namelen(fdt, name, strlen(name));
+}
+
+int fdt_get_path(const void *fdt, int nodeoffset, char *buf, int buflen)
+{
+ int pdepth = 0, p = 0;
+ int offset, depth, namelen;
+ const char *name;
+
+ FDT_RO_PROBE(fdt);
+
+ if (buflen < 2)
+ return -FDT_ERR_NOSPACE;
+
+ for (offset = 0, depth = 0;
+ (offset >= 0) && (offset <= nodeoffset);
+ offset = fdt_next_node(fdt, offset, &depth)) {
+ while (pdepth > depth) {
+ do {
+ p--;
+ } while (buf[p-1] != '/');
+ pdepth--;
+ }
+
+ if (pdepth >= depth) {
+ name = fdt_get_name(fdt, offset, &namelen);
+ if (!name)
+ return namelen;
+ if ((p + namelen + 1) <= buflen) {
+ memcpy(buf + p, name, namelen);
+ p += namelen;
+ buf[p++] = '/';
+ pdepth++;
+ }
+ }
+
+ if (offset == nodeoffset) {
+ if (pdepth < (depth + 1))
+ return -FDT_ERR_NOSPACE;
+
+ if (p > 1) /* special case so that root path is "/", not "" */
+ p--;
+ buf[p] = '\0';
+ return 0;
+ }
+ }
+
+ if ((offset == -FDT_ERR_NOTFOUND) || (offset >= 0))
+ return -FDT_ERR_BADOFFSET;
+ else if (offset == -FDT_ERR_BADOFFSET)
+ return -FDT_ERR_BADSTRUCTURE;
+
+ return offset; /* error from fdt_next_node() */
+}
+
+int fdt_supernode_atdepth_offset(const void *fdt, int nodeoffset,
+ int supernodedepth, int *nodedepth)
+{
+ int offset, depth;
+ int supernodeoffset = -FDT_ERR_INTERNAL;
+
+ FDT_RO_PROBE(fdt);
+
+ if (supernodedepth < 0)
+ return -FDT_ERR_NOTFOUND;
+
+ for (offset = 0, depth = 0;
+ (offset >= 0) && (offset <= nodeoffset);
+ offset = fdt_next_node(fdt, offset, &depth)) {
+ if (depth == supernodedepth)
+ supernodeoffset = offset;
+
+ if (offset == nodeoffset) {
+ if (nodedepth)
+ *nodedepth = depth;
+
+ if (supernodedepth > depth)
+ return -FDT_ERR_NOTFOUND;
+ else
+ return supernodeoffset;
+ }
+ }
+
+ if (!can_assume(VALID_INPUT)) {
+ if ((offset == -FDT_ERR_NOTFOUND) || (offset >= 0))
+ return -FDT_ERR_BADOFFSET;
+ else if (offset == -FDT_ERR_BADOFFSET)
+ return -FDT_ERR_BADSTRUCTURE;
+ }
+
+ return offset; /* error from fdt_next_node() */
+}
+
+int fdt_node_depth(const void *fdt, int nodeoffset)
+{
+ int nodedepth;
+ int err;
+
+ err = fdt_supernode_atdepth_offset(fdt, nodeoffset, 0, &nodedepth);
+ if (err)
+ return (can_assume(LIBFDT_FLAWLESS) || err < 0) ? err :
+ -FDT_ERR_INTERNAL;
+ return nodedepth;
+}
+
+int fdt_parent_offset(const void *fdt, int nodeoffset)
+{
+ int nodedepth = fdt_node_depth(fdt, nodeoffset);
+
+ if (nodedepth < 0)
+ return nodedepth;
+ return fdt_supernode_atdepth_offset(fdt, nodeoffset,
+ nodedepth - 1, NULL);
+}
+
+int fdt_node_offset_by_prop_value(const void *fdt, int startoffset,
+ const char *propname,
+ const void *propval, int proplen)
+{
+ int offset;
+ const void *val;
+ int len;
+
+ FDT_RO_PROBE(fdt);
+
+ /* FIXME: The algorithm here is pretty horrible: we scan each
+ * property of a node in fdt_getprop(), then if that didn't
+ * find what we want, we scan over them again making our way
+ * to the next node. Still it's the easiest to implement
+ * approach; performance can come later. */
+ for (offset = fdt_next_node(fdt, startoffset, NULL);
+ offset >= 0;
+ offset = fdt_next_node(fdt, offset, NULL)) {
+ val = fdt_getprop(fdt, offset, propname, &len);
+ if (val && (len == proplen)
+ && (memcmp(val, propval, len) == 0))
+ return offset;
+ }
+
+ return offset; /* error from fdt_next_node() */
+}
+
+int fdt_node_offset_by_phandle(const void *fdt, uint32_t phandle)
+{
+ int offset;
+
+ if ((phandle == 0) || (phandle == ~0U))
+ return -FDT_ERR_BADPHANDLE;
+
+ FDT_RO_PROBE(fdt);
+
+ /* FIXME: The algorithm here is pretty horrible: we
+ * potentially scan each property of a node in
+ * fdt_get_phandle(), then if that didn't find what
+ * we want, we scan over them again making our way to the next
+ * node. Still it's the easiest to implement approach;
+ * performance can come later. */
+ for (offset = fdt_next_node(fdt, -1, NULL);
+ offset >= 0;
+ offset = fdt_next_node(fdt, offset, NULL)) {
+ if (fdt_get_phandle(fdt, offset) == phandle)
+ return offset;
+ }
+
+ return offset; /* error from fdt_next_node() */
+}
+
+int fdt_stringlist_contains(const char *strlist, int listlen, const char *str)
+{
+ int len = strlen(str);
+ const char *p;
+
+ while (listlen >= len) {
+ if (memcmp(str, strlist, len+1) == 0)
+ return 1;
+ p = memchr(strlist, '\0', listlen);
+ if (!p)
+ return 0; /* malformed strlist.. */
+ listlen -= (p-strlist) + 1;
+ strlist = p + 1;
+ }
+ return 0;
+}
+
+int fdt_stringlist_count(const void *fdt, int nodeoffset, const char *property)
+{
+ const char *list, *end;
+ int length, count = 0;
+
+ list = fdt_getprop(fdt, nodeoffset, property, &length);
+ if (!list)
+ return length;
+
+ end = list + length;
+
+ while (list < end) {
+ length = strnlen(list, end - list) + 1;
+
+ /* Abort if the last string isn't properly NUL-terminated. */
+ if (list + length > end)
+ return -FDT_ERR_BADVALUE;
+
+ list += length;
+ count++;
+ }
+
+ return count;
+}
+
+int fdt_stringlist_search(const void *fdt, int nodeoffset, const char *property,
+ const char *string)
+{
+ int length, len, idx = 0;
+ const char *list, *end;
+
+ list = fdt_getprop(fdt, nodeoffset, property, &length);
+ if (!list)
+ return length;
+
+ len = strlen(string) + 1;
+ end = list + length;
+
+ while (list < end) {
+ length = strnlen(list, end - list) + 1;
+
+ /* Abort if the last string isn't properly NUL-terminated. */
+ if (list + length > end)
+ return -FDT_ERR_BADVALUE;
+
+ if (length == len && memcmp(list, string, length) == 0)
+ return idx;
+
+ list += length;
+ idx++;
+ }
+
+ return -FDT_ERR_NOTFOUND;
+}
+
+const char *fdt_stringlist_get(const void *fdt, int nodeoffset,
+ const char *property, int idx,
+ int *lenp)
+{
+ const char *list, *end;
+ int length;
+
+ list = fdt_getprop(fdt, nodeoffset, property, &length);
+ if (!list) {
+ if (lenp)
+ *lenp = length;
+
+ return NULL;
+ }
+
+ end = list + length;
+
+ while (list < end) {
+ length = strnlen(list, end - list) + 1;
+
+ /* Abort if the last string isn't properly NUL-terminated. */
+ if (list + length > end) {
+ if (lenp)
+ *lenp = -FDT_ERR_BADVALUE;
+
+ return NULL;
+ }
+
+ if (idx == 0) {
+ if (lenp)
+ *lenp = length - 1;
+
+ return list;
+ }
+
+ list += length;
+ idx--;
+ }
+
+ if (lenp)
+ *lenp = -FDT_ERR_NOTFOUND;
+
+ return NULL;
+}
+
+int fdt_node_check_compatible(const void *fdt, int nodeoffset,
+ const char *compatible)
+{
+ const void *prop;
+ int len;
+
+ prop = fdt_getprop(fdt, nodeoffset, "compatible", &len);
+ if (!prop)
+ return len;
+
+ return !fdt_stringlist_contains(prop, len, compatible);
+}
+
+int fdt_node_offset_by_compatible(const void *fdt, int startoffset,
+ const char *compatible)
+{
+ int offset, err;
+
+ FDT_RO_PROBE(fdt);
+
+ /* FIXME: The algorithm here is pretty horrible: we scan each
+ * property of a node in fdt_node_check_compatible(), then if
+ * that didn't find what we want, we scan over them again
+ * making our way to the next node. Still it's the easiest to
+ * implement approach; performance can come later. */
+ for (offset = fdt_next_node(fdt, startoffset, NULL);
+ offset >= 0;
+ offset = fdt_next_node(fdt, offset, NULL)) {
+ err = fdt_node_check_compatible(fdt, offset, compatible);
+ if ((err < 0) && (err != -FDT_ERR_NOTFOUND))
+ return err;
+ else if (err == 0)
+ return offset;
+ }
+
+ return offset; /* error from fdt_next_node() */
+}
diff --git a/tools/src/libfdt/fdt_rw.c b/tools/src/libfdt/fdt_rw.c
new file mode 100644
index 0000000..dd5c93e
--- /dev/null
+++ b/tools/src/libfdt/fdt_rw.c
@@ -0,0 +1,492 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2006 David Gibson, IBM Corporation.
+ */
+#include "libfdt_env.h"
+
+#include "fdt.h"
+#include "libfdt.h"
+
+#include "libfdt_internal.h"
+
+static int fdt_blocks_misordered_(const void *fdt,
+ int mem_rsv_size, int struct_size)
+{
+ return (fdt_off_mem_rsvmap(fdt) < FDT_ALIGN(sizeof(struct fdt_header), 8))
+ || (fdt_off_dt_struct(fdt) <
+ (fdt_off_mem_rsvmap(fdt) + mem_rsv_size))
+ || (fdt_off_dt_strings(fdt) <
+ (fdt_off_dt_struct(fdt) + struct_size))
+ || (fdt_totalsize(fdt) <
+ (fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt)));
+}
+
+static int fdt_rw_probe_(void *fdt)
+{
+ if (can_assume(VALID_DTB))
+ return 0;
+ FDT_RO_PROBE(fdt);
+
+ if (!can_assume(LATEST) && fdt_version(fdt) < 17)
+ return -FDT_ERR_BADVERSION;
+ if (fdt_blocks_misordered_(fdt, sizeof(struct fdt_reserve_entry),
+ fdt_size_dt_struct(fdt)))
+ return -FDT_ERR_BADLAYOUT;
+ if (!can_assume(LATEST) && fdt_version(fdt) > 17)
+ fdt_set_version(fdt, 17);
+
+ return 0;
+}
+
+#define FDT_RW_PROBE(fdt) \
+ { \
+ int err_; \
+ if ((err_ = fdt_rw_probe_(fdt)) != 0) \
+ return err_; \
+ }
+
+static inline unsigned int fdt_data_size_(void *fdt)
+{
+ return fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt);
+}
+
+static int fdt_splice_(void *fdt, void *splicepoint, int oldlen, int newlen)
+{
+ char *p = splicepoint;
+ unsigned int dsize = fdt_data_size_(fdt);
+ size_t soff = p - (char *)fdt;
+
+ if ((oldlen < 0) || (soff + oldlen < soff) || (soff + oldlen > dsize))
+ return -FDT_ERR_BADOFFSET;
+ if ((p < (char *)fdt) || (dsize + newlen < (unsigned)oldlen))
+ return -FDT_ERR_BADOFFSET;
+ if (dsize - oldlen + newlen > fdt_totalsize(fdt))
+ return -FDT_ERR_NOSPACE;
+ memmove(p + newlen, p + oldlen, ((char *)fdt + dsize) - (p + oldlen));
+ return 0;
+}
+
+static int fdt_splice_mem_rsv_(void *fdt, struct fdt_reserve_entry *p,
+ int oldn, int newn)
+{
+ int delta = (newn - oldn) * sizeof(*p);
+ int err;
+ err = fdt_splice_(fdt, p, oldn * sizeof(*p), newn * sizeof(*p));
+ if (err)
+ return err;
+ fdt_set_off_dt_struct(fdt, fdt_off_dt_struct(fdt) + delta);
+ fdt_set_off_dt_strings(fdt, fdt_off_dt_strings(fdt) + delta);
+ return 0;
+}
+
+static int fdt_splice_struct_(void *fdt, void *p,
+ int oldlen, int newlen)
+{
+ int delta = newlen - oldlen;
+ int err;
+
+ if ((err = fdt_splice_(fdt, p, oldlen, newlen)))
+ return err;
+
+ fdt_set_size_dt_struct(fdt, fdt_size_dt_struct(fdt) + delta);
+ fdt_set_off_dt_strings(fdt, fdt_off_dt_strings(fdt) + delta);
+ return 0;
+}
+
+/* Must only be used to roll back in case of error */
+static void fdt_del_last_string_(void *fdt, const char *s)
+{
+ int newlen = strlen(s) + 1;
+
+ fdt_set_size_dt_strings(fdt, fdt_size_dt_strings(fdt) - newlen);
+}
+
+static int fdt_splice_string_(void *fdt, int newlen)
+{
+ void *p = (char *)fdt
+ + fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt);
+ int err;
+
+ if ((err = fdt_splice_(fdt, p, 0, newlen)))
+ return err;
+
+ fdt_set_size_dt_strings(fdt, fdt_size_dt_strings(fdt) + newlen);
+ return 0;
+}
+
+/**
+ * fdt_find_add_string_() - Find or allocate a string
+ *
+ * @fdt: pointer to the device tree to check/adjust
+ * @s: string to find/add
+ * @allocated: Set to 0 if the string was found, 1 if not found and so
+ * allocated. Ignored if can_assume(NO_ROLLBACK)
+ * @return offset of string in the string table (whether found or added)
+ */
+static int fdt_find_add_string_(void *fdt, const char *s, int *allocated)
+{
+ char *strtab = (char *)fdt + fdt_off_dt_strings(fdt);
+ const char *p;
+ char *new;
+ int len = strlen(s) + 1;
+ int err;
+
+ if (!can_assume(NO_ROLLBACK))
+ *allocated = 0;
+
+ p = fdt_find_string_(strtab, fdt_size_dt_strings(fdt), s);
+ if (p)
+ /* found it */
+ return (p - strtab);
+
+ new = strtab + fdt_size_dt_strings(fdt);
+ err = fdt_splice_string_(fdt, len);
+ if (err)
+ return err;
+
+ if (!can_assume(NO_ROLLBACK))
+ *allocated = 1;
+
+ memcpy(new, s, len);
+ return (new - strtab);
+}
+
+int fdt_add_mem_rsv(void *fdt, uint64_t address, uint64_t size)
+{
+ struct fdt_reserve_entry *re;
+ int err;
+
+ FDT_RW_PROBE(fdt);
+
+ re = fdt_mem_rsv_w_(fdt, fdt_num_mem_rsv(fdt));
+ err = fdt_splice_mem_rsv_(fdt, re, 0, 1);
+ if (err)
+ return err;
+
+ re->address = cpu_to_fdt64(address);
+ re->size = cpu_to_fdt64(size);
+ return 0;
+}
+
+int fdt_del_mem_rsv(void *fdt, int n)
+{
+ struct fdt_reserve_entry *re = fdt_mem_rsv_w_(fdt, n);
+
+ FDT_RW_PROBE(fdt);
+
+ if (n >= fdt_num_mem_rsv(fdt))
+ return -FDT_ERR_NOTFOUND;
+
+ return fdt_splice_mem_rsv_(fdt, re, 1, 0);
+}
+
+static int fdt_resize_property_(void *fdt, int nodeoffset, const char *name,
+ int len, struct fdt_property **prop)
+{
+ int oldlen;
+ int err;
+
+ *prop = fdt_get_property_w(fdt, nodeoffset, name, &oldlen);
+ if (!*prop)
+ return oldlen;
+
+ if ((err = fdt_splice_struct_(fdt, (*prop)->data, FDT_TAGALIGN(oldlen),
+ FDT_TAGALIGN(len))))
+ return err;
+
+ (*prop)->len = cpu_to_fdt32(len);
+ return 0;
+}
+
+static int fdt_add_property_(void *fdt, int nodeoffset, const char *name,
+ int len, struct fdt_property **prop)
+{
+ int proplen;
+ int nextoffset;
+ int namestroff;
+ int err;
+ int allocated;
+
+ if ((nextoffset = fdt_check_node_offset_(fdt, nodeoffset)) < 0)
+ return nextoffset;
+
+ namestroff = fdt_find_add_string_(fdt, name, &allocated);
+ if (namestroff < 0)
+ return namestroff;
+
+ *prop = fdt_offset_ptr_w_(fdt, nextoffset);
+ proplen = sizeof(**prop) + FDT_TAGALIGN(len);
+
+ err = fdt_splice_struct_(fdt, *prop, 0, proplen);
+ if (err) {
+ /* Delete the string if we failed to add it */
+ if (!can_assume(NO_ROLLBACK) && allocated)
+ fdt_del_last_string_(fdt, name);
+ return err;
+ }
+
+ (*prop)->tag = cpu_to_fdt32(FDT_PROP);
+ (*prop)->nameoff = cpu_to_fdt32(namestroff);
+ (*prop)->len = cpu_to_fdt32(len);
+ return 0;
+}
+
+int fdt_set_name(void *fdt, int nodeoffset, const char *name)
+{
+ char *namep;
+ int oldlen, newlen;
+ int err;
+
+ FDT_RW_PROBE(fdt);
+
+ namep = (char *)(uintptr_t)fdt_get_name(fdt, nodeoffset, &oldlen);
+ if (!namep)
+ return oldlen;
+
+ newlen = strlen(name);
+
+ err = fdt_splice_struct_(fdt, namep, FDT_TAGALIGN(oldlen+1),
+ FDT_TAGALIGN(newlen+1));
+ if (err)
+ return err;
+
+ memcpy(namep, name, newlen+1);
+ return 0;
+}
+
+int fdt_setprop_placeholder(void *fdt, int nodeoffset, const char *name,
+ int len, void **prop_data)
+{
+ struct fdt_property *prop;
+ int err;
+
+ FDT_RW_PROBE(fdt);
+
+ err = fdt_resize_property_(fdt, nodeoffset, name, len, &prop);
+ if (err == -FDT_ERR_NOTFOUND)
+ err = fdt_add_property_(fdt, nodeoffset, name, len, &prop);
+ if (err)
+ return err;
+
+ *prop_data = prop->data;
+ return 0;
+}
+
+int fdt_setprop(void *fdt, int nodeoffset, const char *name,
+ const void *val, int len)
+{
+ void *prop_data;
+ int err;
+
+ err = fdt_setprop_placeholder(fdt, nodeoffset, name, len, &prop_data);
+ if (err)
+ return err;
+
+ if (len)
+ memcpy(prop_data, val, len);
+ return 0;
+}
+
+int fdt_appendprop(void *fdt, int nodeoffset, const char *name,
+ const void *val, int len)
+{
+ struct fdt_property *prop;
+ int err, oldlen, newlen;
+
+ FDT_RW_PROBE(fdt);
+
+ prop = fdt_get_property_w(fdt, nodeoffset, name, &oldlen);
+ if (prop) {
+ newlen = len + oldlen;
+ err = fdt_splice_struct_(fdt, prop->data,
+ FDT_TAGALIGN(oldlen),
+ FDT_TAGALIGN(newlen));
+ if (err)
+ return err;
+ prop->len = cpu_to_fdt32(newlen);
+ memcpy(prop->data + oldlen, val, len);
+ } else {
+ err = fdt_add_property_(fdt, nodeoffset, name, len, &prop);
+ if (err)
+ return err;
+ memcpy(prop->data, val, len);
+ }
+ return 0;
+}
+
+int fdt_delprop(void *fdt, int nodeoffset, const char *name)
+{
+ struct fdt_property *prop;
+ int len, proplen;
+
+ FDT_RW_PROBE(fdt);
+
+ prop = fdt_get_property_w(fdt, nodeoffset, name, &len);
+ if (!prop)
+ return len;
+
+ proplen = sizeof(*prop) + FDT_TAGALIGN(len);
+ return fdt_splice_struct_(fdt, prop, proplen, 0);
+}
+
+int fdt_add_subnode_namelen(void *fdt, int parentoffset,
+ const char *name, int namelen)
+{
+ struct fdt_node_header *nh;
+ int offset, nextoffset;
+ int nodelen;
+ int err;
+ uint32_t tag;
+ fdt32_t *endtag;
+
+ FDT_RW_PROBE(fdt);
+
+ offset = fdt_subnode_offset_namelen(fdt, parentoffset, name, namelen);
+ if (offset >= 0)
+ return -FDT_ERR_EXISTS;
+ else if (offset != -FDT_ERR_NOTFOUND)
+ return offset;
+
+ /* Try to place the new node after the parent's properties */
+ fdt_next_tag(fdt, parentoffset, &nextoffset); /* skip the BEGIN_NODE */
+ do {
+ offset = nextoffset;
+ tag = fdt_next_tag(fdt, offset, &nextoffset);
+ } while ((tag == FDT_PROP) || (tag == FDT_NOP));
+
+ nh = fdt_offset_ptr_w_(fdt, offset);
+ nodelen = sizeof(*nh) + FDT_TAGALIGN(namelen+1) + FDT_TAGSIZE;
+
+ err = fdt_splice_struct_(fdt, nh, 0, nodelen);
+ if (err)
+ return err;
+
+ nh->tag = cpu_to_fdt32(FDT_BEGIN_NODE);
+ memset(nh->name, 0, FDT_TAGALIGN(namelen+1));
+ memcpy(nh->name, name, namelen);
+ endtag = (fdt32_t *)((char *)nh + nodelen - FDT_TAGSIZE);
+ *endtag = cpu_to_fdt32(FDT_END_NODE);
+
+ return offset;
+}
+
+int fdt_add_subnode(void *fdt, int parentoffset, const char *name)
+{
+ return fdt_add_subnode_namelen(fdt, parentoffset, name, strlen(name));
+}
+
+int fdt_del_node(void *fdt, int nodeoffset)
+{
+ int endoffset;
+
+ FDT_RW_PROBE(fdt);
+
+ endoffset = fdt_node_end_offset_(fdt, nodeoffset);
+ if (endoffset < 0)
+ return endoffset;
+
+ return fdt_splice_struct_(fdt, fdt_offset_ptr_w_(fdt, nodeoffset),
+ endoffset - nodeoffset, 0);
+}
+
+static void fdt_packblocks_(const char *old, char *new,
+ int mem_rsv_size, int struct_size)
+{
+ int mem_rsv_off, struct_off, strings_off;
+
+ mem_rsv_off = FDT_ALIGN(sizeof(struct fdt_header), 8);
+ struct_off = mem_rsv_off + mem_rsv_size;
+ strings_off = struct_off + struct_size;
+
+ memmove(new + mem_rsv_off, old + fdt_off_mem_rsvmap(old), mem_rsv_size);
+ fdt_set_off_mem_rsvmap(new, mem_rsv_off);
+
+ memmove(new + struct_off, old + fdt_off_dt_struct(old), struct_size);
+ fdt_set_off_dt_struct(new, struct_off);
+ fdt_set_size_dt_struct(new, struct_size);
+
+ memmove(new + strings_off, old + fdt_off_dt_strings(old),
+ fdt_size_dt_strings(old));
+ fdt_set_off_dt_strings(new, strings_off);
+ fdt_set_size_dt_strings(new, fdt_size_dt_strings(old));
+}
+
+int fdt_open_into(const void *fdt, void *buf, int bufsize)
+{
+ int err;
+ int mem_rsv_size, struct_size;
+ int newsize;
+ const char *fdtstart = fdt;
+ const char *fdtend = fdtstart + fdt_totalsize(fdt);
+ char *tmp;
+
+ FDT_RO_PROBE(fdt);
+
+ mem_rsv_size = (fdt_num_mem_rsv(fdt)+1)
+ * sizeof(struct fdt_reserve_entry);
+
+ if (can_assume(LATEST) || fdt_version(fdt) >= 17) {
+ struct_size = fdt_size_dt_struct(fdt);
+ } else {
+ struct_size = 0;
+ while (fdt_next_tag(fdt, struct_size, &struct_size) != FDT_END)
+ ;
+ if (struct_size < 0)
+ return struct_size;
+ }
+
+ if (can_assume(LIBFDT_ORDER) ||
+ !fdt_blocks_misordered_(fdt, mem_rsv_size, struct_size)) {
+ /* no further work necessary */
+ err = fdt_move(fdt, buf, bufsize);
+ if (err)
+ return err;
+ fdt_set_version(buf, 17);
+ fdt_set_size_dt_struct(buf, struct_size);
+ fdt_set_totalsize(buf, bufsize);
+ return 0;
+ }
+
+ /* Need to reorder */
+ newsize = FDT_ALIGN(sizeof(struct fdt_header), 8) + mem_rsv_size
+ + struct_size + fdt_size_dt_strings(fdt);
+
+ if (bufsize < newsize)
+ return -FDT_ERR_NOSPACE;
+
+ /* First attempt to build converted tree at beginning of buffer */
+ tmp = buf;
+ /* But if that overlaps with the old tree... */
+ if (((tmp + newsize) > fdtstart) && (tmp < fdtend)) {
+ /* Try right after the old tree instead */
+ tmp = (char *)(uintptr_t)fdtend;
+ if ((tmp + newsize) > ((char *)buf + bufsize))
+ return -FDT_ERR_NOSPACE;
+ }
+
+ fdt_packblocks_(fdt, tmp, mem_rsv_size, struct_size);
+ memmove(buf, tmp, newsize);
+
+ fdt_set_magic(buf, FDT_MAGIC);
+ fdt_set_totalsize(buf, bufsize);
+ fdt_set_version(buf, 17);
+ fdt_set_last_comp_version(buf, 16);
+ fdt_set_boot_cpuid_phys(buf, fdt_boot_cpuid_phys(fdt));
+
+ return 0;
+}
+
+int fdt_pack(void *fdt)
+{
+ int mem_rsv_size;
+
+ FDT_RW_PROBE(fdt);
+
+ mem_rsv_size = (fdt_num_mem_rsv(fdt)+1)
+ * sizeof(struct fdt_reserve_entry);
+ fdt_packblocks_(fdt, fdt, mem_rsv_size, fdt_size_dt_struct(fdt));
+ fdt_set_totalsize(fdt, fdt_data_size_(fdt));
+
+ return 0;
+}
diff --git a/tools/src/libfdt/fdt_strerror.c b/tools/src/libfdt/fdt_strerror.c
new file mode 100644
index 0000000..218b323
--- /dev/null
+++ b/tools/src/libfdt/fdt_strerror.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2006 David Gibson, IBM Corporation.
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "libfdt_env.h"
+
+#include "fdt.h"
+#include "libfdt.h"
+
+#include "libfdt_internal.h"
+
+struct fdt_errtabent {
+ const char *str;
+};
+
+#define FDT_ERRTABENT(val) \
+ [(val)] = { .str = #val, }
+
+static struct fdt_errtabent fdt_errtable[] = {
+ FDT_ERRTABENT(FDT_ERR_NOTFOUND),
+ FDT_ERRTABENT(FDT_ERR_EXISTS),
+ FDT_ERRTABENT(FDT_ERR_NOSPACE),
+
+ FDT_ERRTABENT(FDT_ERR_BADOFFSET),
+ FDT_ERRTABENT(FDT_ERR_BADPATH),
+ FDT_ERRTABENT(FDT_ERR_BADPHANDLE),
+ FDT_ERRTABENT(FDT_ERR_BADSTATE),
+
+ FDT_ERRTABENT(FDT_ERR_TRUNCATED),
+ FDT_ERRTABENT(FDT_ERR_BADMAGIC),
+ FDT_ERRTABENT(FDT_ERR_BADVERSION),
+ FDT_ERRTABENT(FDT_ERR_BADSTRUCTURE),
+ FDT_ERRTABENT(FDT_ERR_BADLAYOUT),
+ FDT_ERRTABENT(FDT_ERR_INTERNAL),
+ FDT_ERRTABENT(FDT_ERR_BADNCELLS),
+ FDT_ERRTABENT(FDT_ERR_BADVALUE),
+ FDT_ERRTABENT(FDT_ERR_BADOVERLAY),
+ FDT_ERRTABENT(FDT_ERR_NOPHANDLES),
+ FDT_ERRTABENT(FDT_ERR_BADFLAGS),
+};
+#define FDT_ERRTABSIZE ((int)(sizeof(fdt_errtable) / sizeof(fdt_errtable[0])))
+
+const char *fdt_strerror(int errval)
+{
+ if (errval > 0)
+ return "<valid offset/length>";
+ else if (errval == 0)
+ return "<no error>";
+ else if (-errval < FDT_ERRTABSIZE) {
+ const char *s = fdt_errtable[-errval].str;
+
+ if (s)
+ return s;
+ }
+
+ return "<unknown error>";
+}
diff --git a/tools/src/libfdt/fdt_sw.c b/tools/src/libfdt/fdt_sw.c
new file mode 100644
index 0000000..c0d9cd0
--- /dev/null
+++ b/tools/src/libfdt/fdt_sw.c
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2006 David Gibson, IBM Corporation.
+ */
+#include "libfdt_env.h"
+
+#include "fdt.h"
+#include "libfdt.h"
+
+#include "libfdt_internal.h"
+
+static int fdt_sw_probe_(void *fdt)
+{
+ if (!can_assume(VALID_INPUT)) {
+ if (fdt_magic(fdt) == FDT_MAGIC)
+ return -FDT_ERR_BADSTATE;
+ else if (fdt_magic(fdt) != FDT_SW_MAGIC)
+ return -FDT_ERR_BADMAGIC;
+ }
+
+ return 0;
+}
+
+#define FDT_SW_PROBE(fdt) \
+ { \
+ int err; \
+ if ((err = fdt_sw_probe_(fdt)) != 0) \
+ return err; \
+ }
+
+/* 'memrsv' state: Initial state after fdt_create()
+ *
+ * Allowed functions:
+ * fdt_add_reservemap_entry()
+ * fdt_finish_reservemap() [moves to 'struct' state]
+ */
+static int fdt_sw_probe_memrsv_(void *fdt)
+{
+ int err = fdt_sw_probe_(fdt);
+ if (err)
+ return err;
+
+ if (!can_assume(VALID_INPUT) && fdt_off_dt_strings(fdt) != 0)
+ return -FDT_ERR_BADSTATE;
+ return 0;
+}
+
+#define FDT_SW_PROBE_MEMRSV(fdt) \
+ { \
+ int err; \
+ if ((err = fdt_sw_probe_memrsv_(fdt)) != 0) \
+ return err; \
+ }
+
+/* 'struct' state: Enter this state after fdt_finish_reservemap()
+ *
+ * Allowed functions:
+ * fdt_begin_node()
+ * fdt_end_node()
+ * fdt_property*()
+ * fdt_finish() [moves to 'complete' state]
+ */
+static int fdt_sw_probe_struct_(void *fdt)
+{
+ int err = fdt_sw_probe_(fdt);
+ if (err)
+ return err;
+
+ if (!can_assume(VALID_INPUT) &&
+ fdt_off_dt_strings(fdt) != fdt_totalsize(fdt))
+ return -FDT_ERR_BADSTATE;
+ return 0;
+}
+
+#define FDT_SW_PROBE_STRUCT(fdt) \
+ { \
+ int err; \
+ if ((err = fdt_sw_probe_struct_(fdt)) != 0) \
+ return err; \
+ }
+
+static inline uint32_t sw_flags(void *fdt)
+{
+ /* assert: (fdt_magic(fdt) == FDT_SW_MAGIC) */
+ return fdt_last_comp_version(fdt);
+}
+
+/* 'complete' state: Enter this state after fdt_finish()
+ *
+ * Allowed functions: none
+ */
+
+static void *fdt_grab_space_(void *fdt, size_t len)
+{
+ unsigned int offset = fdt_size_dt_struct(fdt);
+ unsigned int spaceleft;
+
+ spaceleft = fdt_totalsize(fdt) - fdt_off_dt_struct(fdt)
+ - fdt_size_dt_strings(fdt);
+
+ if ((offset + len < offset) || (offset + len > spaceleft))
+ return NULL;
+
+ fdt_set_size_dt_struct(fdt, offset + len);
+ return fdt_offset_ptr_w_(fdt, offset);
+}
+
+int fdt_create_with_flags(void *buf, int bufsize, uint32_t flags)
+{
+ const int hdrsize = FDT_ALIGN(sizeof(struct fdt_header),
+ sizeof(struct fdt_reserve_entry));
+ void *fdt = buf;
+
+ if (bufsize < hdrsize)
+ return -FDT_ERR_NOSPACE;
+
+ if (flags & ~FDT_CREATE_FLAGS_ALL)
+ return -FDT_ERR_BADFLAGS;
+
+ memset(buf, 0, bufsize);
+
+ /*
+ * magic and last_comp_version keep intermediate state during the fdt
+ * creation process, which is replaced with the proper FDT format by
+ * fdt_finish().
+ *
+ * flags should be accessed with sw_flags().
+ */
+ fdt_set_magic(fdt, FDT_SW_MAGIC);
+ fdt_set_version(fdt, FDT_LAST_SUPPORTED_VERSION);
+ fdt_set_last_comp_version(fdt, flags);
+
+ fdt_set_totalsize(fdt, bufsize);
+
+ fdt_set_off_mem_rsvmap(fdt, hdrsize);
+ fdt_set_off_dt_struct(fdt, fdt_off_mem_rsvmap(fdt));
+ fdt_set_off_dt_strings(fdt, 0);
+
+ return 0;
+}
+
+int fdt_create(void *buf, int bufsize)
+{
+ return fdt_create_with_flags(buf, bufsize, 0);
+}
+
+int fdt_resize(void *fdt, void *buf, int bufsize)
+{
+ size_t headsize, tailsize;
+ char *oldtail, *newtail;
+
+ FDT_SW_PROBE(fdt);
+
+ if (bufsize < 0)
+ return -FDT_ERR_NOSPACE;
+
+ headsize = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt);
+ tailsize = fdt_size_dt_strings(fdt);
+
+ if (!can_assume(VALID_DTB) &&
+ headsize + tailsize > fdt_totalsize(fdt))
+ return -FDT_ERR_INTERNAL;
+
+ if ((headsize + tailsize) > (unsigned)bufsize)
+ return -FDT_ERR_NOSPACE;
+
+ oldtail = (char *)fdt + fdt_totalsize(fdt) - tailsize;
+ newtail = (char *)buf + bufsize - tailsize;
+
+ /* Two cases to avoid clobbering data if the old and new
+ * buffers partially overlap */
+ if (buf <= fdt) {
+ memmove(buf, fdt, headsize);
+ memmove(newtail, oldtail, tailsize);
+ } else {
+ memmove(newtail, oldtail, tailsize);
+ memmove(buf, fdt, headsize);
+ }
+
+ fdt_set_totalsize(buf, bufsize);
+ if (fdt_off_dt_strings(buf))
+ fdt_set_off_dt_strings(buf, bufsize);
+
+ return 0;
+}
+
+int fdt_add_reservemap_entry(void *fdt, uint64_t addr, uint64_t size)
+{
+ struct fdt_reserve_entry *re;
+ int offset;
+
+ FDT_SW_PROBE_MEMRSV(fdt);
+
+ offset = fdt_off_dt_struct(fdt);
+ if ((offset + sizeof(*re)) > fdt_totalsize(fdt))
+ return -FDT_ERR_NOSPACE;
+
+ re = (struct fdt_reserve_entry *)((char *)fdt + offset);
+ re->address = cpu_to_fdt64(addr);
+ re->size = cpu_to_fdt64(size);
+
+ fdt_set_off_dt_struct(fdt, offset + sizeof(*re));
+
+ return 0;
+}
+
+int fdt_finish_reservemap(void *fdt)
+{
+ int err = fdt_add_reservemap_entry(fdt, 0, 0);
+
+ if (err)
+ return err;
+
+ fdt_set_off_dt_strings(fdt, fdt_totalsize(fdt));
+ return 0;
+}
+
+int fdt_begin_node(void *fdt, const char *name)
+{
+ struct fdt_node_header *nh;
+ int namelen;
+
+ FDT_SW_PROBE_STRUCT(fdt);
+
+ namelen = strlen(name) + 1;
+ nh = fdt_grab_space_(fdt, sizeof(*nh) + FDT_TAGALIGN(namelen));
+ if (! nh)
+ return -FDT_ERR_NOSPACE;
+
+ nh->tag = cpu_to_fdt32(FDT_BEGIN_NODE);
+ memcpy(nh->name, name, namelen);
+ return 0;
+}
+
+int fdt_end_node(void *fdt)
+{
+ fdt32_t *en;
+
+ FDT_SW_PROBE_STRUCT(fdt);
+
+ en = fdt_grab_space_(fdt, FDT_TAGSIZE);
+ if (! en)
+ return -FDT_ERR_NOSPACE;
+
+ *en = cpu_to_fdt32(FDT_END_NODE);
+ return 0;
+}
+
+static int fdt_add_string_(void *fdt, const char *s)
+{
+ char *strtab = (char *)fdt + fdt_totalsize(fdt);
+ unsigned int strtabsize = fdt_size_dt_strings(fdt);
+ unsigned int len = strlen(s) + 1;
+ unsigned int struct_top, offset;
+
+ offset = strtabsize + len;
+ struct_top = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt);
+ if (fdt_totalsize(fdt) - offset < struct_top)
+ return 0; /* no more room :( */
+
+ memcpy(strtab - offset, s, len);
+ fdt_set_size_dt_strings(fdt, strtabsize + len);
+ return -offset;
+}
+
+/* Must only be used to roll back in case of error */
+static void fdt_del_last_string_(void *fdt, const char *s)
+{
+ int strtabsize = fdt_size_dt_strings(fdt);
+ int len = strlen(s) + 1;
+
+ fdt_set_size_dt_strings(fdt, strtabsize - len);
+}
+
+static int fdt_find_add_string_(void *fdt, const char *s, int *allocated)
+{
+ char *strtab = (char *)fdt + fdt_totalsize(fdt);
+ int strtabsize = fdt_size_dt_strings(fdt);
+ const char *p;
+
+ *allocated = 0;
+
+ p = fdt_find_string_(strtab - strtabsize, strtabsize, s);
+ if (p)
+ return p - strtab;
+
+ *allocated = 1;
+
+ return fdt_add_string_(fdt, s);
+}
+
+int fdt_property_placeholder(void *fdt, const char *name, int len, void **valp)
+{
+ struct fdt_property *prop;
+ int nameoff;
+ int allocated;
+
+ FDT_SW_PROBE_STRUCT(fdt);
+
+ /* String de-duplication can be slow, _NO_NAME_DEDUP skips it */
+ if (sw_flags(fdt) & FDT_CREATE_FLAG_NO_NAME_DEDUP) {
+ allocated = 1;
+ nameoff = fdt_add_string_(fdt, name);
+ } else {
+ nameoff = fdt_find_add_string_(fdt, name, &allocated);
+ }
+ if (nameoff == 0)
+ return -FDT_ERR_NOSPACE;
+
+ prop = fdt_grab_space_(fdt, sizeof(*prop) + FDT_TAGALIGN(len));
+ if (! prop) {
+ if (allocated)
+ fdt_del_last_string_(fdt, name);
+ return -FDT_ERR_NOSPACE;
+ }
+
+ prop->tag = cpu_to_fdt32(FDT_PROP);
+ prop->nameoff = cpu_to_fdt32(nameoff);
+ prop->len = cpu_to_fdt32(len);
+ *valp = prop->data;
+ return 0;
+}
+
+int fdt_property(void *fdt, const char *name, const void *val, int len)
+{
+ void *ptr;
+ int ret;
+
+ ret = fdt_property_placeholder(fdt, name, len, &ptr);
+ if (ret)
+ return ret;
+ memcpy(ptr, val, len);
+ return 0;
+}
+
+int fdt_finish(void *fdt)
+{
+ char *p = (char *)fdt;
+ fdt32_t *end;
+ int oldstroffset, newstroffset;
+ uint32_t tag;
+ int offset, nextoffset;
+
+ FDT_SW_PROBE_STRUCT(fdt);
+
+ /* Add terminator */
+ end = fdt_grab_space_(fdt, sizeof(*end));
+ if (! end)
+ return -FDT_ERR_NOSPACE;
+ *end = cpu_to_fdt32(FDT_END);
+
+ /* Relocate the string table */
+ oldstroffset = fdt_totalsize(fdt) - fdt_size_dt_strings(fdt);
+ newstroffset = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt);
+ memmove(p + newstroffset, p + oldstroffset, fdt_size_dt_strings(fdt));
+ fdt_set_off_dt_strings(fdt, newstroffset);
+
+ /* Walk the structure, correcting string offsets */
+ offset = 0;
+ while ((tag = fdt_next_tag(fdt, offset, &nextoffset)) != FDT_END) {
+ if (tag == FDT_PROP) {
+ struct fdt_property *prop =
+ fdt_offset_ptr_w_(fdt, offset);
+ int nameoff;
+
+ nameoff = fdt32_to_cpu(prop->nameoff);
+ nameoff += fdt_size_dt_strings(fdt);
+ prop->nameoff = cpu_to_fdt32(nameoff);
+ }
+ offset = nextoffset;
+ }
+ if (nextoffset < 0)
+ return nextoffset;
+
+ /* Finally, adjust the header */
+ fdt_set_totalsize(fdt, newstroffset + fdt_size_dt_strings(fdt));
+
+ /* And fix up fields that were keeping intermediate state. */
+ fdt_set_last_comp_version(fdt, FDT_FIRST_SUPPORTED_VERSION);
+ fdt_set_magic(fdt, FDT_MAGIC);
+
+ return 0;
+}
diff --git a/tools/src/libfdt/fdt_wip.c b/tools/src/libfdt/fdt_wip.c
new file mode 100644
index 0000000..44aed08
--- /dev/null
+++ b/tools/src/libfdt/fdt_wip.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2006 David Gibson, IBM Corporation.
+ */
+#include "libfdt_env.h"
+
+#include "fdt.h"
+#include "libfdt.h"
+
+#include "libfdt_internal.h"
+
+int fdt_setprop_inplace_namelen_partial(void *fdt, int nodeoffset,
+ const char *name, int namelen,
+ uint32_t idx, const void *val,
+ int len)
+{
+ void *propval;
+ int proplen;
+
+ propval = fdt_getprop_namelen_w(fdt, nodeoffset, name, namelen,
+ &proplen);
+ if (!propval)
+ return proplen;
+
+ if ((unsigned)proplen < (len + idx))
+ return -FDT_ERR_NOSPACE;
+
+ memcpy((char *)propval + idx, val, len);
+ return 0;
+}
+
+int fdt_setprop_inplace(void *fdt, int nodeoffset, const char *name,
+ const void *val, int len)
+{
+ const void *propval;
+ int proplen;
+
+ propval = fdt_getprop(fdt, nodeoffset, name, &proplen);
+ if (!propval)
+ return proplen;
+
+ if (proplen != len)
+ return -FDT_ERR_NOSPACE;
+
+ return fdt_setprop_inplace_namelen_partial(fdt, nodeoffset, name,
+ strlen(name), 0,
+ val, len);
+}
+
+static void fdt_nop_region_(void *start, int len)
+{
+ fdt32_t *p;
+
+ for (p = start; (char *)p < ((char *)start + len); p++)
+ *p = cpu_to_fdt32(FDT_NOP);
+}
+
+int fdt_nop_property(void *fdt, int nodeoffset, const char *name)
+{
+ struct fdt_property *prop;
+ int len;
+
+ prop = fdt_get_property_w(fdt, nodeoffset, name, &len);
+ if (!prop)
+ return len;
+
+ fdt_nop_region_(prop, len + sizeof(*prop));
+
+ return 0;
+}
+
+int fdt_node_end_offset_(void *fdt, int offset)
+{
+ int depth = 0;
+
+ while ((offset >= 0) && (depth >= 0))
+ offset = fdt_next_node(fdt, offset, &depth);
+
+ return offset;
+}
+
+int fdt_nop_node(void *fdt, int nodeoffset)
+{
+ int endoffset;
+
+ endoffset = fdt_node_end_offset_(fdt, nodeoffset);
+ if (endoffset < 0)
+ return endoffset;
+
+ fdt_nop_region_(fdt_offset_ptr_w(fdt, nodeoffset, 0),
+ endoffset - nodeoffset);
+ return 0;
+}
diff --git a/tools/src/libfdt/libfdt.h b/tools/src/libfdt/libfdt.h
new file mode 100644
index 0000000..fe49b5d
--- /dev/null
+++ b/tools/src/libfdt/libfdt.h
@@ -0,0 +1,2080 @@
+/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */
+#ifndef LIBFDT_H
+#define LIBFDT_H
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2006 David Gibson, IBM Corporation.
+ */
+
+#include "libfdt_env.h"
+#include "fdt.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FDT_FIRST_SUPPORTED_VERSION 0x02
+#define FDT_LAST_SUPPORTED_VERSION 0x11
+
+/* Error codes: informative error codes */
+#define FDT_ERR_NOTFOUND 1
+ /* FDT_ERR_NOTFOUND: The requested node or property does not exist */
+#define FDT_ERR_EXISTS 2
+ /* FDT_ERR_EXISTS: Attempted to create a node or property which
+ * already exists */
+#define FDT_ERR_NOSPACE 3
+ /* FDT_ERR_NOSPACE: Operation needed to expand the device
+ * tree, but its buffer did not have sufficient space to
+ * contain the expanded tree. Use fdt_open_into() to move the
+ * device tree to a buffer with more space. */
+
+/* Error codes: codes for bad parameters */
+#define FDT_ERR_BADOFFSET 4
+ /* FDT_ERR_BADOFFSET: Function was passed a structure block
+ * offset which is out-of-bounds, or which points to an
+ * unsuitable part of the structure for the operation. */
+#define FDT_ERR_BADPATH 5
+ /* FDT_ERR_BADPATH: Function was passed a badly formatted path
+ * (e.g. missing a leading / for a function which requires an
+ * absolute path) */
+#define FDT_ERR_BADPHANDLE 6
+ /* FDT_ERR_BADPHANDLE: Function was passed an invalid phandle.
+ * This can be caused either by an invalid phandle property
+ * length, or the phandle value was either 0 or -1, which are
+ * not permitted. */
+#define FDT_ERR_BADSTATE 7
+ /* FDT_ERR_BADSTATE: Function was passed an incomplete device
+ * tree created by the sequential-write functions, which is
+ * not sufficiently complete for the requested operation. */
+
+/* Error codes: codes for bad device tree blobs */
+#define FDT_ERR_TRUNCATED 8
+ /* FDT_ERR_TRUNCATED: FDT or a sub-block is improperly
+ * terminated (overflows, goes outside allowed bounds, or
+ * isn't properly terminated). */
+#define FDT_ERR_BADMAGIC 9
+ /* FDT_ERR_BADMAGIC: Given "device tree" appears not to be a
+ * device tree at all - it is missing the flattened device
+ * tree magic number. */
+#define FDT_ERR_BADVERSION 10
+ /* FDT_ERR_BADVERSION: Given device tree has a version which
+ * can't be handled by the requested operation. For
+ * read-write functions, this may mean that fdt_open_into() is
+ * required to convert the tree to the expected version. */
+#define FDT_ERR_BADSTRUCTURE 11
+ /* FDT_ERR_BADSTRUCTURE: Given device tree has a corrupt
+ * structure block or other serious error (e.g. misnested
+ * nodes, or subnodes preceding properties). */
+#define FDT_ERR_BADLAYOUT 12
+ /* FDT_ERR_BADLAYOUT: For read-write functions, the given
+ * device tree has it's sub-blocks in an order that the
+ * function can't handle (memory reserve map, then structure,
+ * then strings). Use fdt_open_into() to reorganize the tree
+ * into a form suitable for the read-write operations. */
+
+/* "Can't happen" error indicating a bug in libfdt */
+#define FDT_ERR_INTERNAL 13
+ /* FDT_ERR_INTERNAL: libfdt has failed an internal assertion.
+ * Should never be returned, if it is, it indicates a bug in
+ * libfdt itself. */
+
+/* Errors in device tree content */
+#define FDT_ERR_BADNCELLS 14
+ /* FDT_ERR_BADNCELLS: Device tree has a #address-cells, #size-cells
+ * or similar property with a bad format or value */
+
+#define FDT_ERR_BADVALUE 15
+ /* FDT_ERR_BADVALUE: Device tree has a property with an unexpected
+ * value. For example: a property expected to contain a string list
+ * is not NUL-terminated within the length of its value. */
+
+#define FDT_ERR_BADOVERLAY 16
+ /* FDT_ERR_BADOVERLAY: The device tree overlay, while
+ * correctly structured, cannot be applied due to some
+ * unexpected or missing value, property or node. */
+
+#define FDT_ERR_NOPHANDLES 17
+ /* FDT_ERR_NOPHANDLES: The device tree doesn't have any
+ * phandle available anymore without causing an overflow */
+
+#define FDT_ERR_BADFLAGS 18
+ /* FDT_ERR_BADFLAGS: The function was passed a flags field that
+ * contains invalid flags or an invalid combination of flags. */
+
+#define FDT_ERR_MAX 18
+
+/* constants */
+#define FDT_MAX_PHANDLE 0xfffffffe
+ /* Valid values for phandles range from 1 to 2^32-2. */
+
+/**********************************************************************/
+/* Low-level functions (you probably don't need these) */
+/**********************************************************************/
+
+#ifndef SWIG /* This function is not useful in Python */
+const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int checklen);
+#endif
+static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
+{
+ return (void *)(uintptr_t)fdt_offset_ptr(fdt, offset, checklen);
+}
+
+uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
+
+/*
+ * Alignment helpers:
+ * These helpers access words from a device tree blob. They're
+ * built to work even with unaligned pointers on platforms (ike
+ * ARM) that don't like unaligned loads and stores
+ */
+
+static inline uint32_t fdt32_ld(const fdt32_t *p)
+{
+ const uint8_t *bp = (const uint8_t *)p;
+
+ return ((uint32_t)bp[0] << 24)
+ | ((uint32_t)bp[1] << 16)
+ | ((uint32_t)bp[2] << 8)
+ | bp[3];
+}
+
+static inline void fdt32_st(void *property, uint32_t value)
+{
+ uint8_t *bp = (uint8_t *)property;
+
+ bp[0] = value >> 24;
+ bp[1] = (value >> 16) & 0xff;
+ bp[2] = (value >> 8) & 0xff;
+ bp[3] = value & 0xff;
+}
+
+static inline uint64_t fdt64_ld(const fdt64_t *p)
+{
+ const uint8_t *bp = (const uint8_t *)p;
+
+ return ((uint64_t)bp[0] << 56)
+ | ((uint64_t)bp[1] << 48)
+ | ((uint64_t)bp[2] << 40)
+ | ((uint64_t)bp[3] << 32)
+ | ((uint64_t)bp[4] << 24)
+ | ((uint64_t)bp[5] << 16)
+ | ((uint64_t)bp[6] << 8)
+ | bp[7];
+}
+
+static inline void fdt64_st(void *property, uint64_t value)
+{
+ uint8_t *bp = (uint8_t *)property;
+
+ bp[0] = value >> 56;
+ bp[1] = (value >> 48) & 0xff;
+ bp[2] = (value >> 40) & 0xff;
+ bp[3] = (value >> 32) & 0xff;
+ bp[4] = (value >> 24) & 0xff;
+ bp[5] = (value >> 16) & 0xff;
+ bp[6] = (value >> 8) & 0xff;
+ bp[7] = value & 0xff;
+}
+
+/**********************************************************************/
+/* Traversal functions */
+/**********************************************************************/
+
+int fdt_next_node(const void *fdt, int offset, int *depth);
+
+/**
+ * fdt_first_subnode() - get offset of first direct subnode
+ *
+ * @fdt: FDT blob
+ * @offset: Offset of node to check
+ * @return offset of first subnode, or -FDT_ERR_NOTFOUND if there is none
+ */
+int fdt_first_subnode(const void *fdt, int offset);
+
+/**
+ * fdt_next_subnode() - get offset of next direct subnode
+ *
+ * After first calling fdt_first_subnode(), call this function repeatedly to
+ * get direct subnodes of a parent node.
+ *
+ * @fdt: FDT blob
+ * @offset: Offset of previous subnode
+ * @return offset of next subnode, or -FDT_ERR_NOTFOUND if there are no more
+ * subnodes
+ */
+int fdt_next_subnode(const void *fdt, int offset);
+
+/**
+ * fdt_for_each_subnode - iterate over all subnodes of a parent
+ *
+ * @node: child node (int, lvalue)
+ * @fdt: FDT blob (const void *)
+ * @parent: parent node (int)
+ *
+ * This is actually a wrapper around a for loop and would be used like so:
+ *
+ * fdt_for_each_subnode(node, fdt, parent) {
+ * Use node
+ * ...
+ * }
+ *
+ * if ((node < 0) && (node != -FDT_ERR_NOTFOUND)) {
+ * Error handling
+ * }
+ *
+ * Note that this is implemented as a macro and @node is used as
+ * iterator in the loop. The parent variable be constant or even a
+ * literal.
+ *
+ */
+#define fdt_for_each_subnode(node, fdt, parent) \
+ for (node = fdt_first_subnode(fdt, parent); \
+ node >= 0; \
+ node = fdt_next_subnode(fdt, node))
+
+/**********************************************************************/
+/* General functions */
+/**********************************************************************/
+#define fdt_get_header(fdt, field) \
+ (fdt32_ld(&((const struct fdt_header *)(fdt))->field))
+#define fdt_magic(fdt) (fdt_get_header(fdt, magic))
+#define fdt_totalsize(fdt) (fdt_get_header(fdt, totalsize))
+#define fdt_off_dt_struct(fdt) (fdt_get_header(fdt, off_dt_struct))
+#define fdt_off_dt_strings(fdt) (fdt_get_header(fdt, off_dt_strings))
+#define fdt_off_mem_rsvmap(fdt) (fdt_get_header(fdt, off_mem_rsvmap))
+#define fdt_version(fdt) (fdt_get_header(fdt, version))
+#define fdt_last_comp_version(fdt) (fdt_get_header(fdt, last_comp_version))
+#define fdt_boot_cpuid_phys(fdt) (fdt_get_header(fdt, boot_cpuid_phys))
+#define fdt_size_dt_strings(fdt) (fdt_get_header(fdt, size_dt_strings))
+#define fdt_size_dt_struct(fdt) (fdt_get_header(fdt, size_dt_struct))
+
+#define fdt_set_hdr_(name) \
+ static inline void fdt_set_##name(void *fdt, uint32_t val) \
+ { \
+ struct fdt_header *fdth = (struct fdt_header *)fdt; \
+ fdth->name = cpu_to_fdt32(val); \
+ }
+fdt_set_hdr_(magic);
+fdt_set_hdr_(totalsize);
+fdt_set_hdr_(off_dt_struct);
+fdt_set_hdr_(off_dt_strings);
+fdt_set_hdr_(off_mem_rsvmap);
+fdt_set_hdr_(version);
+fdt_set_hdr_(last_comp_version);
+fdt_set_hdr_(boot_cpuid_phys);
+fdt_set_hdr_(size_dt_strings);
+fdt_set_hdr_(size_dt_struct);
+#undef fdt_set_hdr_
+
+/**
+ * fdt_header_size - return the size of the tree's header
+ * @fdt: pointer to a flattened device tree
+ */
+size_t fdt_header_size(const void *fdt);
+
+/**
+ * fdt_header_size_ - internal function which takes a version number
+ */
+size_t fdt_header_size_(uint32_t version);
+
+/**
+ * fdt_check_header - sanity check a device tree header
+
+ * @fdt: pointer to data which might be a flattened device tree
+ *
+ * fdt_check_header() checks that the given buffer contains what
+ * appears to be a flattened device tree, and that the header contains
+ * valid information (to the extent that can be determined from the
+ * header alone).
+ *
+ * returns:
+ * 0, if the buffer appears to contain a valid device tree
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_TRUNCATED, standard meanings, as above
+ */
+int fdt_check_header(const void *fdt);
+
+/**
+ * fdt_move - move a device tree around in memory
+ * @fdt: pointer to the device tree to move
+ * @buf: pointer to memory where the device is to be moved
+ * @bufsize: size of the memory space at buf
+ *
+ * fdt_move() relocates, if possible, the device tree blob located at
+ * fdt to the buffer at buf of size bufsize. The buffer may overlap
+ * with the existing device tree blob at fdt. Therefore,
+ * fdt_move(fdt, fdt, fdt_totalsize(fdt))
+ * should always succeed.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, bufsize is insufficient to contain the device tree
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE, standard meanings
+ */
+int fdt_move(const void *fdt, void *buf, int bufsize);
+
+/**********************************************************************/
+/* Read-only functions */
+/**********************************************************************/
+
+int fdt_check_full(const void *fdt, size_t bufsize);
+
+/**
+ * fdt_get_string - retrieve a string from the strings block of a device tree
+ * @fdt: pointer to the device tree blob
+ * @stroffset: offset of the string within the strings block (native endian)
+ * @lenp: optional pointer to return the string's length
+ *
+ * fdt_get_string() retrieves a pointer to a single string from the
+ * strings block of the device tree blob at fdt, and optionally also
+ * returns the string's length in *lenp.
+ *
+ * returns:
+ * a pointer to the string, on success
+ * NULL, if stroffset is out of bounds, or doesn't point to a valid string
+ */
+const char *fdt_get_string(const void *fdt, int stroffset, int *lenp);
+
+/**
+ * fdt_string - retrieve a string from the strings block of a device tree
+ * @fdt: pointer to the device tree blob
+ * @stroffset: offset of the string within the strings block (native endian)
+ *
+ * fdt_string() retrieves a pointer to a single string from the
+ * strings block of the device tree blob at fdt.
+ *
+ * returns:
+ * a pointer to the string, on success
+ * NULL, if stroffset is out of bounds, or doesn't point to a valid string
+ */
+const char *fdt_string(const void *fdt, int stroffset);
+
+/**
+ * fdt_find_max_phandle - find and return the highest phandle in a tree
+ * @fdt: pointer to the device tree blob
+ * @phandle: return location for the highest phandle value found in the tree
+ *
+ * fdt_find_max_phandle() finds the highest phandle value in the given device
+ * tree. The value returned in @phandle is only valid if the function returns
+ * success.
+ *
+ * returns:
+ * 0 on success or a negative error code on failure
+ */
+int fdt_find_max_phandle(const void *fdt, uint32_t *phandle);
+
+/**
+ * fdt_get_max_phandle - retrieves the highest phandle in a tree
+ * @fdt: pointer to the device tree blob
+ *
+ * fdt_get_max_phandle retrieves the highest phandle in the given
+ * device tree. This will ignore badly formatted phandles, or phandles
+ * with a value of 0 or -1.
+ *
+ * This function is deprecated in favour of fdt_find_max_phandle().
+ *
+ * returns:
+ * the highest phandle on success
+ * 0, if no phandle was found in the device tree
+ * -1, if an error occurred
+ */
+static inline uint32_t fdt_get_max_phandle(const void *fdt)
+{
+ uint32_t phandle;
+ int err;
+
+ err = fdt_find_max_phandle(fdt, &phandle);
+ if (err < 0)
+ return (uint32_t)-1;
+
+ return phandle;
+}
+
+/**
+ * fdt_generate_phandle - return a new, unused phandle for a device tree blob
+ * @fdt: pointer to the device tree blob
+ * @phandle: return location for the new phandle
+ *
+ * Walks the device tree blob and looks for the highest phandle value. On
+ * success, the new, unused phandle value (one higher than the previously
+ * highest phandle value in the device tree blob) will be returned in the
+ * @phandle parameter.
+ *
+ * Returns:
+ * 0 on success or a negative error-code on failure
+ */
+int fdt_generate_phandle(const void *fdt, uint32_t *phandle);
+
+/**
+ * fdt_num_mem_rsv - retrieve the number of memory reserve map entries
+ * @fdt: pointer to the device tree blob
+ *
+ * Returns the number of entries in the device tree blob's memory
+ * reservation map. This does not include the terminating 0,0 entry
+ * or any other (0,0) entries reserved for expansion.
+ *
+ * returns:
+ * the number of entries
+ */
+int fdt_num_mem_rsv(const void *fdt);
+
+/**
+ * fdt_get_mem_rsv - retrieve one memory reserve map entry
+ * @fdt: pointer to the device tree blob
+ * @address, @size: pointers to 64-bit variables
+ *
+ * On success, *address and *size will contain the address and size of
+ * the n-th reserve map entry from the device tree blob, in
+ * native-endian format.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE, standard meanings
+ */
+int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size);
+
+/**
+ * fdt_subnode_offset_namelen - find a subnode based on substring
+ * @fdt: pointer to the device tree blob
+ * @parentoffset: structure block offset of a node
+ * @name: name of the subnode to locate
+ * @namelen: number of characters of name to consider
+ *
+ * Identical to fdt_subnode_offset(), but only examine the first
+ * namelen characters of name for matching the subnode name. This is
+ * useful for finding subnodes based on a portion of a larger string,
+ * such as a full path.
+ */
+#ifndef SWIG /* Not available in Python */
+int fdt_subnode_offset_namelen(const void *fdt, int parentoffset,
+ const char *name, int namelen);
+#endif
+/**
+ * fdt_subnode_offset - find a subnode of a given node
+ * @fdt: pointer to the device tree blob
+ * @parentoffset: structure block offset of a node
+ * @name: name of the subnode to locate
+ *
+ * fdt_subnode_offset() finds a subnode of the node at structure block
+ * offset parentoffset with the given name. name may include a unit
+ * address, in which case fdt_subnode_offset() will find the subnode
+ * with that unit address, or the unit address may be omitted, in
+ * which case fdt_subnode_offset() will find an arbitrary subnode
+ * whose name excluding unit address matches the given name.
+ *
+ * returns:
+ * structure block offset of the requested subnode (>=0), on success
+ * -FDT_ERR_NOTFOUND, if the requested subnode does not exist
+ * -FDT_ERR_BADOFFSET, if parentoffset did not point to an FDT_BEGIN_NODE
+ * tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_TRUNCATED, standard meanings.
+ */
+int fdt_subnode_offset(const void *fdt, int parentoffset, const char *name);
+
+/**
+ * fdt_path_offset_namelen - find a tree node by its full path
+ * @fdt: pointer to the device tree blob
+ * @path: full path of the node to locate
+ * @namelen: number of characters of path to consider
+ *
+ * Identical to fdt_path_offset(), but only consider the first namelen
+ * characters of path as the path name.
+ */
+#ifndef SWIG /* Not available in Python */
+int fdt_path_offset_namelen(const void *fdt, const char *path, int namelen);
+#endif
+
+/**
+ * fdt_path_offset - find a tree node by its full path
+ * @fdt: pointer to the device tree blob
+ * @path: full path of the node to locate
+ *
+ * fdt_path_offset() finds a node of a given path in the device tree.
+ * Each path component may omit the unit address portion, but the
+ * results of this are undefined if any such path component is
+ * ambiguous (that is if there are multiple nodes at the relevant
+ * level matching the given component, differentiated only by unit
+ * address).
+ *
+ * returns:
+ * structure block offset of the node with the requested path (>=0), on
+ * success
+ * -FDT_ERR_BADPATH, given path does not begin with '/' or is invalid
+ * -FDT_ERR_NOTFOUND, if the requested node does not exist
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_TRUNCATED, standard meanings.
+ */
+int fdt_path_offset(const void *fdt, const char *path);
+
+/**
+ * fdt_get_name - retrieve the name of a given node
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: structure block offset of the starting node
+ * @lenp: pointer to an integer variable (will be overwritten) or NULL
+ *
+ * fdt_get_name() retrieves the name (including unit address) of the
+ * device tree node at structure block offset nodeoffset. If lenp is
+ * non-NULL, the length of this name is also returned, in the integer
+ * pointed to by lenp.
+ *
+ * returns:
+ * pointer to the node's name, on success
+ * If lenp is non-NULL, *lenp contains the length of that name
+ * (>=0)
+ * NULL, on error
+ * if lenp is non-NULL *lenp contains an error code (<0):
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE
+ * tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE, standard meanings
+ */
+const char *fdt_get_name(const void *fdt, int nodeoffset, int *lenp);
+
+/**
+ * fdt_first_property_offset - find the offset of a node's first property
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: structure block offset of a node
+ *
+ * fdt_first_property_offset() finds the first property of the node at
+ * the given structure block offset.
+ *
+ * returns:
+ * structure block offset of the property (>=0), on success
+ * -FDT_ERR_NOTFOUND, if the requested node has no properties
+ * -FDT_ERR_BADOFFSET, if nodeoffset did not point to an FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_TRUNCATED, standard meanings.
+ */
+int fdt_first_property_offset(const void *fdt, int nodeoffset);
+
+/**
+ * fdt_next_property_offset - step through a node's properties
+ * @fdt: pointer to the device tree blob
+ * @offset: structure block offset of a property
+ *
+ * fdt_next_property_offset() finds the property immediately after the
+ * one at the given structure block offset. This will be a property
+ * of the same node as the given property.
+ *
+ * returns:
+ * structure block offset of the next property (>=0), on success
+ * -FDT_ERR_NOTFOUND, if the given property is the last in its node
+ * -FDT_ERR_BADOFFSET, if nodeoffset did not point to an FDT_PROP tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_TRUNCATED, standard meanings.
+ */
+int fdt_next_property_offset(const void *fdt, int offset);
+
+/**
+ * fdt_for_each_property_offset - iterate over all properties of a node
+ *
+ * @property_offset: property offset (int, lvalue)
+ * @fdt: FDT blob (const void *)
+ * @node: node offset (int)
+ *
+ * This is actually a wrapper around a for loop and would be used like so:
+ *
+ * fdt_for_each_property_offset(property, fdt, node) {
+ * Use property
+ * ...
+ * }
+ *
+ * if ((property < 0) && (property != -FDT_ERR_NOTFOUND)) {
+ * Error handling
+ * }
+ *
+ * Note that this is implemented as a macro and property is used as
+ * iterator in the loop. The node variable can be constant or even a
+ * literal.
+ */
+#define fdt_for_each_property_offset(property, fdt, node) \
+ for (property = fdt_first_property_offset(fdt, node); \
+ property >= 0; \
+ property = fdt_next_property_offset(fdt, property))
+
+/**
+ * fdt_get_property_by_offset - retrieve the property at a given offset
+ * @fdt: pointer to the device tree blob
+ * @offset: offset of the property to retrieve
+ * @lenp: pointer to an integer variable (will be overwritten) or NULL
+ *
+ * fdt_get_property_by_offset() retrieves a pointer to the
+ * fdt_property structure within the device tree blob at the given
+ * offset. If lenp is non-NULL, the length of the property value is
+ * also returned, in the integer pointed to by lenp.
+ *
+ * Note that this code only works on device tree versions >= 16. fdt_getprop()
+ * works on all versions.
+ *
+ * returns:
+ * pointer to the structure representing the property
+ * if lenp is non-NULL, *lenp contains the length of the property
+ * value (>=0)
+ * NULL, on error
+ * if lenp is non-NULL, *lenp contains an error code (<0):
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_PROP tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+const struct fdt_property *fdt_get_property_by_offset(const void *fdt,
+ int offset,
+ int *lenp);
+
+/**
+ * fdt_get_property_namelen - find a property based on substring
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to find
+ * @name: name of the property to find
+ * @namelen: number of characters of name to consider
+ * @lenp: pointer to an integer variable (will be overwritten) or NULL
+ *
+ * Identical to fdt_get_property(), but only examine the first namelen
+ * characters of name for matching the property name.
+ */
+#ifndef SWIG /* Not available in Python */
+const struct fdt_property *fdt_get_property_namelen(const void *fdt,
+ int nodeoffset,
+ const char *name,
+ int namelen, int *lenp);
+#endif
+
+/**
+ * fdt_get_property - find a given property in a given node
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to find
+ * @name: name of the property to find
+ * @lenp: pointer to an integer variable (will be overwritten) or NULL
+ *
+ * fdt_get_property() retrieves a pointer to the fdt_property
+ * structure within the device tree blob corresponding to the property
+ * named 'name' of the node at offset nodeoffset. If lenp is
+ * non-NULL, the length of the property value is also returned, in the
+ * integer pointed to by lenp.
+ *
+ * returns:
+ * pointer to the structure representing the property
+ * if lenp is non-NULL, *lenp contains the length of the property
+ * value (>=0)
+ * NULL, on error
+ * if lenp is non-NULL, *lenp contains an error code (<0):
+ * -FDT_ERR_NOTFOUND, node does not have named property
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE
+ * tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+const struct fdt_property *fdt_get_property(const void *fdt, int nodeoffset,
+ const char *name, int *lenp);
+static inline struct fdt_property *fdt_get_property_w(void *fdt, int nodeoffset,
+ const char *name,
+ int *lenp)
+{
+ return (struct fdt_property *)(uintptr_t)
+ fdt_get_property(fdt, nodeoffset, name, lenp);
+}
+
+/**
+ * fdt_getprop_by_offset - retrieve the value of a property at a given offset
+ * @fdt: pointer to the device tree blob
+ * @offset: offset of the property to read
+ * @namep: pointer to a string variable (will be overwritten) or NULL
+ * @lenp: pointer to an integer variable (will be overwritten) or NULL
+ *
+ * fdt_getprop_by_offset() retrieves a pointer to the value of the
+ * property at structure block offset 'offset' (this will be a pointer
+ * to within the device blob itself, not a copy of the value). If
+ * lenp is non-NULL, the length of the property value is also
+ * returned, in the integer pointed to by lenp. If namep is non-NULL,
+ * the property's namne will also be returned in the char * pointed to
+ * by namep (this will be a pointer to within the device tree's string
+ * block, not a new copy of the name).
+ *
+ * returns:
+ * pointer to the property's value
+ * if lenp is non-NULL, *lenp contains the length of the property
+ * value (>=0)
+ * if namep is non-NULL *namep contiains a pointer to the property
+ * name.
+ * NULL, on error
+ * if lenp is non-NULL, *lenp contains an error code (<0):
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_PROP tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+#ifndef SWIG /* This function is not useful in Python */
+const void *fdt_getprop_by_offset(const void *fdt, int offset,
+ const char **namep, int *lenp);
+#endif
+
+/**
+ * fdt_getprop_namelen - get property value based on substring
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to find
+ * @name: name of the property to find
+ * @namelen: number of characters of name to consider
+ * @lenp: pointer to an integer variable (will be overwritten) or NULL
+ *
+ * Identical to fdt_getprop(), but only examine the first namelen
+ * characters of name for matching the property name.
+ */
+#ifndef SWIG /* Not available in Python */
+const void *fdt_getprop_namelen(const void *fdt, int nodeoffset,
+ const char *name, int namelen, int *lenp);
+static inline void *fdt_getprop_namelen_w(void *fdt, int nodeoffset,
+ const char *name, int namelen,
+ int *lenp)
+{
+ return (void *)(uintptr_t)fdt_getprop_namelen(fdt, nodeoffset, name,
+ namelen, lenp);
+}
+#endif
+
+/**
+ * fdt_getprop - retrieve the value of a given property
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to find
+ * @name: name of the property to find
+ * @lenp: pointer to an integer variable (will be overwritten) or NULL
+ *
+ * fdt_getprop() retrieves a pointer to the value of the property
+ * named 'name' of the node at offset nodeoffset (this will be a
+ * pointer to within the device blob itself, not a copy of the value).
+ * If lenp is non-NULL, the length of the property value is also
+ * returned, in the integer pointed to by lenp.
+ *
+ * returns:
+ * pointer to the property's value
+ * if lenp is non-NULL, *lenp contains the length of the property
+ * value (>=0)
+ * NULL, on error
+ * if lenp is non-NULL, *lenp contains an error code (<0):
+ * -FDT_ERR_NOTFOUND, node does not have named property
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE
+ * tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+const void *fdt_getprop(const void *fdt, int nodeoffset,
+ const char *name, int *lenp);
+static inline void *fdt_getprop_w(void *fdt, int nodeoffset,
+ const char *name, int *lenp)
+{
+ return (void *)(uintptr_t)fdt_getprop(fdt, nodeoffset, name, lenp);
+}
+
+/**
+ * fdt_get_phandle - retrieve the phandle of a given node
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: structure block offset of the node
+ *
+ * fdt_get_phandle() retrieves the phandle of the device tree node at
+ * structure block offset nodeoffset.
+ *
+ * returns:
+ * the phandle of the node at nodeoffset, on success (!= 0, != -1)
+ * 0, if the node has no phandle, or another error occurs
+ */
+uint32_t fdt_get_phandle(const void *fdt, int nodeoffset);
+
+/**
+ * fdt_get_alias_namelen - get alias based on substring
+ * @fdt: pointer to the device tree blob
+ * @name: name of the alias th look up
+ * @namelen: number of characters of name to consider
+ *
+ * Identical to fdt_get_alias(), but only examine the first namelen
+ * characters of name for matching the alias name.
+ */
+#ifndef SWIG /* Not available in Python */
+const char *fdt_get_alias_namelen(const void *fdt,
+ const char *name, int namelen);
+#endif
+
+/**
+ * fdt_get_alias - retrieve the path referenced by a given alias
+ * @fdt: pointer to the device tree blob
+ * @name: name of the alias th look up
+ *
+ * fdt_get_alias() retrieves the value of a given alias. That is, the
+ * value of the property named 'name' in the node /aliases.
+ *
+ * returns:
+ * a pointer to the expansion of the alias named 'name', if it exists
+ * NULL, if the given alias or the /aliases node does not exist
+ */
+const char *fdt_get_alias(const void *fdt, const char *name);
+
+/**
+ * fdt_get_path - determine the full path of a node
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose path to find
+ * @buf: character buffer to contain the returned path (will be overwritten)
+ * @buflen: size of the character buffer at buf
+ *
+ * fdt_get_path() computes the full path of the node at offset
+ * nodeoffset, and records that path in the buffer at buf.
+ *
+ * NOTE: This function is expensive, as it must scan the device tree
+ * structure from the start to nodeoffset.
+ *
+ * returns:
+ * 0, on success
+ * buf contains the absolute path of the node at
+ * nodeoffset, as a NUL-terminated string.
+ * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag
+ * -FDT_ERR_NOSPACE, the path of the given node is longer than (bufsize-1)
+ * characters and will not fit in the given buffer.
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE, standard meanings
+ */
+int fdt_get_path(const void *fdt, int nodeoffset, char *buf, int buflen);
+
+/**
+ * fdt_supernode_atdepth_offset - find a specific ancestor of a node
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose parent to find
+ * @supernodedepth: depth of the ancestor to find
+ * @nodedepth: pointer to an integer variable (will be overwritten) or NULL
+ *
+ * fdt_supernode_atdepth_offset() finds an ancestor of the given node
+ * at a specific depth from the root (where the root itself has depth
+ * 0, its immediate subnodes depth 1 and so forth). So
+ * fdt_supernode_atdepth_offset(fdt, nodeoffset, 0, NULL);
+ * will always return 0, the offset of the root node. If the node at
+ * nodeoffset has depth D, then:
+ * fdt_supernode_atdepth_offset(fdt, nodeoffset, D, NULL);
+ * will return nodeoffset itself.
+ *
+ * NOTE: This function is expensive, as it must scan the device tree
+ * structure from the start to nodeoffset.
+ *
+ * returns:
+ * structure block offset of the node at node offset's ancestor
+ * of depth supernodedepth (>=0), on success
+ * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag
+ * -FDT_ERR_NOTFOUND, supernodedepth was greater than the depth of
+ * nodeoffset
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE, standard meanings
+ */
+int fdt_supernode_atdepth_offset(const void *fdt, int nodeoffset,
+ int supernodedepth, int *nodedepth);
+
+/**
+ * fdt_node_depth - find the depth of a given node
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose parent to find
+ *
+ * fdt_node_depth() finds the depth of a given node. The root node
+ * has depth 0, its immediate subnodes depth 1 and so forth.
+ *
+ * NOTE: This function is expensive, as it must scan the device tree
+ * structure from the start to nodeoffset.
+ *
+ * returns:
+ * depth of the node at nodeoffset (>=0), on success
+ * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE, standard meanings
+ */
+int fdt_node_depth(const void *fdt, int nodeoffset);
+
+/**
+ * fdt_parent_offset - find the parent of a given node
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose parent to find
+ *
+ * fdt_parent_offset() locates the parent node of a given node (that
+ * is, it finds the offset of the node which contains the node at
+ * nodeoffset as a subnode).
+ *
+ * NOTE: This function is expensive, as it must scan the device tree
+ * structure from the start to nodeoffset, *twice*.
+ *
+ * returns:
+ * structure block offset of the parent of the node at nodeoffset
+ * (>=0), on success
+ * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE, standard meanings
+ */
+int fdt_parent_offset(const void *fdt, int nodeoffset);
+
+/**
+ * fdt_node_offset_by_prop_value - find nodes with a given property value
+ * @fdt: pointer to the device tree blob
+ * @startoffset: only find nodes after this offset
+ * @propname: property name to check
+ * @propval: property value to search for
+ * @proplen: length of the value in propval
+ *
+ * fdt_node_offset_by_prop_value() returns the offset of the first
+ * node after startoffset, which has a property named propname whose
+ * value is of length proplen and has value equal to propval; or if
+ * startoffset is -1, the very first such node in the tree.
+ *
+ * To iterate through all nodes matching the criterion, the following
+ * idiom can be used:
+ * offset = fdt_node_offset_by_prop_value(fdt, -1, propname,
+ * propval, proplen);
+ * while (offset != -FDT_ERR_NOTFOUND) {
+ * // other code here
+ * offset = fdt_node_offset_by_prop_value(fdt, offset, propname,
+ * propval, proplen);
+ * }
+ *
+ * Note the -1 in the first call to the function, if 0 is used here
+ * instead, the function will never locate the root node, even if it
+ * matches the criterion.
+ *
+ * returns:
+ * structure block offset of the located node (>= 0, >startoffset),
+ * on success
+ * -FDT_ERR_NOTFOUND, no node matching the criterion exists in the
+ * tree after startoffset
+ * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE, standard meanings
+ */
+int fdt_node_offset_by_prop_value(const void *fdt, int startoffset,
+ const char *propname,
+ const void *propval, int proplen);
+
+/**
+ * fdt_node_offset_by_phandle - find the node with a given phandle
+ * @fdt: pointer to the device tree blob
+ * @phandle: phandle value
+ *
+ * fdt_node_offset_by_phandle() returns the offset of the node
+ * which has the given phandle value. If there is more than one node
+ * in the tree with the given phandle (an invalid tree), results are
+ * undefined.
+ *
+ * returns:
+ * structure block offset of the located node (>= 0), on success
+ * -FDT_ERR_NOTFOUND, no node with that phandle exists
+ * -FDT_ERR_BADPHANDLE, given phandle value was invalid (0 or -1)
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE, standard meanings
+ */
+int fdt_node_offset_by_phandle(const void *fdt, uint32_t phandle);
+
+/**
+ * fdt_node_check_compatible: check a node's compatible property
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of a tree node
+ * @compatible: string to match against
+ *
+ *
+ * fdt_node_check_compatible() returns 0 if the given node contains a
+ * 'compatible' property with the given string as one of its elements,
+ * it returns non-zero otherwise, or on error.
+ *
+ * returns:
+ * 0, if the node has a 'compatible' property listing the given string
+ * 1, if the node has a 'compatible' property, but it does not list
+ * the given string
+ * -FDT_ERR_NOTFOUND, if the given node has no 'compatible' property
+ * -FDT_ERR_BADOFFSET, if nodeoffset does not refer to a BEGIN_NODE tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE, standard meanings
+ */
+int fdt_node_check_compatible(const void *fdt, int nodeoffset,
+ const char *compatible);
+
+/**
+ * fdt_node_offset_by_compatible - find nodes with a given 'compatible' value
+ * @fdt: pointer to the device tree blob
+ * @startoffset: only find nodes after this offset
+ * @compatible: 'compatible' string to match against
+ *
+ * fdt_node_offset_by_compatible() returns the offset of the first
+ * node after startoffset, which has a 'compatible' property which
+ * lists the given compatible string; or if startoffset is -1, the
+ * very first such node in the tree.
+ *
+ * To iterate through all nodes matching the criterion, the following
+ * idiom can be used:
+ * offset = fdt_node_offset_by_compatible(fdt, -1, compatible);
+ * while (offset != -FDT_ERR_NOTFOUND) {
+ * // other code here
+ * offset = fdt_node_offset_by_compatible(fdt, offset, compatible);
+ * }
+ *
+ * Note the -1 in the first call to the function, if 0 is used here
+ * instead, the function will never locate the root node, even if it
+ * matches the criterion.
+ *
+ * returns:
+ * structure block offset of the located node (>= 0, >startoffset),
+ * on success
+ * -FDT_ERR_NOTFOUND, no node matching the criterion exists in the
+ * tree after startoffset
+ * -FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE, standard meanings
+ */
+int fdt_node_offset_by_compatible(const void *fdt, int startoffset,
+ const char *compatible);
+
+/**
+ * fdt_stringlist_contains - check a string list property for a string
+ * @strlist: Property containing a list of strings to check
+ * @listlen: Length of property
+ * @str: String to search for
+ *
+ * This is a utility function provided for convenience. The list contains
+ * one or more strings, each terminated by \0, as is found in a device tree
+ * "compatible" property.
+ *
+ * @return: 1 if the string is found in the list, 0 not found, or invalid list
+ */
+int fdt_stringlist_contains(const char *strlist, int listlen, const char *str);
+
+/**
+ * fdt_stringlist_count - count the number of strings in a string list
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of a tree node
+ * @property: name of the property containing the string list
+ * @return:
+ * the number of strings in the given property
+ * -FDT_ERR_BADVALUE if the property value is not NUL-terminated
+ * -FDT_ERR_NOTFOUND if the property does not exist
+ */
+int fdt_stringlist_count(const void *fdt, int nodeoffset, const char *property);
+
+/**
+ * fdt_stringlist_search - find a string in a string list and return its index
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of a tree node
+ * @property: name of the property containing the string list
+ * @string: string to look up in the string list
+ *
+ * Note that it is possible for this function to succeed on property values
+ * that are not NUL-terminated. That's because the function will stop after
+ * finding the first occurrence of @string. This can for example happen with
+ * small-valued cell properties, such as #address-cells, when searching for
+ * the empty string.
+ *
+ * @return:
+ * the index of the string in the list of strings
+ * -FDT_ERR_BADVALUE if the property value is not NUL-terminated
+ * -FDT_ERR_NOTFOUND if the property does not exist or does not contain
+ * the given string
+ */
+int fdt_stringlist_search(const void *fdt, int nodeoffset, const char *property,
+ const char *string);
+
+/**
+ * fdt_stringlist_get() - obtain the string at a given index in a string list
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of a tree node
+ * @property: name of the property containing the string list
+ * @index: index of the string to return
+ * @lenp: return location for the string length or an error code on failure
+ *
+ * Note that this will successfully extract strings from properties with
+ * non-NUL-terminated values. For example on small-valued cell properties
+ * this function will return the empty string.
+ *
+ * If non-NULL, the length of the string (on success) or a negative error-code
+ * (on failure) will be stored in the integer pointer to by lenp.
+ *
+ * @return:
+ * A pointer to the string at the given index in the string list or NULL on
+ * failure. On success the length of the string will be stored in the memory
+ * location pointed to by the lenp parameter, if non-NULL. On failure one of
+ * the following negative error codes will be returned in the lenp parameter
+ * (if non-NULL):
+ * -FDT_ERR_BADVALUE if the property value is not NUL-terminated
+ * -FDT_ERR_NOTFOUND if the property does not exist
+ */
+const char *fdt_stringlist_get(const void *fdt, int nodeoffset,
+ const char *property, int index,
+ int *lenp);
+
+/**********************************************************************/
+/* Read-only functions (addressing related) */
+/**********************************************************************/
+
+/**
+ * FDT_MAX_NCELLS - maximum value for #address-cells and #size-cells
+ *
+ * This is the maximum value for #address-cells, #size-cells and
+ * similar properties that will be processed by libfdt. IEE1275
+ * requires that OF implementations handle values up to 4.
+ * Implementations may support larger values, but in practice higher
+ * values aren't used.
+ */
+#define FDT_MAX_NCELLS 4
+
+/**
+ * fdt_address_cells - retrieve address size for a bus represented in the tree
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node to find the address size for
+ *
+ * When the node has a valid #address-cells property, returns its value.
+ *
+ * returns:
+ * 0 <= n < FDT_MAX_NCELLS, on success
+ * 2, if the node has no #address-cells property
+ * -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid
+ * #address-cells property
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+int fdt_address_cells(const void *fdt, int nodeoffset);
+
+/**
+ * fdt_size_cells - retrieve address range size for a bus represented in the
+ * tree
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node to find the address range size for
+ *
+ * When the node has a valid #size-cells property, returns its value.
+ *
+ * returns:
+ * 0 <= n < FDT_MAX_NCELLS, on success
+ * 1, if the node has no #size-cells property
+ * -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid
+ * #size-cells property
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+int fdt_size_cells(const void *fdt, int nodeoffset);
+
+
+/**********************************************************************/
+/* Write-in-place functions */
+/**********************************************************************/
+
+/**
+ * fdt_setprop_inplace_namelen_partial - change a property's value,
+ * but not its size
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to change
+ * @name: name of the property to change
+ * @namelen: number of characters of name to consider
+ * @idx: index of the property to change in the array
+ * @val: pointer to data to replace the property value with
+ * @len: length of the property value
+ *
+ * Identical to fdt_setprop_inplace(), but modifies the given property
+ * starting from the given index, and using only the first characters
+ * of the name. It is useful when you want to manipulate only one value of
+ * an array and you have a string that doesn't end with \0.
+ */
+#ifndef SWIG /* Not available in Python */
+int fdt_setprop_inplace_namelen_partial(void *fdt, int nodeoffset,
+ const char *name, int namelen,
+ uint32_t idx, const void *val,
+ int len);
+#endif
+
+/**
+ * fdt_setprop_inplace - change a property's value, but not its size
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to change
+ * @name: name of the property to change
+ * @val: pointer to data to replace the property value with
+ * @len: length of the property value
+ *
+ * fdt_setprop_inplace() replaces the value of a given property with
+ * the data in val, of length len. This function cannot change the
+ * size of a property, and so will only work if len is equal to the
+ * current length of the property.
+ *
+ * This function will alter only the bytes in the blob which contain
+ * the given property value, and will not alter or move any other part
+ * of the tree.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, if len is not equal to the property's current length
+ * -FDT_ERR_NOTFOUND, node does not have the named property
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+#ifndef SWIG /* Not available in Python */
+int fdt_setprop_inplace(void *fdt, int nodeoffset, const char *name,
+ const void *val, int len);
+#endif
+
+/**
+ * fdt_setprop_inplace_u32 - change the value of a 32-bit integer property
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to change
+ * @name: name of the property to change
+ * @val: 32-bit integer value to replace the property with
+ *
+ * fdt_setprop_inplace_u32() replaces the value of a given property
+ * with the 32-bit integer value in val, converting val to big-endian
+ * if necessary. This function cannot change the size of a property,
+ * and so will only work if the property already exists and has length
+ * 4.
+ *
+ * This function will alter only the bytes in the blob which contain
+ * the given property value, and will not alter or move any other part
+ * of the tree.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, if the property's length is not equal to 4
+ * -FDT_ERR_NOTFOUND, node does not have the named property
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+static inline int fdt_setprop_inplace_u32(void *fdt, int nodeoffset,
+ const char *name, uint32_t val)
+{
+ fdt32_t tmp = cpu_to_fdt32(val);
+ return fdt_setprop_inplace(fdt, nodeoffset, name, &tmp, sizeof(tmp));
+}
+
+/**
+ * fdt_setprop_inplace_u64 - change the value of a 64-bit integer property
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to change
+ * @name: name of the property to change
+ * @val: 64-bit integer value to replace the property with
+ *
+ * fdt_setprop_inplace_u64() replaces the value of a given property
+ * with the 64-bit integer value in val, converting val to big-endian
+ * if necessary. This function cannot change the size of a property,
+ * and so will only work if the property already exists and has length
+ * 8.
+ *
+ * This function will alter only the bytes in the blob which contain
+ * the given property value, and will not alter or move any other part
+ * of the tree.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, if the property's length is not equal to 8
+ * -FDT_ERR_NOTFOUND, node does not have the named property
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+static inline int fdt_setprop_inplace_u64(void *fdt, int nodeoffset,
+ const char *name, uint64_t val)
+{
+ fdt64_t tmp = cpu_to_fdt64(val);
+ return fdt_setprop_inplace(fdt, nodeoffset, name, &tmp, sizeof(tmp));
+}
+
+/**
+ * fdt_setprop_inplace_cell - change the value of a single-cell property
+ *
+ * This is an alternative name for fdt_setprop_inplace_u32()
+ */
+static inline int fdt_setprop_inplace_cell(void *fdt, int nodeoffset,
+ const char *name, uint32_t val)
+{
+ return fdt_setprop_inplace_u32(fdt, nodeoffset, name, val);
+}
+
+/**
+ * fdt_nop_property - replace a property with nop tags
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to nop
+ * @name: name of the property to nop
+ *
+ * fdt_nop_property() will replace a given property's representation
+ * in the blob with FDT_NOP tags, effectively removing it from the
+ * tree.
+ *
+ * This function will alter only the bytes in the blob which contain
+ * the property, and will not alter or move any other part of the
+ * tree.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOTFOUND, node does not have the named property
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+int fdt_nop_property(void *fdt, int nodeoffset, const char *name);
+
+/**
+ * fdt_nop_node - replace a node (subtree) with nop tags
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node to nop
+ *
+ * fdt_nop_node() will replace a given node's representation in the
+ * blob, including all its subnodes, if any, with FDT_NOP tags,
+ * effectively removing it from the tree.
+ *
+ * This function will alter only the bytes in the blob which contain
+ * the node and its properties and subnodes, and will not alter or
+ * move any other part of the tree.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+int fdt_nop_node(void *fdt, int nodeoffset);
+
+/**********************************************************************/
+/* Sequential write functions */
+/**********************************************************************/
+
+/* fdt_create_with_flags flags */
+#define FDT_CREATE_FLAG_NO_NAME_DEDUP 0x1
+ /* FDT_CREATE_FLAG_NO_NAME_DEDUP: Do not try to de-duplicate property
+ * names in the fdt. This can result in faster creation times, but
+ * a larger fdt. */
+
+#define FDT_CREATE_FLAGS_ALL (FDT_CREATE_FLAG_NO_NAME_DEDUP)
+
+/**
+ * fdt_create_with_flags - begin creation of a new fdt
+ * @fdt: pointer to memory allocated where fdt will be created
+ * @bufsize: size of the memory space at fdt
+ * @flags: a valid combination of FDT_CREATE_FLAG_ flags, or 0.
+ *
+ * fdt_create_with_flags() begins the process of creating a new fdt with
+ * the sequential write interface.
+ *
+ * fdt creation process must end with fdt_finished() to produce a valid fdt.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, bufsize is insufficient for a minimal fdt
+ * -FDT_ERR_BADFLAGS, flags is not valid
+ */
+int fdt_create_with_flags(void *buf, int bufsize, uint32_t flags);
+
+/**
+ * fdt_create - begin creation of a new fdt
+ * @fdt: pointer to memory allocated where fdt will be created
+ * @bufsize: size of the memory space at fdt
+ *
+ * fdt_create() is equivalent to fdt_create_with_flags() with flags=0.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, bufsize is insufficient for a minimal fdt
+ */
+int fdt_create(void *buf, int bufsize);
+
+int fdt_resize(void *fdt, void *buf, int bufsize);
+int fdt_add_reservemap_entry(void *fdt, uint64_t addr, uint64_t size);
+int fdt_finish_reservemap(void *fdt);
+int fdt_begin_node(void *fdt, const char *name);
+int fdt_property(void *fdt, const char *name, const void *val, int len);
+static inline int fdt_property_u32(void *fdt, const char *name, uint32_t val)
+{
+ fdt32_t tmp = cpu_to_fdt32(val);
+ return fdt_property(fdt, name, &tmp, sizeof(tmp));
+}
+static inline int fdt_property_u64(void *fdt, const char *name, uint64_t val)
+{
+ fdt64_t tmp = cpu_to_fdt64(val);
+ return fdt_property(fdt, name, &tmp, sizeof(tmp));
+}
+
+#ifndef SWIG /* Not available in Python */
+static inline int fdt_property_cell(void *fdt, const char *name, uint32_t val)
+{
+ return fdt_property_u32(fdt, name, val);
+}
+#endif
+
+/**
+ * fdt_property_placeholder - add a new property and return a ptr to its value
+ *
+ * @fdt: pointer to the device tree blob
+ * @name: name of property to add
+ * @len: length of property value in bytes
+ * @valp: returns a pointer to where where the value should be placed
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_NOSPACE, standard meanings
+ */
+int fdt_property_placeholder(void *fdt, const char *name, int len, void **valp);
+
+#define fdt_property_string(fdt, name, str) \
+ fdt_property(fdt, name, str, strlen(str)+1)
+int fdt_end_node(void *fdt);
+int fdt_finish(void *fdt);
+
+/**********************************************************************/
+/* Read-write functions */
+/**********************************************************************/
+
+int fdt_create_empty_tree(void *buf, int bufsize);
+int fdt_open_into(const void *fdt, void *buf, int bufsize);
+int fdt_pack(void *fdt);
+
+/**
+ * fdt_add_mem_rsv - add one memory reserve map entry
+ * @fdt: pointer to the device tree blob
+ * @address, @size: 64-bit values (native endian)
+ *
+ * Adds a reserve map entry to the given blob reserving a region at
+ * address address of length size.
+ *
+ * This function will insert data into the reserve map and will
+ * therefore change the indexes of some entries in the table.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to
+ * contain the new reservation entry
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+int fdt_add_mem_rsv(void *fdt, uint64_t address, uint64_t size);
+
+/**
+ * fdt_del_mem_rsv - remove a memory reserve map entry
+ * @fdt: pointer to the device tree blob
+ * @n: entry to remove
+ *
+ * fdt_del_mem_rsv() removes the n-th memory reserve map entry from
+ * the blob.
+ *
+ * This function will delete data from the reservation table and will
+ * therefore change the indexes of some entries in the table.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOTFOUND, there is no entry of the given index (i.e. there
+ * are less than n+1 reserve map entries)
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+int fdt_del_mem_rsv(void *fdt, int n);
+
+/**
+ * fdt_set_name - change the name of a given node
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: structure block offset of a node
+ * @name: name to give the node
+ *
+ * fdt_set_name() replaces the name (including unit address, if any)
+ * of the given node with the given string. NOTE: this function can't
+ * efficiently check if the new name is unique amongst the given
+ * node's siblings; results are undefined if this function is invoked
+ * with a name equal to one of the given node's siblings.
+ *
+ * This function may insert or delete data from the blob, and will
+ * therefore change the offsets of some existing nodes.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, there is insufficient free space in the blob
+ * to contain the new name
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE, standard meanings
+ */
+int fdt_set_name(void *fdt, int nodeoffset, const char *name);
+
+/**
+ * fdt_setprop - create or change a property
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to change
+ * @name: name of the property to change
+ * @val: pointer to data to set the property value to
+ * @len: length of the property value
+ *
+ * fdt_setprop() sets the value of the named property in the given
+ * node to the given value and length, creating the property if it
+ * does not already exist.
+ *
+ * This function may insert or delete data from the blob, and will
+ * therefore change the offsets of some existing nodes.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to
+ * contain the new property value
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+int fdt_setprop(void *fdt, int nodeoffset, const char *name,
+ const void *val, int len);
+
+/**
+ * fdt_setprop_placeholder - allocate space for a property
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to change
+ * @name: name of the property to change
+ * @len: length of the property value
+ * @prop_data: return pointer to property data
+ *
+ * fdt_setprop_placeholer() allocates the named property in the given node.
+ * If the property exists it is resized. In either case a pointer to the
+ * property data is returned.
+ *
+ * This function may insert or delete data from the blob, and will
+ * therefore change the offsets of some existing nodes.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to
+ * contain the new property value
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+int fdt_setprop_placeholder(void *fdt, int nodeoffset, const char *name,
+ int len, void **prop_data);
+
+/**
+ * fdt_setprop_u32 - set a property to a 32-bit integer
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to change
+ * @name: name of the property to change
+ * @val: 32-bit integer value for the property (native endian)
+ *
+ * fdt_setprop_u32() sets the value of the named property in the given
+ * node to the given 32-bit integer value (converting to big-endian if
+ * necessary), or creates a new property with that value if it does
+ * not already exist.
+ *
+ * This function may insert or delete data from the blob, and will
+ * therefore change the offsets of some existing nodes.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to
+ * contain the new property value
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+static inline int fdt_setprop_u32(void *fdt, int nodeoffset, const char *name,
+ uint32_t val)
+{
+ fdt32_t tmp = cpu_to_fdt32(val);
+ return fdt_setprop(fdt, nodeoffset, name, &tmp, sizeof(tmp));
+}
+
+/**
+ * fdt_setprop_u64 - set a property to a 64-bit integer
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to change
+ * @name: name of the property to change
+ * @val: 64-bit integer value for the property (native endian)
+ *
+ * fdt_setprop_u64() sets the value of the named property in the given
+ * node to the given 64-bit integer value (converting to big-endian if
+ * necessary), or creates a new property with that value if it does
+ * not already exist.
+ *
+ * This function may insert or delete data from the blob, and will
+ * therefore change the offsets of some existing nodes.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to
+ * contain the new property value
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+static inline int fdt_setprop_u64(void *fdt, int nodeoffset, const char *name,
+ uint64_t val)
+{
+ fdt64_t tmp = cpu_to_fdt64(val);
+ return fdt_setprop(fdt, nodeoffset, name, &tmp, sizeof(tmp));
+}
+
+/**
+ * fdt_setprop_cell - set a property to a single cell value
+ *
+ * This is an alternative name for fdt_setprop_u32()
+ */
+static inline int fdt_setprop_cell(void *fdt, int nodeoffset, const char *name,
+ uint32_t val)
+{
+ return fdt_setprop_u32(fdt, nodeoffset, name, val);
+}
+
+/**
+ * fdt_setprop_string - set a property to a string value
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to change
+ * @name: name of the property to change
+ * @str: string value for the property
+ *
+ * fdt_setprop_string() sets the value of the named property in the
+ * given node to the given string value (using the length of the
+ * string to determine the new length of the property), or creates a
+ * new property with that value if it does not already exist.
+ *
+ * This function may insert or delete data from the blob, and will
+ * therefore change the offsets of some existing nodes.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to
+ * contain the new property value
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+#define fdt_setprop_string(fdt, nodeoffset, name, str) \
+ fdt_setprop((fdt), (nodeoffset), (name), (str), strlen(str)+1)
+
+
+/**
+ * fdt_setprop_empty - set a property to an empty value
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to change
+ * @name: name of the property to change
+ *
+ * fdt_setprop_empty() sets the value of the named property in the
+ * given node to an empty (zero length) value, or creates a new empty
+ * property if it does not already exist.
+ *
+ * This function may insert or delete data from the blob, and will
+ * therefore change the offsets of some existing nodes.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to
+ * contain the new property value
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+#define fdt_setprop_empty(fdt, nodeoffset, name) \
+ fdt_setprop((fdt), (nodeoffset), (name), NULL, 0)
+
+/**
+ * fdt_appendprop - append to or create a property
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to change
+ * @name: name of the property to append to
+ * @val: pointer to data to append to the property value
+ * @len: length of the data to append to the property value
+ *
+ * fdt_appendprop() appends the value to the named property in the
+ * given node, creating the property if it does not already exist.
+ *
+ * This function may insert data into the blob, and will therefore
+ * change the offsets of some existing nodes.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to
+ * contain the new property value
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+int fdt_appendprop(void *fdt, int nodeoffset, const char *name,
+ const void *val, int len);
+
+/**
+ * fdt_appendprop_u32 - append a 32-bit integer value to a property
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to change
+ * @name: name of the property to change
+ * @val: 32-bit integer value to append to the property (native endian)
+ *
+ * fdt_appendprop_u32() appends the given 32-bit integer value
+ * (converting to big-endian if necessary) to the value of the named
+ * property in the given node, or creates a new property with that
+ * value if it does not already exist.
+ *
+ * This function may insert data into the blob, and will therefore
+ * change the offsets of some existing nodes.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to
+ * contain the new property value
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+static inline int fdt_appendprop_u32(void *fdt, int nodeoffset,
+ const char *name, uint32_t val)
+{
+ fdt32_t tmp = cpu_to_fdt32(val);
+ return fdt_appendprop(fdt, nodeoffset, name, &tmp, sizeof(tmp));
+}
+
+/**
+ * fdt_appendprop_u64 - append a 64-bit integer value to a property
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to change
+ * @name: name of the property to change
+ * @val: 64-bit integer value to append to the property (native endian)
+ *
+ * fdt_appendprop_u64() appends the given 64-bit integer value
+ * (converting to big-endian if necessary) to the value of the named
+ * property in the given node, or creates a new property with that
+ * value if it does not already exist.
+ *
+ * This function may insert data into the blob, and will therefore
+ * change the offsets of some existing nodes.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to
+ * contain the new property value
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+static inline int fdt_appendprop_u64(void *fdt, int nodeoffset,
+ const char *name, uint64_t val)
+{
+ fdt64_t tmp = cpu_to_fdt64(val);
+ return fdt_appendprop(fdt, nodeoffset, name, &tmp, sizeof(tmp));
+}
+
+/**
+ * fdt_appendprop_cell - append a single cell value to a property
+ *
+ * This is an alternative name for fdt_appendprop_u32()
+ */
+static inline int fdt_appendprop_cell(void *fdt, int nodeoffset,
+ const char *name, uint32_t val)
+{
+ return fdt_appendprop_u32(fdt, nodeoffset, name, val);
+}
+
+/**
+ * fdt_appendprop_string - append a string to a property
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to change
+ * @name: name of the property to change
+ * @str: string value to append to the property
+ *
+ * fdt_appendprop_string() appends the given string to the value of
+ * the named property in the given node, or creates a new property
+ * with that value if it does not already exist.
+ *
+ * This function may insert data into the blob, and will therefore
+ * change the offsets of some existing nodes.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to
+ * contain the new property value
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+#define fdt_appendprop_string(fdt, nodeoffset, name, str) \
+ fdt_appendprop((fdt), (nodeoffset), (name), (str), strlen(str)+1)
+
+/**
+ * fdt_appendprop_addrrange - append a address range property
+ * @fdt: pointer to the device tree blob
+ * @parent: offset of the parent node
+ * @nodeoffset: offset of the node to add a property at
+ * @name: name of property
+ * @addr: start address of a given range
+ * @size: size of a given range
+ *
+ * fdt_appendprop_addrrange() appends an address range value (start
+ * address and size) to the value of the named property in the given
+ * node, or creates a new property with that value if it does not
+ * already exist.
+ * If "name" is not specified, a default "reg" is used.
+ * Cell sizes are determined by parent's #address-cells and #size-cells.
+ *
+ * This function may insert data into the blob, and will therefore
+ * change the offsets of some existing nodes.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid
+ * #address-cells property
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADVALUE, addr or size doesn't fit to respective cells size
+ * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to
+ * contain a new property
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+int fdt_appendprop_addrrange(void *fdt, int parent, int nodeoffset,
+ const char *name, uint64_t addr, uint64_t size);
+
+/**
+ * fdt_delprop - delete a property
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to nop
+ * @name: name of the property to nop
+ *
+ * fdt_del_property() will delete the given property.
+ *
+ * This function will delete data from the blob, and will therefore
+ * change the offsets of some existing nodes.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOTFOUND, node does not have the named property
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+int fdt_delprop(void *fdt, int nodeoffset, const char *name);
+
+/**
+ * fdt_add_subnode_namelen - creates a new node based on substring
+ * @fdt: pointer to the device tree blob
+ * @parentoffset: structure block offset of a node
+ * @name: name of the subnode to locate
+ * @namelen: number of characters of name to consider
+ *
+ * Identical to fdt_add_subnode(), but use only the first namelen
+ * characters of name as the name of the new node. This is useful for
+ * creating subnodes based on a portion of a larger string, such as a
+ * full path.
+ */
+#ifndef SWIG /* Not available in Python */
+int fdt_add_subnode_namelen(void *fdt, int parentoffset,
+ const char *name, int namelen);
+#endif
+
+/**
+ * fdt_add_subnode - creates a new node
+ * @fdt: pointer to the device tree blob
+ * @parentoffset: structure block offset of a node
+ * @name: name of the subnode to locate
+ *
+ * fdt_add_subnode() creates a new node as a subnode of the node at
+ * structure block offset parentoffset, with the given name (which
+ * should include the unit address, if any).
+ *
+ * This function will insert data into the blob, and will therefore
+ * change the offsets of some existing nodes.
+
+ * returns:
+ * structure block offset of the created nodeequested subnode (>=0), on
+ * success
+ * -FDT_ERR_NOTFOUND, if the requested subnode does not exist
+ * -FDT_ERR_BADOFFSET, if parentoffset did not point to an FDT_BEGIN_NODE
+ * tag
+ * -FDT_ERR_EXISTS, if the node at parentoffset already has a subnode of
+ * the given name
+ * -FDT_ERR_NOSPACE, if there is insufficient free space in the
+ * blob to contain the new node
+ * -FDT_ERR_NOSPACE
+ * -FDT_ERR_BADLAYOUT
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_TRUNCATED, standard meanings.
+ */
+int fdt_add_subnode(void *fdt, int parentoffset, const char *name);
+
+/**
+ * fdt_del_node - delete a node (subtree)
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node to nop
+ *
+ * fdt_del_node() will remove the given node, including all its
+ * subnodes if any, from the blob.
+ *
+ * This function will delete data from the blob, and will therefore
+ * change the offsets of some existing nodes.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+int fdt_del_node(void *fdt, int nodeoffset);
+
+/**
+ * fdt_overlay_apply - Applies a DT overlay on a base DT
+ * @fdt: pointer to the base device tree blob
+ * @fdto: pointer to the device tree overlay blob
+ *
+ * fdt_overlay_apply() will apply the given device tree overlay on the
+ * given base device tree.
+ *
+ * Expect the base device tree to be modified, even if the function
+ * returns an error.
+ *
+ * returns:
+ * 0, on success
+ * -FDT_ERR_NOSPACE, there's not enough space in the base device tree
+ * -FDT_ERR_NOTFOUND, the overlay points to some inexistant nodes or
+ * properties in the base DT
+ * -FDT_ERR_BADPHANDLE,
+ * -FDT_ERR_BADOVERLAY,
+ * -FDT_ERR_NOPHANDLES,
+ * -FDT_ERR_INTERNAL,
+ * -FDT_ERR_BADLAYOUT,
+ * -FDT_ERR_BADMAGIC,
+ * -FDT_ERR_BADOFFSET,
+ * -FDT_ERR_BADPATH,
+ * -FDT_ERR_BADVERSION,
+ * -FDT_ERR_BADSTRUCTURE,
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_TRUNCATED, standard meanings
+ */
+int fdt_overlay_apply(void *fdt, void *fdto);
+
+/**********************************************************************/
+/* Debugging / informational functions */
+/**********************************************************************/
+
+const char *fdt_strerror(int errval);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LIBFDT_H */
diff --git a/tools/src/libfdt/libfdt_env.h b/tools/src/libfdt/libfdt_env.h
new file mode 100644
index 0000000..6d028a4
--- /dev/null
+++ b/tools/src/libfdt/libfdt_env.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */
+#ifndef LIBFDT_ENV_H
+#define LIBFDT_ENV_H
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2006 David Gibson, IBM Corporation.
+ * Copyright 2012 Kim Phillips, Freescale Semiconductor.
+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <limits.h>
+
+#ifdef __CHECKER__
+#define FDT_FORCE __attribute__((force))
+#define FDT_BITWISE __attribute__((bitwise))
+#else
+#define FDT_FORCE
+#define FDT_BITWISE
+#endif
+
+typedef uint16_t FDT_BITWISE fdt16_t;
+typedef uint32_t FDT_BITWISE fdt32_t;
+typedef uint64_t FDT_BITWISE fdt64_t;
+
+#define EXTRACT_BYTE(x, n) ((unsigned long long)((uint8_t *)&x)[n])
+#define CPU_TO_FDT16(x) ((EXTRACT_BYTE(x, 0) << 8) | EXTRACT_BYTE(x, 1))
+#define CPU_TO_FDT32(x) ((EXTRACT_BYTE(x, 0) << 24) | (EXTRACT_BYTE(x, 1) << 16) | \
+ (EXTRACT_BYTE(x, 2) << 8) | EXTRACT_BYTE(x, 3))
+#define CPU_TO_FDT64(x) ((EXTRACT_BYTE(x, 0) << 56) | (EXTRACT_BYTE(x, 1) << 48) | \
+ (EXTRACT_BYTE(x, 2) << 40) | (EXTRACT_BYTE(x, 3) << 32) | \
+ (EXTRACT_BYTE(x, 4) << 24) | (EXTRACT_BYTE(x, 5) << 16) | \
+ (EXTRACT_BYTE(x, 6) << 8) | EXTRACT_BYTE(x, 7))
+
+static inline uint16_t fdt16_to_cpu(fdt16_t x)
+{
+ return (FDT_FORCE uint16_t)CPU_TO_FDT16(x);
+}
+static inline fdt16_t cpu_to_fdt16(uint16_t x)
+{
+ return (FDT_FORCE fdt16_t)CPU_TO_FDT16(x);
+}
+
+static inline uint32_t fdt32_to_cpu(fdt32_t x)
+{
+ return (FDT_FORCE uint32_t)CPU_TO_FDT32(x);
+}
+static inline fdt32_t cpu_to_fdt32(uint32_t x)
+{
+ return (FDT_FORCE fdt32_t)CPU_TO_FDT32(x);
+}
+
+static inline uint64_t fdt64_to_cpu(fdt64_t x)
+{
+ return (FDT_FORCE uint64_t)CPU_TO_FDT64(x);
+}
+static inline fdt64_t cpu_to_fdt64(uint64_t x)
+{
+ return (FDT_FORCE fdt64_t)CPU_TO_FDT64(x);
+}
+#undef CPU_TO_FDT64
+#undef CPU_TO_FDT32
+#undef CPU_TO_FDT16
+#undef EXTRACT_BYTE
+
+#ifdef __APPLE__
+#include <AvailabilityMacros.h>
+
+/* strnlen() is not available on Mac OS < 10.7 */
+# if !defined(MAC_OS_X_VERSION_10_7) || (MAC_OS_X_VERSION_MAX_ALLOWED < \
+ MAC_OS_X_VERSION_10_7)
+
+#define strnlen fdt_strnlen
+
+/*
+ * fdt_strnlen: returns the length of a string or max_count - which ever is
+ * smallest.
+ * Input 1 string: the string whose size is to be determined
+ * Input 2 max_count: the maximum value returned by this function
+ * Output: length of the string or max_count (the smallest of the two)
+ */
+static inline size_t fdt_strnlen(const char *string, size_t max_count)
+{
+ const char *p = memchr(string, 0, max_count);
+ return p ? p - string : max_count;
+}
+
+#endif /* !defined(MAC_OS_X_VERSION_10_7) || (MAC_OS_X_VERSION_MAX_ALLOWED <
+ MAC_OS_X_VERSION_10_7) */
+
+#endif /* __APPLE__ */
+
+#endif /* LIBFDT_ENV_H */
diff --git a/tools/src/libfdt/libfdt_internal.h b/tools/src/libfdt/libfdt_internal.h
new file mode 100644
index 0000000..1a393d0
--- /dev/null
+++ b/tools/src/libfdt/libfdt_internal.h
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) */
+#ifndef LIBFDT_INTERNAL_H
+#define LIBFDT_INTERNAL_H
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2006 David Gibson, IBM Corporation.
+ */
+#include "fdt.h"
+
+#define FDT_ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
+#define FDT_TAGALIGN(x) (FDT_ALIGN((x), FDT_TAGSIZE))
+
+int32_t fdt_ro_probe_(const void *fdt);
+#define FDT_RO_PROBE(fdt) \
+ { \
+ int32_t totalsize_; \
+ if ((totalsize_ = fdt_ro_probe_(fdt)) < 0) \
+ return totalsize_; \
+ }
+
+int fdt_check_node_offset_(const void *fdt, int offset);
+int fdt_check_prop_offset_(const void *fdt, int offset);
+const char *fdt_find_string_(const char *strtab, int tabsize, const char *s);
+int fdt_node_end_offset_(void *fdt, int nodeoffset);
+
+static inline const void *fdt_offset_ptr_(const void *fdt, int offset)
+{
+ return (const char *)fdt + fdt_off_dt_struct(fdt) + offset;
+}
+
+static inline void *fdt_offset_ptr_w_(void *fdt, int offset)
+{
+ return (void *)(uintptr_t)fdt_offset_ptr_(fdt, offset);
+}
+
+static inline const struct fdt_reserve_entry *fdt_mem_rsv_(const void *fdt, int n)
+{
+ const struct fdt_reserve_entry *rsv_table =
+ (const struct fdt_reserve_entry *)
+ ((const char *)fdt + fdt_off_mem_rsvmap(fdt));
+
+ return rsv_table + n;
+}
+static inline struct fdt_reserve_entry *fdt_mem_rsv_w_(void *fdt, int n)
+{
+ return (void *)(uintptr_t)fdt_mem_rsv_(fdt, n);
+}
+
+#define FDT_SW_MAGIC (~FDT_MAGIC)
+
+/**********************************************************************/
+/* Checking controls */
+/**********************************************************************/
+
+#ifndef FDT_ASSUME_MASK
+#define FDT_ASSUME_MASK 0
+#endif
+
+/*
+ * Defines assumptions which can be enabled. Each of these can be enabled
+ * individually. For maximum safety, don't enable any assumptions!
+ *
+ * For minimal code size and no safety, use ASSUME_PERFECT at your own risk.
+ * You should have another method of validating the device tree, such as a
+ * signature or hash check before using libfdt.
+ *
+ * For situations where security is not a concern it may be safe to enable
+ * ASSUME_SANE.
+ */
+enum {
+ /*
+ * This does essentially no checks. Only the latest device-tree
+ * version is correctly handled. Inconsistencies or errors in the device
+ * tree may cause undefined behaviour or crashes. Invalid parameters
+ * passed to libfdt may do the same.
+ *
+ * If an error occurs when modifying the tree it may leave the tree in
+ * an intermediate (but valid) state. As an example, adding a property
+ * where there is insufficient space may result in the property name
+ * being added to the string table even though the property itself is
+ * not added to the struct section.
+ *
+ * Only use this if you have a fully validated device tree with
+ * the latest supported version and wish to minimise code size.
+ */
+ ASSUME_PERFECT = 0xff,
+
+ /*
+ * This assumes that the device tree is sane. i.e. header metadata
+ * and basic hierarchy are correct.
+ *
+ * With this assumption enabled, normal device trees produced by libfdt
+ * and the compiler should be handled safely. Malicious device trees and
+ * complete garbage may cause libfdt to behave badly or crash. Truncated
+ * device trees (e.g. those only partially loaded) can also cause
+ * problems.
+ *
+ * Note: Only checks that relate exclusively to the device tree itself
+ * (not the parameters passed to libfdt) are disabled by this
+ * assumption. This includes checking headers, tags and the like.
+ */
+ ASSUME_VALID_DTB = 1 << 0,
+
+ /*
+ * This builds on ASSUME_VALID_DTB and further assumes that libfdt
+ * functions are called with valid parameters, i.e. not trigger
+ * FDT_ERR_BADOFFSET or offsets that are out of bounds. It disables any
+ * extensive checking of parameters and the device tree, making various
+ * assumptions about correctness.
+ *
+ * It doesn't make sense to enable this assumption unless
+ * ASSUME_VALID_DTB is also enabled.
+ */
+ ASSUME_VALID_INPUT = 1 << 1,
+
+ /*
+ * This disables checks for device-tree version and removes all code
+ * which handles older versions.
+ *
+ * Only enable this if you know you have a device tree with the latest
+ * version.
+ */
+ ASSUME_LATEST = 1 << 2,
+
+ /*
+ * This assumes that it is OK for a failed addition to the device tree,
+ * due to lack of space or some other problem, to skip any rollback
+ * steps (such as dropping the property name from the string table).
+ * This is safe to enable in most circumstances, even though it may
+ * leave the tree in a sub-optimal state.
+ */
+ ASSUME_NO_ROLLBACK = 1 << 3,
+
+ /*
+ * This assumes that the device tree components appear in a 'convenient'
+ * order, i.e. the memory reservation block first, then the structure
+ * block and finally the string block.
+ *
+ * This order is not specified by the device-tree specification,
+ * but is expected by libfdt. The device-tree compiler always created
+ * device trees with this order.
+ *
+ * This assumption disables a check in fdt_open_into() and removes the
+ * ability to fix the problem there. This is safe if you know that the
+ * device tree is correctly ordered. See fdt_blocks_misordered_().
+ */
+ ASSUME_LIBFDT_ORDER = 1 << 4,
+
+ /*
+ * This assumes that libfdt itself does not have any internal bugs. It
+ * drops certain checks that should never be needed unless libfdt has an
+ * undiscovered bug.
+ *
+ * This can generally be considered safe to enable.
+ */
+ ASSUME_LIBFDT_FLAWLESS = 1 << 5,
+};
+
+/**
+ * can_assume_() - check if a particular assumption is enabled
+ *
+ * @mask: Mask to check (ASSUME_...)
+ * @return true if that assumption is enabled, else false
+ */
+static inline bool can_assume_(int mask)
+{
+ return FDT_ASSUME_MASK & mask;
+}
+
+/** helper macros for checking assumptions */
+#define can_assume(_assume) can_assume_(ASSUME_ ## _assume)
+
+#endif /* LIBFDT_INTERNAL_H */
diff --git a/tools/src/main.c b/tools/src/main.c
new file mode 100644
index 0000000..8d69e03
--- /dev/null
+++ b/tools/src/main.c
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "../build/build_cfg.h"
+#include "../build/build_tag.h"
+
+#include "../config.h"
+
+#include "adt.h"
+#include "aic.h"
+#include "clk.h"
+#include "cpufreq.h"
+#include "display.h"
+#include "exception.h"
+#include "fb.h"
+#include "firmware.h"
+#include "gxf.h"
+#include "heapblock.h"
+#include "mcc.h"
+#include "memory.h"
+#include "nvme.h"
+#include "payload.h"
+#include "pcie.h"
+#include "pmgr.h"
+#include "sep.h"
+#include "smp.h"
+#include "string.h"
+#include "tunables.h"
+#include "uart.h"
+#include "uartproxy.h"
+#include "usb.h"
+#include "utils.h"
+#include "wdt.h"
+#include "xnuboot.h"
+
+struct vector_args next_stage;
+
+const char version_tag[] = "##m1n1_ver##" BUILD_TAG;
+const char *const m1n1_version = version_tag + 12;
+
+u32 board_id = ~0, chip_id = ~0;
+
+void get_device_info(void)
+{
+ printf("Device info:\n");
+ printf(" Model: %s\n", (const char *)adt_getprop(adt, 0, "model", NULL));
+ printf(" Target: %s\n", (const char *)adt_getprop(adt, 0, "target-type", NULL));
+
+ int chosen = adt_path_offset(adt, "/chosen");
+ if (chosen > 0) {
+ if (ADT_GETPROP(adt, chosen, "board-id", &board_id) < 0)
+ printf("Failed to find board-id\n");
+ if (ADT_GETPROP(adt, chosen, "chip-id", &chip_id) < 0)
+ printf("Failed to find chip-id\n");
+
+ printf(" Board-ID: 0x%x\n", board_id);
+ printf(" Chip-ID: 0x%x\n", chip_id);
+ } else {
+ printf("No chosen node!\n");
+ }
+
+ printf("\n");
+}
+
+void run_actions(void)
+{
+ bool usb_up = false;
+
+#ifndef BRINGUP
+#ifdef EARLY_PROXY_TIMEOUT
+ int node = adt_path_offset(adt, "/chosen/asmb");
+ u64 lp_sip0 = 0;
+
+ if (node >= 0) {
+ ADT_GETPROP(adt, node, "lp-sip0", &lp_sip0);
+ printf("Boot policy: sip0 = %ld\n", lp_sip0);
+ }
+
+ if (!cur_boot_args.video.display && lp_sip0 == 127) {
+ printf("Bringing up USB for early debug...\n");
+
+ usb_init();
+ usb_iodev_init();
+
+ usb_up = true;
+
+ printf("Waiting for proxy connection... ");
+ for (int i = 0; i < EARLY_PROXY_TIMEOUT * 100; i++) {
+ for (int j = 0; j < USB_IODEV_COUNT; j++) {
+ iodev_id_t iodev = IODEV_USB0 + j;
+
+ if (!(iodev_get_usage(iodev) & USAGE_UARTPROXY))
+ continue;
+
+ usb_iodev_vuart_setup(iodev);
+ iodev_handle_events(iodev);
+ if (iodev_can_write(iodev) || iodev_can_write(IODEV_USB_VUART)) {
+ printf(" Connected!\n");
+ uartproxy_run(NULL);
+ return;
+ }
+ }
+
+ mdelay(10);
+ if (i % 100 == 99)
+ printf(".");
+ }
+ printf(" Timed out\n");
+ }
+#endif
+#endif
+
+ printf("Checking for payloads...\n");
+
+ if (payload_run() == 0) {
+ printf("Valid payload found\n");
+ return;
+ }
+
+ fb_set_active(true);
+
+ printf("No valid payload found\n");
+
+#ifndef BRINGUP
+ if (!usb_up) {
+ usb_init();
+ usb_iodev_init();
+ }
+#endif
+
+ printf("Running proxy...\n");
+
+ uartproxy_run(NULL);
+}
+
+void m1n1_main(void)
+{
+ printf("\n\nm1n1 %s\n", m1n1_version);
+ printf("Copyright The Asahi Linux Contributors\n");
+ printf("Licensed under the MIT license\n\n");
+
+ printf("Running in EL%lu\n\n", mrs(CurrentEL) >> 2);
+
+ get_device_info();
+ firmware_init();
+
+ heapblock_init();
+
+#ifndef BRINGUP
+ gxf_init();
+ mcc_init();
+ mmu_init();
+ aic_init();
+#endif
+ wdt_disable();
+#ifndef BRINGUP
+ pmgr_init();
+ tunables_apply_static();
+
+#ifdef USE_FB
+ display_init();
+ // Kick DCP to sleep, so dodgy monitors which cause reconnect cycles don't cause us to lose the
+ // framebuffer.
+ display_shutdown(DCP_SLEEP_IF_EXTERNAL);
+ fb_init(false);
+ fb_display_logo();
+#ifdef FB_SILENT_MODE
+ fb_set_active(!cur_boot_args.video.display);
+#else
+ fb_set_active(true);
+#endif
+#endif
+
+ clk_init();
+ cpufreq_init();
+ sep_init();
+#endif
+
+ printf("Initialization complete.\n");
+
+ run_actions();
+
+ if (!next_stage.entry) {
+ panic("Nothing to do!\n");
+ }
+
+ printf("Preparing to run next stage at %p...\n", next_stage.entry);
+
+ nvme_shutdown();
+ exception_shutdown();
+#ifndef BRINGUP
+ usb_iodev_shutdown();
+ display_shutdown(DCP_SLEEP_IF_EXTERNAL);
+#ifdef USE_FB
+ fb_shutdown(next_stage.restore_logo);
+#endif
+ mmu_shutdown();
+#endif
+
+ printf("Vectoring to next stage...\n");
+
+ next_stage.entry(next_stage.args[0], next_stage.args[1], next_stage.args[2], next_stage.args[3],
+ next_stage.args[4]);
+
+ panic("Next stage returned!\n");
+}
diff --git a/tools/src/math/exp2f_data.c b/tools/src/math/exp2f_data.c
new file mode 100644
index 0000000..38c9333
--- /dev/null
+++ b/tools/src/math/exp2f_data.c
@@ -0,0 +1,42 @@
+/*
+ * Shared data between expf, exp2f and powf.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "exp2f_data.h"
+
+#define N (1 << EXP2F_TABLE_BITS)
+
+const struct exp2f_data __exp2f_data = {
+ /* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
+ used for computing 2^(k/N) for an int |k| < 150 N as
+ double(tab[k%N] + (k << 52-BITS)) */
+ .tab =
+ {
+ 0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
+ 0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
+ 0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
+ 0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
+ 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
+ 0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
+ 0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
+ 0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
+ },
+ .shift_scaled = 0x1.8p+52 / N,
+ .poly =
+ {
+ 0x1.c6af84b912394p-5,
+ 0x1.ebfce50fac4f3p-3,
+ 0x1.62e42ff0c52d6p-1,
+ },
+ .shift = 0x1.8p+52,
+ .invln2_scaled = 0x1.71547652b82fep+0 * N,
+ .poly_scaled =
+ {
+ 0x1.c6af84b912394p-5 / N / N / N,
+ 0x1.ebfce50fac4f3p-3 / N / N,
+ 0x1.62e42ff0c52d6p-1 / N,
+ },
+};
diff --git a/tools/src/math/exp2f_data.h b/tools/src/math/exp2f_data.h
new file mode 100644
index 0000000..a88c6ce
--- /dev/null
+++ b/tools/src/math/exp2f_data.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _EXP2F_DATA_H
+#define _EXP2F_DATA_H
+
+#include <stdint.h>
+
+/* Shared between expf, exp2f and powf. */
+#define EXP2F_TABLE_BITS 5
+#define EXP2F_POLY_ORDER 3
+extern const struct exp2f_data {
+ uint64_t tab[1 << EXP2F_TABLE_BITS];
+ double shift_scaled;
+ double poly[EXP2F_POLY_ORDER];
+ double shift;
+ double invln2_scaled;
+ double poly_scaled[EXP2F_POLY_ORDER];
+} __exp2f_data;
+
+#endif
diff --git a/tools/src/math/expf.c b/tools/src/math/expf.c
new file mode 100644
index 0000000..c9f1b3f
--- /dev/null
+++ b/tools/src/math/expf.c
@@ -0,0 +1,83 @@
+/*
+ * Single-precision e^x function.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <math.h>
+#include <stdint.h>
+
+#include "exp2f_data.h"
+#include "libm.h"
+
+#define double_t double
+
+/*
+EXP2F_TABLE_BITS = 5
+EXP2F_POLY_ORDER = 3
+
+ULP error: 0.502 (nearest rounding.)
+Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
+Wrong count: 170635 (all nearest rounding wrong results with fma.)
+Non-nearest ULP error: 1 (rounded ULP error)
+*/
+
+#define N (1 << EXP2F_TABLE_BITS)
+#define InvLn2N __exp2f_data.invln2_scaled
+#define T __exp2f_data.tab
+#define C __exp2f_data.poly_scaled
+
+static inline uint32_t top12(float x)
+{
+ return asuint(x) >> 20;
+}
+
+float expf(float x)
+{
+ uint32_t abstop;
+ uint64_t ki, t;
+ double_t kd, xd, z, r, r2, y, s;
+
+ xd = (double_t)x;
+ abstop = top12(x) & 0x7ff;
+ if (predict_false(abstop >= top12(88.0f))) {
+ /* |x| >= 88 or x is nan. */
+ if (asuint(x) == asuint(-INFINITY))
+ return 0.0f;
+ if (abstop >= top12(INFINITY))
+ return x + x;
+ if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
+ return __math_oflowf(0);
+ if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
+ return __math_uflowf(0);
+ }
+
+ /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */
+ z = InvLn2N * xd;
+
+ /* Round and convert z to int, the result is in [-150*N, 128*N] and
+ ideally ties-to-even rule is used, otherwise the magnitude of r
+ can be bigger which gives larger approximation error. */
+#if TOINT_INTRINSICS
+ kd = roundtoint(z);
+ ki = converttoint(z);
+#else
+#define SHIFT __exp2f_data.shift
+ kd = eval_as_double(z + SHIFT);
+ ki = asuint64(kd);
+ kd -= SHIFT;
+#endif
+ r = z - kd;
+
+ /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+ t = T[ki % N];
+ t += ki << (52 - EXP2F_TABLE_BITS);
+ s = asdouble(t);
+ z = C[0] * r + C[1];
+ r2 = r * r;
+ y = C[2] * r + 1;
+ y = z * r2 + y;
+ y = y * s;
+ return eval_as_float(y);
+}
diff --git a/tools/src/math/libm.h b/tools/src/math/libm.h
new file mode 100644
index 0000000..1616c74
--- /dev/null
+++ b/tools/src/math/libm.h
@@ -0,0 +1,271 @@
+#ifndef _LIBM_H
+#define _LIBM_H
+
+#include <endian.h>
+#include <float.h>
+#include <math.h>
+#include <stdint.h>
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
+union ldshape {
+ long double f;
+ struct {
+ uint64_t m;
+ uint16_t se;
+ } i;
+};
+#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
+/* This is the m68k variant of 80-bit long double, and this definition only works
+ * on archs where the alignment requirement of uint64_t is <= 4. */
+union ldshape {
+ long double f;
+ struct {
+ uint16_t se;
+ uint16_t pad;
+ uint64_t m;
+ } i;
+};
+#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
+union ldshape {
+ long double f;
+ struct {
+ uint64_t lo;
+ uint32_t mid;
+ uint16_t top;
+ uint16_t se;
+ } i;
+ struct {
+ uint64_t lo;
+ uint64_t hi;
+ } i2;
+};
+#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
+union ldshape {
+ long double f;
+ struct {
+ uint16_t se;
+ uint16_t top;
+ uint32_t mid;
+ uint64_t lo;
+ } i;
+ struct {
+ uint64_t hi;
+ uint64_t lo;
+ } i2;
+};
+#else
+#error Unsupported long double representation
+#endif
+
+/* Support non-nearest rounding mode. */
+#define WANT_ROUNDING 1
+/* Support signaling NaNs. */
+#define WANT_SNAN 0
+
+#if WANT_SNAN
+#error SNaN is unsupported
+#else
+#define issignalingf_inline(x) 0
+#define issignaling_inline(x) 0
+#endif
+
+#ifndef TOINT_INTRINSICS
+#define TOINT_INTRINSICS 0
+#endif
+
+#if TOINT_INTRINSICS
+/* Round x to nearest int in all rounding modes, ties have to be rounded
+ consistently with converttoint so the results match. If the result
+ would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
+static double_t roundtoint(double_t);
+
+/* Convert x to nearest int in all rounding modes, ties have to be rounded
+ consistently with roundtoint. If the result is not representible in an
+ int32_t then the semantics is unspecified. */
+static int32_t converttoint(double_t);
+#endif
+
+/* Helps static branch prediction so hot path can be better optimized. */
+#ifdef __GNUC__
+#define predict_true(x) __builtin_expect(!!(x), 1)
+#define predict_false(x) __builtin_expect(x, 0)
+#else
+#define predict_true(x) (x)
+#define predict_false(x) (x)
+#endif
+
+/* Evaluate an expression as the specified type. With standard excess
+ precision handling a type cast or assignment is enough (with
+ -ffloat-store an assignment is required, in old compilers argument
+ passing and return statement may not drop excess precision). */
+
+static inline float eval_as_float(float x)
+{
+ float y = x;
+ return y;
+}
+
+static inline double eval_as_double(double x)
+{
+ double y = x;
+ return y;
+}
+
+/* fp_barrier returns its input, but limits code transformations
+ as if it had a side-effect (e.g. observable io) and returned
+ an arbitrary value. */
+
+#ifndef fp_barrierf
+#define fp_barrierf fp_barrierf
+static inline float fp_barrierf(float x)
+{
+ volatile float y = x;
+ return y;
+}
+#endif
+
+#ifndef fp_barrier
+#define fp_barrier fp_barrier
+static inline double fp_barrier(double x)
+{
+ volatile double y = x;
+ return y;
+}
+#endif
+
+#ifndef fp_barrierl
+#define fp_barrierl fp_barrierl
+static inline long double fp_barrierl(long double x)
+{
+ volatile long double y = x;
+ return y;
+}
+#endif
+
+/* fp_force_eval ensures that the input value is computed when that's
+ otherwise unused. To prevent the constant folding of the input
+ expression, an additional fp_barrier may be needed or a compilation
+ mode that does so (e.g. -frounding-math in gcc). Then it can be
+ used to evaluate an expression for its fenv side-effects only. */
+
+#ifndef fp_force_evalf
+#define fp_force_evalf fp_force_evalf
+static inline void fp_force_evalf(float x)
+{
+ volatile float y;
+ y = x;
+ (void)y;
+}
+#endif
+
+#ifndef fp_force_eval
+#define fp_force_eval fp_force_eval
+static inline void fp_force_eval(double x)
+{
+ volatile double y;
+ y = x;
+ (void)y;
+}
+#endif
+
+#ifndef fp_force_evall
+#define fp_force_evall fp_force_evall
+static inline void fp_force_evall(long double x)
+{
+ volatile long double y;
+ y = x;
+ (void)y;
+}
+#endif
+
+#define FORCE_EVAL(x) \
+ do { \
+ if (sizeof(x) == sizeof(float)) { \
+ fp_force_evalf(x); \
+ } else if (sizeof(x) == sizeof(double)) { \
+ fp_force_eval(x); \
+ } else { \
+ fp_force_evall(x); \
+ } \
+ } while (0)
+
+#define asuint(f) \
+ ((union { \
+ float _f; \
+ uint32_t _i; \
+ }){f}) \
+ ._i
+#define asfloat(i) \
+ ((union { \
+ uint32_t _i; \
+ float _f; \
+ }){i}) \
+ ._f
+#define asuint64(f) \
+ ((union { \
+ double _f; \
+ uint64_t _i; \
+ }){f}) \
+ ._i
+#define asdouble(i) \
+ ((union { \
+ uint64_t _i; \
+ double _f; \
+ }){i}) \
+ ._f
+
+#define EXTRACT_WORDS(hi, lo, d) \
+ do { \
+ uint64_t __u = asuint64(d); \
+ (hi) = __u >> 32; \
+ (lo) = (uint32_t)__u; \
+ } while (0)
+
+#define GET_HIGH_WORD(hi, d) \
+ do { \
+ (hi) = asuint64(d) >> 32; \
+ } while (0)
+
+#define GET_LOW_WORD(lo, d) \
+ do { \
+ (lo) = (uint32_t)asuint64(d); \
+ } while (0)
+
+#define INSERT_WORDS(d, hi, lo) \
+ do { \
+ (d) = asdouble(((uint64_t)(hi) << 32) | (uint32_t)(lo)); \
+ } while (0)
+
+#define SET_HIGH_WORD(d, hi) INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
+
+#define SET_LOW_WORD(d, lo) INSERT_WORDS(d, asuint64(d) >> 32, lo)
+
+#define GET_FLOAT_WORD(w, d) \
+ do { \
+ (w) = asuint(d); \
+ } while (0)
+
+#define SET_FLOAT_WORD(d, w) \
+ do { \
+ (d) = asfloat(w); \
+ } while (0)
+
+/* error handling functions */
+
+static inline float __math_xflowf(uint32_t sign, float y)
+{
+ return eval_as_float(fp_barrierf(sign ? -y : y) * y);
+}
+
+static inline float __math_oflowf(uint32_t sign)
+{
+ return __math_xflowf(sign, 0x1p97f);
+}
+
+float __math_uflowf(uint32_t sign)
+{
+ return __math_xflowf(sign, 0x1p-95f);
+}
+
+#endif
diff --git a/tools/src/mcc.c b/tools/src/mcc.c
new file mode 100644
index 0000000..d687d73
--- /dev/null
+++ b/tools/src/mcc.c
@@ -0,0 +1,271 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "mcc.h"
+#include "adt.h"
+#include "hv.h"
+#include "memory.h"
+#include "string.h"
+#include "utils.h"
+
+static bool mcc_initialized = false;
+
+#define MAX_MCC_INSTANCES 16
+
+#define T8103_PLANES 4
+#define T8103_PLANE_STRIDE 0x40000
+#define T8103_DCS_STRIDE 0x40000
+
+#define T6000_PLANES 4
+#define T6000_PLANE_OFFSET 0
+#define T6000_PLANE_STRIDE 0x40000
+#define T6000_GLOBAL_OFFSET 0x100000
+#define T6000_DCS_OFFSET 0x200000
+#define T6000_DCS_STRIDE 0x100000
+#define T6000_DCS_COUNT 4
+
+#define PLANE_TZ_START(i) (0x6a0 + i * 0x10)
+#define PLANE_TZ_END(i) (0x6a4 + i * 0x10)
+#define PLANE_TZ_ENABLE(i) (0x6a8 + i * 0x10)
+#define PLANE_TZ_REGS 4
+
+#define PLANE_CACHE_ENABLE 0x1c00
+#define PLANE_CACHE_STATUS 0x1c04
+
+#define T8103_CACHE_STATUS_DATA_COUNT GENMASK(14, 10)
+#define T8103_CACHE_STATUS_TAG_COUNT GENMASK(9, 5)
+
+#define T6000_CACHE_STATUS_DATA_COUNT GENMASK(13, 9)
+#define T6000_CACHE_STATUS_TAG_COUNT GENMASK(8, 4)
+
+#define T6000_CACHE_WAYS 12
+#define T6000_CACHE_STATUS_MASK (T6000_CACHE_STATUS_DATA_COUNT | T6000_CACHE_STATUS_TAG_COUNT)
+#define T6000_CACHE_STATUS_VAL \
+ (FIELD_PREP(T6000_CACHE_STATUS_DATA_COUNT, T6000_CACHE_WAYS) | \
+ FIELD_PREP(T6000_CACHE_STATUS_TAG_COUNT, T6000_CACHE_WAYS))
+
+#define T8103_CACHE_WAYS 16
+#define T8103_CACHE_STATUS_MASK (T8103_CACHE_STATUS_DATA_COUNT | T8103_CACHE_STATUS_TAG_COUNT)
+#define T8103_CACHE_STATUS_VAL \
+ (FIELD_PREP(T8103_CACHE_STATUS_DATA_COUNT, T8103_CACHE_WAYS) | \
+ FIELD_PREP(T8103_CACHE_STATUS_TAG_COUNT, T8103_CACHE_WAYS))
+
+#define CACHE_ENABLE_TIMEOUT 10000
+
+#define T8103_DCC_DRAMCFG0 0xdc4
+#define T8103_DCC_DRAMCFG1 0xdbc
+#define T8103_DCC_DRAMCFG0_DEFAULT 0x813057f
+#define T8103_DCC_DRAMCFG1_DEFAULT 0x1800180
+#define T8103_DCC_DRAMCFG0_FAST 0x133
+#define T8103_DCC_DRAMCFG1_FAST 0x55555340
+
+#define T6000_DCC_DRAMCFG 0x13cc
+#define T6000_DCC_DRAMCFG_DEFAULT 0x55551555
+#define T6000_DCC_DRAMCFG_FAST 0xffff0000
+
+size_t mcc_carveout_count;
+struct mcc_carveout mcc_carveouts[PLANE_TZ_REGS + 1];
+
+struct mcc_regs {
+ u64 plane_base;
+ u64 plane_stride;
+ int plane_count;
+
+ u64 global_base;
+
+ u64 dcs_base;
+ u64 dcs_stride;
+ int dcs_count;
+
+ int cache_ways;
+ u32 cache_status_mask;
+ u32 cache_status_val;
+};
+
+static int mcc_count;
+static struct mcc_regs mcc_regs[MAX_MCC_INSTANCES];
+
+static u32 plane_read32(int mcc, int plane, u64 offset)
+{
+ return read32(mcc_regs[mcc].plane_base + plane * mcc_regs[mcc].plane_stride + offset);
+}
+
+static void plane_write32(int mcc, int plane, u64 offset, u32 value)
+{
+ write32(mcc_regs[mcc].plane_base + plane * mcc_regs[mcc].plane_stride + offset, value);
+}
+
+static int plane_poll32(int mcc, int plane, u64 offset, u32 mask, u32 target, u32 timeout)
+{
+ return poll32(mcc_regs[mcc].plane_base + plane * mcc_regs[mcc].plane_stride + offset, mask,
+ target, timeout);
+}
+
+static void mcc_enable_cache(void)
+{
+ if (!mcc_initialized)
+ return;
+
+ for (int mcc = 0; mcc < mcc_count; mcc++) {
+ for (int plane = 0; plane < mcc_regs[mcc].plane_count; plane++) {
+ plane_write32(mcc, plane, PLANE_CACHE_ENABLE, mcc_regs[mcc].cache_ways);
+ if (plane_poll32(mcc, plane, PLANE_CACHE_STATUS, mcc_regs[mcc].cache_status_mask,
+ mcc_regs[mcc].cache_status_val, CACHE_ENABLE_TIMEOUT))
+ printf("MCC: timeout while enabling cache for MCC %d plane %d: 0x%x\n", mcc, plane,
+ plane_read32(mcc, plane, PLANE_CACHE_STATUS));
+ }
+ }
+}
+
+int mcc_unmap_carveouts(void)
+{
+ if (!mcc_initialized)
+ return -1;
+
+ mcc_carveout_count = 0;
+ memset(mcc_carveouts, 0, sizeof mcc_carveouts);
+ // All MCCs and planes should have identical configs
+ for (int i = 0; i < PLANE_TZ_REGS; i++) {
+ uint64_t start = plane_read32(0, 0, PLANE_TZ_START(i));
+ uint64_t end = plane_read32(0, 0, PLANE_TZ_END(i));
+ bool enabled = plane_read32(0, 0, PLANE_TZ_ENABLE(i));
+
+ if (enabled) {
+ if (!start || start == end) {
+ printf("MMU: TZ%d region has bad bounds 0x%lx..0x%lx (iBoot bug?)\n", i, start,
+ end);
+ continue;
+ }
+
+ start = start << 12;
+ end = (end + 1) << 12;
+ start |= ram_base;
+ end |= ram_base;
+ printf("MMU: Unmapping TZ%d region at 0x%lx..0x%lx\n", i, start, end);
+ mmu_rm_mapping(start, end - start);
+ mmu_rm_mapping(start | REGION_RWX_EL0, end - start);
+ mmu_rm_mapping(start | REGION_RW_EL0, end - start);
+ mmu_rm_mapping(start | REGION_RX_EL1, end - start);
+ mcc_carveouts[mcc_carveout_count].base = start;
+ mcc_carveouts[mcc_carveout_count].size = end - start;
+ mcc_carveout_count++;
+ }
+ }
+
+ return 0;
+}
+
+int mcc_init_t8103(int node, int *path)
+{
+ printf("MCC: Initializing T8103 MCC...\n");
+
+ mcc_count = 1;
+ mcc_regs[0].plane_stride = T8103_PLANE_STRIDE;
+ mcc_regs[0].plane_count = T8103_PLANES;
+ mcc_regs[0].dcs_stride = T8103_DCS_STRIDE;
+
+ if (adt_get_reg(adt, path, "reg", 0, &mcc_regs[0].global_base, NULL)) {
+ printf("MCC: Failed to get reg property 0!\n");
+ return -1;
+ }
+
+ if (adt_get_reg(adt, path, "reg", 1, &mcc_regs[0].plane_base, NULL)) {
+ printf("MCC: Failed to get reg property 1!\n");
+ return -1;
+ }
+
+ if (adt_get_reg(adt, path, "reg", 2, &mcc_regs[0].dcs_base, NULL)) {
+ printf("MCC: Failed to get reg property 2!\n");
+ return -1;
+ }
+
+ u32 val;
+ if (ADT_GETPROP(adt, node, "dcs_num_channels", &val) < 0) {
+ printf("MCC: Failed to get dcs_num_channels property!\n");
+ return -1;
+ }
+
+ mcc_regs[0].dcs_count = val;
+ mcc_regs[0].cache_ways = T8103_CACHE_WAYS;
+ mcc_regs[0].cache_status_mask = T8103_CACHE_STATUS_MASK;
+ mcc_regs[0].cache_status_val = T8103_CACHE_STATUS_VAL;
+
+ mcc_enable_cache();
+
+ printf("MCC: Initialized T8103 MCC (%d channels)\n", val);
+
+ mcc_initialized = true;
+
+ return 0;
+}
+
+int mcc_init_t6000(int node, int *path)
+{
+ u32 reg_len;
+
+ if (!adt_getprop(adt, node, "reg", &reg_len)) {
+ printf("MCC: Failed to get reg property!\n");
+ return -1;
+ }
+
+ mcc_count = reg_len / 16;
+
+ printf("MCC: Initializing T6000 MCCs (%d instances)...\n", mcc_count);
+
+ if (mcc_count > MAX_MCC_INSTANCES) {
+ printf("MCC: Too many instances, increase MAX_MCC_INSTANCES!\n");
+ mcc_count = MAX_MCC_INSTANCES;
+ }
+
+ for (int i = 0; i < mcc_count; i++) {
+ u64 base;
+ if (adt_get_reg(adt, path, "reg", 0, &base, NULL)) {
+ printf("MCC: Failed to get reg index %d!\n", i);
+ return -1;
+ }
+
+ mcc_regs[i].plane_base = base + T6000_PLANE_OFFSET;
+ mcc_regs[i].plane_stride = T6000_PLANE_STRIDE;
+ mcc_regs[i].plane_count = T6000_PLANES;
+
+ mcc_regs[i].global_base = base + T6000_GLOBAL_OFFSET;
+
+ mcc_regs[i].dcs_base = base + T6000_DCS_OFFSET;
+ mcc_regs[i].dcs_stride = T6000_DCS_STRIDE;
+ mcc_regs[i].dcs_count = T6000_DCS_COUNT;
+
+ mcc_regs[i].cache_ways = T6000_CACHE_WAYS;
+ mcc_regs[i].cache_status_mask = T6000_CACHE_STATUS_MASK;
+ mcc_regs[i].cache_status_val = T6000_CACHE_STATUS_VAL;
+ }
+
+ mcc_enable_cache();
+
+ printf("MCC: Initialized T6000 MCCs (%d instances, %d planes, %d channels)\n", mcc_count,
+ mcc_regs[0].plane_count, mcc_regs[0].dcs_count);
+
+ mcc_initialized = true;
+
+ return 0;
+}
+
+int mcc_init(void)
+{
+ int path[8];
+ int node = adt_path_offset_trace(adt, "/arm-io/mcc", path);
+
+ if (node < 0) {
+ printf("MCC: MCC node not found!\n");
+ return -1;
+ }
+
+ if (adt_is_compatible(adt, node, "mcc,t8103")) {
+ return mcc_init_t8103(node, path);
+ } else if (adt_is_compatible(adt, node, "mcc,t8112")) {
+ return mcc_init_t8103(node, path);
+ } else if (adt_is_compatible(adt, node, "mcc,t6000")) {
+ return mcc_init_t6000(node, path);
+ } else {
+ printf("MCC: Unsupported version\n");
+ return -1;
+ }
+}
diff --git a/tools/src/mcc.h b/tools/src/mcc.h
new file mode 100644
index 0000000..b059d47
--- /dev/null
+++ b/tools/src/mcc.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef MCC_H
+#define MCC_H
+
+#include "types.h"
+
+struct mcc_carveout {
+ u64 base;
+ u64 size;
+};
+
+extern size_t mcc_carveout_count;
+extern struct mcc_carveout mcc_carveouts[];
+
+int mcc_init(void);
+int mcc_unmap_carveouts(void);
+
+#endif
diff --git a/tools/src/memory.c b/tools/src/memory.c
new file mode 100644
index 0000000..aec6782
--- /dev/null
+++ b/tools/src/memory.c
@@ -0,0 +1,566 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "memory.h"
+#include "adt.h"
+#include "assert.h"
+#include "cpu_regs.h"
+#include "fb.h"
+#include "gxf.h"
+#include "malloc.h"
+#include "mcc.h"
+#include "smp.h"
+#include "string.h"
+#include "utils.h"
+#include "xnuboot.h"
+
+#define PAGE_SIZE 0x4000
+#define CACHE_LINE_SIZE 64
+
+#define CACHE_RANGE_OP(func, op) \
+ void func(void *addr, size_t length) \
+ { \
+ u64 p = (u64)addr; \
+ u64 end = p + length; \
+ while (p < end) { \
+ cacheop(op, p); \
+ p += CACHE_LINE_SIZE; \
+ } \
+ }
+
+CACHE_RANGE_OP(ic_ivau_range, "ic ivau")
+CACHE_RANGE_OP(dc_ivac_range, "dc ivac")
+CACHE_RANGE_OP(dc_zva_range, "dc zva")
+CACHE_RANGE_OP(dc_cvac_range, "dc cvac")
+CACHE_RANGE_OP(dc_cvau_range, "dc cvau")
+CACHE_RANGE_OP(dc_civac_range, "dc civac")
+
+extern u8 _stack_top[];
+
+uint64_t ram_base = 0;
+
+static inline u64 read_sctlr(void)
+{
+ sysop("isb");
+ return mrs(SCTLR_EL1);
+}
+
+static inline void write_sctlr(u64 val)
+{
+ msr(SCTLR_EL1, val);
+ sysop("isb");
+}
+
+#define VADDR_L3_INDEX_BITS 11
+#define VADDR_L2_INDEX_BITS 11
+// We treat two concatenated L1 page tables as one
+#define VADDR_L1_INDEX_BITS 12
+
+#define VADDR_L3_OFFSET_BITS 14
+#define VADDR_L2_OFFSET_BITS 25
+#define VADDR_L1_OFFSET_BITS 36
+
+#define VADDR_L1_ALIGN_MASK GENMASK(VADDR_L1_OFFSET_BITS - 1, VADDR_L2_OFFSET_BITS)
+#define VADDR_L2_ALIGN_MASK GENMASK(VADDR_L2_OFFSET_BITS - 1, VADDR_L3_OFFSET_BITS)
+#define PTE_TARGET_MASK GENMASK(49, VADDR_L3_OFFSET_BITS)
+
+#define ENTRIES_PER_L1_TABLE BIT(VADDR_L1_INDEX_BITS)
+#define ENTRIES_PER_L2_TABLE BIT(VADDR_L2_INDEX_BITS)
+#define ENTRIES_PER_L3_TABLE BIT(VADDR_L3_INDEX_BITS)
+
+#define IS_PTE(pte) ((pte) && pte & PTE_VALID)
+
+#define L1_IS_TABLE(pte) (IS_PTE(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE)
+#define L1_IS_BLOCK(pte) (IS_PTE(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_BLOCK)
+#define L2_IS_TABLE(pte) (IS_PTE(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE)
+#define L2_IS_BLOCK(pte) (IS_PTE(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_BLOCK)
+#define L3_IS_BLOCK(pte) (IS_PTE(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_PAGE)
+
+/*
+ * We use 16KB pages which results in the following virtual address space:
+ *
+ * [L0 index] [L1 index] [L2 index] [L3 index] [page offset]
+ * 1 bit 11 bits 11 bits 11 bits 14 bits
+ *
+ * To simplify things we treat the L1 page table as a concatenated table,
+ * which results in the following layout:
+ *
+ * [L1 index] [L2 index] [L3 index] [page offset]
+ * 12 bits 11 bits 11 bits 14 bits
+ *
+ * We initalize one double-size L1 table which covers the entire virtual memory space,
+ * point to the two halves in the single L0 table and then create L2/L3 tables on demand.
+ */
+
+/*
+ * SPRR mappings interpret these bits as a 4-bit index as follows
+ * [AP1][AP0][PXN][UXN]
+ */
+#define SPRR_INDEX(perm) \
+ (((PTE_AP_RO & (perm)) ? 0b1000 : 0) | ((PTE_AP_EL0 & (perm)) ? 0b0100 : 0) | \
+ ((PTE_UXN & (perm)) ? 0b0010 : 0) | ((PTE_PXN & (perm)) ? 0b0001 : 0))
+
+enum SPRR_val_t {
+ EL0_GL0,
+ ELrx_GL0,
+ ELr_GL0,
+ ELrw_GL0,
+ EL0_GLrx,
+ ELrx_GLrx,
+ ELr_GLrx,
+ EL0_GLrx_ALT,
+ EL0_GLr,
+ ELx_GLr,
+ ELr_GLr,
+ ELrw_GLr,
+ EL0_GLrw,
+ ELrx_GLrw,
+ ELr_GLrw,
+ ELrw_GLrw,
+};
+
+/*
+ * With SPRR enabled, RWX mappings get downgraded to RW.
+ */
+
+#define SPRR_PERM(ap, val) (((u64)val) << (4 * SPRR_INDEX(ap)))
+
+#define SPRR_DEFAULT_PERM_EL1 \
+ SPRR_PERM(PERM_RO_EL0, ELrw_GLrw) | SPRR_PERM(PERM_RW_EL0, ELrw_GLrw) | \
+ SPRR_PERM(PERM_RX_EL0, ELrx_GLrx) | SPRR_PERM(PERM_RWX_EL0, ELrw_GLrw) | \
+ SPRR_PERM(PERM_RO, ELr_GLr) | SPRR_PERM(PERM_RW, ELrw_GLrw) | \
+ SPRR_PERM(PERM_RX, ELrx_GLrx) | SPRR_PERM(PERM_RWX, ELrw_GLrw)
+
+#define SPRR_DEFAULT_PERM_EL0 \
+ SPRR_PERM(PERM_RO_EL0, ELr_GLr) | SPRR_PERM(PERM_RW_EL0, ELrw_GLrw) | \
+ SPRR_PERM(PERM_RX_EL0, ELrx_GLrx) | SPRR_PERM(PERM_RWX_EL0, ELrx_GLrx) | \
+ SPRR_PERM(PERM_RO, ELr_GLr) | SPRR_PERM(PERM_RW, ELrw_GLrw) | \
+ SPRR_PERM(PERM_RX, ELrx_GLrx) | SPRR_PERM(PERM_RWX, ELrw_GLrw)
+
+/*
+ * aarch64 allows to configure attribute sets for up to eight different memory
+ * types. we need normal memory and two types of device memory (nGnRnE and
+ * nGnRE) in m1n1.
+ * The indexes here are selected arbitrarily: A page table entry
+ * contains a field to select one of these which will then be used
+ * to select the corresponding memory access flags from MAIR.
+ */
+
+#define MAIR_SHIFT_NORMAL (MAIR_IDX_NORMAL * 8)
+#define MAIR_SHIFT_NORMAL_NC (MAIR_IDX_NORMAL_NC * 8)
+#define MAIR_SHIFT_DEVICE_nGnRnE (MAIR_IDX_DEVICE_nGnRnE * 8)
+#define MAIR_SHIFT_DEVICE_nGnRE (MAIR_IDX_DEVICE_nGnRE * 8)
+#define MAIR_SHIFT_DEVICE_nGRE (MAIR_IDX_DEVICE_nGRE * 8)
+#define MAIR_SHIFT_DEVICE_GRE (MAIR_IDX_DEVICE_GRE * 8)
+
+/*
+ * https://developer.arm.com/documentation/ddi0500/e/system-control/aarch64-register-descriptions/memory-attribute-indirection-register--el1
+ *
+ * MAIR_ATTR_NORMAL_DEFAULT sets Normal Memory, Outer Write-back non-transient,
+ * Inner Write-back non-transient, R=1, W=1
+ * MAIR_ATTR_DEVICE_nGnRnE sets Device-nGnRnE memory
+ * MAIR_ATTR_DEVICE_nGnRE sets Device-nGnRE memory
+ */
+#define MAIR_ATTR_NORMAL_DEFAULT 0xffUL
+#define MAIR_ATTR_NORMAL_NC 0x44UL
+#define MAIR_ATTR_DEVICE_nGnRnE 0x00UL
+#define MAIR_ATTR_DEVICE_nGnRE 0x04UL
+#define MAIR_ATTR_DEVICE_nGRE 0x08UL
+#define MAIR_ATTR_DEVICE_GRE 0x0cUL
+
+static u64 *mmu_pt_L0;
+static u64 *mmu_pt_L1;
+
+static u64 *mmu_pt_get_l2(u64 from)
+{
+ u64 l1idx = from >> VADDR_L1_OFFSET_BITS;
+ assert(l1idx < ENTRIES_PER_L1_TABLE);
+ u64 l1d = mmu_pt_L1[l1idx];
+
+ if (L1_IS_TABLE(l1d))
+ return (u64 *)(l1d & PTE_TARGET_MASK);
+
+ u64 *l2 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L2_TABLE * sizeof(u64));
+ assert(!IS_PTE(l1d));
+ memset64(l2, 0, ENTRIES_PER_L2_TABLE * sizeof(u64));
+
+ l1d = ((u64)l2) | FIELD_PREP(PTE_TYPE, PTE_TABLE) | PTE_VALID;
+ mmu_pt_L1[l1idx] = l1d;
+ return l2;
+}
+
+static void mmu_pt_map_l2(u64 from, u64 to, u64 size)
+{
+ assert((from & MASK(VADDR_L2_OFFSET_BITS)) == 0);
+ assert((to & PTE_TARGET_MASK & MASK(VADDR_L2_OFFSET_BITS)) == 0);
+ assert((size & MASK(VADDR_L2_OFFSET_BITS)) == 0);
+
+ to |= FIELD_PREP(PTE_TYPE, PTE_BLOCK);
+
+ for (; size; size -= BIT(VADDR_L2_OFFSET_BITS)) {
+ u64 idx = (from >> VADDR_L2_OFFSET_BITS) & MASK(VADDR_L2_INDEX_BITS);
+ u64 *l2 = mmu_pt_get_l2(from);
+
+ if (L2_IS_TABLE(l2[idx]))
+ free((void *)(l2[idx] & PTE_TARGET_MASK));
+
+ l2[idx] = to;
+ from += BIT(VADDR_L2_OFFSET_BITS);
+ to += BIT(VADDR_L2_OFFSET_BITS);
+ }
+}
+
+static u64 *mmu_pt_get_l3(u64 from)
+{
+ u64 *l2 = mmu_pt_get_l2(from);
+ u64 l2idx = (from >> VADDR_L2_OFFSET_BITS) & MASK(VADDR_L2_INDEX_BITS);
+ assert(l2idx < ENTRIES_PER_L2_TABLE);
+ u64 l2d = l2[l2idx];
+
+ if (L2_IS_TABLE(l2d))
+ return (u64 *)(l2d & PTE_TARGET_MASK);
+
+ u64 *l3 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L3_TABLE * sizeof(u64));
+ if (IS_PTE(l2d)) {
+ u64 l3d = l2d;
+ l3d &= ~PTE_TYPE;
+ l3d |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
+ for (u64 idx = 0; idx < ENTRIES_PER_L3_TABLE; idx++, l3d += BIT(VADDR_L3_OFFSET_BITS))
+ l3[idx] = l3d;
+ } else {
+ memset64(l3, 0, ENTRIES_PER_L3_TABLE * sizeof(u64));
+ }
+
+ l2d = ((u64)l3) | FIELD_PREP(PTE_TYPE, PTE_TABLE) | PTE_VALID;
+ l2[l2idx] = l2d;
+ return l3;
+}
+
+static void mmu_pt_map_l3(u64 from, u64 to, u64 size)
+{
+ assert((from & MASK(VADDR_L3_OFFSET_BITS)) == 0);
+ assert((to & PTE_TARGET_MASK & MASK(VADDR_L3_OFFSET_BITS)) == 0);
+ assert((size & MASK(VADDR_L3_OFFSET_BITS)) == 0);
+
+ to |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
+
+ for (; size; size -= BIT(VADDR_L3_OFFSET_BITS)) {
+ u64 idx = (from >> VADDR_L3_OFFSET_BITS) & MASK(VADDR_L3_INDEX_BITS);
+ u64 *l3 = mmu_pt_get_l3(from);
+
+ l3[idx] = to;
+ from += BIT(VADDR_L3_OFFSET_BITS);
+ to += BIT(VADDR_L3_OFFSET_BITS);
+ }
+}
+
+int mmu_map(u64 from, u64 to, u64 size)
+{
+ u64 chunk;
+ if (from & MASK(VADDR_L3_OFFSET_BITS) || size & MASK(VADDR_L3_OFFSET_BITS))
+ return -1;
+
+ // L3 mappings to boundary
+ u64 boundary = ALIGN_UP(from, MASK(VADDR_L2_OFFSET_BITS));
+ // CPU CTRR doesn't like L2 mappings crossing CTRR boundaries!
+ // Map everything below the m1n1 base as L3
+ if (boundary >= ram_base && boundary < (u64)_base)
+ boundary = ALIGN_UP((u64)_base, MASK(VADDR_L2_OFFSET_BITS));
+
+ chunk = min(size, boundary - from);
+ if (chunk) {
+ mmu_pt_map_l3(from, to, chunk);
+ from += chunk;
+ to += chunk;
+ size -= chunk;
+ }
+
+ // L2 mappings
+ chunk = ALIGN_DOWN(size, MASK(VADDR_L2_OFFSET_BITS));
+ if (chunk && (to & VADDR_L2_ALIGN_MASK) == 0) {
+ mmu_pt_map_l2(from, to, chunk);
+ from += chunk;
+ to += chunk;
+ size -= chunk;
+ }
+
+ // L3 mappings to end
+ if (size) {
+ mmu_pt_map_l3(from, to, size);
+ }
+
+ return 0;
+}
+
+static u64 mmu_make_table_pte(u64 *addr)
+{
+ u64 pte = FIELD_PREP(PTE_TYPE, PTE_TABLE) | PTE_VALID;
+ pte |= (uintptr_t)addr;
+ pte |= PTE_ACCESS;
+ return pte;
+}
+
+static void mmu_init_pagetables(void)
+{
+ mmu_pt_L0 = memalign(PAGE_SIZE, sizeof(u64) * 2);
+ mmu_pt_L1 = memalign(PAGE_SIZE, sizeof(u64) * ENTRIES_PER_L1_TABLE);
+
+ memset64(mmu_pt_L0, 0, sizeof(u64) * 2);
+ memset64(mmu_pt_L1, 0, sizeof(u64) * ENTRIES_PER_L1_TABLE);
+
+ mmu_pt_L0[0] = mmu_make_table_pte(&mmu_pt_L1[0]);
+ mmu_pt_L0[1] = mmu_make_table_pte(&mmu_pt_L1[ENTRIES_PER_L1_TABLE >> 1]);
+}
+
+void mmu_add_mapping(u64 from, u64 to, size_t size, u8 attribute_index, u64 perms)
+{
+ if (mmu_map(from,
+ to | PTE_MAIR_IDX(attribute_index) | PTE_ACCESS | PTE_VALID | PTE_SH_OS | perms,
+ size) < 0)
+ panic("Failed to add MMU mapping 0x%lx -> 0x%lx (0x%lx)\n", from, to, size);
+
+ sysop("dsb ishst");
+ sysop("tlbi vmalle1is");
+ sysop("dsb ish");
+ sysop("isb");
+}
+
+void mmu_rm_mapping(u64 from, size_t size)
+{
+ if (mmu_map(from, 0, size) < 0)
+ panic("Failed to rm MMU mapping at 0x%lx (0x%lx)\n", from, size);
+}
+
+static void mmu_map_mmio(void)
+{
+ int node = adt_path_offset(adt, "/arm-io");
+ if (node < 0) {
+ printf("MMU: ARM-IO node not found!\n");
+ return;
+ }
+ u32 ranges_len;
+ const u32 *ranges = adt_getprop(adt, node, "ranges", &ranges_len);
+ if (!ranges) {
+ printf("MMU: Failed to get ranges property!\n");
+ return;
+ }
+ // Assume all cell counts are 2 (64bit)
+ int range_cnt = ranges_len / 24;
+ while (range_cnt--) {
+ u64 bus = ranges[2] | ((u64)ranges[3] << 32);
+ u64 size = ranges[4] | ((u64)ranges[5] << 32);
+
+ mmu_add_mapping(bus, bus, size, MAIR_IDX_DEVICE_nGnRnE, PERM_RW_EL0);
+
+ ranges += 6;
+ }
+}
+
+static void mmu_remap_ranges(void)
+{
+
+ int node = adt_path_offset(adt, "/defaults");
+ if (node < 0) {
+ printf("MMU: defaults node not found!\n");
+ return;
+ }
+ u32 ranges_len;
+ const u32 *ranges = adt_getprop(adt, node, "pmap-io-ranges", &ranges_len);
+ if (!ranges) {
+ printf("MMU: Failed to get pmap-io-ranges property!\n");
+ return;
+ }
+ int range_cnt = ranges_len / 24;
+ while (range_cnt--) {
+ u64 addr = ranges[0] | ((u64)ranges[1] << 32);
+ u64 size = ranges[2] | ((u64)ranges[3] << 32);
+ u32 flags = ranges[4];
+
+ // TODO: is this the right logic?
+ if ((flags >> 28) == 8) {
+ printf("MMU: Adding Device-nGnRE mapping at 0x%lx (0x%lx)\n", addr, size);
+ mmu_add_mapping(addr, addr, size, MAIR_IDX_DEVICE_nGnRE, PERM_RW_EL0);
+ } else if (flags == 0x60004016) {
+ printf("MMU: Adding Normal-NC mapping at 0x%lx (0x%lx)\n", addr, size);
+ mmu_add_mapping(addr, addr, size, MAIR_IDX_NORMAL_NC, PERM_RW_EL0);
+ }
+
+ ranges += 6;
+ }
+}
+
+void mmu_map_framebuffer(u64 addr, size_t size)
+{
+ printf("MMU: Adding Normal-NC mapping at 0x%lx (0x%zx) for framebuffer\n", addr, size);
+ dc_civac_range((void *)addr, size);
+ mmu_add_mapping(addr, addr, size, MAIR_IDX_NORMAL_NC, PERM_RW_EL0);
+}
+
+static void mmu_add_default_mappings(void)
+{
+ ram_base = ALIGN_DOWN(cur_boot_args.phys_base, BIT(32));
+ uint64_t ram_size = cur_boot_args.mem_size + cur_boot_args.phys_base - ram_base;
+ ram_size = ALIGN_DOWN(ram_size, 0x4000);
+
+ printf("MMU: RAM base: 0x%lx\n", ram_base);
+ printf("MMU: Top of normal RAM: 0x%lx\n", ram_base + ram_size);
+
+ mmu_map_mmio();
+
+ /*
+ * Create identity mapping for RAM from 0x08_0000_0000
+ * With SPRR enabled, this becomes RW.
+ * This range includes all real RAM, including carveouts
+ */
+ mmu_add_mapping(ram_base, ram_base, cur_boot_args.mem_size_actual, MAIR_IDX_NORMAL, PERM_RWX);
+
+ /* Unmap carveout regions */
+ mcc_unmap_carveouts();
+
+ /*
+ * Remap m1n1 executable code as RX.
+ */
+ mmu_add_mapping((u64)_base, (u64)_base, (u64)_rodata_end - (u64)_base, MAIR_IDX_NORMAL,
+ PERM_RX_EL0);
+
+ /*
+ * Make guard page at the end of the main stack
+ */
+ mmu_rm_mapping((u64)_stack_top, PAGE_SIZE);
+
+ /*
+ * Create mapping for RAM from 0x88_0000_0000,
+ * read/writable/exec by EL0 (but not executable by EL1)
+ * With SPRR enabled, this becomes RX_EL0.
+ */
+ mmu_add_mapping(ram_base | REGION_RWX_EL0, ram_base, ram_size, MAIR_IDX_NORMAL, PERM_RWX_EL0);
+ /*
+ * Create mapping for RAM from 0x98_0000_0000,
+ * read/writable by EL0 (but not executable by EL1)
+ * With SPRR enabled, this becomes RW_EL0.
+ */
+ mmu_add_mapping(ram_base | REGION_RW_EL0, ram_base, ram_size, MAIR_IDX_NORMAL, PERM_RW_EL0);
+ /*
+ * Create mapping for RAM from 0xa8_0000_0000,
+ * read/executable by EL1
+ * This allows executing from dynamic regions in EL1
+ */
+ mmu_add_mapping(ram_base | REGION_RX_EL1, ram_base, ram_size, MAIR_IDX_NORMAL, PERM_RX_EL0);
+
+ /*
+ * Create four seperate full mappings of MMIO space, with different access types
+ */
+ mmu_add_mapping(0xc000000000, 0x0000000000, 0x0800000000, MAIR_IDX_DEVICE_GRE, PERM_RW_EL0);
+ mmu_add_mapping(0xd000000000, 0x0000000000, 0x0800000000, MAIR_IDX_DEVICE_nGRE, PERM_RW_EL0);
+ mmu_add_mapping(0xe000000000, 0x0000000000, 0x0800000000, MAIR_IDX_DEVICE_nGnRnE, PERM_RW_EL0);
+ mmu_add_mapping(0xf000000000, 0x0000000000, 0x0800000000, MAIR_IDX_DEVICE_nGnRE, PERM_RW_EL0);
+
+ /*
+ * Handle pmap-ranges
+ */
+ mmu_remap_ranges();
+}
+
+static void mmu_configure(void)
+{
+ msr(MAIR_EL1, (MAIR_ATTR_NORMAL_DEFAULT << MAIR_SHIFT_NORMAL) |
+ (MAIR_ATTR_DEVICE_nGnRnE << MAIR_SHIFT_DEVICE_nGnRnE) |
+ (MAIR_ATTR_DEVICE_nGnRE << MAIR_SHIFT_DEVICE_nGnRE) |
+ (MAIR_ATTR_NORMAL_NC << MAIR_SHIFT_NORMAL_NC));
+ msr(TCR_EL1, FIELD_PREP(TCR_IPS, TCR_IPS_4TB) | FIELD_PREP(TCR_TG1, TCR_TG1_16K) |
+ FIELD_PREP(TCR_SH1, TCR_SH1_IS) | FIELD_PREP(TCR_ORGN1, TCR_ORGN1_WBWA) |
+ FIELD_PREP(TCR_IRGN1, TCR_IRGN1_WBWA) | FIELD_PREP(TCR_T1SZ, TCR_T1SZ_48BIT) |
+ FIELD_PREP(TCR_TG0, TCR_TG0_16K) | FIELD_PREP(TCR_SH0, TCR_SH0_IS) |
+ FIELD_PREP(TCR_ORGN0, TCR_ORGN0_WBWA) | FIELD_PREP(TCR_IRGN0, TCR_IRGN0_WBWA) |
+ FIELD_PREP(TCR_T0SZ, TCR_T0SZ_48BIT));
+
+ msr(TTBR0_EL1, (uintptr_t)mmu_pt_L0);
+ msr(TTBR1_EL1, (uintptr_t)mmu_pt_L0);
+
+ // Armv8-A Address Translation, 100940_0101_en, page 28
+ sysop("dsb ishst");
+ sysop("tlbi vmalle1is");
+ sysop("dsb ish");
+ sysop("isb");
+}
+
+static void mmu_init_sprr(void)
+{
+ msr_sync(SYS_IMP_APL_SPRR_CONFIG_EL1, 1);
+ msr_sync(SYS_IMP_APL_SPRR_PERM_EL0, SPRR_DEFAULT_PERM_EL0);
+ msr_sync(SYS_IMP_APL_SPRR_PERM_EL1, SPRR_DEFAULT_PERM_EL1);
+ msr_sync(SYS_IMP_APL_SPRR_CONFIG_EL1, 0);
+}
+
+void mmu_init(void)
+{
+ printf("MMU: Initializing...\n");
+
+ if (read_sctlr() & SCTLR_M) {
+ printf("MMU: already intialized.\n");
+ return;
+ }
+
+ mmu_init_pagetables();
+ mmu_add_default_mappings();
+ mmu_configure();
+ mmu_init_sprr();
+
+ // Enable EL0 memory access by EL1
+ msr(PAN, 0);
+
+ // RES1 bits
+ u64 sctlr = SCTLR_LSMAOE | SCTLR_nTLSMD | SCTLR_TSCXT | SCTLR_ITD;
+ // Configure translation
+ sctlr |= SCTLR_I | SCTLR_C | SCTLR_M | SCTLR_SPAN;
+
+ printf("MMU: SCTLR_EL1: %lx -> %lx\n", mrs(SCTLR_EL1), sctlr);
+ write_sctlr(sctlr);
+ printf("MMU: running with MMU and caches enabled!\n");
+}
+
+static void mmu_secondary_setup(void)
+{
+ mmu_configure();
+ mmu_init_sprr();
+
+ // Enable EL0 memory access by EL1
+ msr(PAN, 0);
+
+ // RES1 bits
+ u64 sctlr = SCTLR_LSMAOE | SCTLR_nTLSMD | SCTLR_TSCXT | SCTLR_ITD;
+ // Configure translation
+ sctlr |= SCTLR_I | SCTLR_C | SCTLR_M | SCTLR_SPAN;
+ write_sctlr(sctlr);
+}
+
+void mmu_init_secondary(int cpu)
+{
+ smp_call4(cpu, mmu_secondary_setup, 0, 0, 0, 0);
+ smp_wait(cpu);
+}
+
+void mmu_shutdown(void)
+{
+ fb_console_reserve_lines(3);
+ printf("MMU: shutting down...\n");
+ write_sctlr(read_sctlr() & ~(SCTLR_I | SCTLR_C | SCTLR_M));
+ printf("MMU: shutdown successful, clearing caches\n");
+ dcsw_op_all(DCSW_OP_DCCISW);
+}
+
+u64 mmu_disable(void)
+{
+ u64 sctlr_old = read_sctlr();
+ if (!(sctlr_old & SCTLR_M))
+ return sctlr_old;
+
+ write_sctlr(sctlr_old & ~(SCTLR_I | SCTLR_C | SCTLR_M));
+ dcsw_op_all(DCSW_OP_DCCISW);
+
+ return sctlr_old;
+}
+
+void mmu_restore(u64 state)
+{
+ write_sctlr(state);
+}
diff --git a/tools/src/memory.h b/tools/src/memory.h
new file mode 100644
index 0000000..247a5d3
--- /dev/null
+++ b/tools/src/memory.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef MEMORY_H
+#define MEMORY_H
+
+#include "cpu_regs.h"
+#include "types.h"
+
+#define REGION_RWX_EL0 0x80000000000
+#define REGION_RW_EL0 0xa0000000000
+#define REGION_RX_EL1 0xc0000000000
+
+/*
+ * https://armv8-ref.codingbelief.com/en/chapter_d4/d43_2_armv8_translation_table_level_3_descriptor_formats.html
+ * PTE_TYPE:PTE_BLOCK indicates that the page table entry (PTE) points to a physical memory block
+ * PTE_TYPE:PTE_TABLE indicates that the PTE points to another PTE
+ * PTE_TYPE:PTE_PAGE indicates that the PTE points to a single page
+ * PTE_FLAG_ACCESS is required to allow access to the memory region
+ * PTE_MAIR_IDX sets the MAIR index to be used for this PTE
+ */
+#define PTE_VALID BIT(0)
+#define PTE_TYPE BIT(1)
+#define PTE_BLOCK 0
+#define PTE_TABLE 1
+#define PTE_PAGE 1
+#define PTE_ACCESS BIT(10)
+#define PTE_MAIR_IDX(i) ((i & 7) << 2)
+#define PTE_PXN BIT(53)
+#define PTE_UXN BIT(54)
+#define PTE_AP_RO BIT(7)
+#define PTE_AP_EL0 BIT(6)
+#define PTE_SH_NS (0b00 << 8)
+#define PTE_SH_OS (0b10 << 8)
+#define PTE_SH_IS (0b11 << 8)
+
+#define PERM_RO_EL0 PTE_AP_EL0 | PTE_AP_RO | PTE_PXN | PTE_UXN
+#define PERM_RW_EL0 PTE_AP_EL0 | PTE_PXN | PTE_UXN
+#define PERM_RX_EL0 PTE_AP_EL0 | PTE_AP_RO
+#define PERM_RWX_EL0 PTE_AP_EL0
+
+#define PERM_RO PTE_AP_RO | PTE_PXN | PTE_UXN
+#define PERM_RW PTE_PXN | PTE_UXN
+#define PERM_RX PTE_AP_RO | PTE_UXN
+#define PERM_RWX 0
+
+#define MAIR_IDX_NORMAL 0
+#define MAIR_IDX_NORMAL_NC 1
+#define MAIR_IDX_DEVICE_nGnRnE 2
+#define MAIR_IDX_DEVICE_nGnRE 3
+#define MAIR_IDX_DEVICE_nGRE 4
+#define MAIR_IDX_DEVICE_GRE 5
+
+#ifndef __ASSEMBLER__
+
+#include "utils.h"
+
+extern uint64_t ram_base;
+
+void ic_ivau_range(void *addr, size_t length);
+void dc_ivac_range(void *addr, size_t length);
+void dc_zva_range(void *addr, size_t length);
+void dc_cvac_range(void *addr, size_t length);
+void dc_cvau_range(void *addr, size_t length);
+void dc_civac_range(void *addr, size_t length);
+
+#define DCSW_OP_DCISW 0x0
+#define DCSW_OP_DCCISW 0x1
+#define DCSW_OP_DCCSW 0x2
+void dcsw_op_all(u64 op_type);
+
+void mmu_init(void);
+void mmu_init_secondary(int cpu);
+void mmu_shutdown(void);
+void mmu_add_mapping(u64 from, u64 to, size_t size, u8 attribute_index, u64 perms);
+void mmu_rm_mapping(u64 from, size_t size);
+void mmu_map_framebuffer(u64 addr, size_t size);
+
+u64 mmu_disable(void);
+void mmu_restore(u64 state);
+
+static inline bool mmu_active(void)
+{
+ return mrs(SCTLR_EL1) & SCTLR_M;
+}
+
+#endif
+
+#endif
diff --git a/tools/src/memory_asm.S b/tools/src/memory_asm.S
new file mode 100644
index 0000000..2c2c778
--- /dev/null
+++ b/tools/src/memory_asm.S
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2013-2020, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#define LOC_SHIFT 24
+#define CLIDR_FIELD_WIDTH 3
+#define LEVEL_SHIFT 1
+
+.macro func, name
+.globl \name
+.type \name, @function
+\name:
+.endm
+
+ .globl dcsw_op_all
+
+/*
+ * This macro can be used for implementing various data cache operations `op`
+ */
+.macro do_dcache_maintenance_by_mva op
+ /* Exit early if size is zero */
+ cbz x1, exit_loop_\op
+ dcache_line_size x2, x3
+ add x1, x0, x1
+ sub x3, x2, #1
+ bic x0, x0, x3
+loop_\op:
+ dc \op, x0
+ add x0, x0, x2
+ cmp x0, x1
+ b.lo loop_\op
+ dsb sy
+exit_loop_\op:
+ ret
+.endm
+
+ /* ---------------------------------------------------------------
+ * Data cache operations by set/way to the level specified
+ *
+ * The main function, do_dcsw_op requires:
+ * x0: The operation type (0-2), as defined in arch.h
+ * x3: The last cache level to operate on
+ * x9: clidr_el1
+ * x10: The cache level to begin operation from
+ * and will carry out the operation on each data cache from level 0
+ * to the level in x3 in sequence
+ *
+ * The dcsw_op macro sets up the x3 and x9 parameters based on
+ * clidr_el1 cache information before invoking the main function
+ * ---------------------------------------------------------------
+ */
+
+ .macro dcsw_op shift, fw, ls
+ mrs x9, clidr_el1
+ ubfx x3, x9, \shift, \fw
+ lsl x3, x3, \ls
+ mov x10, xzr
+ b do_dcsw_op
+ .endm
+
+func do_dcsw_op
+ cbz x3, exit
+ adr x14, dcsw_loop_table // compute inner loop address
+ add x14, x14, x0, lsl #5 // inner loop is 8x32-bit instructions
+ mov x0, x9
+ mov w8, #1
+loop1:
+ add x2, x10, x10, lsr #1 // work out 3x current cache level
+ lsr x1, x0, x2 // extract cache type bits from clidr
+ and x1, x1, #7 // mask the bits for current cache only
+ cmp x1, #2 // see what cache we have at this level
+ b.lo level_done // nothing to do if no cache or icache
+
+ msr csselr_el1, x10 // select current cache level in csselr
+ isb // isb to sych the new cssr&csidr
+ mrs x1, ccsidr_el1 // read the new ccsidr
+ and x2, x1, #7 // extract the length of the cache lines
+ add x2, x2, #4 // add 4 (line length offset)
+ ubfx x4, x1, #3, #10 // maximum way number
+ clz w5, w4 // bit position of way size increment
+ lsl w9, w4, w5 // w9 = aligned max way number
+ lsl w16, w8, w5 // w16 = way number loop decrement
+ orr w9, w10, w9 // w9 = combine way and cache number
+ ubfx w6, w1, #13, #15 // w6 = max set number
+ lsl w17, w8, w2 // w17 = set number loop decrement
+ dsb sy // barrier before we start this level
+ br x14 // jump to DC operation specific loop
+
+ .macro dcsw_loop _op
+loop2_\_op:
+ lsl w7, w6, w2 // w7 = aligned max set number
+
+loop3_\_op:
+ orr w11, w9, w7 // combine cache, way and set number
+ dc \_op, x11
+ subs w7, w7, w17 // decrement set number
+ b.hs loop3_\_op
+
+ subs x9, x9, x16 // decrement way number
+ b.hs loop2_\_op
+
+ b level_done
+ .endm
+
+level_done:
+ add x10, x10, #2 // increment cache number
+ cmp x3, x10
+ b.hi loop1
+ msr csselr_el1, xzr // select cache level 0 in csselr
+ dsb sy // barrier to complete final cache operation
+ isb
+exit:
+ ret
+
+dcsw_loop_table:
+ dcsw_loop isw
+ dcsw_loop cisw
+ dcsw_loop csw
+
+
+func dcsw_op_all
+ dcsw_op #LOC_SHIFT, #CLIDR_FIELD_WIDTH, #LEVEL_SHIFT
+
+ /* ---------------------------------------------------------------
+ * Helper macro for data cache operations by set/way for the
+ * level specified
+ * ---------------------------------------------------------------
+ */
+ .macro dcsw_op_level level
+ mrs x9, clidr_el1
+ mov x3, \level
+ sub x10, x3, #2
+ b do_dcsw_op
+ .endm
+
+ /* ---------------------------------------------------------------
+ * Data cache operations by set/way for level 1 cache
+ *
+ * The main function, do_dcsw_op requires:
+ * x0: The operation type (0-2), as defined in arch.h
+ * ---------------------------------------------------------------
+ */
+func dcsw_op_level1
+ dcsw_op_level #(1 << LEVEL_SHIFT)
+
+ /* ---------------------------------------------------------------
+ * Data cache operations by set/way for level 2 cache
+ *
+ * The main function, do_dcsw_op requires:
+ * x0: The operation type (0-2), as defined in arch.h
+ * ---------------------------------------------------------------
+ */
+func dcsw_op_level2
+ dcsw_op_level #(2 << LEVEL_SHIFT)
+
+ /* ---------------------------------------------------------------
+ * Data cache operations by set/way for level 3 cache
+ *
+ * The main function, do_dcsw_op requires:
+ * x0: The operation type (0-2), as defined in arch.h
+ * ---------------------------------------------------------------
+ */
+func dcsw_op_level3
+ dcsw_op_level #(3 << LEVEL_SHIFT)
diff --git a/tools/src/minilzlib/dictbuf.c b/tools/src/minilzlib/dictbuf.c
new file mode 100644
index 0000000..02875dc
--- /dev/null
+++ b/tools/src/minilzlib/dictbuf.c
@@ -0,0 +1,155 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ dictbuf.c
+
+Abstract:
+
+ This module implements the management of the LZMA "history buffer" which is
+ often called the "dictionary". Routines for writing into the history buffer
+ as well as for reading back from it are implemented, as well as mechanisms
+ for repeating previous symbols forward into the dictionary. This forms the
+ basis for LZMA match distance-length pairs that are found and decompressed.
+ Note that for simplicity's sake, the dictionary is stored directly in the
+ output buffer, such that no "flushing" or copying is needed back and forth.
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#include "minlzlib.h"
+
+//
+// State used for the history buffer (dictionary)
+//
+typedef struct _DICTIONARY_STATE
+{
+ //
+ // Buffer, start position, current position, and offset limit in the buffer
+ //
+ uint8_t* Buffer;
+ uint32_t BufferSize;
+ uint32_t Start;
+ uint32_t Offset;
+ uint32_t Limit;
+} DICTIONARY_STATE, *PDICTIONARY_STATE;
+DICTIONARY_STATE Dictionary;
+
+void
+DtInitialize (
+ uint8_t* HistoryBuffer,
+ uint32_t Size
+ )
+{
+ //
+ // Initialize the buffer and reset the position
+ //
+ Dictionary.Buffer = HistoryBuffer;
+ Dictionary.Offset = 0;
+ Dictionary.BufferSize = Size;
+}
+
+bool
+DtSetLimit (
+ uint32_t Limit
+ )
+{
+ //
+ // Make sure that the passed in dictionary limit fits within the size, and
+ // then set this as the new limit. Save the starting point (current offset)
+ //
+ if ((Dictionary.Offset + Limit) > Dictionary.BufferSize)
+ {
+ return false;
+ }
+ Dictionary.Limit = Dictionary.Offset + Limit;
+ Dictionary.Start = Dictionary.Offset;
+ return true;
+}
+
+bool
+DtIsComplete (
+ uint32_t* BytesProcessed
+ )
+{
+ //
+ // Return bytes processed and if the dictionary has been fully written to
+ //
+ *BytesProcessed = Dictionary.Offset - Dictionary.Start;
+ return (Dictionary.Offset == Dictionary.Limit);
+}
+
+bool
+DtCanWrite (
+ uint32_t* Position
+ )
+{
+ //
+ // Return our position and make sure it's not beyond the uncompressed size
+ //
+ *Position = Dictionary.Offset;
+ return (Dictionary.Offset < Dictionary.Limit);
+}
+
+uint8_t
+DtGetSymbol (
+ uint32_t Distance
+ )
+{
+ //
+ // If the dictionary is still empty, just return 0, otherwise, return the
+ // symbol that is Distance bytes backward.
+ //
+ if (Distance > Dictionary.Offset)
+ {
+ return 0;
+ }
+ return Dictionary.Buffer[Dictionary.Offset - Distance];
+}
+
+void
+DtPutSymbol (
+ uint8_t Symbol
+ )
+{
+ //
+ // Write the symbol and advance our position
+ //
+ Dictionary.Buffer[Dictionary.Offset++] = Symbol;
+}
+
+bool
+DtRepeatSymbol (
+ uint32_t Length,
+ uint32_t Distance
+ )
+{
+ //
+ // Make sure we never get asked to write past the end of the dictionary. We
+ // should also not allow the distance to go beyond the current offset since
+ // DtGetSymbol will return 0 thinking the dictionary is empty.
+ //
+ if (((Length + Dictionary.Offset) > Dictionary.Limit) ||
+ (Distance > Dictionary.Offset))
+ {
+ return false;
+ }
+
+ //
+ // Now rewrite the stream of past symbols forward into the dictionary.
+ //
+ do
+ {
+ DtPutSymbol(DtGetSymbol(Distance));
+ } while (--Length > 0);
+ return true;
+}
diff --git a/tools/src/minilzlib/inputbuf.c b/tools/src/minilzlib/inputbuf.c
new file mode 100644
index 0000000..67d652c
--- /dev/null
+++ b/tools/src/minilzlib/inputbuf.c
@@ -0,0 +1,144 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ inputbuf.c
+
+Abstract:
+
+ This module implements helper functions for managing the input buffer that
+ contains arithmetic-coded LZ77 match distance-length pairs and raw literals
+ Both seeking (such that an external reader can refer to multiple bytes) and
+ reading (capturing) an individual byte are supported. Support for aligning
+ input data to 4 bytes (which is a requirement for XZ-encoded files) is also
+ implemented.
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#include "minlzlib.h"
+
+//
+// Input Buffer State
+//
+typedef struct _BUFFER_STATE
+{
+ //
+ // Start of the buffer, current offset, current packet end, and total input size
+ //
+ uint8_t* Buffer;
+ uint32_t Offset;
+ uint32_t SoftLimit;
+ uint32_t Size;
+} BUFFER_STATE, * PBUFFER_STATE;
+BUFFER_STATE In;
+
+bool
+BfAlign (
+ void
+ )
+{
+ uint8_t padByte;
+ //
+ // Keep reading until we reach 32-bit alignment. All bytes must be zero.
+ //
+ while (In.Offset & 3)
+ {
+ if (!BfRead(&padByte) || (padByte != 0))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool
+BfSetSoftLimit (
+ uint32_t Remaining
+ )
+{
+ if ((In.Size - In.Offset) < Remaining)
+ {
+ return false;
+ }
+ In.SoftLimit = In.Offset + Remaining;
+ return true;
+}
+
+void
+BfResetSoftLimit (
+ void
+ )
+{
+ In.SoftLimit = In.Size;
+}
+
+bool
+BfSeek (
+ uint32_t Length,
+ uint8_t** Bytes
+ )
+{
+ //
+ // Make sure the input buffer has enough space to seek the desired size, if
+ // it does, return the current position and then seek past the desired size
+ //
+ if ((In.Offset + Length) > In.SoftLimit)
+ {
+ *Bytes = 0;
+ return false;
+ }
+ *Bytes = &In.Buffer[In.Offset];
+ In.Offset += Length;
+ return true;
+}
+
+uint32_t
+BfTell (
+ void
+ )
+{
+ return In.Offset;
+}
+
+bool
+BfRead (
+ uint8_t* Byte
+ )
+{
+ uint8_t* pByte;
+ //
+ // Seek past the byte and read it
+ //
+ if (!BfSeek(sizeof(*Byte), &pByte))
+ {
+ *Byte = 0;
+ return false;
+ }
+ *Byte = *pByte;
+ return true;
+}
+
+void
+BfInitialize (
+ uint8_t* InputBuffer,
+ uint32_t InputSize
+ )
+{
+ //
+ // Save all the data in the context buffer state
+ //
+ In.Buffer = InputBuffer;
+ In.Size = InputSize;
+ In.SoftLimit = InputSize;
+ In.Offset = 0;
+}
diff --git a/tools/src/minilzlib/lzma2dec.c b/tools/src/minilzlib/lzma2dec.c
new file mode 100644
index 0000000..7a15513
--- /dev/null
+++ b/tools/src/minilzlib/lzma2dec.c
@@ -0,0 +1,228 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ lzma2dec.c
+
+Abstract:
+
+ This module implements the LZMA2 decoding logic responsible for parsing the
+ LZMA2 Control Byte, the Information Bytes (Compressed & Uncompressed Stream
+ Size), and the Property Byte during the initial Dictionary Reset. Note that
+ this module only implements support for a single such reset (i.e.: archives
+ in "solid" mode).
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#include "minlzlib.h"
+#include "lzma2dec.h"
+
+bool
+Lz2DecodeChunk (
+ uint32_t* BytesProcessed,
+ uint32_t RawSize,
+ uint16_t CompressedSize
+ )
+{
+ uint32_t bytesProcessed;
+
+ //
+ // Go and decode this chunk, sequence by sequence
+ //
+ if (!LzDecode())
+ {
+ return false;
+ }
+
+ //
+ // In a correctly formatted stream, the last arithmetic-coded sequence must
+ // be zero once we finished with the last chunk. Make sure the stream ended
+ // exactly where we expected it to.
+ //
+ if (!RcIsComplete(&bytesProcessed) || (bytesProcessed != CompressedSize))
+ {
+ return false;
+ }
+
+ //
+ // The entire output stream must have been written to, and the dictionary
+ // must be full now.
+ //
+ if (!DtIsComplete(&bytesProcessed) || (bytesProcessed != RawSize))
+ {
+ return false;
+ }
+ *BytesProcessed += bytesProcessed;
+ return true;
+}
+
+bool
+Lz2DecodeStream (
+ uint32_t* BytesProcessed,
+ bool GetSizeOnly
+ )
+{
+ uint8_t* inBytes;
+ LZMA2_CONTROL_BYTE controlByte;
+ uint8_t propertyByte;
+ uint32_t rawSize;
+ uint16_t compressedSize;
+
+ //
+ // Read the first control byte
+ //
+ *BytesProcessed = 0;
+ while (BfRead(&controlByte.Value))
+ {
+ //
+ // When the LZMA2 control byte is 0, the entire stream is decoded. This
+ // is the only success path out of this function.
+ //
+ if (controlByte.Value == 0)
+ {
+ return true;
+ }
+
+ //
+ // Read the appropriate number of info bytes based on the stream type.
+ //
+ if (!BfSeek((controlByte.u.Common.IsLzma == 1 ) ? 4 : 2, &inBytes))
+ {
+ break;
+ }
+
+ //
+ // For LZMA streams calculate both the uncompressed and compressed size
+ // from the info bytes. Uncompressed streams only have the former.
+ //
+ if (controlByte.u.Common.IsLzma == 1)
+ {
+ rawSize = controlByte.u.Lzma.RawSize << 16;
+ compressedSize = inBytes[2] << 8;
+ compressedSize += inBytes[3] + 1;
+ }
+ else
+ {
+ rawSize = 0;
+ compressedSize = 0;
+ }
+
+ //
+ // Make sure that the output buffer that was supplied is big enough to
+ // fit the uncompressed chunk, unless we're just calculating the size.
+ //
+ rawSize += inBytes[0] << 8;
+ rawSize += inBytes[1] + 1;
+ if (!GetSizeOnly && !DtSetLimit(rawSize))
+ {
+ break;
+ }
+
+ //
+ // Check if the full LZMA state needs to be reset, which must happen at
+ // the start of stream. Also check for a property reset, which occurs
+ // when an LZMA stream follows an uncompressed stream. Separately,
+ // check for a state reset without a property byte (happens rarely,
+ // but does happen in a few compressed streams).
+ //
+ if ((controlByte.u.Lzma.ResetState == Lzma2FullReset) ||
+ (controlByte.u.Lzma.ResetState == Lzma2PropertyReset))
+ {
+ //
+ // Read the LZMA properties and then initialize the decoder.
+ //
+ if (!BfRead(&propertyByte) || !LzInitialize(propertyByte))
+ {
+ break;
+ }
+ }
+ else if (controlByte.u.Lzma.ResetState == Lzma2SimpleReset)
+ {
+ LzResetState();
+ }
+ //
+ // else controlByte.u.Lzma.ResetState == Lzma2NoReset, since a two-bit
+ // field only has four possible values
+ //
+
+ //
+ // Don't do any decompression if the caller only wants to know the size
+ //
+ if (GetSizeOnly)
+ {
+ *BytesProcessed += rawSize;
+ BfSeek((controlByte.u.Common.IsLzma == 1) ? compressedSize : rawSize,
+ &inBytes);
+ continue;
+ }
+ else if (controlByte.u.Common.IsLzma == 0)
+ {
+ //
+ // Seek to the requested size in the input buffer
+ //
+ if (!BfSeek(rawSize, &inBytes))
+ {
+ return false;
+ }
+
+ //
+ // Copy the data into the dictionary as-is
+ //
+ for (uint32_t i = 0; i < rawSize; i++)
+ {
+ DtPutSymbol(inBytes[i]);
+ }
+
+ //
+ // Update bytes and keep going to the next chunk
+ //
+ *BytesProcessed += rawSize;
+ continue;
+ }
+
+ //
+ // Record how many bytes are left in this sequence as our SoftLimit for
+ // the other operations. This allows us to omit most range checking
+ // logic in rangedec.c. This soft limit lasts until reset below.
+ //
+ if (!BfSetSoftLimit(compressedSize))
+ {
+ break;
+ }
+
+ //
+ // Read the initial range and code bytes to initialize the arithmetic
+ // coding decoder, and let it know how much input data exists. We've
+ // already validated that this much space exists in the input buffer.
+ //
+ if (!RcInitialize(&compressedSize))
+ {
+ break;
+ }
+
+ //
+ // Start decoding the LZMA sequences in this chunk
+ //
+ if (!Lz2DecodeChunk(BytesProcessed, rawSize, compressedSize))
+ {
+ break;
+ }
+
+ //
+ // Having decoded that chunk, reset our soft limit (to the full
+ // input stream) so we can read the next chunk.
+ //
+ BfResetSoftLimit();
+ }
+ return false;
+}
diff --git a/tools/src/minilzlib/lzma2dec.h b/tools/src/minilzlib/lzma2dec.h
new file mode 100644
index 0000000..0b31440
--- /dev/null
+++ b/tools/src/minilzlib/lzma2dec.h
@@ -0,0 +1,91 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ lzma2dec.h
+
+Abstract:
+
+ This header file contains C-style data structures and enumerations that map
+ back to the LZMA2 standard. This includes the encoding of the LZMA2 Control
+ Byte and the possible LZMA2 Reset States.
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#pragma once
+
+//
+// The most complex LZMA sequence possible is a "match" sequence where the
+// the length is > 127 bytes, and the distance is > 127 bytes. This type of
+// sequence starts with {1,1} for "match", followed by {1,1,nnnnnnnn} for
+// "8-bit encoded length", followed by {1,1,1,1,1,1} to select the distance
+// slot (63). That's 18 bits so far, which all come from arithmetic-coded
+// bit trees with various probabilities. The next 26 bits are going to be
+// fixed-probability, meaning that the bit tree is mathematically hardcoded
+// at 50%. Finally, there are the last 4 "align" distance bits which also
+// come from an arithmetic-coded bit tree, bringing the total such bits to
+// 22.
+//
+// Each time we have to "normalize" the arithmetic coder, it consumes an
+// additional byte. Normalization is done whenever we consume more than 8
+// of the high bits of the coder's range (i.e.: below 2^24), so exactly
+// every 8 direct bits (which always halve the range due to their 50%).
+// The other bits can have arbitrary probabilities, but in the worst case
+// we need to normalize the range every n bits. As such, this is a total of
+// 20 worst-case normalization per LZMA sequence. Finally, we do one last
+// normalization at the end of LzDecode, to make sure that the decoder is
+// always in a normalized state. This means that a compressed chunk should
+// be at least 21 bytes if we want to guarantee that LzDecode can never
+// read past the current input stream, and avoid range checking.
+//
+#define LZMA_MAX_SEQUENCE_SIZE 21
+
+//
+// This describes the different ways an LZMA2 control byte can request a reset
+//
+typedef enum _LZMA2_COMPRESSED_RESET_STATE
+{
+ Lzma2NoReset = 0,
+ Lzma2SimpleReset = 1,
+ Lzma2PropertyReset = 2,
+ Lzma2FullReset = 3
+} LZMA2_COMPRESSED_RESET_STATE;
+
+//
+// This describes how an LZMA2 control byte can be parsed
+//
+typedef union _LZMA2_CONTROL_BYTE
+{
+ union
+ {
+ struct
+ {
+ uint8_t ResetState : 2;
+ uint8_t Reserved : 5;
+ uint8_t IsLzma : 1;
+ } Raw;
+ struct
+ {
+ uint8_t RawSize : 5;
+ uint8_t ResetState : 2;
+ uint8_t IsLzma : 1;
+ } Lzma;
+ struct
+ {
+ uint8_t : 7;
+ uint8_t IsLzma : 1;
+ } Common;
+ } u;
+ uint8_t Value;
+} LZMA2_CONTROL_BYTE;
+static_assert(sizeof(LZMA2_CONTROL_BYTE) == 1, "Invalid control byte size");
diff --git a/tools/src/minilzlib/lzmadec.c b/tools/src/minilzlib/lzmadec.c
new file mode 100644
index 0000000..1a3c420
--- /dev/null
+++ b/tools/src/minilzlib/lzmadec.c
@@ -0,0 +1,627 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ lzmadec.c
+
+Abstract:
+
+ This module implements the LZMA Decoding Logic responsible for decoding the
+ three possible types of LZMA "packets": matches, repetitions (short \& long)
+ and literals. The probability model for each type of packet is also stored
+ in this file, along with the management of the previously seen packet types
+ (which is tracked as the "sequence").
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#include "minlzlib.h"
+#include "lzmadec.h"
+
+//
+// Probability Bit Model for Lenghts in Rep and in Match sequences
+//
+typedef struct _LENGTH_DECODER_STATE
+{
+ //
+ // Bit Model for the choosing the type of length encoding
+ //
+ uint16_t Choice;
+ uint16_t Choice2;
+ //
+ // Bit Model for each of the length encodings
+ //
+ uint16_t Low[LZMA_POSITION_COUNT][LZMA_MAX_LOW_LENGTH];
+ uint16_t Mid[LZMA_POSITION_COUNT][LZMA_MAX_MID_LENGTH];
+ uint16_t High[LZMA_MAX_HIGH_LENGTH];
+} LENGTH_DECODER_STATE, * PLENGTH_DECODER_STATE;
+
+//
+// State used for LZMA decoding
+//
+typedef struct _DECODER_STATE
+{
+ //
+ // Current type of sequence last decoded
+ //
+ LZMA_SEQUENCE_STATE Sequence;
+ //
+ // History of last 4 decoded distances
+ //
+ uint32_t Rep0;
+ uint32_t Rep1;
+ uint32_t Rep2;
+ uint32_t Rep3;
+ //
+ // Pending length to repeat from dictionary
+ //
+ uint32_t Len;
+ //
+ // Probability Bit Models for all sequence types
+ //
+ union
+ {
+ struct
+ {
+ //
+ // Literal model
+ //
+ uint16_t Literal[LZMA_LITERAL_CODERS][LZMA_LC_MODEL_SIZE];
+ //
+ // Last-used-distance based models
+ //
+ uint16_t Rep[LzmaMaxState];
+ uint16_t Rep0[LzmaMaxState];
+ uint16_t Rep0Long[LzmaMaxState][LZMA_POSITION_COUNT];
+ uint16_t Rep1[LzmaMaxState];
+ uint16_t Rep2[LzmaMaxState];
+ LENGTH_DECODER_STATE RepLen;
+ //
+ // Explicit distance match based models
+ //
+ uint16_t Match[LzmaMaxState][LZMA_POSITION_COUNT];
+ uint16_t DistSlot[LZMA_FIRST_CONTEXT_DISTANCE_SLOT][LZMA_DISTANCE_SLOTS];
+ uint16_t Dist[(1 << 7) - LZMA_FIRST_FIXED_DISTANCE_SLOT];
+ uint16_t Align[LZMA_DISTANCE_ALIGN_SLOTS];
+ LENGTH_DECODER_STATE MatchLen;
+ } BitModel;
+ uint16_t RawProbabilities[LZMA_BIT_MODEL_SLOTS];
+ } u;
+} DECODER_STATE, *PDECODER_STATE;
+DECODER_STATE Decoder;
+
+//
+// LZMA decoding uses 3 "properties" which determine how the probability
+// bit model will be laid out. These store the number of bits that are used
+// to pick the correct Literal Coder ("lc"), the number of Position bits to
+// select the Literal coder ("lp"), and the number of Position Bits used to
+// select various lengths ("pb"). In LZMA2, these properties are encoded in
+// a single byte with the formula: ((pb * 45) + lp * 9) + lc).
+//
+// We only support the default {lc = 3, lp = 0, pb = 2} properties, which
+// are what the main encoders out there use. This means that a total of 2
+// bits will be used for arithmetic-coded bit trees that are dependent on
+// the current position, and that a total of 3 bits will be used when we
+// pick the arithmetic-coded bit tree used for literal coding. The 0 means
+// this selection will _not_ be dependent on the position in the buffer.
+//
+const uint8_t k_LzSupportedProperties =
+ (LZMA_PB * 45) + (LZMA_LP * 9) + (LZMA_LC);
+
+void
+LzSetLiteral (
+ PLZMA_SEQUENCE_STATE State
+ )
+{
+ if (*State <= LzmaLitShortrepLitLitState)
+ {
+ //
+ // States 0-3 represent packets with at least 2 back-to-back literals,
+ // so another literal now takes us to state 0 (3 back-to-back literals)
+ //
+ *State = LzmaLitLitLitState;
+ }
+ else if (*State <= LzmaLitShortrepState)
+ {
+ //
+ // States 4-6 represent packets with a literal at the end, so seeing
+ // another literal now takes us to 2 back-to-back literals, which are
+ // state packets 1-3.
+ //
+ // States 7-9 represent packets with a literal at the start, followed
+ // by a match/rep/shortrep. Seeing another literal now drops this first
+ // literal and takes us to having a literal at the end, which are state
+ // packets 4-6 that we just described in the paragraph above.
+ //
+ *State = (LZMA_SEQUENCE_STATE)(*State - 3);
+ }
+ else
+ {
+ //
+ // Finally, state 10 and 11 represent cases without a single literal in
+ // the last 2 sequence packets, so seeing a literal now takes us to a
+ // "literal at the end" state, either following a match or a rep.
+ //
+ *State = (LZMA_SEQUENCE_STATE)(*State - 6);
+ }
+}
+
+bool
+LzIsLiteral (
+ LZMA_SEQUENCE_STATE State
+ )
+{
+ //
+ // States 0-6 describe literal packet sequences
+ //
+ return State < LzmaMaxLitState;
+}
+
+void
+LzSetMatch (
+ PLZMA_SEQUENCE_STATE State
+ )
+{
+ //
+ // Move to the appropriate "match" state based on current literal state
+ //
+ *State = LzIsLiteral(*State) ? LzmaLitMatchState : LzmaNonlitMatchState;
+}
+
+void
+LzSetLongRep (
+ PLZMA_SEQUENCE_STATE State
+ )
+{
+ //
+ // Move to the appropriate "long rep" state based on current literal state
+ //
+ *State = LzIsLiteral(*State) ? LzmaLitRepState : LzmaNonlitRepState;
+}
+
+void
+LzSetShortRep (
+ PLZMA_SEQUENCE_STATE State
+ )
+{
+ //
+ // Move to the appropriate "short rep" state based on current literal state
+ //
+ *State = LzIsLiteral(*State) ? LzmaLitShortrepState : LzmaNonlitRepState;
+}
+
+uint16_t*
+LzGetLiteralSlot (
+ void
+ )
+{
+ uint8_t symbol;
+
+ //
+ // To pick the correct literal coder arithmetic-coded bit tree, LZMA uses
+ // the "lc" parameter to choose the number of high bits from the previous
+ // symbol (in the normal case, 3). It then combines that with the "lp"
+ // parameter to choose the number of low bits from the current position in
+ // the dictionary. However, since "lp" is normally 0, we can omit this.
+ //
+ symbol = DtGetSymbol(1);
+ return Decoder.u.BitModel.Literal[symbol >> (8 - LZMA_LC)];
+}
+
+uint16_t*
+LzGetDistSlot (
+ void
+ )
+{
+ uint8_t slotIndex;
+
+ //
+ // There are 4 different arithmetic-coded bit trees which are used to pick
+ // the correct "distance slot" when doing match distance decoding. Each of
+ // them is used based on the length of the symbol that is being repeated.
+ // For lengths of 2, 3, 4 bytes, a dedicated set of distance slots is used.
+ // For lengths of 5 bytes or above, a shared set of distance slots is used.
+ //
+ if (Decoder.Len < (LZMA_FIRST_CONTEXT_DISTANCE_SLOT + LZMA_MIN_LENGTH))
+ {
+ slotIndex = (uint8_t)(Decoder.Len - LZMA_MIN_LENGTH);
+ }
+ else
+ {
+ slotIndex = LZMA_FIRST_CONTEXT_DISTANCE_SLOT - 1;
+ }
+ return Decoder.u.BitModel.DistSlot[slotIndex];
+}
+
+void
+LzDecodeLiteral (
+ void
+ )
+{
+ uint16_t* probArray;
+ uint8_t symbol, matchByte;
+
+ //
+ // First, choose the correct arithmetic-coded bit tree (which is based on
+ // the last symbol we just decoded), then see if we last decoded a literal.
+ //
+ // If so, simply get the symbol from the bit tree as normal. However, if
+ // we didn't last see a literal, we need to read the "match byte" that is
+ // "n" bytes away from the last decoded match. We previously stored this in
+ // rep0.
+ //
+ // Based on this match byte, we'll then use 2 other potential bit trees,
+ // see LzDecodeMatched for more information.
+ //
+ probArray = LzGetLiteralSlot();
+ if (LzIsLiteral(Decoder.Sequence))
+ {
+
+ symbol = RcGetBitTree(probArray, (1 << 8));
+ }
+ else
+ {
+ matchByte = DtGetSymbol(Decoder.Rep0 + 1);
+ symbol = RcDecodeMatchedBitTree(probArray, matchByte);
+ }
+
+ //
+ // Write the symbol and indicate that the last sequence was a literal
+ //
+ DtPutSymbol(symbol);
+ LzSetLiteral(&Decoder.Sequence);
+}
+
+void
+LzDecodeLen (
+ PLENGTH_DECODER_STATE LenState,
+ uint8_t PosBit
+ )
+{
+ uint16_t* probArray;
+ uint16_t limit;
+
+ //
+ // Lenghts of 2 and higher are encoded in 3 possible types of arithmetic-
+ // coded bit trees, depending on the size of the length.
+ //
+ // Lengths 2-9 are encoded in trees called "Low" using 3 bits of data.
+ // Lengths 10-17 are encoded in trees called "Mid" using 3 bits of data.
+ // Lengths 18-273 are encoded in a tree called "high" using 8 bits of data.
+ //
+ // The appropriate "Low" or "Mid" tree is selected based on the bottom 2
+ // position bits (0-3) (in the LZMA standard, this is based on the "pb",
+ // while the "High" tree is shared for all positions.
+ //
+ // Two arithmetic-coded bit trees, called "Choice" and "Choice2" tell us
+ // the type of Length, so we can choose the right tree. {0, n} tells us
+ // to use the Low trees, while {1, 0} tells us to use the Mid trees. Lastly
+ // {1, 1} tells us to use the High tree.
+ //
+ Decoder.Len = LZMA_MIN_LENGTH;
+ if (RcIsBitSet(&LenState->Choice))
+ {
+ if (RcIsBitSet(&LenState->Choice2))
+ {
+ probArray = LenState->High;
+ limit = LZMA_MAX_HIGH_LENGTH;
+ Decoder.Len += LZMA_MAX_LOW_LENGTH + LZMA_MAX_MID_LENGTH;
+ }
+ else
+ {
+ probArray = LenState->Mid[PosBit];
+ limit = LZMA_MAX_MID_LENGTH;
+ Decoder.Len += LZMA_MAX_LOW_LENGTH;
+ }
+ }
+ else
+ {
+ probArray = LenState->Low[PosBit];
+ limit = LZMA_MAX_LOW_LENGTH;
+ }
+ Decoder.Len += RcGetBitTree(probArray, limit);
+}
+
+void
+LzDecodeMatch (
+ uint8_t PosBit
+ )
+{
+ uint16_t* probArray;
+ uint8_t distSlot, distBits;
+
+ //
+ // Decode the length component of the "match" sequence. Then, since we're
+ // about to decode a new distance, update our history by one level.
+ //
+ LzDecodeLen(&Decoder.u.BitModel.MatchLen, PosBit);
+ Decoder.Rep3 = Decoder.Rep2;
+ Decoder.Rep2 = Decoder.Rep1;
+ Decoder.Rep1 = Decoder.Rep0;
+
+ //
+ // Read the first 6 bits, which make up the "distance slot"
+ //
+ probArray = LzGetDistSlot();
+ distSlot = RcGetBitTree(probArray, LZMA_DISTANCE_SLOTS);
+ if (distSlot < LZMA_FIRST_CONTEXT_DISTANCE_SLOT)
+ {
+ //
+ // Slots 0-3 directly encode the distance as a literal number
+ //
+ Decoder.Rep0 = distSlot;
+ }
+ else
+ {
+ //
+ // For slots 4-13, figure out how many "context encoded bits" are used
+ // to encode this distance. The math works out such that slots 4-5 use
+ // 1 bit, 6-7 use 2 bits, 8-9 use 3 bits, and so on and so forth until
+ // slots 12-13 which use 5 bits.
+ //
+ // This gives us anywhere from 1-5 bits, plus the two upper bits which
+ // can either be 0b10 or 0b11 (based on the bottom bit of the distance
+ // slot). Thus, with the context encoded bits, we can represent lengths
+ // anywhere from 0b10[0] to 0b11[11111] (i.e.: 4-127).
+ //
+ // For slots 14-63, we use "fixed 50% probability bits" which are also
+ // called "direct bits". The formula below also tells us how many such
+ // direct bits to use in this scenario. In other words, distBits can
+ // either be the number of "context encoded bits" for slots 4-13, or it
+ // can be the the number of "direct bits" for slots 14-63. This gives
+ // us a range of of 2 to 26 bits, which are then used as middle bits.
+ // Finally, the last 4 bits are called the "align" bits. The smallest
+ // possible number we can encode is now going to be 0b10[00][0000] and
+ // the highest is 0b11[1111111111111111111111111][1111], in other words
+ // 128 to (2^31)-1.
+ //
+ distBits = (distSlot >> 1) - 1;
+ Decoder.Rep0 = (0b10 | (distSlot & 1)) << distBits;
+
+ //
+ // Slots 4-13 have their own arithmetic-coded reverse bit trees. Slots
+ // 14-63 encode the middle "direct bits" with fixed 50% probability and
+ // the bottom 4 "align bits" with a shared arithmetic-coded reverse bit
+ // tree.
+ //
+ if (distSlot < LZMA_FIRST_FIXED_DISTANCE_SLOT)
+ {
+ probArray = &Decoder.u.BitModel.Dist[Decoder.Rep0 - distSlot];
+ }
+ else
+ {
+ Decoder.Rep0 |= RcGetFixed(distBits - LZMA_DISTANCE_ALIGN_BITS) <<
+ LZMA_DISTANCE_ALIGN_BITS;
+ distBits = LZMA_DISTANCE_ALIGN_BITS;
+ probArray = Decoder.u.BitModel.Align;
+ }
+ Decoder.Rep0 |= RcGetReverseBitTree(probArray, distBits);
+ }
+
+ //
+ // Indicate that the last sequence was a "match"
+ //
+ LzSetMatch(&Decoder.Sequence);
+}
+
+void
+LzDecodeRepLen (
+ uint8_t PosBit,
+ bool IsLongRep
+ )
+{
+ //
+ // Decode the length byte and indicate the last sequence was a "rep".
+ // If this is a short rep, then the length is always hard-coded to 1.
+ //
+ if (IsLongRep)
+ {
+ LzDecodeLen(&Decoder.u.BitModel.RepLen, PosBit);
+ LzSetLongRep(&Decoder.Sequence);
+ }
+ else
+ {
+ Decoder.Len = 1;
+ LzSetShortRep(&Decoder.Sequence);
+ }
+}
+
+void
+LzDecodeRep0(
+ uint8_t PosBit
+ )
+{
+ uint8_t bit;
+
+ //
+ // This could be a "short rep" with a length of 1, or a "long rep0" with
+ // a length that we have to decode. The next bit tells us this, using the
+ // arithmetic-coded bit trees stored in "Rep0Long", with 1 tree for each
+ // position bit (0-3).
+ //
+ bit = RcIsBitSet(&Decoder.u.BitModel.Rep0Long[Decoder.Sequence][PosBit]);
+ LzDecodeRepLen(PosBit, bit);
+}
+
+void
+LzDecodeLongRep (
+ uint8_t PosBit
+ )
+{
+ uint32_t newRep;
+
+ //
+ // Read the next 2 bits to figure out which of the recently used distances
+ // we should use for this match. The following three states are possible :
+ //
+ // {0,n} - "Long rep1", where the length is stored in an arithmetic-coded
+ // bit tree, and the distance is the 2nd most recently used distance (Rep1)
+ //
+ // {1,0} - "Long rep2", where the length is stored in an arithmetic-coded
+ // bit tree, and the distance is the 3rd most recently used distance (Rep2)
+ //
+ // {1,1} - "Long rep3", where the length is stored in an arithmetic-coded
+ // bit tree, and the distance is the 4th most recently used distance (Rep3)
+ //
+ // Once we have the right one, we must slide down each previously recently
+ // used distance, so that the distance we're now using (Rep1, Rep2 or Rep3)
+ // becomes "Rep0" again.
+ //
+ if (RcIsBitSet(&Decoder.u.BitModel.Rep1[Decoder.Sequence]))
+ {
+ if (RcIsBitSet(&Decoder.u.BitModel.Rep2[Decoder.Sequence]))
+ {
+ newRep = Decoder.Rep3;
+ Decoder.Rep3 = Decoder.Rep2;
+ }
+ else
+ {
+ newRep = Decoder.Rep2;
+ }
+ Decoder.Rep2 = Decoder.Rep1;
+ }
+ else
+ {
+ newRep = Decoder.Rep1;
+ }
+ Decoder.Rep1 = Decoder.Rep0;
+ Decoder.Rep0 = newRep;
+ LzDecodeRepLen(PosBit, true);
+}
+
+void
+LzDecodeRep (
+ uint8_t PosBit
+ )
+{
+ //
+ // We know this is an LZ77 distance-length pair where the distance is based
+ // on a history of up to 4 previously used distance (Rep0-3). To know which
+ // distance to use, the following 5 bit positions are possible (keeping in
+ // mind that we've already decoded the first 2 bits {1,1} in LzDecode which
+ // got us here in the first place):
+ //
+ // {0,0} - "Short rep", where the length is always 1 and distance is always
+ // the most recently used distance (Rep0).
+ //
+ // {0,1} - "Long rep0", where the length is stored in an arithmetic-coded
+ // bit tree, and the distance is the most recently used distance (Rep0).
+ //
+ // Because both of these possibilities just use Rep0, LzDecodeRep0 handles
+ // these two cases. Otherwise, we use LzDecodeLongRep to read up to two
+ // additional bits to figure out which recently used distance (1, 2, or 3)
+ // to use.
+ //
+ if (RcIsBitSet(&Decoder.u.BitModel.Rep0[Decoder.Sequence]))
+ {
+ LzDecodeLongRep(PosBit);
+ }
+ else
+ {
+ LzDecodeRep0(PosBit);
+ }
+}
+
+bool
+LzDecode (
+ void
+ )
+{
+ uint32_t position;
+ uint8_t posBit;
+
+ //
+ // Get the current position in dictionary, making sure we have input bytes.
+ // Once we run out of bytes, normalize the last arithmetic coded byte and
+ // ensure there's no pending lengths that we haven't yet repeated.
+ //
+ while (DtCanWrite(&position) && RcCanRead())
+ {
+ //
+ // An LZMA packet begins here, which can have 3 possible initial bit
+ // sequences that correspond to the type of encoding that was chosen
+ // to represent the next stream of symbols.
+ //
+ // {0, n} represents a "literal", which LzDecodeLiteral decodes.
+ // Literals are a single byte encoded with arithmetic-coded bit trees
+ //
+ // {1, 0} represents a "match", which LzDecodeMatch decodes.
+ // Matches are typical LZ77 sequences with explicit length and distance
+ //
+ // {1, 1} represents a "rep", which LzDecodeRep decodes.
+ // Reps are LZ77 sequences where the distance is encoded as a reference
+ // to a previously used distance (up to 4 -- called "Rep0-3").
+ //
+ // Once we've decoded either the "match" or the "rep', we now have the
+ // distance in "Rep0" (the most recently used distance) and the length
+ // in "Len", so we will use DtRepeatSymbol to go back in the dictionary
+ // buffer "Rep0" bytes and repeat that character "Len" times.
+ //
+ posBit = position & (LZMA_POSITION_COUNT - 1);
+ if (RcIsBitSet(&Decoder.u.BitModel.Match[Decoder.Sequence][posBit]))
+ {
+ if (RcIsBitSet(&Decoder.u.BitModel.Rep[Decoder.Sequence]))
+ {
+ LzDecodeRep(posBit);
+ }
+ else
+ {
+ LzDecodeMatch(posBit);
+ }
+
+ if (!DtRepeatSymbol(Decoder.Len, Decoder.Rep0 + 1))
+ {
+ return false;
+ }
+ Decoder.Len = 0;
+ }
+ else
+ {
+ LzDecodeLiteral();
+ }
+ }
+ RcNormalize();
+ return (Decoder.Len == 0);
+}
+
+void
+LzResetState (
+ void
+ )
+{
+ //
+ // Initialize decoder to default state in case we're called more than once.
+ // The LZMA "Bit Model" is an adaptive arithmetic-coded probability-based
+ // bit tree which encodes either a "0" or a "1".
+ //
+ Decoder.Sequence = LzmaLitLitLitState;
+ Decoder.Rep0 = Decoder.Rep1 = Decoder.Rep2 = Decoder.Rep3 = 0;
+ static_assert((LZMA_BIT_MODEL_SLOTS * 2) == sizeof(Decoder.u.BitModel),
+ "Invalid size");
+ for (int i = 0; i < LZMA_BIT_MODEL_SLOTS; i++)
+ {
+ RcSetDefaultProbability(&Decoder.u.RawProbabilities[i]);
+ }
+}
+
+bool
+LzInitialize (
+ uint8_t Properties
+ )
+{
+ if (Properties != k_LzSupportedProperties)
+ {
+ return false;
+ }
+ LzResetState();
+ return true;
+}
diff --git a/tools/src/minilzlib/lzmadec.h b/tools/src/minilzlib/lzmadec.h
new file mode 100644
index 0000000..652165d
--- /dev/null
+++ b/tools/src/minilzlib/lzmadec.h
@@ -0,0 +1,114 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ lzmadec.h
+
+Abstract:
+
+ This header file contains C-style definitions, constants, and enumerations
+ that map back to the LZMA Standard, specifically the probability model that
+ is used for encoding probabilities.
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#pragma once
+
+//
+// Literals can be 0-255 and are encoded in 3 different types of slots based on
+// the previous literal decoded and the "match byte" used.
+//
+#define LZMA_LITERALS 256
+#define LZMA_LC_TYPES 3
+#define LZMA_LC_MODEL_SIZE (LZMA_LC_TYPES * LZMA_LITERALS)
+
+//
+// These are the hardcoded LZMA properties we support for position and coders
+//
+#define LZMA_LC 3
+#define LZMA_PB 2
+#define LZMA_LP 0
+#define LZMA_LITERAL_CODERS (1 << LZMA_LC)
+#define LZMA_POSITION_COUNT (1 << LZMA_PB)
+
+//
+// Lengths are described in three different ways using "low", "mid", and "high"
+// bit trees. The first two trees encode 3 bits, the last encodes 8. We never
+// encode a length less than 2 bytes, since that's wasteful.
+//
+#define LZMA_MAX_LOW_LENGTH (1 << 3)
+#define LZMA_MAX_MID_LENGTH (1 << 3)
+#define LZMA_MAX_HIGH_LENGTH (1 << 8)
+#define LZMA_MIN_LENGTH 2
+
+//
+// Distances can be encoded in different ways, based on the distance slot.
+// Lengths of 2, 3, 4 bytes are directly encoded with their own slot. Lengths
+// over 5 share a slot, which is then further subdivded into 3 different ways
+// of encoding them, which are described in the source.
+//
+#define LZMA_DISTANCE_SLOTS 64
+#define LZMA_FIRST_CONTEXT_DISTANCE_SLOT 4
+#define LZMA_FIRST_FIXED_DISTANCE_SLOT 14
+#define LZMA_DISTANCE_ALIGN_BITS 4
+#define LZMA_DISTANCE_ALIGN_SLOTS (1 << LZMA_DISTANCE_ALIGN_BITS)
+
+//
+// Total number of probabilities that we need to store
+//
+#define LZMA_BIT_MODEL_SLOTS (1174 + \
+ (LZMA_LITERAL_CODERS * \
+ LZMA_LC_MODEL_SIZE))
+
+//
+// The LZMA probability bit model is typically based on the last LZMA sequences
+// that were decoded. There are 11 such possibilities that are tracked.
+//
+typedef enum _LZMA_SEQUENCE_STATE
+{
+ //
+ // State where we last saw three literals
+ //
+ LzmaLitLitLitState,
+ //
+ // States where we last saw two literals preceeded by a non-literal
+ //
+ LzmaMatchLitLitState,
+ LzmaRepLitLitState,
+ LzmaLitShortrepLitLitState,
+ //
+ // States where we last saw one literal preceeded by a non-literal
+ //
+ LzmaMatchLitState,
+ LzmaRepLitState,
+ LzmaLitShortrepLitState,
+ //
+ // Separator between states where we last saw at least one literal
+ //
+ LzmaMaxLitState,
+ //
+ // States where we last saw a non-literal preceeded by a literal
+ //
+ LzmaLitMatchState = 7,
+ LzmaLitRepState,
+ LzmaLitShortrepState,
+ //
+ // States where we last saw two non-literals
+ //
+ LzmaNonlitMatchState,
+ LzmaNonlitRepState,
+ //
+ // Separator for number of total states
+ //
+ LzmaMaxState
+} LZMA_SEQUENCE_STATE, * PLZMA_SEQUENCE_STATE;
diff --git a/tools/src/minilzlib/minlzlib.h b/tools/src/minilzlib/minlzlib.h
new file mode 100644
index 0000000..c5276ae
--- /dev/null
+++ b/tools/src/minilzlib/minlzlib.h
@@ -0,0 +1,88 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ minlzlib.h
+
+Abstract:
+
+ This header file is the main include for the minlz library. It contains the
+ internal function definitions for the history \& input buffers, the LZMA and
+ LZMA2 decoders, and the arithmetic (de)coder.
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#pragma once
+
+//
+// C Standard Headers
+//
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <assert.h>
+
+//
+// Input Buffer Management
+//
+bool BfRead(uint8_t* Byte);
+bool BfSeek(uint32_t Length, uint8_t** Bytes);
+uint32_t BfTell(void);
+bool BfAlign(void);
+void BfInitialize(uint8_t* InputBuffer, uint32_t InputSize);
+bool BfSetSoftLimit(uint32_t Remaining);
+void BfResetSoftLimit(void);
+
+//
+// Dictionary (History Buffer) Management
+//
+bool DtRepeatSymbol(uint32_t Length, uint32_t Distance);
+void DtInitialize(uint8_t* HistoryBuffer, uint32_t Position);
+bool DtSetLimit(uint32_t Limit);
+void DtPutSymbol(uint8_t Symbol);
+uint8_t DtGetSymbol(uint32_t Distance);
+bool DtCanWrite(uint32_t* Position);
+bool DtIsComplete(uint32_t* BytesProcessed);
+
+//
+// Range Decoder
+//
+uint8_t RcGetBitTree(uint16_t* BitModel, uint16_t Limit);
+uint8_t RcGetReverseBitTree(uint16_t* BitModel, uint8_t HighestBit);
+uint8_t RcDecodeMatchedBitTree(uint16_t* BitModel, uint8_t MatchByte);
+uint32_t RcGetFixed(uint8_t HighestBit);
+bool RcInitialize(uint16_t* ChunkSize);
+uint8_t RcIsBitSet(uint16_t* Probability);
+void RcNormalize(void);
+bool RcCanRead(void);
+bool RcIsComplete(uint32_t* Offset);
+void RcSetDefaultProbability(uint16_t* Probability);
+
+//
+// LZMA Decoder
+//
+bool LzDecode(void);
+bool LzInitialize(uint8_t Properties);
+void LzResetState(void);
+
+//
+// LZMA2 Decoder
+//
+bool Lz2DecodeStream(uint32_t* BytesProcessed, bool GetSizeOnly);
+#ifdef MINLZ_INTEGRITY_CHECKS
+//
+// Checksum Management
+//
+uint32_t OsComputeCrc32(uint32_t Initial, const uint8_t* Data, uint32_t Length);
+#define Crc32(Buffer, Length) OsComputeCrc32(0, (const uint8_t*)Buffer, Length)
+#endif
diff --git a/tools/src/minilzlib/minlzma.h b/tools/src/minilzlib/minlzma.h
new file mode 100644
index 0000000..f7ca4bd
--- /dev/null
+++ b/tools/src/minilzlib/minlzma.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <stdbool.h>
+
+/*!
+ * @brief Decompresses an XZ stream from InputBuffer into OutputBuffer.
+ *
+ * @detail The XZ stream must contain a single block with an LZMA2 filter
+ * and no BJC2 filters, using default LZMA properties, and using
+ * either CRC32 or None as the checksum type.
+ *
+ * @param[in] InputBuffer - A fully formed buffer containing the XZ stream.
+ * @param[in,out] InputSize - The size of the input buffer. On output, the size
+ * consumed from the input buffer.
+ * @param[in] OutputBuffer - A fully allocated buffer to receive the output.
+ * Callers can pass in NULL if they do not intend to decompress,
+ * in combination with setting OutputSize to 0, in order to query
+ * the final expected size of the decompressed buffer.
+ * @param[in,out] OutputSize - On input, the size of the buffer. On output, the
+ * size of the decompressed result.
+ *
+ * @return true - The input buffer was fully decompressed in OutputBuffer,
+ * or no decompression was requested, the size of the decompressed
+ * buffer was returned in OutputSIze.
+ * false - A failure occurred during the decompression process.
+ */
+bool
+XzDecode (
+ uint8_t* InputBuffer,
+ uint32_t* InputSize,
+ uint8_t* OutputBuffer,
+ uint32_t* OutputSize
+ );
diff --git a/tools/src/minilzlib/rangedec.c b/tools/src/minilzlib/rangedec.c
new file mode 100644
index 0000000..6a9f84f
--- /dev/null
+++ b/tools/src/minilzlib/rangedec.c
@@ -0,0 +1,395 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ rangedec.c
+
+Abstract:
+
+ This module implements the Range Decoder, which is how LZMA describes the
+ arithmetic coder that it uses to represent the binary representation of the
+ LZ77 match length-distance pairs after the initial compression pass. At the
+ implementation level, this coder works with an alphabet of only 2 symbols:
+ the bit "0", and the bit "1", so there are only ever two probability ranges
+ that need to be checked each pass. In LZMA, a probability of 100% encodes a
+ "0", while 0% encodes a "1". Initially, all probabilities are assumed to be
+ 50%. Probabilities are stored using 11-bits (2048 \=\= 100%), and thus use 16
+ bits of storage. Finally, the range decoder is adaptive, meaning that each
+ time a bit is decoded, the probabilities are updated: each 0 increases the
+ probability of another 0, and each 1 decrases it. The algorithm adapts the
+ probabilities using an exponential moving average with a shift ratio of 5.
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#include "minlzlib.h"
+
+//
+// The range decoder uses 11 probability bits, where 2048 is 100% chance of a 0
+//
+#define LZMA_RC_PROBABILITY_BITS 11
+#define LZMA_RC_MAX_PROBABILITY (1 << LZMA_RC_PROBABILITY_BITS)
+const uint16_t k_LzmaRcHalfProbability = LZMA_RC_MAX_PROBABILITY / 2;
+
+//
+// The range decoder uses an exponential moving average of the last probability
+// hit (match or miss) with an adaptation rate of 5 bits (which falls in the
+// middle of its 11 bits used to encode a probability.
+//
+#define LZMA_RC_ADAPTATION_RATE_SHIFT 5
+
+//
+// The range decoder has enough precision for the range only as long as the top
+// 8 bits are still set. Once it falls below, it needs a renormalization step.
+//
+#define LZMA_RC_MIN_RANGE (1 << 24)
+
+//
+// The range decoder must be initialized with 5 bytes, the first of which is
+// ignored
+//
+#define LZMA_RC_INIT_BYTES 5
+
+//
+// State used for the binary adaptive arithmetic coder (LZMA Range Decoder)
+//
+typedef struct _RANGE_DECODER_STATE
+{
+ //
+ // Start and end location of the current stream's range encoder buffer
+ //
+ uint8_t* Start;
+ uint8_t* Limit;
+ //
+ // Current probability range and 32-bit arithmetic encoded sequence code
+ //
+ uint32_t Range;
+ uint32_t Code;
+} RANGE_DECODER_STATE, *PRANGE_DECODER_STATE;
+RANGE_DECODER_STATE RcState;
+
+bool
+RcInitialize (
+ uint16_t* ChunkSize
+ )
+{
+ uint8_t i, rcByte;
+ uint8_t* chunkEnd;
+
+ //
+ // Make sure that the input buffer has enough space for the requirements of
+ // the range encoder. We (temporarily) seek forward to validate this.
+ //
+ if (!BfSeek(*ChunkSize, &chunkEnd))
+ {
+ return false;
+ }
+ BfSeek(-*ChunkSize, &chunkEnd);
+
+ //
+ // The initial probability range is set to its highest value, after which
+ // the next 5 bytes are used to initialize the initial code. Note that the
+ // first byte outputted by the encoder is always going to be zero, so it is
+ // ignored here.
+ //
+ RcState.Range = (uint32_t)-1;
+ RcState.Code = 0;
+ for (i = 0; i < LZMA_RC_INIT_BYTES; i++)
+ {
+ BfRead(&rcByte);
+ RcState.Code = (RcState.Code << 8) | rcByte;
+ }
+
+ //
+ // Store our current location in the buffer now, and how far we can go on
+ // reading. Then decrease the total chunk size by the count of init bytes,
+ // so that the caller can check, once done (RcIsComplete), if the code has
+ // become 0 exactly when the compressed chunk size has been fully consumed
+ // by the decoder.
+ //
+ BfSeek(0, &RcState.Start);
+ RcState.Limit = RcState.Start + *ChunkSize;
+ *ChunkSize -= LZMA_RC_INIT_BYTES;
+ return true;
+}
+
+bool
+RcCanRead (
+ void
+ )
+{
+ uint8_t* pos;
+ //
+ // We can keep reading symbols as long as we haven't reached the end of the
+ // input buffer yet.
+ //
+ BfSeek(0, &pos);
+ return pos <= RcState.Limit;
+}
+
+bool
+RcIsComplete (
+ uint32_t* BytesProcessed
+ )
+{
+ uint8_t* pos;
+ //
+ // When the last symbol has been decoded, the last code should be zero as
+ // there is nothing left to describe. Return the offset in the buffer where
+ // this occurred (which should be equal to the compressed size).
+ //
+ BfSeek(0, &pos);
+ *BytesProcessed = (uint32_t)(pos - RcState.Start);
+ return (RcState.Code == 0);
+}
+
+void
+RcNormalize (
+ void
+ )
+{
+ uint8_t rcByte;
+ //
+ // Whenever we drop below 24 bits, there is no longer enough precision in
+ // the probability range not to avoid a "stuck" state where we cannot tell
+ // apart the two branches (above/below the probability range) because the
+ // two options appear identical with the number of precision bits that we
+ // have. In this case, shift the state by a byte (8 bits) and read another.
+ //
+ if (RcState.Range < LZMA_RC_MIN_RANGE)
+ {
+ RcState.Range <<= 8;
+ RcState.Code <<= 8;
+ BfRead(&rcByte);
+ RcState.Code |= rcByte;
+ }
+}
+
+void
+RcAdapt (
+ bool Miss,
+ uint16_t* Probability
+ )
+{
+ //
+ // In the canonical range encoders out there (including this one used by
+ // LZMA, we want the probability to adapt (change) as we read more or less
+ // bits that match our expectation. In order to quickly adapt to change,
+ // use an exponential moving average. The standard way of doing this is to
+ // use an integer based adaptation with a shift that's somewhere between
+ // {1, bits-1}. Since LZMA uses 11 bits for its model, 5 is a nice number
+ // that lands exactly between 1 and 10.
+ //
+ if (Miss)
+ {
+ *Probability -= *Probability >> LZMA_RC_ADAPTATION_RATE_SHIFT;
+ }
+ else
+ {
+ *Probability += (LZMA_RC_MAX_PROBABILITY - *Probability) >>
+ LZMA_RC_ADAPTATION_RATE_SHIFT;
+ }
+}
+
+uint8_t
+RcIsBitSet (
+ uint16_t* Probability
+ )
+{
+ uint32_t bound;
+ uint8_t bit;
+
+ //
+ // Always begin by making sure the range has been normalized for precision
+ //
+ RcNormalize();
+
+ //
+ // Check if the current arithmetic code is descried by the next calculated
+ // proportionally-divided probability range. Recall that the probabilities
+ // encode the chance of the symbol (bit) being a 0 -- not a 1!
+ //
+ // Therefore, if the next chunk of the code lies outside of this new range,
+ // we are still on the path to our 0. Otherwise, if the code is now part of
+ // the newly defined range (inclusive), then we produce a 1 and limit the
+ // range to produce a new range and code for the next decoding pass.
+ //
+ bound = (RcState.Range >> LZMA_RC_PROBABILITY_BITS) * *Probability;
+ if (RcState.Code < bound)
+ {
+ RcState.Range = bound;
+ bit = 0;
+ }
+ else
+ {
+ RcState.Range -= bound;
+ RcState.Code -= bound;
+ bit = 1;
+ }
+
+ //
+ // Always finish by adapt the probabilities based on the bit value
+ //
+ RcAdapt(bit, Probability);
+ return bit;
+}
+
+uint8_t
+RcIsFixedBitSet(
+ void
+ )
+{
+ uint8_t bit;
+
+ //
+ // This is a specialized version of RcIsBitSet with two differences:
+ //
+ // First, there is no adaptive probability -- it is hardcoded to 50%.
+ //
+ // Second, because there are 11 bits per probability, and 50% is 1<<10,
+ // "(LZMA_RC_PROBABILITY_BITS) * Probability" is essentially 1. As such,
+ // we can just shift by 1 (in other words, halving the range).
+ //
+ RcNormalize();
+ RcState.Range >>= 1;
+ if (RcState.Code < RcState.Range)
+ {
+ bit = 0;
+ }
+ else
+ {
+ RcState.Code -= RcState.Range;
+ bit = 1;
+ }
+ return bit;
+}
+
+uint8_t
+RcGetBitTree (
+ uint16_t* BitModel,
+ uint16_t Limit
+ )
+{
+ uint16_t symbol;
+
+ //
+ // Context probability bit trees always begin at index 1. Iterate over each
+ // decoded bit and just keep shifting it in place, until we reach the total
+ // expected number of bits, which should never be over 8 (limit is 0x100).
+ //
+ // Once decoded, always subtract the limit back from the symbol since we go
+ // one bit "past" the limit in the loop, as a side effect of the tree being
+ // off-by-one.
+ //
+ for (symbol = 1; symbol < Limit; )
+ {
+ symbol = (symbol << 1) | RcIsBitSet(&BitModel[symbol]);
+ }
+ return (symbol - Limit) & 0xFF;
+}
+
+uint8_t
+RcGetReverseBitTree (
+ uint16_t* BitModel,
+ uint8_t HighestBit
+ )
+{
+ uint16_t symbol;
+ uint8_t i, bit, result;
+
+ //
+ // This is the same logic as in RcGetBitTree, but with the bits actually
+ // encoded in reverse order. We keep track of the probability index as the
+ // "symbol" just like RcGetBitTree, but actually decode the result in the
+ // opposite order.
+ //
+ for (i = 0, symbol = 1, result = 0; i < HighestBit; i++)
+ {
+ bit = RcIsBitSet(&BitModel[symbol]);
+ symbol = (symbol << 1) | bit;
+ result |= bit << i;
+ }
+ return result;
+}
+
+uint8_t
+RcDecodeMatchedBitTree (
+ uint16_t* BitModel,
+ uint8_t MatchByte
+ )
+{
+ uint16_t symbol, bytePos, matchBit;
+ uint8_t bit;
+
+ //
+ // Parse each bit in the "match byte" (see LzDecodeLiteral), which we call
+ // a "match bit".
+ //
+ // Then, treat this as a special bit tree decoding where two possible trees
+ // are used: one for when the "match bit" is set, and a separate one for
+ // when the "match bit" is not set. Since each tree can encode up to 256
+ // symbols, each one has 0x100 slots.
+ //
+ // Finally, we have the original bit tree which we'll revert back to once
+ // the match bits are no longer in play, which we parse for the remainder
+ // of the symbol.
+ //
+ for (bytePos = MatchByte, symbol = 1; symbol < 0x100; bytePos <<= 1)
+ {
+ matchBit = (bytePos >> 7) & 1;
+
+ bit = RcIsBitSet(&BitModel[symbol + (0x100 * (matchBit + 1))]);
+ symbol = (symbol << 1) | bit;
+
+ if (matchBit != bit)
+ {
+ while (symbol < 0x100)
+ {
+ symbol = (symbol << 1) | RcIsBitSet(&BitModel[symbol]);
+ }
+ break;
+ }
+ }
+ return symbol & 0xFF;
+}
+
+uint32_t
+RcGetFixed (
+ uint8_t HighestBit
+ )
+{
+ uint32_t symbol;
+
+ //
+ // Fixed probability bit trees always begin at index 0. Iterate over each
+ // decoded bit and just keep shifting it in place, until we reach the total
+ // expected number of bits (typically never higher than 26 -- the maximum
+ // number of "direct bits" that the distance of a "match" can encode).
+ //
+ symbol = 0;
+ do
+ {
+ symbol = (symbol << 1) | RcIsFixedBitSet();
+ } while (--HighestBit > 0);
+ return symbol;
+}
+
+void
+RcSetDefaultProbability (
+ uint16_t* Probability
+ )
+{
+ //
+ // By default, we initialize the probabilities to 0.5 (50% chance).
+ //
+ *Probability = k_LzmaRcHalfProbability;
+}
diff --git a/tools/src/minilzlib/xzstream.c b/tools/src/minilzlib/xzstream.c
new file mode 100644
index 0000000..dd5078c
--- /dev/null
+++ b/tools/src/minilzlib/xzstream.c
@@ -0,0 +1,547 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ xzstream.c
+
+Abstract:
+
+ This module implements the XZ stream format decoding, including support for
+ parsing the stream header and block header, and then handing off the block
+ decoding to the LZMA2 decoder. Finally, if "meta checking" is enabled, then
+ the index and stream footer are also parsed and validated. Optionally, each
+ of these component structures can be checked against its CRC32 checksum, if
+ "integrity checking" has been enabled. Note that this library only supports
+ single-stream, single-block XZ files that have CRC32 (or None) set as their
+ block checking algorithm. Finally, no BJC filters are supported, and files
+ with a compressed/uncompressed size metadata indicator are not handled.
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#define MINLZ_META_CHECKS
+
+#include "minlzlib.h"
+#include "xzstream.h"
+#include "../utils.h"
+
+//
+// XzDecodeBlockHeader can return "I successfully found a block",
+// "I failed/bad block header", or "there was no block header".
+// Though minlzlib explicitly only claims to handle files with a
+// single block, it needs to also handle files with no blocks at all.
+// (Produced by "xz" when compressing an empty input file)
+//
+typedef enum _XZ_DECODE_BLOCK_HEADER_RESULT {
+ XzBlockHeaderFail = 0,
+ XzBlockHeaderSuccess = 1,
+ XzBlockHeaderNoBlock = 2
+} XZ_DECODE_BLOCK_HEADER_RESULT;
+
+const uint8_t k_XzLzma2FilterIdentifier = 0x21;
+
+#ifdef _WIN32
+void __security_check_cookie(_In_ uintptr_t _StackCookie) { (void)(_StackCookie); }
+#endif
+
+#ifdef MINLZ_META_CHECKS
+//
+// XZ Stream Container State
+//
+typedef struct _CONTAINER_STATE
+{
+ //
+ // Size of the XZ header and the index, used to validate against footer
+ //
+ uint32_t HeaderSize;
+ uint32_t IndexSize;
+ //
+ // Size of the compressed block and its checksum
+ //
+ uint32_t UncompressedBlockSize;
+ uint32_t UnpaddedBlockSize;
+ uint32_t ChecksumSize;
+} CONTAINER_STATE, * PCONTAINER_STATE;
+CONTAINER_STATE Container;
+#endif
+
+#ifdef MINLZ_META_CHECKS
+bool
+XzDecodeVli (
+ vli_type* Vli
+ )
+{
+ uint8_t vliByte;
+ uint32_t bitPos;
+
+ //
+ // Read the initial VLI byte (might be the value itself)
+ //
+ if (!BfRead(&vliByte))
+ {
+ return false;
+ }
+ *Vli = vliByte & 0x7F;
+
+ //
+ // Check if this was a complex VLI (and we have space for it)
+ //
+ bitPos = 7;
+ while ((vliByte & 0x80) != 0)
+ {
+ //
+ // Read the next byte
+ //
+ if (!BfRead(&vliByte))
+ {
+ return false;
+ }
+
+ //
+ // Make sure we're not decoding an invalid VLI
+ //
+ if ((bitPos == (7 * VLI_BYTES_MAX)) || (vliByte == 0))
+ {
+ return false;
+ }
+
+ //
+ // Decode it and move to the next 7 bits
+ //
+ *Vli |= (vli_type)((vliByte & 0x7F) << bitPos);
+ bitPos += 7;
+ }
+ return true;
+}
+
+bool
+XzDecodeIndex (
+ void
+ )
+{
+ uint32_t vli;
+ uint8_t* indexStart;
+ uint8_t* indexEnd;
+ uint32_t* pCrc32;
+ uint8_t indexByte;
+
+ //
+ // Remember where the index started so we can compute its size
+ //
+ BfSeek(0, &indexStart);
+
+ //
+ // The index always starts out with an empty byte
+ //
+ if (!BfRead(&indexByte) || (indexByte != 0))
+ {
+ return false;
+ }
+
+ //
+ // Then the count of blocks, which we expect to be 1
+ //
+ if (!XzDecodeVli(&vli) || (vli != 1))
+ {
+ return false;
+ }
+
+ //
+ // Then the unpadded block size, which should match
+ //
+ if (!XzDecodeVli(&vli) || (Container.UnpaddedBlockSize != vli))
+ {
+ return false;
+ }
+
+ //
+ // Then the uncompressed block size, which should match
+ //
+ if (!XzDecodeVli(&vli) || (Container.UncompressedBlockSize != vli))
+ {
+ return false;
+ }
+
+ //
+ // Then we pad to the next multiple of 4
+ //
+ if (!BfAlign())
+ {
+ return false;
+ }
+
+ //
+ // Store the index size with padding to validate the footer later
+ //
+ BfSeek(0, &indexEnd);
+ Container.IndexSize = (uint32_t)(indexEnd - indexStart);
+
+ //
+ // Read the CRC32, which is not part of the index size
+ //
+ if (!BfSeek(sizeof(*pCrc32), (uint8_t**)&pCrc32))
+ {
+ return false;
+ }
+#ifdef MINLZ_INTEGRITY_CHECKS
+ //
+ // Make sure the index is not corrupt
+ //
+ if (Crc32(indexStart, Container.IndexSize) != *pCrc32)
+ {
+ return false;
+ }
+#endif
+ return true;
+}
+
+bool
+XzDecodeStreamFooter (
+ void
+ )
+{
+ PXZ_STREAM_FOOTER streamFooter;
+
+ //
+ // Seek past the footer, making sure we have space in the input stream
+ //
+ if (!BfSeek(sizeof(*streamFooter), (uint8_t**)&streamFooter))
+ {
+ return false;
+ }
+
+ //
+ // Validate the footer magic
+ //
+ if (streamFooter->Magic != 'ZY')
+ {
+ return false;
+ }
+
+ //
+ // Validate no flags other than checksum type are set
+ //
+ if ((streamFooter->u.Flags != 0) &&
+ ((streamFooter->u.s.CheckType != XzCheckTypeCrc32) &&
+ (streamFooter->u.s.CheckType != XzCheckTypeCrc64) &&
+ (streamFooter->u.s.CheckType != XzCheckTypeSha2) &&
+ (streamFooter->u.s.CheckType != XzCheckTypeNone)))
+ {
+ return false;
+ }
+
+ //
+ // Validate if the footer accurately describes the size of the index
+ //
+ if (Container.IndexSize != (streamFooter->BackwardSize * 4))
+ {
+ return false;
+ }
+#ifdef MINLZ_INTEGRITY_CHECKS
+ //
+ // Compute the footer's CRC32 and make sure it's not corrupted
+ //
+ if (Crc32(&streamFooter->BackwardSize,
+ sizeof(streamFooter->BackwardSize) +
+ sizeof(streamFooter->u.Flags)) !=
+ streamFooter->Crc32)
+ {
+ return false;
+ }
+#endif
+ return true;
+}
+#endif
+
+bool
+XzDecodeBlock (
+ uint8_t* OutputBuffer,
+ uint32_t* BlockSize
+ )
+{
+#ifdef MINLZ_META_CHECKS
+ uint8_t *inputStart, *inputEnd;
+#endif
+ //
+ // Decode the LZMA2 stream. If full integrity checking is enabled, also
+ // save the offset before and after decoding, so we can save the block
+ // sizes and compare them against the footer and index after decoding.
+ //
+#ifdef MINLZ_META_CHECKS
+ BfSeek(0, &inputStart);
+#endif
+ if (!Lz2DecodeStream(BlockSize, OutputBuffer == NULL))
+ {
+ return false;
+ }
+#ifdef MINLZ_META_CHECKS
+ BfSeek(0, &inputEnd);
+ Container.UnpaddedBlockSize = Container.HeaderSize +
+ (uint32_t)(inputEnd - inputStart);
+ Container.UncompressedBlockSize = *BlockSize;
+#endif
+ //
+ // After the block data, we need to pad to 32-bit alignment
+ //
+ if (!BfAlign())
+ {
+ return false;
+ }
+#if defined(MINLZ_INTEGRITY_CHECKS) || defined(MINLZ_META_CHECKS)
+ //
+ // Finally, move past the size of the checksum if any, then compare it with
+ // with the actual CRC32 of the block, if integrity checks are enabled. If
+ // meta checks are enabled, update the block size so the index checking can
+ // validate it.
+ //
+ if (!BfSeek(Container.ChecksumSize, &inputEnd))
+ {
+ return false;
+ }
+#endif
+ (void)(OutputBuffer);
+#ifdef MINLZ_INTEGRITY_CHECKS
+ if ((OutputBuffer != NULL) &&
+ (Crc32(OutputBuffer, *BlockSize) != *(uint32_t*)inputEnd))
+ {
+ return false;
+ }
+#endif
+#ifdef MINLZ_META_CHECKS
+ Container.UnpaddedBlockSize += Container.ChecksumSize;
+#endif
+ return true;
+}
+
+bool
+XzDecodeStreamHeader (
+ void
+ )
+{
+ PXZ_STREAM_HEADER streamHeader;
+
+ //
+ // Seek past the header, making sure we have space in the input stream
+ //
+ if (!BfSeek(sizeof(*streamHeader), (uint8_t**)&streamHeader))
+ {
+ return false;
+ }
+#ifdef MINLZ_META_CHECKS
+ //
+ // Validate the header magic
+ //
+ if ((*(uint32_t*)&streamHeader->Magic[1] != 'ZXz7') ||
+ (streamHeader->Magic[0] != 0xFD) ||
+ (streamHeader->Magic[5] != 0x00))
+ {
+ return false;
+ }
+
+ //
+ // Validate no flags other than checksum type are set
+ //
+ if ((streamHeader->u.Flags != 0) &&
+ ((streamHeader->u.s.CheckType != XzCheckTypeCrc32) &&
+ (streamHeader->u.s.CheckType != XzCheckTypeCrc64) &&
+ (streamHeader->u.s.CheckType != XzCheckTypeSha2) &&
+ (streamHeader->u.s.CheckType != XzCheckTypeNone)))
+ {
+ return false;
+ }
+
+ //
+ // Remember that a checksum might come at the end of the block later
+ //
+ if (streamHeader->u.s.CheckType == 0)
+ {
+ Container.ChecksumSize = 0;
+ } else {
+ Container.ChecksumSize = 4 << ((streamHeader->u.s.CheckType - 1) / 3);
+ }
+
+#endif
+#ifdef MINLZ_INTEGRITY_CHECKS
+ //
+ // Compute the header's CRC32 and make sure it's not corrupted
+ //
+ if (Crc32(&streamHeader->u.Flags, sizeof(streamHeader->u.Flags)) !=
+ streamHeader->Crc32)
+ {
+ return false;
+ }
+#endif
+ return true;
+}
+
+XZ_DECODE_BLOCK_HEADER_RESULT
+XzDecodeBlockHeader (
+ void
+ )
+{
+ PXZ_BLOCK_HEADER blockHeader;
+#ifdef MINLZ_META_CHECKS
+ uint32_t size;
+#endif
+ //
+ // Seek past the header, making sure we have space in the input stream
+ //
+ if (!BfSeek(sizeof(*blockHeader), (uint8_t**)&blockHeader))
+ {
+ return XzBlockHeaderFail;
+ }
+ if (blockHeader->Size == 0)
+ {
+ //
+ // That's no block! That's an index!
+ //
+ BfSeek((uint32_t)(-(uint16_t)sizeof(*blockHeader)),
+ (uint8_t**)&blockHeader);
+ return XzBlockHeaderNoBlock;
+ }
+#ifdef MINLZ_META_CHECKS
+ //
+ // Validate that the size of the header is what we expect
+ //
+ Container.HeaderSize = (blockHeader->Size + 1) * 4;
+ if (Container.HeaderSize != sizeof(*blockHeader))
+ {
+ return XzBlockHeaderFail;
+ }
+
+ //
+ // Validate that no additional flags or filters are enabled
+ //
+ if (blockHeader->u.Flags != 0)
+ {
+ return XzBlockHeaderFail;
+ }
+
+ //
+ // Validate that the only filter is the LZMA2 filter
+ //
+ if (blockHeader->LzmaFlags.Id != k_XzLzma2FilterIdentifier)
+ {
+ return XzBlockHeaderFail;
+ }
+
+ //
+ // With the expected number of property bytes
+ //
+ if (blockHeader->LzmaFlags.Size
+ != sizeof(blockHeader->LzmaFlags.u.Properties))
+ {
+ return XzBlockHeaderFail;
+ }
+
+ //
+ // The only property is the dictionary size, make sure it is valid.
+ //
+ // We don't actually need to store or compare the size with anything since
+ // the library expects the caller to always put in a buffer that's large
+ // enough to contain the full uncompressed file (or calling it in "get size
+ // only" mode to get this information).
+ //
+ // This output buffer can thus be smaller than the size of the dictionary
+ // which is absolutely OK as long as that's actually the size of the output
+ // file. If callers pass in a buffer size that's too small, decoding will
+ // fail at later stages anyway, and that's incorrect use of minlzlib.
+ //
+ size = blockHeader->LzmaFlags.u.s.DictionarySize;
+ if (size > 39)
+ {
+ return XzBlockHeaderFail;
+ }
+#ifdef MINLZ_INTEGRITY_CHECKS
+ //
+ // Compute the header's CRC32 and make sure it's not corrupted
+ //
+ if (Crc32(blockHeader,
+ Container.HeaderSize - sizeof(blockHeader->Crc32)) !=
+ blockHeader->Crc32)
+ {
+ return XzBlockHeaderFail;
+ }
+#endif
+#endif
+ return XzBlockHeaderSuccess;
+}
+
+bool
+XzDecode (
+ uint8_t* InputBuffer,
+ uint32_t* InputSize,
+ uint8_t* OutputBuffer,
+ uint32_t* OutputSize
+ )
+{
+
+ //
+ // Initialize the input buffer descriptor and history buffer (dictionary)
+ //
+ BfInitialize(InputBuffer, *InputSize ? *InputSize : UINT32_MAX);
+ DtInitialize(OutputBuffer, *OutputSize);
+
+ //
+ // Decode the stream header for check for validity
+ //
+ if (!XzDecodeStreamHeader())
+ {
+ printf("header decode failed\n");
+ return false;
+ }
+
+ //
+ // Decode the block header for check for validity
+ //
+ switch (XzDecodeBlockHeader())
+ {
+ case XzBlockHeaderFail:
+ printf("block header failed\n");
+ return false;
+ case XzBlockHeaderNoBlock:
+ *OutputSize = 0;
+ break;
+ case XzBlockHeaderSuccess:
+ //
+ // Decode the actual block
+ //
+ if (!XzDecodeBlock(OutputBuffer, OutputSize))
+ {
+ printf("block decode failed\n");
+ return false;
+ }
+ break;
+ }
+
+#ifdef MINLZ_META_CHECKS
+ //
+ // Decode the index for validity checks
+ //
+ if (!XzDecodeIndex())
+ {
+ return false;
+ }
+
+ //
+ // And finally decode the footer as a final set of checks
+ //
+ if (!XzDecodeStreamFooter())
+ {
+ return false;
+ }
+
+ if (!*InputSize)
+ *InputSize = BfTell();
+#endif
+ return true;
+}
diff --git a/tools/src/minilzlib/xzstream.h b/tools/src/minilzlib/xzstream.h
new file mode 100644
index 0000000..f227879
--- /dev/null
+++ b/tools/src/minilzlib/xzstream.h
@@ -0,0 +1,123 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ xzstream.h
+
+Abstract:
+
+ This header file contains C-style data structures and enumerations that map
+ back to the XZ stream and file format standard, including for the decoding
+ of Variable Length Integers (VLI). This includes definitions for the stream
+ header, block header, index and stream footer, and associated check types.
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#pragma once
+
+//
+// XZ streams encode certain numbers as "variable length integers", with 7 bits
+// for the data, and a high bit to encode that another byte must be consumed.
+//
+typedef uint32_t vli_type;
+#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
+
+//
+// These are the possible supported types for integrity checking in an XZ file
+//
+typedef enum _XZ_CHECK_TYPES
+{
+ XzCheckTypeNone = 0,
+ XzCheckTypeCrc32 = 1,
+ XzCheckTypeCrc64 = 4,
+ XzCheckTypeSha2 = 10
+} XZ_CHECK_TYPES;
+
+//
+// This describes the first 12 bytes of any XZ container file / stream
+//
+typedef struct _XZ_STREAM_HEADER
+{
+ uint8_t Magic[6];
+ union
+ {
+ struct
+ {
+ uint8_t ReservedFlags;
+ uint8_t CheckType : 4;
+ uint8_t ReservedType : 4;
+ } s;
+ uint16_t Flags;
+ } u;
+ uint32_t Crc32;
+} XZ_STREAM_HEADER, * PXZ_STREAM_HEADER;
+static_assert(sizeof(XZ_STREAM_HEADER) == 12, "Invalid Stream Header Size");
+
+//
+// This describes the last 12 bytes of any XZ container file / stream
+//
+typedef struct _XZ_STREAM_FOOTER
+{
+ uint32_t Crc32;
+ uint32_t BackwardSize;
+ union
+ {
+ struct
+ {
+ uint8_t ReservedFlags;
+ uint8_t CheckType : 4;
+ uint8_t ReservedType : 4;
+ } s;
+ uint16_t Flags;
+ } u;
+ uint16_t Magic;
+} XZ_STREAM_FOOTER, * PXZ_STREAM_FOOTER;
+static_assert(sizeof(XZ_STREAM_FOOTER) == 12, "Invalid Stream Footer Size");
+
+//
+// This describes the beginning of a compressed payload stored in an XZ stream,
+// with hardcoded expectations for an LZMA2-compressed payload that has 0 extra
+// filters (such as BCJ2).
+//
+typedef struct _XZ_BLOCK_HEADER
+{
+ uint8_t Size;
+ union
+ {
+ struct
+ {
+ uint8_t FilterCount : 2;
+ uint8_t Reserved : 4;
+ uint8_t HasCompressedSize : 1;
+ uint8_t HasUncompressedSize : 1;
+ } s;
+ uint8_t Flags;
+ } u;
+ struct
+ {
+ uint8_t Id;
+ uint8_t Size;
+ union
+ {
+ struct
+ {
+ uint8_t DictionarySize : 6;
+ uint8_t Reserved : 2;
+ } s;
+ uint8_t Properties;
+ } u;
+ } LzmaFlags;
+ uint8_t Padding[3];
+ uint32_t Crc32;
+} XZ_BLOCK_HEADER, * PXZ_BLOCK_HEADER;
+static_assert(sizeof(XZ_BLOCK_HEADER) == 12, "Invalid Block Header Size");
diff --git a/tools/src/nvme.c b/tools/src/nvme.c
new file mode 100644
index 0000000..e6741eb
--- /dev/null
+++ b/tools/src/nvme.c
@@ -0,0 +1,505 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "adt.h"
+#include "assert.h"
+#include "malloc.h"
+#include "nvme.h"
+#include "pmgr.h"
+#include "rtkit.h"
+#include "sart.h"
+#include "string.h"
+#include "utils.h"
+
+#define NVME_TIMEOUT 1000000
+#define NVME_ENABLE_TIMEOUT 5000000
+#define NVME_SHUTDOWN_TIMEOUT 5000000
+#define NVME_QUEUE_SIZE 64
+
+#define NVME_CC 0x14
+#define NVME_CC_SHN GENMASK(15, 14)
+#define NVME_CC_SHN_NONE 0
+#define NVME_CC_SHN_NORMAL 1
+#define NVME_CC_SHN_ABRUPT 2
+#define NVME_CC_EN BIT(0)
+
+#define NVME_CSTS 0x1c
+#define NVME_CSTS_SHST GENMASK(3, 2)
+#define NVME_CSTS_SHST_NORMAL 0
+#define NVME_CSTS_SHST_BUSY 1
+#define NVME_CSTS_SHST_DONE 2
+#define NVME_CSTS_RDY BIT(0)
+
+#define NVME_AQA 0x24
+#define NVME_ASQ 0x28
+#define NVME_ACQ 0x30
+
+#define NVME_DB_ACQ 0x1004
+#define NVME_DB_IOCQ 0x100c
+
+#define NVME_BOOT_STATUS 0x1300
+#define NVME_BOOT_STATUS_OK 0xde71ce55
+
+#define NVME_LINEAR_SQ_CTRL 0x24908
+#define NVME_LINEAR_SQ_CTRL_EN BIT(0)
+
+#define NVME_UNKNONW_CTRL 0x24008
+#define NVME_UNKNONW_CTRL_PRP_NULL_CHECK BIT(11)
+
+#define NVME_MAX_PEND_CMDS_CTRL 0x1210
+#define NVME_DB_LINEAR_ASQ 0x2490c
+#define NVME_DB_LINEAR_IOSQ 0x24910
+
+#define NVMMU_NUM 0x28100
+#define NVMMU_ASQ_BASE 0x28108
+#define NVMMU_IOSQ_BASE 0x28110
+#define NVMMU_TCB_INVAL 0x28118
+#define NVMMU_TCB_STAT 0x29120
+
+#define NVME_ADMIN_CMD_DELETE_SQ 0x00
+#define NVME_ADMIN_CMD_CREATE_SQ 0x01
+#define NVME_ADMIN_CMD_DELETE_CQ 0x04
+#define NVME_ADMIN_CMD_CREATE_CQ 0x05
+#define NVME_QUEUE_CONTIGUOUS BIT(0)
+
+#define NVME_CMD_FLUSH 0x00
+#define NVME_CMD_WRITE 0x01
+#define NVME_CMD_READ 0x02
+
+struct nvme_command {
+ u8 opcode;
+ u8 flags;
+ u8 tag;
+ u8 rsvd; // normal NVMe has tag as u16
+ u32 nsid;
+ u32 cdw2;
+ u32 cdw3;
+ u64 metadata;
+ u64 prp1;
+ u64 prp2;
+ u32 cdw10;
+ u32 cdw11;
+ u32 cdw12;
+ u32 cdw13;
+ u32 cdw14;
+ u32 cdw15;
+};
+
+struct nvme_completion {
+ u64 result;
+ u32 rsvd; // normal NVMe has the sq_head and sq_id here
+ u16 tag;
+ u16 status;
+};
+
+struct apple_nvmmu_tcb {
+ u8 opcode;
+ u8 dma_flags;
+ u8 slot_id;
+ u8 unk0;
+ u32 len;
+ u64 unk1[2];
+ u64 prp1;
+ u64 prp2;
+ u64 unk2[2];
+ u8 aes_iv[8];
+ u8 _aes_unk[64];
+};
+
+struct nvme_queue {
+ struct apple_nvmmu_tcb *tcbs;
+ struct nvme_command *cmds;
+ struct nvme_completion *cqes;
+
+ u8 cq_head;
+ u8 cq_phase;
+
+ bool adminq;
+};
+
+static_assert(sizeof(struct nvme_command) == 64, "invalid nvme_command size");
+static_assert(sizeof(struct nvme_completion) == 16, "invalid nvme_completion size");
+static_assert(sizeof(struct apple_nvmmu_tcb) == 128, "invalid apple_nvmmu_tcb size");
+
+static bool nvme_initialized = false;
+static u8 nvme_die;
+
+static asc_dev_t *nvme_asc = NULL;
+static rtkit_dev_t *nvme_rtkit = NULL;
+static sart_dev_t *nvme_sart = NULL;
+
+static u64 nvme_base;
+
+static struct nvme_queue adminq, ioq;
+
+static bool alloc_queue(struct nvme_queue *q)
+{
+ memset(q, 0, sizeof(*q));
+
+ q->tcbs = memalign(SZ_16K, NVME_QUEUE_SIZE * sizeof(*q->tcbs));
+ if (!q->tcbs)
+ return false;
+
+ q->cmds = memalign(SZ_16K, NVME_QUEUE_SIZE * sizeof(*q->cmds));
+ if (!q->cmds)
+ goto free_tcbs;
+
+ q->cqes = memalign(SZ_16K, NVME_QUEUE_SIZE * sizeof(*q->cqes));
+ if (!q->cqes)
+ goto free_cmds;
+
+ memset(q->tcbs, 0, NVME_QUEUE_SIZE * sizeof(*q->tcbs));
+ memset(q->cmds, 0, NVME_QUEUE_SIZE * sizeof(*q->cmds));
+ memset(q->cqes, 0, NVME_QUEUE_SIZE * sizeof(*q->cqes));
+ q->cq_head = 0;
+ q->cq_phase = 1;
+ return true;
+
+free_cmds:
+ free(q->cmds);
+free_tcbs:
+ free(q->tcbs);
+ return false;
+}
+
+static void free_queue(struct nvme_queue *q)
+{
+ free(q->cmds);
+ free(q->tcbs);
+ free(q->cqes);
+}
+
+static void nvme_poll_syslog(void)
+{
+ struct rtkit_message msg;
+ rtkit_recv(nvme_rtkit, &msg);
+}
+
+static bool nvme_ctrl_disable(void)
+{
+ u64 timeout = timeout_calculate(NVME_TIMEOUT);
+
+ clear32(nvme_base + NVME_CC, NVME_CC_EN);
+ while (read32(nvme_base + NVME_CSTS) & NVME_CSTS_RDY && !timeout_expired(timeout))
+ nvme_poll_syslog();
+
+ return !(read32(nvme_base + NVME_CSTS) & NVME_CSTS_RDY);
+}
+
+static bool nvme_ctrl_enable(void)
+{
+ u64 timeout = timeout_calculate(NVME_ENABLE_TIMEOUT);
+
+ mask32(nvme_base + NVME_CC, NVME_CC_SHN, NVME_CC_EN);
+ while (!(read32(nvme_base + NVME_CSTS) & NVME_CSTS_RDY) && !timeout_expired(timeout))
+ nvme_poll_syslog();
+
+ return read32(nvme_base + NVME_CSTS) & NVME_CSTS_RDY;
+}
+
+static bool nvme_ctrl_shutdown(void)
+{
+ u64 timeout = timeout_calculate(NVME_SHUTDOWN_TIMEOUT);
+
+ mask32(nvme_base + NVME_CC, NVME_CC_SHN, FIELD_PREP(NVME_CC_SHN, NVME_CC_SHN_NORMAL));
+ while (FIELD_GET(NVME_CSTS_SHST, read32(nvme_base + NVME_CSTS)) != NVME_CSTS_SHST_DONE &&
+ !timeout_expired(timeout))
+ nvme_poll_syslog();
+
+ return FIELD_GET(NVME_CSTS_SHST, read32(nvme_base + NVME_CSTS)) == NVME_CSTS_SHST_DONE;
+}
+
+static bool nvme_exec_command(struct nvme_queue *q, struct nvme_command *cmd, u64 *result)
+{
+ bool found = false;
+ u64 timeout;
+ u8 tag = 0;
+ struct nvme_command *queue_cmd = &q->cmds[tag];
+ struct apple_nvmmu_tcb *tcb = &q->tcbs[tag];
+
+ memcpy(queue_cmd, cmd, sizeof(*cmd));
+ queue_cmd->tag = tag;
+
+ memset(tcb, 0, sizeof(*tcb));
+ tcb->opcode = queue_cmd->opcode;
+ tcb->dma_flags = 3; // always allow read+write to the PRP pages
+ tcb->slot_id = tag;
+ tcb->len = queue_cmd->cdw12;
+ tcb->prp1 = queue_cmd->prp1;
+ tcb->prp2 = queue_cmd->prp2;
+
+ /* make sure ANS2 can see the command and tcb before triggering it */
+ dma_wmb();
+
+ nvme_poll_syslog();
+ if (q->adminq)
+ write32(nvme_base + NVME_DB_LINEAR_ASQ, tag);
+ else
+ write32(nvme_base + NVME_DB_LINEAR_IOSQ, tag);
+ nvme_poll_syslog();
+
+ timeout = timeout_calculate(NVME_TIMEOUT);
+ struct nvme_completion cqe;
+ while (!timeout_expired(timeout)) {
+ nvme_poll_syslog();
+
+ /* we need a DMA read barrier here since the CQ will be updated using DMA */
+ dma_rmb();
+ memcpy(&cqe, &q->cqes[q->cq_head], sizeof(cqe));
+ if ((cqe.status & 1) != q->cq_phase)
+ continue;
+
+ if (cqe.tag == tag) {
+ found = true;
+ if (result)
+ *result = cqe.result;
+ } else {
+ printf("nvme: invalid tag in CQ: expected %d but got %d\n", tag, cqe.tag);
+ }
+
+ write32(nvme_base + NVMMU_TCB_INVAL, cqe.tag);
+ if (read32(nvme_base + NVMMU_TCB_STAT))
+ printf("nvme: NVMMU invalidation for tag %d failed\n", cqe.tag);
+
+ /* increment head and switch phase once the end of the queue has been reached */
+ q->cq_head += 1;
+ if (q->cq_head == NVME_QUEUE_SIZE) {
+ q->cq_head = 0;
+ q->cq_phase ^= 1;
+ }
+
+ if (q->adminq)
+ write32(nvme_base + NVME_DB_ACQ, q->cq_head);
+ else
+ write32(nvme_base + NVME_DB_IOCQ, q->cq_head);
+ break;
+ }
+
+ if (!found) {
+ printf("nvme: could not find command completion in CQ\n");
+ return false;
+ }
+
+ cqe.status >>= 1;
+ if (cqe.status) {
+ printf("nvme: command failed with status %d\n", cqe.status);
+ return false;
+ }
+
+ return true;
+}
+
+bool nvme_init(void)
+{
+ if (nvme_initialized) {
+ printf("nvme: already initialized\n");
+ return true;
+ }
+
+ int adt_path[8];
+ int node = adt_path_offset_trace(adt, "/arm-io/ans", adt_path);
+ if (node < 0) {
+ printf("nvme: Error getting NVMe node /arm-io/ans\n");
+ return NULL;
+ }
+
+ u32 cg;
+ if (ADT_GETPROP(adt, node, "clock-gates", &cg) < 0) {
+ printf("nvme: Error getting NVMe clock-gates\n");
+ return NULL;
+ }
+ nvme_die = FIELD_GET(PMGR_DIE_ID, cg);
+ printf("nvme: ANS is on die %d\n", nvme_die);
+
+ if (adt_get_reg(adt, adt_path, "reg", 3, &nvme_base, NULL) < 0) {
+ printf("nvme: Error getting NVMe base address.\n");
+ return NULL;
+ }
+
+ if (!alloc_queue(&adminq)) {
+ printf("nvme: Error allocating admin queue\n");
+ return NULL;
+ }
+ if (!alloc_queue(&ioq)) {
+ printf("nvme: Error allocating admin queue\n");
+ goto out_adminq;
+ }
+
+ ioq.adminq = false;
+ adminq.adminq = true;
+
+ nvme_asc = asc_init("/arm-io/ans");
+ if (!nvme_asc)
+ goto out_ioq;
+
+ nvme_sart = sart_init("/arm-io/sart-ans");
+ if (!nvme_sart)
+ goto out_asc;
+
+ nvme_rtkit = rtkit_init("nvme", nvme_asc, NULL, NULL, nvme_sart);
+ if (!nvme_rtkit)
+ goto out_sart;
+
+ if (!rtkit_boot(nvme_rtkit))
+ goto out_rtkit;
+
+ if (poll32(nvme_base + NVME_BOOT_STATUS, 0xffffffff, NVME_BOOT_STATUS_OK, USEC_PER_SEC) < 0) {
+ printf("nvme: ANS did not boot correctly.\n");
+ goto out_shutdown;
+ }
+
+ /* setup controller and NVMMU for linear submission queue */
+ set32(nvme_base + NVME_LINEAR_SQ_CTRL, NVME_LINEAR_SQ_CTRL_EN);
+ clear32(nvme_base + NVME_UNKNONW_CTRL, NVME_UNKNONW_CTRL_PRP_NULL_CHECK);
+ write32(nvme_base + NVME_MAX_PEND_CMDS_CTRL,
+ ((NVME_QUEUE_SIZE - 1) << 16) | (NVME_QUEUE_SIZE - 1));
+ write32(nvme_base + NVMMU_NUM, NVME_QUEUE_SIZE - 1);
+ write64_lo_hi(nvme_base + NVMMU_ASQ_BASE, (u64)adminq.tcbs);
+ write64_lo_hi(nvme_base + NVMMU_IOSQ_BASE, (u64)ioq.tcbs);
+
+ /* setup admin queue */
+ if (!nvme_ctrl_disable()) {
+ printf("nvme: timeout while waiting for CSTS.RDY to clear\n");
+ goto out_shutdown;
+ }
+ write64_lo_hi(nvme_base + NVME_ASQ, (u64)adminq.cmds);
+ write64_lo_hi(nvme_base + NVME_ACQ, (u64)adminq.cqes);
+ write32(nvme_base + NVME_AQA, ((NVME_QUEUE_SIZE - 1) << 16) | (NVME_QUEUE_SIZE - 1));
+ if (!nvme_ctrl_enable()) {
+ printf("nvme: timeout while waiting for CSTS.RDY to be set\n");
+ goto out_disable_ctrl;
+ }
+
+ /* setup IO queue */
+ struct nvme_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = NVME_ADMIN_CMD_CREATE_CQ;
+ cmd.prp1 = (u64)ioq.cqes;
+ cmd.cdw10 = 1; // cq id
+ cmd.cdw10 |= (NVME_QUEUE_SIZE - 1) << 16;
+ cmd.cdw11 = NVME_QUEUE_CONTIGUOUS;
+ if (!nvme_exec_command(&adminq, &cmd, NULL)) {
+ printf("nvme: create cq command failed\n");
+ goto out_disable_ctrl;
+ }
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = NVME_ADMIN_CMD_CREATE_SQ;
+ cmd.prp1 = (u64)ioq.cmds;
+ cmd.cdw10 = 1; // sq id
+ cmd.cdw10 |= (NVME_QUEUE_SIZE - 1) << 16;
+ cmd.cdw11 = NVME_QUEUE_CONTIGUOUS;
+ cmd.cdw11 |= 1 << 16; // cq id for this sq
+ if (!nvme_exec_command(&adminq, &cmd, NULL)) {
+ printf("nvme: create sq command failed\n");
+ goto out_delete_cq;
+ }
+
+ nvme_initialized = true;
+ printf("nvme: initialized at 0x%lx\n", nvme_base);
+ return true;
+
+out_delete_cq:
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = NVME_ADMIN_CMD_DELETE_CQ;
+ cmd.cdw10 = 1; // cq id
+ if (!nvme_exec_command(&adminq, &cmd, NULL))
+ printf("nvme: delete cq command failed\n");
+out_disable_ctrl:
+ nvme_ctrl_shutdown();
+ nvme_ctrl_disable();
+ nvme_poll_syslog();
+out_shutdown:
+ rtkit_sleep(nvme_rtkit);
+ // Some machines call this ANS, some ANS2...
+ pmgr_reset(nvme_die, "ANS");
+ pmgr_reset(nvme_die, "ANS2");
+out_rtkit:
+ rtkit_free(nvme_rtkit);
+out_sart:
+ sart_free(nvme_sart);
+out_asc:
+ asc_free(nvme_asc);
+out_ioq:
+ free_queue(&ioq);
+out_adminq:
+ free_queue(&adminq);
+ return false;
+}
+
+void nvme_shutdown(void)
+{
+ if (!nvme_initialized) {
+ printf("nvme: trying to shut down but not initialized\n");
+ return;
+ }
+
+ struct nvme_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = NVME_ADMIN_CMD_DELETE_SQ;
+ cmd.cdw10 = 1; // sq id
+ if (!nvme_exec_command(&adminq, &cmd, NULL))
+ printf("nvme: delete sq command failed\n");
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = NVME_ADMIN_CMD_DELETE_CQ;
+ cmd.cdw10 = 1; // cq id
+ if (!nvme_exec_command(&adminq, &cmd, NULL))
+ printf("nvme: delete cq command failed\n");
+
+ if (!nvme_ctrl_shutdown())
+ printf("nvme: timeout while waiting for controller shutdown\n");
+ if (!nvme_ctrl_disable())
+ printf("nvme: timeout while waiting for CSTS.RDY to clear\n");
+
+ rtkit_sleep(nvme_rtkit);
+ // Some machines call this ANS, some ANS2...
+ pmgr_reset(nvme_die, "ANS");
+ pmgr_reset(nvme_die, "ANS2");
+ rtkit_free(nvme_rtkit);
+ sart_free(nvme_sart);
+ asc_free(nvme_asc);
+ free_queue(&ioq);
+ free_queue(&adminq);
+ nvme_initialized = false;
+
+ printf("nvme: shutdown done\n");
+}
+
+bool nvme_flush(u32 nsid)
+{
+ struct nvme_command cmd;
+
+ if (!nvme_initialized)
+ return false;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = NVME_CMD_FLUSH;
+ cmd.nsid = nsid;
+
+ return nvme_exec_command(&ioq, &cmd, NULL);
+}
+
+bool nvme_read(u32 nsid, u64 lba, void *buffer)
+{
+ struct nvme_command cmd;
+ u64 buffer_addr = (u64)buffer;
+
+ if (!nvme_initialized)
+ return false;
+
+ /* no need for 16K alignment here since the NVME page size is 4k */
+ if (buffer_addr & (SZ_4K - 1))
+ return false;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = NVME_CMD_READ;
+ cmd.nsid = nsid;
+ cmd.prp1 = (u64)buffer_addr;
+ cmd.cdw10 = lba;
+ cmd.cdw11 = lba >> 32;
+ cmd.cdw12 = 1; // 4096 bytes
+
+ return nvme_exec_command(&ioq, &cmd, NULL);
+}
diff --git a/tools/src/nvme.h b/tools/src/nvme.h
new file mode 100644
index 0000000..8989a60
--- /dev/null
+++ b/tools/src/nvme.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef NVME_H
+#define NVME_H
+
+#include "types.h"
+
+bool nvme_init(void);
+void nvme_shutdown(void);
+
+bool nvme_flush(u32 nsid);
+bool nvme_read(u32 nsid, u64 lba, void *buffer);
+
+#endif
diff --git a/tools/src/payload.c b/tools/src/payload.c
new file mode 100644
index 0000000..69c9129
--- /dev/null
+++ b/tools/src/payload.c
@@ -0,0 +1,281 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "payload.h"
+#include "adt.h"
+#include "assert.h"
+#include "chainload.h"
+#include "display.h"
+#include "heapblock.h"
+#include "kboot.h"
+#include "smp.h"
+#include "utils.h"
+
+#include "libfdt/libfdt.h"
+#include "minilzlib/minlzma.h"
+#include "tinf/tinf.h"
+
+// Kernels must be 2MB aligned
+#define KERNEL_ALIGN (2 << 20)
+
+static const u8 gz_magic[] = {0x1f, 0x8b};
+static const u8 xz_magic[] = {0xfd, '7', 'z', 'X', 'Z', 0x00};
+static const u8 fdt_magic[] = {0xd0, 0x0d, 0xfe, 0xed};
+static const u8 kernel_magic[] = {'A', 'R', 'M', 0x64}; // at 0x38
+static const u8 cpio_magic[] = {'0', '7', '0', '7', '0'}; // '1' or '2' next
+static const u8 img4_magic[] = {0x16, 0x04, 'I', 'M', 'G', '4'}; // IA5String 'IMG4'
+static const u8 sig_magic[] = {'m', '1', 'n', '1', '_', 's', 'i', 'g'};
+static const u8 empty[] = {0, 0, 0, 0};
+
+static char expect_compatible[256];
+static struct kernel_header *kernel = NULL;
+static void *fdt = NULL;
+static char *chainload_spec = NULL;
+
+static void *load_one_payload(void *start, size_t size);
+
+static void finalize_uncompression(void *dest, size_t dest_len)
+{
+ // Actually reserve the space. malloc is safe after this, but...
+ assert(dest == heapblock_alloc_aligned(dest_len, KERNEL_ALIGN));
+
+ void *end = ((u8 *)dest) + dest_len;
+ void *next = load_one_payload(dest, dest_len);
+ assert(!next || next >= dest);
+
+ // If the payload needs padding, we need to reserve more, so it better have not used
+ // malloc either.
+ if (next > end) {
+ // Explicitly *un*aligned or it'll fail this assert, since 64b alignment is the default
+ assert(end == heapblock_alloc_aligned((u8 *)next - (u8 *)end, 1));
+ }
+}
+
+static void *decompress_gz(void *p, size_t size)
+{
+ unsigned int source_len = size, dest_len = 1 << 30; // 1 GiB should be enough hopefully
+
+ // Start at the end of the heap area, no allocation yet. The following code must not use
+ // malloc or heapblock, until finalize_uncompression is called.
+ void *dest = heapblock_alloc_aligned(0, KERNEL_ALIGN);
+
+ printf("Uncompressing... ");
+ int ret = tinf_gzip_uncompress(dest, &dest_len, p, &source_len);
+
+ if (ret != TINF_OK) {
+ printf("Error %d\n", ret);
+ return NULL;
+ }
+
+ printf("%d bytes uncompressed to %d bytes\n", source_len, dest_len);
+
+ finalize_uncompression(dest, dest_len);
+
+ return ((u8 *)p) + source_len;
+}
+
+static void *decompress_xz(void *p, size_t size)
+{
+ uint32_t source_len = size, dest_len = 1 << 30; // 1 GiB should be enough hopefully
+
+ // Start at the end of the heap area, no allocation yet. The following code must not use
+ // malloc or heapblock, until finalize_uncompression is called.
+ void *dest = heapblock_alloc_aligned(0, KERNEL_ALIGN);
+
+ printf("Uncompressing... ");
+ int ret = XzDecode(p, &source_len, dest, &dest_len);
+
+ if (!ret) {
+ printf("XZ decode failed\n");
+ return NULL;
+ }
+
+ printf("%d bytes uncompressed to %d bytes\n", source_len, dest_len);
+
+ finalize_uncompression(dest, dest_len);
+
+ return ((u8 *)p) + source_len;
+}
+
+static void *load_fdt(void *p, size_t size)
+{
+ if (fdt_node_check_compatible(p, 0, expect_compatible) == 0) {
+ printf("Found a devicetree for %s at %p\n", expect_compatible, p);
+ fdt = p;
+ }
+ assert(!size || size == fdt_totalsize(p));
+ return ((u8 *)p) + fdt_totalsize(p);
+}
+
+static void *load_cpio(void *p, size_t size)
+{
+ if (!size) {
+ // We could handle this, but who uses uncompressed initramfs?
+ printf("Uncompressed cpio archives not supported\n");
+ return NULL;
+ }
+
+ kboot_set_initrd(p, size);
+ return ((u8 *)p) + size;
+}
+
+static void *load_kernel(void *p, size_t size)
+{
+ kernel = p;
+
+ assert(size <= kernel->image_size);
+
+ // If this is an in-line kernel, it's probably not aligned, so we need to make a copy
+ if (((u64)kernel) & (KERNEL_ALIGN - 1)) {
+ void *new_addr = heapblock_alloc_aligned(kernel->image_size, KERNEL_ALIGN);
+ memcpy(new_addr, kernel, size ? size : kernel->image_size);
+ kernel = new_addr;
+ }
+
+ /*
+ * Kernel blobs unfortunately do not have an accurate file size header, so
+ * this will fail for in-line payloads. However, conversely, this is required for
+ * compressed payloads, in order to allocate padding that the kernel needs, which will be
+ * beyond the end of the compressed data. So if we know the input size, tell the caller
+ * about the true image size; otherwise don't.
+ */
+ if (size) {
+ return ((u8 *)p) + kernel->image_size;
+ } else {
+ return NULL;
+ }
+}
+
+#define MAX_VAR_NAME 64
+#define MAX_VAR_SIZE 1024
+
+#define IS_VAR(x) !strncmp((char *)*p, x, strlen(x))
+
+#define MAX_CHOSEN_VARS 16
+
+static size_t chosen_cnt = 0;
+static char *chosen[MAX_CHOSEN_VARS];
+
+static bool check_var(u8 **p)
+{
+ char *val = memchr(*p, '=', strnlen((char *)*p, MAX_VAR_NAME + 1));
+ if (!val)
+ return false;
+
+ val++;
+
+ char *end = memchr(val, '\n', strnlen(val, MAX_VAR_SIZE + 1));
+ if (!end)
+ return false;
+
+ *end = 0;
+ printf("Found a variable at %p: %s\n", *p, (char *)*p);
+
+ if (IS_VAR("chosen.")) {
+ if (chosen_cnt >= MAX_CHOSEN_VARS)
+ printf("Too many chosen vars, ignoring %s\n", *p);
+ else
+ chosen[chosen_cnt++] = (char *)*p;
+ } else if (IS_VAR("chainload=")) {
+ chainload_spec = val;
+ } else if (IS_VAR("display=")) {
+ display_configure(val);
+ } else {
+ printf("Unknown variable %s\n", *p);
+ }
+
+ *p = (u8 *)(end + 1);
+ return true;
+}
+
+static void *load_one_payload(void *start, size_t size)
+{
+ u8 *p = start;
+
+ if (!start)
+ return NULL;
+
+ if (!memcmp(p, gz_magic, sizeof gz_magic)) {
+ printf("Found a gzip compressed payload at %p\n", p);
+ return decompress_gz(p, size);
+ } else if (!memcmp(p, xz_magic, sizeof xz_magic)) {
+ printf("Found an XZ compressed payload at %p\n", p);
+ return decompress_xz(p, size);
+ } else if (!memcmp(p, fdt_magic, sizeof fdt_magic)) {
+ return load_fdt(p, size);
+ } else if (!memcmp(p, cpio_magic, sizeof cpio_magic)) {
+ printf("Found a cpio initramfs at %p\n", p);
+ return load_cpio(p, size);
+ } else if (!memcmp(p + 0x38, kernel_magic, sizeof kernel_magic)) {
+ printf("Found a kernel at %p\n", p);
+ return load_kernel(p, size);
+ } else if (!memcmp(p, sig_magic, sizeof sig_magic)) {
+ u32 size;
+ memcpy(&size, p + 8, 4);
+
+ printf("Found a m1n1 signature at %p, skipping 0x%x bytes\n", p, size);
+ return p + size;
+ } else if (check_var(&p)) {
+ return p;
+ } else if (!memcmp(p, empty, sizeof empty) ||
+ !memcmp(p + 0x05, img4_magic, sizeof img4_magic)) { // SEPFW after m1n1
+ printf("No more payloads at %p\n", p);
+ return NULL;
+ } else {
+ printf("Unknown payload at %p (magic: %02x%02x%02x%02x)\n", p, p[0], p[1], p[2], p[3]);
+ return NULL;
+ }
+}
+
+int payload_run(void)
+{
+ const char *target = adt_getprop(adt, 0, "target-type", NULL);
+ if (target) {
+ strcpy(expect_compatible, "apple,");
+ char *p = expect_compatible + strlen(expect_compatible);
+ while (*target && p != expect_compatible + sizeof(expect_compatible) - 1) {
+ *p++ = tolower(*target++);
+ }
+ *p = 0;
+ printf("Devicetree compatible value: %s\n", expect_compatible);
+ } else {
+ printf("Cannot find target type! %p %p\n", target, adt);
+ return -1;
+ }
+
+ chosen_cnt = 0;
+
+ void *p = _payload_start;
+
+ while (p)
+ p = load_one_payload(p, 0);
+
+ if (chainload_spec) {
+ return chainload_load(chainload_spec, chosen, chosen_cnt);
+ }
+
+ if (kernel && fdt) {
+ smp_start_secondaries();
+
+ for (size_t i = 0; i < chosen_cnt; i++) {
+ char *val = memchr(chosen[i], '=', MAX_VAR_NAME + 1);
+
+ assert(val);
+ val[0] = 0; // Terminate var name
+ if (kboot_set_chosen(chosen[i] + 7, val + 1) < 0)
+ printf("Failed to kboot set %s='%s'\n", chosen[i], val);
+ }
+
+ if (kboot_prepare_dt(fdt)) {
+ printf("Failed to prepare FDT!\n");
+ return -1;
+ }
+
+ return kboot_boot(kernel);
+ } else if (kernel && !fdt) {
+ printf("ERROR: Kernel found but no devicetree for %s available.\n", expect_compatible);
+ } else if (!kernel && fdt) {
+ printf("ERROR: Devicetree found but no kernel.\n");
+ }
+
+ return -1;
+}
diff --git a/tools/src/payload.h b/tools/src/payload.h
new file mode 100644
index 0000000..8e6aa72
--- /dev/null
+++ b/tools/src/payload.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef __PAYLOAD_H__
+#define __PAYLOAD_H__
+
+int payload_run(void);
+
+#endif
diff --git a/tools/src/pcie.c b/tools/src/pcie.c
new file mode 100644
index 0000000..39d6a23
--- /dev/null
+++ b/tools/src/pcie.c
@@ -0,0 +1,388 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "adt.h"
+#include "pcie.h"
+#include "pmgr.h"
+#include "tunables.h"
+#include "utils.h"
+
+/*
+ * The ADT uses 17 register sets:
+ *
+ * 0: 90000000 00000006 10000000 00000000 ECAM
+ * 1: 80000000 00000006 00040000 00000000 RC
+ * 2: 80080000 00000006 00090000 00000000 PHY
+ * 3: 800c0000 00000006 00020000 00000000 PHY IP
+ * 4: 8c000000 00000006 00004000 00000000 AXI
+ * 5: 3d2bc000 00000000 00001000 00000000 fuses
+ * 6: 81000000 00000006 00008000 00000000 port 0 config
+ * 7: 81010000 00000006 00001000 00000000 port 0 LTSSM debug
+ * 8: 80084000 00000006 00004000 00000000 port 0 PHY
+ * 9: 800c8000 00000006 00016610 00000000 port 0 PHY IP
+ <macOS 12.0 RC and later add a per-port Intr2AXI reg here>
+ * 10: 82000000 00000006 00008000 00000000 port 1 config
+ * 11: 82010000 00000006 00001000 00000000 port 1 LTSSM debug
+ * 12: 80088000 00000006 00004000 00000000 port 1 PHY
+ * 13: 800d0000 00000006 00006000 00000000 port 1 PHY IP
+ <...>
+ * 14: 83000000 00000006 00008000 00000000 port 2 config
+ * 15: 83010000 00000006 00001000 00000000 port 2 LTSSM debug
+ * 16: 8008c000 00000006 00004000 00000000 port 2 PHY
+ * 17: 800d8000 00000006 00006000 00000000 port 2 PHY IP
+ <...>
+ */
+
+/* PHY registers */
+
+#define APCIE_PHY_CTRL 0x000
+#define APCIE_PHY_CTRL_CLK0REQ BIT(0)
+#define APCIE_PHY_CTRL_CLK1REQ BIT(1)
+#define APCIE_PHY_CTRL_CLK0ACK BIT(2)
+#define APCIE_PHY_CTRL_CLK1ACK BIT(3)
+#define APCIE_PHY_CTRL_RESET BIT(7)
+
+#define APCIE_PHYIF_CTRL 0x024
+#define APCIE_PHYIF_CTRL_RUN BIT(0)
+
+/* Port registers */
+
+#define APCIE_PORT_LINKSTS 0x208
+#define APCIE_PORT_LINKSTS_BUSY BIT(2)
+
+#define APCIE_PORT_APPCLK 0x800
+#define APCIE_PORT_APPCLK_EN BIT(0)
+
+#define APCIE_PORT_STATUS 0x804
+#define APCIE_PORT_STATUS_RUN BIT(0)
+
+#define APCIE_PORT_RESET 0x814
+#define APCIE_PORT_RESET_DIS BIT(0)
+
+/* PCIe capability registers */
+#define PCIE_CAP_BASE 0x70
+#define PCIE_LNKCAP 0x0c
+#define PCIE_LNKCAP_SLS GENMASK(3, 0)
+#define PCIE_LNKCAP2 0x2c
+#define PCIE_LNKCAP2_SLS GENMASK(6, 1)
+#define PCIE_LNKCTL2 0x30
+#define PCIE_LNKCTL2_TLS GENMASK(3, 0)
+
+/* DesignWare PCIe Core registers */
+
+#define DWC_DBI_RO_WR 0x8bc
+#define DWC_DBI_RO_WR_EN BIT(0)
+
+#define DWC_DBI_LINK_WIDTH_SPEED_CONTROL 0x80c
+#define DWC_DBI_SPEED_CHANGE BIT(17)
+
+struct fuse_bits {
+ u16 src_reg;
+ u16 tgt_reg;
+ u8 src_bit;
+ u8 tgt_bit;
+ u8 width;
+};
+
+const struct fuse_bits pcie_fuse_bits_t8103[] = {
+ {0x0084, 0x6238, 4, 0, 6}, {0x0084, 0x6220, 10, 14, 3}, {0x0084, 0x62a4, 13, 17, 2},
+ {0x0418, 0x522c, 27, 9, 2}, {0x0418, 0x522c, 13, 12, 3}, {0x0418, 0x5220, 18, 14, 3},
+ {0x0418, 0x52a4, 21, 17, 2}, {0x0418, 0x522c, 23, 16, 5}, {0x0418, 0x5278, 23, 20, 3},
+ {0x0418, 0x5018, 31, 2, 1}, {0x041c, 0x1204, 0, 2, 5}, {},
+};
+
+const struct fuse_bits pcie_fuse_bits_t6000[] = {
+ {0x004c, 0x1004, 3, 2, 5}, {0x0048, 0x522c, 26, 16, 5}, {0x0048, 0x522c, 29, 9, 2},
+ {0x0048, 0x522c, 26, 12, 3}, {0x0048, 0x522c, 26, 16, 5}, {0x0048, 0x52a4, 24, 17, 2},
+ {0x004c, 0x5018, 2, 3, 1}, {0x0048, 0x50a4, 14, 17, 2}, {0x0048, 0x62a4, 14, 17, 2},
+ {0x0048, 0x6220, 8, 14, 3}, {0x0048, 0x6238, 2, 0, 6}, {},
+};
+
+/* clang-format off */
+const struct fuse_bits pcie_fuse_bits_t8112[] = {
+ {0x0490, 0x6238, 0, 0, 6}, {0x0490, 0x6220, 6, 14, 3}, {0x0490, 0x62a4, 12, 17, 2},
+ {0x0490, 0x5018, 14, 2, 1}, {0x0490, 0x5220, 15, 14, 3}, {0x0490, 0x52a4, 18, 17, 2},
+ {0x0490, 0x5278, 20, 20, 3}, {0x0490, 0x522c, 23, 12, 3}, {0x0490, 0x522c, 26, 9, 2},
+ {0x0490, 0x522c, 28, 16, 4}, {0x0494, 0x522c, 0, 20, 1}, {0x0494, 0x1204, 5, 2, 5},
+ {},
+};
+/* clang-format on */
+
+static bool pcie_initialized = false;
+static u64 rc_base;
+static u64 phy_base;
+static u64 phy_ip_base;
+static u64 fuse_base;
+static u32 port_count;
+static u64 port_base[8];
+
+#define SHARED_REG_COUNT 6
+
+int pcie_init(void)
+{
+ const char *path = "/arm-io/apcie";
+ int adt_path[8];
+ int adt_offset;
+ const struct fuse_bits *fuse_bits;
+
+ if (pcie_initialized)
+ return 0;
+
+ adt_offset = adt_path_offset_trace(adt, path, adt_path);
+ if (adt_offset < 0) {
+ printf("pcie: Error getting node %s\n", path);
+ return -1;
+ }
+
+ if (adt_is_compatible(adt, adt_offset, "apcie,t8103")) {
+ fuse_bits = pcie_fuse_bits_t8103;
+ printf("pcie: Initializing t8103 PCIe controller\n");
+ } else if (adt_is_compatible(adt, adt_offset, "apcie,t6000")) {
+ fuse_bits = pcie_fuse_bits_t6000;
+ printf("pcie: Initializing t6000 PCIe controller\n");
+ } else if (adt_is_compatible(adt, adt_offset, "apcie,t8112")) {
+ fuse_bits = pcie_fuse_bits_t8112;
+ printf("pcie: Initializing t8112 PCIe controller\n");
+ } else {
+ printf("pcie: Unsupported compatible\n");
+ return -1;
+ }
+
+ if (ADT_GETPROP(adt, adt_offset, "#ports", &port_count) < 0) {
+ printf("pcie: Error getting port count for %s\n", path);
+ return -1;
+ }
+
+ u64 config_base;
+ if (adt_get_reg(adt, adt_path, "reg", 0, &config_base, NULL)) {
+ printf("pcie: Error getting reg with index %d for %s\n", 0, path);
+ return -1;
+ }
+
+ if (adt_get_reg(adt, adt_path, "reg", 1, &rc_base, NULL)) {
+ printf("pcie: Error getting reg with index %d for %s\n", 1, path);
+ return -1;
+ }
+
+ if (adt_get_reg(adt, adt_path, "reg", 2, &phy_base, NULL)) {
+ printf("pcie: Error getting reg with index %d for %s\n", 2, path);
+ return -1;
+ }
+
+ if (adt_get_reg(adt, adt_path, "reg", 3, &phy_ip_base, NULL)) {
+ printf("pcie: Error getting reg with index %d for %s\n", 3, path);
+ return -1;
+ }
+
+ if (adt_get_reg(adt, adt_path, "reg", 5, &fuse_base, NULL)) {
+ printf("pcie: Error getting reg with index %d for %s\n", 5, path);
+ return -1;
+ }
+
+ u32 reg_len;
+ if (!adt_getprop(adt, adt_offset, "reg", &reg_len)) {
+ printf("pcie: Error getting reg length for %s\n", path);
+ return -1;
+ }
+
+ int port_regs = (reg_len / 16) - SHARED_REG_COUNT;
+
+ if (port_regs % port_count) {
+ printf("pcie: %d port registers do not evenly divide into %d ports\n", port_regs,
+ port_count);
+ return -1;
+ }
+
+ int port_reg_cnt = port_regs / port_count;
+ printf("pcie: ADT uses %d reg entries per port\n", port_reg_cnt);
+
+ if (pmgr_adt_power_enable(path)) {
+ printf("pcie: Error enabling power for %s\n", path);
+ return -1;
+ }
+
+ if (tunables_apply_local(path, "apcie-axi2af-tunables", 4)) {
+ printf("pcie: Error applying %s for %s\n", "apcie-axi2af-tunables", path);
+ return -1;
+ }
+
+ /* ??? */
+ write32(rc_base + 0x4, 0);
+
+ if (tunables_apply_local(path, "apcie-common-tunables", 1)) {
+ printf("pcie: Error applying %s for %s\n", "apcie-common-tunables", path);
+ return -1;
+ }
+
+ /*
+ * Initialize PHY.
+ */
+
+ if (tunables_apply_local(path, "apcie-phy-tunables", 2)) {
+ printf("pcie: Error applying %s for %s\n", "apcie-phy-tunables", path);
+ return -1;
+ }
+
+ set32(phy_base + APCIE_PHY_CTRL, APCIE_PHY_CTRL_CLK0REQ);
+ if (poll32(phy_base + APCIE_PHY_CTRL, APCIE_PHY_CTRL_CLK0ACK, APCIE_PHY_CTRL_CLK0ACK, 50000)) {
+ printf("pcie: Timeout enabling PHY CLK0\n");
+ return -1;
+ }
+
+ set32(phy_base + APCIE_PHY_CTRL, APCIE_PHY_CTRL_CLK1REQ);
+ if (poll32(phy_base + APCIE_PHY_CTRL, APCIE_PHY_CTRL_CLK1ACK, APCIE_PHY_CTRL_CLK1ACK, 50000)) {
+ printf("pcie: Timeout enabling PHY CLK1\n");
+ return -1;
+ }
+
+ clear32(phy_base + APCIE_PHY_CTRL, APCIE_PHY_CTRL_RESET);
+ udelay(1);
+
+ /* ??? */
+ set32(rc_base + APCIE_PHYIF_CTRL, APCIE_PHYIF_CTRL_RUN);
+ udelay(1);
+
+ /* Apply "fuses". */
+ for (int i = 0; fuse_bits[i].width; i++) {
+ u32 fuse;
+ fuse = (read32(fuse_base + fuse_bits[i].src_reg) >> fuse_bits[i].src_bit);
+ fuse &= (1 << fuse_bits[i].width) - 1;
+ mask32(phy_ip_base + fuse_bits[i].tgt_reg,
+ ((1 << fuse_bits[i].width) - 1) << fuse_bits[i].tgt_bit,
+ fuse << fuse_bits[i].tgt_bit);
+ }
+
+ if (tunables_apply_local(path, "apcie-phy-ip-pll-tunables", 3)) {
+ printf("pcie: Error applying %s for %s\n", "apcie-phy-ip-pll-tunables", path);
+ return -1;
+ }
+ if (tunables_apply_local(path, "apcie-phy-ip-auspma-tunables", 3)) {
+ printf("pcie: Error applying %s for %s\n", "apcie-phy-ip-auspma-tunables", path);
+ return -1;
+ }
+
+ for (u32 port = 0; port < port_count; port++) {
+ char bridge[64];
+ int bridge_offset;
+
+ /*
+ * Initialize RC port.
+ */
+
+ snprintf(bridge, sizeof(bridge), "/arm-io/apcie/pci-bridge%d", port);
+
+ if ((bridge_offset = adt_path_offset(adt, bridge)) < 0)
+ continue;
+
+ printf("pcie: Initializing port %d\n", port);
+
+ if (adt_get_reg(adt, adt_path, "reg", port * port_reg_cnt + SHARED_REG_COUNT,
+ &port_base[port], NULL)) {
+ printf("pcie: Error getting reg with index %d for %s\n",
+ port * port_reg_cnt + SHARED_REG_COUNT, path);
+ return -1;
+ }
+
+ if (tunables_apply_local_addr(bridge, "apcie-config-tunables", port_base[port])) {
+ printf("pcie: Error applying %s for %s\n", "apcie-config-tunables", bridge);
+ return -1;
+ }
+
+ set32(port_base[port] + APCIE_PORT_APPCLK, APCIE_PORT_APPCLK_EN);
+
+ /* PERSTN */
+ set32(port_base[port] + APCIE_PORT_RESET, APCIE_PORT_RESET_DIS);
+
+ if (poll32(port_base[port] + APCIE_PORT_STATUS, APCIE_PORT_STATUS_RUN,
+ APCIE_PORT_STATUS_RUN, 250000)) {
+ printf("pcie: Port failed to come up on %s\n", bridge);
+ return -1;
+ }
+
+ if (poll32(port_base[port] + APCIE_PORT_LINKSTS, APCIE_PORT_LINKSTS_BUSY, 0, 250000)) {
+ printf("pcie: Port failed to become idle on %s\n", bridge);
+ return -1;
+ }
+
+ /* Make Designware PCIe Core registers writable. */
+ set32(config_base + DWC_DBI_RO_WR, DWC_DBI_RO_WR_EN);
+
+ if (tunables_apply_local_addr(bridge, "pcie-rc-tunables", config_base)) {
+ printf("pcie: Error applying %s for %s\n", "pcie-rc-tunables", bridge);
+ return -1;
+ }
+ if (tunables_apply_local_addr(bridge, "pcie-rc-gen3-shadow-tunables", config_base)) {
+ printf("pcie: Error applying %s for %s\n", "pcie-rc-gen3-shadow-tunables", bridge);
+ return -1;
+ }
+ if (tunables_apply_local_addr(bridge, "pcie-rc-gen4-shadow-tunables", config_base)) {
+ printf("pcie: Error applying %s for %s\n", "pcie-rc-gen4-shadow-tunables", bridge);
+ return -1;
+ }
+
+ u32 max_speed;
+ if (ADT_GETPROP(adt, bridge_offset, "maximum-link-speed", &max_speed) >= 0) {
+ /* Apple changed how they announce the link speed for the 10gb nic
+ * at the latest in MacOS 12.3. The "lan-10gb" subnode has now a
+ * "target-link-speed" property and "maximum-link-speed" remains
+ * at 1.
+ */
+ int lan_10gb = adt_subnode_offset(adt, bridge_offset, "lan-10gb");
+ if (lan_10gb > 0) {
+ int target_speed;
+ if (ADT_GETPROP(adt, lan_10gb, "target-link-speed", &target_speed) >= 0) {
+ if (target_speed > 0)
+ max_speed = target_speed;
+ }
+ }
+
+ printf("pcie: Port %d max speed = %d\n", port, max_speed);
+
+ if (max_speed == 0) {
+ printf("pcie: Invalid max-speed\n");
+ return -1;
+ }
+
+ mask32(config_base + PCIE_CAP_BASE + PCIE_LNKCAP, PCIE_LNKCAP_SLS,
+ FIELD_PREP(PCIE_LNKCAP_SLS, max_speed));
+
+ mask32(config_base + PCIE_CAP_BASE + PCIE_LNKCAP2, PCIE_LNKCAP2_SLS,
+ FIELD_PREP(PCIE_LNKCAP2_SLS, (1 << max_speed) - 1));
+
+ mask16(config_base + PCIE_CAP_BASE + PCIE_LNKCTL2, PCIE_LNKCTL2_TLS,
+ FIELD_PREP(PCIE_LNKCTL2_TLS, max_speed));
+ }
+
+ set32(config_base + DWC_DBI_LINK_WIDTH_SPEED_CONTROL, DWC_DBI_SPEED_CHANGE);
+
+ /* Make Designware PCIe Core registers readonly. */
+ clear32(config_base + DWC_DBI_RO_WR, DWC_DBI_RO_WR_EN);
+
+ /* Move to the next PCIe device on this bus. */
+ config_base += (1 << 15);
+ }
+
+ pcie_initialized = true;
+ printf("pcie: Initialized.\n");
+
+ return 0;
+}
+
+int pcie_shutdown(void)
+{
+ if (!pcie_initialized)
+ return 0;
+
+ for (u32 port = 0; port < port_count; port++) {
+ clear32(port_base[port] + APCIE_PORT_RESET, APCIE_PORT_RESET_DIS);
+ clear32(port_base[port] + APCIE_PORT_APPCLK, APCIE_PORT_APPCLK_EN);
+ }
+
+ clear32(phy_base + APCIE_PHY_CTRL, APCIE_PHY_CTRL_RESET);
+ clear32(phy_base + APCIE_PHY_CTRL, APCIE_PHY_CTRL_CLK1REQ);
+ clear32(phy_base + APCIE_PHY_CTRL, APCIE_PHY_CTRL_CLK0REQ);
+
+ pcie_initialized = false;
+ printf("pcie: Shutdown.\n");
+
+ return 0;
+}
diff --git a/tools/src/pcie.h b/tools/src/pcie.h
new file mode 100644
index 0000000..e33d59d
--- /dev/null
+++ b/tools/src/pcie.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef PCIE_H
+#define PCIE_H
+
+int pcie_init(void);
+int pcie_shutdown(void);
+
+#endif
diff --git a/tools/src/pmgr.c b/tools/src/pmgr.c
new file mode 100644
index 0000000..0ae3888
--- /dev/null
+++ b/tools/src/pmgr.c
@@ -0,0 +1,358 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "pmgr.h"
+#include "adt.h"
+#include "string.h"
+#include "types.h"
+#include "utils.h"
+
+#define PMGR_RESET BIT(31)
+#define PMGR_AUTO_ENABLE BIT(28)
+#define PMGR_PS_AUTO GENMASK(27, 24)
+#define PMGR_PARENT_OFF BIT(11)
+#define PMGR_DEV_DISABLE BIT(10)
+#define PMGR_WAS_CLKGATED BIT(9)
+#define PMGR_WAS_PWRGATED BIT(8)
+#define PMGR_PS_ACTUAL GENMASK(7, 4)
+#define PMGR_PS_TARGET GENMASK(3, 0)
+
+#define PMGR_PS_ACTIVE 0xf
+#define PMGR_PS_CLKGATE 0x4
+#define PMGR_PS_PWRGATE 0x0
+
+#define PMGR_POLL_TIMEOUT 10000
+
+#define PMGR_FLAG_VIRTUAL 0x10
+
+struct pmgr_device {
+ u32 flags;
+ u16 parent[2];
+ u8 unk1[2];
+ u8 addr_offset;
+ u8 psreg_idx;
+ u8 unk2[14];
+ u16 id;
+ u8 unk3[4];
+ const char name[0x10];
+} PACKED;
+
+static int pmgr_initialized = 0;
+
+static int pmgr_path[8];
+static int pmgr_offset;
+static int pmgr_dies;
+
+static const u32 *pmgr_ps_regs = NULL;
+static u32 pmgr_ps_regs_len = 0;
+
+static const struct pmgr_device *pmgr_devices = NULL;
+static u32 pmgr_devices_len = 0;
+
+static uintptr_t pmgr_get_psreg(u8 idx)
+{
+ if (idx * 12 >= pmgr_ps_regs_len) {
+ printf("pmgr: Index %d is out of bounds for ps-regs\n", idx);
+ return 0;
+ }
+
+ u32 reg_idx = pmgr_ps_regs[3 * idx];
+ u32 reg_offset = pmgr_ps_regs[3 * idx + 1];
+
+ u64 pmgr_reg;
+ if (adt_get_reg(adt, pmgr_path, "reg", reg_idx, &pmgr_reg, NULL) < 0) {
+ printf("pmgr: Error getting /arm-io/pmgr regs\n");
+ return 0;
+ }
+
+ return pmgr_reg + reg_offset;
+}
+
+static int pmgr_set_mode(uintptr_t addr, u8 target_mode)
+{
+ mask32(addr, PMGR_PS_TARGET, FIELD_PREP(PMGR_PS_TARGET, target_mode));
+ if (poll32(addr, PMGR_PS_ACTUAL, FIELD_PREP(PMGR_PS_ACTUAL, target_mode), PMGR_POLL_TIMEOUT) <
+ 0) {
+ printf("pmgr: timeout while trying to set mode %x for device at 0x%lx: %x\n", target_mode,
+ addr, read32(addr));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int pmgr_find_device(u16 id, const struct pmgr_device **device)
+{
+ for (size_t i = 0; i < pmgr_devices_len; ++i) {
+ const struct pmgr_device *i_device = &pmgr_devices[i];
+ if (i_device->id != id)
+ continue;
+
+ *device = i_device;
+ return 0;
+ }
+
+ return -1;
+}
+
+static uintptr_t pmgr_device_get_addr(u8 die, const struct pmgr_device *device)
+{
+ uintptr_t addr = pmgr_get_psreg(device->psreg_idx);
+ if (addr == 0)
+ return 0;
+
+ addr += PMGR_DIE_OFFSET * die;
+
+ addr += (device->addr_offset << 3);
+ return addr;
+}
+
+static int pmgr_set_mode_recursive(u8 die, u16 id, u8 target_mode, bool recurse)
+{
+ if (!pmgr_initialized) {
+ printf("pmgr: pmgr_set_mode_recursive() called before successful pmgr_init()\n");
+ return -1;
+ }
+
+ if (id == 0)
+ return -1;
+
+ const struct pmgr_device *device;
+
+ if (pmgr_find_device(id, &device))
+ return -1;
+
+ if (!(device->flags & PMGR_FLAG_VIRTUAL)) {
+ uintptr_t addr = pmgr_device_get_addr(die, device);
+ if (!addr)
+ return -1;
+ if (pmgr_set_mode(addr, target_mode))
+ return -1;
+ }
+ if (!recurse)
+ return 0;
+
+ for (int i = 0; i < 2; i++) {
+ if (device->parent[i]) {
+ u16 parent = FIELD_GET(PMGR_DEVICE_ID, device->parent[i]);
+ int ret = pmgr_set_mode_recursive(die, parent, target_mode, true);
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+int pmgr_power_enable(u32 id)
+{
+ u16 device = FIELD_GET(PMGR_DEVICE_ID, id);
+ u8 die = FIELD_GET(PMGR_DIE_ID, id);
+ return pmgr_set_mode_recursive(die, device, PMGR_PS_ACTIVE, true);
+}
+
+int pmgr_power_disable(u32 id)
+{
+ u16 device = FIELD_GET(PMGR_DEVICE_ID, id);
+ u8 die = FIELD_GET(PMGR_DIE_ID, id);
+ return pmgr_set_mode_recursive(die, device, PMGR_PS_PWRGATE, false);
+}
+
+static int pmgr_adt_find_devices(const char *path, const u32 **devices, u32 *n_devices)
+{
+ int node_offset = adt_path_offset(adt, path);
+ if (node_offset < 0) {
+ printf("pmgr: Error getting node %s\n", path);
+ return -1;
+ }
+
+ *devices = adt_getprop(adt, node_offset, "clock-gates", n_devices);
+ if (*devices == NULL || *n_devices == 0) {
+ printf("pmgr: Error getting %s clock-gates.\n", path);
+ return -1;
+ }
+
+ *n_devices /= 4;
+
+ return 0;
+}
+
+static int pmgr_adt_devices_set_mode(const char *path, u8 target_mode, int recurse)
+{
+ const u32 *devices;
+ u32 n_devices;
+ int ret = 0;
+
+ if (pmgr_adt_find_devices(path, &devices, &n_devices) < 0)
+ return -1;
+
+ for (u32 i = 0; i < n_devices; ++i) {
+ u16 device = FIELD_GET(PMGR_DEVICE_ID, devices[i]);
+ u8 die = FIELD_GET(PMGR_DIE_ID, devices[i]);
+ if (pmgr_set_mode_recursive(die, device, target_mode, recurse))
+ ret = -1;
+ }
+
+ return ret;
+}
+
+int pmgr_adt_power_enable(const char *path)
+{
+ int ret = pmgr_adt_devices_set_mode(path, PMGR_PS_ACTIVE, true);
+ return ret;
+}
+
+int pmgr_adt_power_disable(const char *path)
+{
+ return pmgr_adt_devices_set_mode(path, PMGR_PS_PWRGATE, false);
+}
+
+static int pmgr_reset_device(int die, const struct pmgr_device *dev)
+{
+ if (die < 0 || die > 16) {
+ printf("pmgr: invalid die id %d for device %s\n", die, dev->name);
+ return -1;
+ }
+
+ uintptr_t addr = pmgr_device_get_addr(die, dev);
+
+ u32 reg = read32(addr);
+ if (FIELD_GET(PMGR_PS_ACTUAL, reg) != PMGR_PS_ACTIVE) {
+ printf("pmgr: will not reset disabled device %d.%s\n", die, dev->name);
+ return -1;
+ }
+
+ printf("pmgr: resetting device %d.%s\n", die, dev->name);
+
+ set32(addr, PMGR_DEV_DISABLE);
+ set32(addr, PMGR_RESET);
+ udelay(10);
+ clear32(addr, PMGR_RESET);
+ clear32(addr, PMGR_DEV_DISABLE);
+
+ return 0;
+}
+
+int pmgr_adt_reset(const char *path)
+{
+ const u32 *devices;
+ u32 n_devices;
+ int ret = 0;
+
+ if (pmgr_adt_find_devices(path, &devices, &n_devices) < 0)
+ return -1;
+
+ for (u32 i = 0; i < n_devices; ++i) {
+ const struct pmgr_device *device;
+ u16 id = FIELD_GET(PMGR_DEVICE_ID, devices[i]);
+ u8 die = FIELD_GET(PMGR_DIE_ID, devices[i]);
+
+ if (pmgr_find_device(id, &device)) {
+ ret = -1;
+ continue;
+ }
+
+ if (pmgr_reset_device(die, device))
+ ret = -1;
+ }
+
+ return ret;
+}
+
+int pmgr_reset(int die, const char *name)
+{
+ const struct pmgr_device *dev = NULL;
+
+ for (unsigned int i = 0; i < pmgr_devices_len; ++i) {
+ if (strncmp(pmgr_devices[i].name, name, 0x10) == 0) {
+ dev = &pmgr_devices[i];
+ break;
+ }
+ }
+
+ if (!dev)
+ return -1;
+
+ return pmgr_reset_device(die, dev);
+}
+
+int pmgr_init(void)
+{
+ int node = adt_path_offset(adt, "/arm-io");
+ if (node < 0) {
+ printf("pmgr: Error getting /arm-io node\n");
+ return -1;
+ }
+ if (ADT_GETPROP(adt, node, "die-count", &pmgr_dies) < 0)
+ pmgr_dies = 1;
+
+ pmgr_offset = adt_path_offset_trace(adt, "/arm-io/pmgr", pmgr_path);
+ if (pmgr_offset < 0) {
+ printf("pmgr: Error getting /arm-io/pmgr node\n");
+ return -1;
+ }
+
+ pmgr_ps_regs = adt_getprop(adt, pmgr_offset, "ps-regs", &pmgr_ps_regs_len);
+ if (pmgr_ps_regs == NULL || pmgr_ps_regs_len == 0) {
+ printf("pmgr: Error getting /arm-io/pmgr ps-regs\n.");
+ return -1;
+ }
+
+ pmgr_devices = adt_getprop(adt, pmgr_offset, "devices", &pmgr_devices_len);
+ if (pmgr_devices == NULL || pmgr_devices_len == 0) {
+ printf("pmgr: Error getting /arm-io/pmgr devices.\n");
+ return -1;
+ }
+
+ pmgr_devices_len /= sizeof(*pmgr_devices);
+ pmgr_initialized = 1;
+
+ printf("pmgr: Cleaning up device states...\n");
+
+ for (u8 die = 0; die < pmgr_dies; ++die) {
+ for (size_t i = 0; i < pmgr_devices_len; ++i) {
+ const struct pmgr_device *device = &pmgr_devices[i];
+
+ if ((device->flags & PMGR_FLAG_VIRTUAL))
+ continue;
+
+ uintptr_t addr = pmgr_device_get_addr(die, device);
+ if (!addr)
+ continue;
+
+ u32 reg = read32(addr);
+
+ if (reg & PMGR_AUTO_ENABLE || FIELD_GET(PMGR_PS_TARGET, reg) == PMGR_PS_ACTIVE) {
+ for (int j = 0; j < 2; j++) {
+ if (device->parent[j]) {
+ const struct pmgr_device *pdevice;
+ if (pmgr_find_device(device->parent[j], &pdevice)) {
+ printf("pmgr: Failed to find parent #%d for %s\n", device->parent[j],
+ device->name);
+ continue;
+ }
+
+ if ((pdevice->flags & PMGR_FLAG_VIRTUAL))
+ continue;
+
+ addr = pmgr_device_get_addr(die, pdevice);
+ if (!addr)
+ continue;
+
+ reg = read32(addr);
+
+ if (!(reg & PMGR_AUTO_ENABLE) &&
+ FIELD_GET(PMGR_PS_TARGET, reg) != PMGR_PS_ACTIVE) {
+ printf("pmgr: Enabling %d.%s, parent of active device %s\n", die,
+ pdevice->name, device->name);
+ pmgr_set_mode(addr, PMGR_PS_ACTIVE);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ printf("pmgr: initialized, %d devices on %u dies found.\n", pmgr_devices_len, pmgr_dies);
+
+ return 0;
+}
diff --git a/tools/src/pmgr.h b/tools/src/pmgr.h
new file mode 100644
index 0000000..5dcc939
--- /dev/null
+++ b/tools/src/pmgr.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef PMGR_H
+#define PMGR_H
+
+#include "types.h"
+
+#define PMGR_DIE_OFFSET 0x2000000000
+
+#define PMGR_DEVICE_ID GENMASK(15, 0)
+#define PMGR_DIE_ID GENMASK(31, 28)
+
+int pmgr_init(void);
+
+int pmgr_power_enable(u32 id);
+int pmgr_power_disable(u32 id);
+
+int pmgr_adt_power_enable(const char *path);
+int pmgr_adt_power_disable(const char *path);
+int pmgr_adt_reset(const char *path);
+
+int pmgr_reset(int die, const char *name);
+
+#endif
diff --git a/tools/src/proxy.c b/tools/src/proxy.c
new file mode 100644
index 0000000..3925d7e
--- /dev/null
+++ b/tools/src/proxy.c
@@ -0,0 +1,575 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "proxy.h"
+#include "dapf.h"
+#include "dart.h"
+#include "display.h"
+#include "exception.h"
+#include "fb.h"
+#include "gxf.h"
+#include "heapblock.h"
+#include "hv.h"
+#include "iodev.h"
+#include "kboot.h"
+#include "malloc.h"
+#include "mcc.h"
+#include "memory.h"
+#include "nvme.h"
+#include "pcie.h"
+#include "pmgr.h"
+#include "smp.h"
+#include "string.h"
+#include "tunables.h"
+#include "types.h"
+#include "uart.h"
+#include "uartproxy.h"
+#include "usb.h"
+#include "utils.h"
+#include "xnuboot.h"
+
+#include "minilzlib/minlzma.h"
+#include "tinf/tinf.h"
+
+int proxy_process(ProxyRequest *request, ProxyReply *reply)
+{
+ enum exc_guard_t guard_save = exc_guard;
+
+ reply->opcode = request->opcode;
+ reply->status = S_OK;
+ reply->retval = 0;
+ switch (request->opcode) {
+ case P_NOP:
+ break;
+ case P_EXIT:
+ if (request->args[0])
+ return request->args[0];
+ return 1;
+ case P_CALL: {
+ generic_func *f = (generic_func *)request->args[0];
+ reply->retval = f(request->args[1], request->args[2], request->args[3],
+ request->args[4], request->args[5]);
+ break;
+ }
+ case P_GET_BOOTARGS:
+ reply->retval = boot_args_addr;
+ break;
+ case P_GET_BASE:
+ reply->retval = (u64)_base;
+ break;
+ case P_SET_BAUD: {
+ int cnt = request->args[1];
+ printf("Changing baud rate to %lu...\n", request->args[0]);
+ uart_setbaud(request->args[0]);
+ while (cnt--) {
+ uart_putbyte(request->args[2]);
+ uart_putbyte(request->args[2] >> 8);
+ uart_putbyte(request->args[2] >> 16);
+ uart_putbyte(request->args[2] >> 24);
+ }
+ break;
+ }
+ case P_UDELAY:
+ udelay(request->args[0]);
+ break;
+ case P_SET_EXC_GUARD:
+ exc_count = 0;
+ guard_save = request->args[0];
+ break;
+ case P_GET_EXC_COUNT:
+ reply->retval = exc_count;
+ exc_count = 0;
+ break;
+ case P_EL0_CALL:
+ reply->retval = el0_call((void *)request->args[0], request->args[1], request->args[2],
+ request->args[3], request->args[4]);
+ break;
+ case P_EL1_CALL:
+ reply->retval = el1_call((void *)request->args[0], request->args[1], request->args[2],
+ request->args[3], request->args[4]);
+ break;
+ case P_VECTOR:
+ // forcefully restore tps6598x IRQs
+ usb_hpm_restore_irqs(1);
+ iodev_console_flush();
+ next_stage.entry = (generic_func *)request->args[0];
+ memcpy(next_stage.args, &request->args[1], 5 * sizeof(u64));
+ next_stage.restore_logo = true;
+ return 1;
+ case P_GL1_CALL:
+ reply->retval = gl1_call((void *)request->args[0], request->args[1], request->args[2],
+ request->args[3], request->args[4]);
+ break;
+ case P_GL2_CALL:
+ reply->retval = gl2_call((void *)request->args[0], request->args[1], request->args[2],
+ request->args[3], request->args[4]);
+ break;
+ case P_GET_SIMD_STATE:
+ get_simd_state((void *)request->args[0]);
+ break;
+ case P_PUT_SIMD_STATE:
+ put_simd_state((void *)request->args[0]);
+ break;
+ case P_REBOOT:
+ reboot();
+ break;
+
+ case P_WRITE64:
+ exc_guard = GUARD_SKIP;
+ write64(request->args[0], request->args[1]);
+ break;
+ case P_WRITE32:
+ exc_guard = GUARD_SKIP;
+ write32(request->args[0], request->args[1]);
+ break;
+ case P_WRITE16:
+ exc_guard = GUARD_SKIP;
+ write16(request->args[0], request->args[1]);
+ break;
+ case P_WRITE8:
+ exc_guard = GUARD_SKIP;
+ write8(request->args[0], request->args[1]);
+ break;
+
+ case P_READ64:
+ exc_guard = GUARD_MARK;
+ reply->retval = read64(request->args[0]);
+ break;
+ case P_READ32:
+ exc_guard = GUARD_MARK;
+ reply->retval = read32(request->args[0]);
+ break;
+ case P_READ16:
+ exc_guard = GUARD_MARK;
+ reply->retval = read16(request->args[0]);
+ break;
+ case P_READ8:
+ exc_guard = GUARD_MARK;
+ reply->retval = read8(request->args[0]);
+ break;
+
+ case P_SET64:
+ exc_guard = GUARD_MARK;
+ reply->retval = set64(request->args[0], request->args[1]);
+ break;
+ case P_SET32:
+ exc_guard = GUARD_MARK;
+ reply->retval = set32(request->args[0], request->args[1]);
+ break;
+ case P_SET16:
+ exc_guard = GUARD_MARK;
+ reply->retval = set16(request->args[0], request->args[1]);
+ break;
+ case P_SET8:
+ exc_guard = GUARD_MARK;
+ reply->retval = set8(request->args[0], request->args[1]);
+ break;
+
+ case P_CLEAR64:
+ exc_guard = GUARD_MARK;
+ reply->retval = clear64(request->args[0], request->args[1]);
+ break;
+ case P_CLEAR32:
+ exc_guard = GUARD_MARK;
+ reply->retval = clear32(request->args[0], request->args[1]);
+ break;
+ case P_CLEAR16:
+ exc_guard = GUARD_MARK;
+ reply->retval = clear16(request->args[0], request->args[1]);
+ break;
+ case P_CLEAR8:
+ exc_guard = GUARD_MARK;
+ reply->retval = clear8(request->args[0], request->args[1]);
+ break;
+
+ case P_MASK64:
+ exc_guard = GUARD_MARK;
+ reply->retval = mask64(request->args[0], request->args[1], request->args[2]);
+ break;
+ case P_MASK32:
+ exc_guard = GUARD_MARK;
+ reply->retval = mask32(request->args[0], request->args[1], request->args[2]);
+ break;
+ case P_MASK16:
+ exc_guard = GUARD_MARK;
+ reply->retval = mask16(request->args[0], request->args[1], request->args[2]);
+ break;
+ case P_MASK8:
+ exc_guard = GUARD_MARK;
+ reply->retval = mask8(request->args[0], request->args[1], request->args[2]);
+ break;
+
+ case P_WRITEREAD64:
+ exc_guard = GUARD_MARK;
+ reply->retval = writeread64(request->args[0], request->args[1]);
+ break;
+ case P_WRITEREAD32:
+ exc_guard = GUARD_MARK;
+ reply->retval = writeread32(request->args[0], request->args[1]);
+ break;
+ case P_WRITEREAD16:
+ exc_guard = GUARD_MARK;
+ reply->retval = writeread16(request->args[0], request->args[1]);
+ break;
+ case P_WRITEREAD8:
+ exc_guard = GUARD_MARK;
+ reply->retval = writeread8(request->args[0], request->args[1]);
+ break;
+
+ case P_MEMCPY64:
+ exc_guard = GUARD_RETURN;
+ memcpy64((void *)request->args[0], (void *)request->args[1], request->args[2]);
+ break;
+ case P_MEMCPY32:
+ exc_guard = GUARD_RETURN;
+ memcpy32((void *)request->args[0], (void *)request->args[1], request->args[2]);
+ break;
+ case P_MEMCPY16:
+ exc_guard = GUARD_RETURN;
+ memcpy16((void *)request->args[0], (void *)request->args[1], request->args[2]);
+ break;
+ case P_MEMCPY8:
+ exc_guard = GUARD_RETURN;
+ memcpy8((void *)request->args[0], (void *)request->args[1], request->args[2]);
+ break;
+
+ case P_MEMSET64:
+ exc_guard = GUARD_RETURN;
+ memset64((void *)request->args[0], request->args[1], request->args[2]);
+ break;
+ case P_MEMSET32:
+ exc_guard = GUARD_RETURN;
+ memset32((void *)request->args[0], request->args[1], request->args[2]);
+ break;
+ case P_MEMSET16:
+ exc_guard = GUARD_RETURN;
+ memset16((void *)request->args[0], request->args[1], request->args[2]);
+ break;
+ case P_MEMSET8:
+ exc_guard = GUARD_RETURN;
+ memset8((void *)request->args[0], request->args[1], request->args[2]);
+ break;
+
+ case P_IC_IALLUIS:
+ ic_ialluis();
+ break;
+ case P_IC_IALLU:
+ ic_iallu();
+ break;
+ case P_IC_IVAU:
+ ic_ivau_range((void *)request->args[0], request->args[1]);
+ break;
+ case P_DC_IVAC:
+ dc_ivac_range((void *)request->args[0], request->args[1]);
+ break;
+ case P_DC_ISW:
+ dc_isw((void *)request->args[0]);
+ break;
+ case P_DC_CSW:
+ dc_csw((void *)request->args[0]);
+ break;
+ case P_DC_CISW:
+ dc_cisw((void *)request->args[0]);
+ break;
+ case P_DC_ZVA:
+ dc_zva_range((void *)request->args[0], request->args[1]);
+ break;
+ case P_DC_CVAC:
+ dc_cvac_range((void *)request->args[0], request->args[1]);
+ break;
+ case P_DC_CVAU:
+ dc_cvau_range((void *)request->args[0], request->args[1]);
+ break;
+ case P_DC_CIVAC:
+ dc_civac_range((void *)request->args[0], request->args[1]);
+ break;
+ case P_MMU_SHUTDOWN:
+ mmu_shutdown();
+ break;
+ case P_MMU_INIT:
+ mmu_init();
+ break;
+ case P_MMU_DISABLE:
+ reply->retval = mmu_disable();
+ break;
+ case P_MMU_RESTORE:
+ mmu_restore(request->args[0]);
+ break;
+ case P_MMU_INIT_SECONDARY:
+ mmu_init_secondary(request->args[0]);
+ break;
+
+ case P_XZDEC: {
+ uint32_t destlen, srclen;
+ destlen = request->args[3];
+ srclen = request->args[1];
+ if (XzDecode((void *)request->args[0], &srclen, (void *)request->args[2], &destlen))
+ reply->retval = destlen;
+ else
+ reply->retval = ~0L;
+ break;
+ }
+ case P_GZDEC: {
+ unsigned int destlen, srclen;
+ destlen = request->args[3];
+ srclen = request->args[1];
+ size_t ret = tinf_gzip_uncompress((void *)request->args[2], &destlen,
+ (void *)request->args[0], &srclen);
+ if (ret != TINF_OK)
+ reply->retval = ret;
+ else
+ reply->retval = destlen;
+ break;
+ }
+
+ case P_SMP_START_SECONDARIES:
+ smp_start_secondaries();
+ break;
+ case P_SMP_CALL:
+ smp_call4(request->args[0], (void *)request->args[1], request->args[2],
+ request->args[3], request->args[4], request->args[5]);
+ break;
+ case P_SMP_CALL_SYNC:
+ smp_call4(request->args[0], (void *)request->args[1], request->args[2],
+ request->args[3], request->args[4], request->args[5]);
+ reply->retval = smp_wait(request->args[0]);
+ break;
+ case P_SMP_WAIT:
+ reply->retval = smp_wait(request->args[0]);
+ break;
+ case P_SMP_SET_WFE_MODE:
+ smp_set_wfe_mode(request->args[0]);
+ break;
+
+ case P_HEAPBLOCK_ALLOC:
+ reply->retval = (u64)heapblock_alloc(request->args[0]);
+ break;
+ case P_MALLOC:
+ reply->retval = (u64)malloc(request->args[0]);
+ break;
+ case P_MEMALIGN:
+ reply->retval = (u64)memalign(request->args[0], request->args[1]);
+ break;
+ case P_FREE:
+ free((void *)request->args[0]);
+ break;
+
+ case P_KBOOT_BOOT:
+ if (kboot_boot((void *)request->args[0]) == 0)
+ return 1;
+ break;
+ case P_KBOOT_SET_CHOSEN:
+ reply->retval = kboot_set_chosen((void *)request->args[0], (void *)request->args[1]);
+ break;
+ case P_KBOOT_SET_INITRD:
+ kboot_set_initrd((void *)request->args[0], request->args[1]);
+ break;
+ case P_KBOOT_PREPARE_DT:
+ reply->retval = kboot_prepare_dt((void *)request->args[0]);
+ break;
+
+ case P_PMGR_POWER_ENABLE:
+ reply->retval = pmgr_power_enable(request->args[0]);
+ break;
+ case P_PMGR_POWER_DISABLE:
+ reply->retval = pmgr_power_enable(request->args[0]);
+ break;
+ case P_PMGR_ADT_POWER_ENABLE:
+ reply->retval = pmgr_adt_power_enable((const char *)request->args[0]);
+ break;
+ case P_PMGR_ADT_POWER_DISABLE:
+ reply->retval = pmgr_adt_power_disable((const char *)request->args[0]);
+ break;
+ case P_PMGR_RESET:
+ reply->retval = pmgr_reset(request->args[0], (const char *)request->args[1]);
+ break;
+
+ case P_IODEV_SET_USAGE:
+ iodev_set_usage(request->args[0], request->args[1]);
+ break;
+ case P_IODEV_CAN_READ:
+ reply->retval = iodev_can_read(request->args[0]);
+ break;
+ case P_IODEV_CAN_WRITE:
+ reply->retval = iodev_can_write(request->args[0]);
+ break;
+ case P_IODEV_READ:
+ reply->retval =
+ iodev_read(request->args[0], (void *)request->args[1], request->args[2]);
+ break;
+ case P_IODEV_WRITE:
+ reply->retval =
+ iodev_write(request->args[0], (void *)request->args[1], request->args[2]);
+ break;
+ case P_IODEV_WHOAMI:
+ reply->retval = uartproxy_iodev;
+ break;
+
+ case P_USB_IODEV_VUART_SETUP:
+ usb_iodev_vuart_setup(request->args[0]);
+ break;
+
+ case P_TUNABLES_APPLY_GLOBAL:
+ reply->retval = tunables_apply_global((const char *)request->args[0],
+ (const char *)request->args[1]);
+ break;
+ case P_TUNABLES_APPLY_LOCAL:
+ reply->retval = tunables_apply_local((const char *)request->args[0],
+ (const char *)request->args[1], request->args[2]);
+ break;
+ case P_TUNABLES_APPLY_LOCAL_ADDR:
+ reply->retval = tunables_apply_local_addr(
+ (const char *)request->args[0], (const char *)request->args[1], request->args[2]);
+ break;
+
+ case P_DART_INIT:
+ reply->retval = (u64)dart_init(request->args[0], request->args[1], request->args[2],
+ request->args[3]);
+ break;
+ case P_DART_SHUTDOWN:
+ dart_shutdown((dart_dev_t *)request->args[0]);
+ break;
+ case P_DART_MAP:
+ reply->retval = dart_map((dart_dev_t *)request->args[0], request->args[1],
+ (void *)request->args[2], request->args[3]);
+ break;
+ case P_DART_UNMAP:
+ dart_unmap((dart_dev_t *)request->args[0], request->args[1], request->args[2]);
+ break;
+
+ case P_HV_INIT:
+ hv_init();
+ break;
+ case P_HV_MAP:
+ hv_map(request->args[0], request->args[1], request->args[2], request->args[3]);
+ break;
+ case P_HV_START:
+ hv_start((void *)request->args[0], &request->args[1]);
+ break;
+ case P_HV_TRANSLATE:
+ reply->retval = hv_translate(request->args[0], request->args[1], request->args[2],
+ (void *)request->args[3]);
+ break;
+ case P_HV_PT_WALK:
+ reply->retval = hv_pt_walk(request->args[0]);
+ break;
+ case P_HV_MAP_VUART:
+ hv_map_vuart(request->args[0], request->args[1], request->args[2]);
+ break;
+ case P_HV_MAP_VIRTIO:
+ hv_map_virtio(request->args[0], (void *)request->args[1]);
+ break;
+ case P_VIRTIO_PUT_BUFFER:
+ virtio_put_buffer(request->args[0], request->args[1], request->args[2],
+ request->args[3]);
+ break;
+ case P_HV_TRACE_IRQ:
+ reply->retval = hv_trace_irq(request->args[0], request->args[1], request->args[2],
+ request->args[3]);
+ break;
+ case P_HV_WDT_START:
+ hv_wdt_start(request->args[0]);
+ break;
+ case P_HV_START_SECONDARY:
+ hv_start_secondary(request->args[0], (void *)request->args[1], &request->args[2]);
+ break;
+ case P_HV_SWITCH_CPU:
+ reply->retval = hv_switch_cpu(request->args[0]);
+ break;
+ case P_HV_SET_TIME_STEALING:
+ hv_set_time_stealing(request->args[0], request->args[1]);
+ break;
+ case P_HV_PIN_CPU:
+ hv_pin_cpu(request->args[0]);
+ break;
+ case P_HV_WRITE_HCR:
+ hv_write_hcr(request->args[0]);
+ break;
+
+ case P_FB_INIT:
+ fb_init(request->args[0]);
+ break;
+ case P_FB_SHUTDOWN:
+ fb_shutdown(request->args[0]);
+ break;
+ case P_FB_BLIT:
+ // HACK: Running out of args, stash pix fmt in high bits of stride...
+ fb_blit(request->args[0], request->args[1], request->args[2], request->args[3],
+ (void *)request->args[4], (u32)request->args[5], request->args[5] >> 32);
+ break;
+ case P_FB_UNBLIT:
+ fb_unblit(request->args[0], request->args[1], request->args[2], request->args[3],
+ (void *)request->args[4], request->args[5]);
+ break;
+ case P_FB_FILL:
+ fb_fill(request->args[0], request->args[1], request->args[2], request->args[3],
+ int2rgb(request->args[4]));
+ break;
+ case P_FB_CLEAR:
+ fb_clear(int2rgb(request->args[0]));
+ break;
+ case P_FB_DISPLAY_LOGO:
+ fb_display_logo();
+ break;
+ case P_FB_RESTORE_LOGO:
+ fb_restore_logo();
+ break;
+ case P_FB_IMPROVE_LOGO:
+ fb_improve_logo();
+ break;
+
+ case P_PCIE_INIT:
+ pcie_init();
+ break;
+ case P_PCIE_SHUTDOWN:
+ pcie_shutdown();
+ break;
+
+ case P_NVME_INIT:
+ reply->retval = nvme_init();
+ break;
+ case P_NVME_SHUTDOWN:
+ nvme_shutdown();
+ break;
+ case P_NVME_READ:
+ reply->retval = nvme_read(request->args[0], request->args[1], (void *)request->args[2]);
+ break;
+ case P_NVME_FLUSH:
+ reply->retval = nvme_flush(request->args[0]);
+ break;
+
+ case P_MCC_GET_CARVEOUTS:
+ reply->retval = (u64)mcc_carveouts;
+ break;
+
+ case P_DISPLAY_INIT:
+ reply->retval = display_init();
+ break;
+ case P_DISPLAY_CONFIGURE:
+ reply->retval = display_configure((char *)request->args[0]);
+ break;
+ case P_DISPLAY_SHUTDOWN:
+ display_shutdown(request->args[0]);
+ break;
+ case P_DISPLAY_START_DCP:
+ display_start_dcp();
+ break;
+ case P_DISPLAY_IS_EXTERNAL:
+ reply->retval = display_is_external;
+ break;
+
+ case P_DAPF_INIT_ALL:
+ reply->retval = dapf_init_all();
+ break;
+ case P_DAPF_INIT:
+ reply->retval = dapf_init((const char *)request->args[0]);
+ break;
+
+ default:
+ reply->status = S_BADCMD;
+ break;
+ }
+ sysop("dsb sy");
+ sysop("isb");
+ exc_guard = guard_save;
+ return 0;
+}
diff --git a/tools/src/proxy.h b/tools/src/proxy.h
new file mode 100644
index 0000000..27a3f8e
--- /dev/null
+++ b/tools/src/proxy.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef __PROXY_H__
+#define __PROXY_H__
+
+#include "types.h"
+
+typedef enum {
+ P_NOP = 0x000, // System functions
+ P_EXIT,
+ P_CALL,
+ P_GET_BOOTARGS,
+ P_GET_BASE,
+ P_SET_BAUD,
+ P_UDELAY,
+ P_SET_EXC_GUARD,
+ P_GET_EXC_COUNT,
+ P_EL0_CALL,
+ P_EL1_CALL,
+ P_VECTOR,
+ P_GL1_CALL,
+ P_GL2_CALL,
+ P_GET_SIMD_STATE,
+ P_PUT_SIMD_STATE,
+ P_REBOOT,
+
+ P_WRITE64 = 0x100, // Generic register functions
+ P_WRITE32,
+ P_WRITE16,
+ P_WRITE8,
+ P_READ64,
+ P_READ32,
+ P_READ16,
+ P_READ8,
+ P_SET64,
+ P_SET32,
+ P_SET16,
+ P_SET8,
+ P_CLEAR64,
+ P_CLEAR32,
+ P_CLEAR16,
+ P_CLEAR8,
+ P_MASK64,
+ P_MASK32,
+ P_MASK16,
+ P_MASK8,
+ P_WRITEREAD64,
+ P_WRITEREAD32,
+ P_WRITEREAD16,
+ P_WRITEREAD8,
+
+ P_MEMCPY64 = 0x200, // Memory block transfer functions
+ P_MEMCPY32,
+ P_MEMCPY16,
+ P_MEMCPY8,
+ P_MEMSET64,
+ P_MEMSET32,
+ P_MEMSET16,
+ P_MEMSET8,
+
+ P_IC_IALLUIS = 0x300, // Cache and memory ops
+ P_IC_IALLU,
+ P_IC_IVAU,
+ P_DC_IVAC,
+ P_DC_ISW,
+ P_DC_CSW,
+ P_DC_CISW,
+ P_DC_ZVA,
+ P_DC_CVAC,
+ P_DC_CVAU,
+ P_DC_CIVAC,
+ P_MMU_SHUTDOWN,
+ P_MMU_INIT,
+ P_MMU_DISABLE,
+ P_MMU_RESTORE,
+ P_MMU_INIT_SECONDARY,
+
+ P_XZDEC = 0x400, // Decompression and data processing ops
+ P_GZDEC,
+
+ P_SMP_START_SECONDARIES = 0x500, // SMP and system management ops
+ P_SMP_CALL,
+ P_SMP_CALL_SYNC,
+ P_SMP_WAIT,
+ P_SMP_SET_WFE_MODE,
+
+ P_HEAPBLOCK_ALLOC = 0x600, // Heap and memory management ops
+ P_MALLOC,
+ P_MEMALIGN,
+ P_FREE,
+
+ P_KBOOT_BOOT = 0x700, // Kernel boot ops
+ P_KBOOT_SET_CHOSEN,
+ P_KBOOT_SET_INITRD,
+ P_KBOOT_PREPARE_DT,
+
+ P_PMGR_POWER_ENABLE = 0x800, // power/clock management ops
+ P_PMGR_POWER_DISABLE,
+ P_PMGR_ADT_POWER_ENABLE,
+ P_PMGR_ADT_POWER_DISABLE,
+ P_PMGR_RESET,
+
+ P_IODEV_SET_USAGE = 0x900,
+ P_IODEV_CAN_READ,
+ P_IODEV_CAN_WRITE,
+ P_IODEV_READ,
+ P_IODEV_WRITE,
+ P_IODEV_WHOAMI,
+ P_USB_IODEV_VUART_SETUP,
+
+ P_TUNABLES_APPLY_GLOBAL = 0xa00,
+ P_TUNABLES_APPLY_LOCAL,
+ P_TUNABLES_APPLY_LOCAL_ADDR,
+
+ P_DART_INIT = 0xb00,
+ P_DART_SHUTDOWN,
+ P_DART_MAP,
+ P_DART_UNMAP,
+
+ P_HV_INIT = 0xc00,
+ P_HV_MAP,
+ P_HV_START,
+ P_HV_TRANSLATE,
+ P_HV_PT_WALK,
+ P_HV_MAP_VUART,
+ P_HV_TRACE_IRQ,
+ P_HV_WDT_START,
+ P_HV_START_SECONDARY,
+ P_HV_SWITCH_CPU,
+ P_HV_SET_TIME_STEALING,
+ P_HV_PIN_CPU,
+ P_HV_WRITE_HCR,
+ P_HV_MAP_VIRTIO,
+ P_VIRTIO_PUT_BUFFER,
+
+ P_FB_INIT = 0xd00,
+ P_FB_SHUTDOWN,
+ P_FB_BLIT,
+ P_FB_UNBLIT,
+ P_FB_FILL,
+ P_FB_CLEAR,
+ P_FB_DISPLAY_LOGO,
+ P_FB_RESTORE_LOGO,
+ P_FB_IMPROVE_LOGO,
+
+ P_PCIE_INIT = 0xe00,
+ P_PCIE_SHUTDOWN,
+
+ P_NVME_INIT = 0xf00,
+ P_NVME_SHUTDOWN,
+ P_NVME_READ,
+ P_NVME_FLUSH,
+
+ P_MCC_GET_CARVEOUTS = 0x1000,
+
+ P_DISPLAY_INIT = 0x1100,
+ P_DISPLAY_CONFIGURE,
+ P_DISPLAY_SHUTDOWN,
+ P_DISPLAY_START_DCP,
+ P_DISPLAY_IS_EXTERNAL,
+
+ P_DAPF_INIT_ALL = 0x1200,
+ P_DAPF_INIT,
+
+} ProxyOp;
+
+#define S_OK 0
+#define S_BADCMD -1
+
+typedef struct {
+ u64 opcode;
+ u64 args[6];
+} ProxyRequest;
+
+typedef struct {
+ u64 opcode;
+ s64 status;
+ u64 retval;
+} ProxyReply;
+
+int proxy_process(ProxyRequest *request, ProxyReply *reply);
+
+#endif
diff --git a/tools/src/ringbuffer.c b/tools/src/ringbuffer.c
new file mode 100644
index 0000000..36b89d9
--- /dev/null
+++ b/tools/src/ringbuffer.c
@@ -0,0 +1,81 @@
+#include "ringbuffer.h"
+#include "malloc.h"
+#include "types.h"
+
+ringbuffer_t *ringbuffer_alloc(size_t len)
+{
+ ringbuffer_t *bfr = malloc(sizeof(*bfr));
+ if (!bfr)
+ return NULL;
+
+ bfr->buffer = malloc(len);
+ if (!bfr->buffer) {
+ free(bfr);
+ return NULL;
+ }
+
+ bfr->read = 0;
+ bfr->write = 0;
+ bfr->len = len;
+
+ return bfr;
+}
+
+void ringbuffer_free(ringbuffer_t *bfr)
+{
+ if (bfr)
+ free(bfr->buffer);
+ free(bfr);
+}
+
+size_t ringbuffer_read(u8 *target, size_t len, ringbuffer_t *bfr)
+{
+ size_t read;
+
+ for (read = 0; read < len; ++read) {
+ if (bfr->read == bfr->write)
+ break;
+
+ *target = bfr->buffer[bfr->read];
+ target++;
+
+ bfr->read++;
+ bfr->read %= bfr->len;
+ }
+
+ return read;
+}
+
+size_t ringbuffer_write(const u8 *src, size_t len, ringbuffer_t *bfr)
+{
+ size_t written;
+
+ for (written = 0; written < len; ++written) {
+ if (((bfr->write + 1) % bfr->len) == bfr->read)
+ break;
+
+ bfr->buffer[bfr->write] = *src;
+ src++;
+
+ bfr->write++;
+ bfr->write %= bfr->len;
+ }
+
+ return written;
+}
+
+size_t ringbuffer_get_used(ringbuffer_t *bfr)
+{
+ size_t read = bfr->read;
+ size_t write = bfr->write;
+
+ if (write < read)
+ write += bfr->len;
+
+ return write - read;
+}
+
+size_t ringbuffer_get_free(ringbuffer_t *bfr)
+{
+ return bfr->len - ringbuffer_get_used(bfr);
+}
diff --git a/tools/src/ringbuffer.h b/tools/src/ringbuffer.h
new file mode 100644
index 0000000..553ae76
--- /dev/null
+++ b/tools/src/ringbuffer.h
@@ -0,0 +1,22 @@
+#ifndef RINGBUFFER_H
+#define RINGBUFFER_H
+
+#include "types.h"
+
+typedef struct {
+ u8 *buffer;
+ size_t len;
+ size_t read;
+ size_t write;
+} ringbuffer_t;
+
+ringbuffer_t *ringbuffer_alloc(size_t len);
+void ringbuffer_free(ringbuffer_t *bfr);
+
+size_t ringbuffer_read(u8 *target, size_t len, ringbuffer_t *bfr);
+size_t ringbuffer_write(const u8 *src, size_t len, ringbuffer_t *bfr);
+
+size_t ringbuffer_get_used(ringbuffer_t *bfr);
+size_t ringbuffer_get_free(ringbuffer_t *bfr);
+
+#endif
diff --git a/tools/src/rtkit.c b/tools/src/rtkit.c
new file mode 100644
index 0000000..db80258
--- /dev/null
+++ b/tools/src/rtkit.c
@@ -0,0 +1,710 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "../config.h"
+
+#include "rtkit.h"
+#include "adt.h"
+#include "asc.h"
+#include "dart.h"
+#include "iova.h"
+#include "malloc.h"
+#include "sart.h"
+#include "string.h"
+#include "types.h"
+#include "utils.h"
+
+#define rtkit_printf(...) \
+ do { \
+ debug_printf("rtkit(%s): ", rtk->name); \
+ debug_printf(__VA_ARGS__); \
+ } while (0)
+
+#define RTKIT_EP_MGMT 0
+#define RTKIT_EP_CRASHLOG 1
+#define RTKIT_EP_SYSLOG 2
+#define RTKIT_EP_DEBUG 3
+#define RTKIT_EP_IOREPORT 4
+#define RTKIT_EP_OSLOG 8
+
+#define MGMT_TYPE GENMASK(59, 52)
+
+#define MGMT_PWR_STATE GENMASK(15, 0)
+
+#define MSG_BUFFER_REQUEST 1
+#define MSG_BUFFER_REQUEST_SIZE GENMASK(51, 44)
+#define MSG_BUFFER_REQUEST_IOVA GENMASK(41, 0)
+
+#define MSG_SYSLOG_INIT 8
+#define MSG_SYSLOG_INIT_ENTRYSIZE GENMASK(39, 24)
+#define MSG_SYSLOG_INIT_COUNT GENMASK(15, 0)
+#define MSG_SYSLOG_LOG 5
+#define MSG_SYSLOG_LOG_INDEX GENMASK(7, 0)
+
+#define MSG_OSLOG_INIT 0x10
+#define MSG_OSLOG_ACK 0x30
+
+#define MGMT_MSG_HELLO 1
+#define MGMT_MSG_HELLO_ACK 2
+#define MGMT_MSG_HELLO_MINVER GENMASK(15, 0)
+#define MGMT_MSG_HELLO_MAXVER GENMASK(31, 16)
+
+#define MGMT_MSG_IOP_PWR_STATE 6
+#define MGMT_MSG_IOP_PWR_STATE_ACK 7
+
+#define MGMT_MSG_EPMAP 8
+#define MGMT_MSG_EPMAP_DONE BIT(51)
+#define MGMT_MSG_EPMAP_BASE GENMASK(34, 32)
+#define MGMT_MSG_EPMAP_BITMAP GENMASK(31, 0)
+
+#define MGMT_MSG_EPMAP_REPLY 8
+#define MGMT_MSG_EPMAP_REPLY_DONE BIT(51)
+#define MGMT_MSG_EPMAP_REPLY_MORE BIT(0)
+
+#define MGMT_MSG_AP_PWR_STATE 0xb
+#define MGMT_MSG_AP_PWR_STATE_ACK 0xb
+
+#define MGMT_MSG_START_EP 5
+#define MGMT_MSG_START_EP_IDX GENMASK(39, 32)
+#define MGMT_MSG_START_EP_FLAG BIT(1)
+
+#define RTKIT_MIN_VERSION 11
+#define RTKIT_MAX_VERSION 12
+
+#define IOVA_MASK GENMASK(35, 0)
+
+enum rtkit_power_state {
+ RTKIT_POWER_OFF = 0x00,
+ RTKIT_POWER_SLEEP = 0x01,
+ RTKIT_POWER_QUIESCED = 0x10,
+ RTKIT_POWER_ON = 0x20,
+ RTKIT_POWER_INIT = 0x220,
+};
+
+struct rtkit_dev {
+ char *name;
+
+ asc_dev_t *asc;
+ dart_dev_t *dart;
+ iova_domain_t *dart_iovad;
+ sart_dev_t *sart;
+
+ u64 dva_base;
+
+ enum rtkit_power_state iop_power;
+ enum rtkit_power_state ap_power;
+
+ struct rtkit_buffer syslog_bfr;
+ struct rtkit_buffer crashlog_bfr;
+ struct rtkit_buffer ioreport_bfr;
+
+ u32 syslog_cnt, syslog_size;
+
+ bool crashed;
+};
+
+struct syslog_log {
+ u32 hdr;
+ u32 unk;
+ char context[24];
+ char msg[];
+};
+
+struct crashlog_hdr {
+ u32 type;
+ u32 ver;
+ u32 total_size;
+ u32 flags;
+ u8 _padding[16];
+};
+
+struct crashlog_entry {
+ u32 type;
+ u32 _padding;
+ u32 flags;
+ u32 len;
+ u8 payload[];
+};
+
+rtkit_dev_t *rtkit_init(const char *name, asc_dev_t *asc, dart_dev_t *dart,
+ iova_domain_t *dart_iovad, sart_dev_t *sart)
+{
+ if (dart && sart) {
+ printf("rtkit: Cannot use both SART and DART simultaneously\n");
+ return NULL;
+ }
+
+ if (dart && !dart_iovad) {
+ printf("rtkit: if DART is used iovad is already required\n");
+ return NULL;
+ }
+
+ rtkit_dev_t *rtk = malloc(sizeof(*rtk));
+ if (!rtk)
+ return NULL;
+ memset(rtk, 0, sizeof(*rtk));
+
+ size_t name_len = strlen(name);
+ rtk->name = malloc(name_len + 1);
+ if (!rtk->name)
+ goto out_free_rtk;
+ strcpy(rtk->name, name);
+
+ rtk->asc = asc;
+ rtk->dart = dart;
+ rtk->dart_iovad = dart_iovad;
+ rtk->sart = sart;
+ rtk->iop_power = RTKIT_POWER_OFF;
+ rtk->ap_power = RTKIT_POWER_OFF;
+ rtk->dva_base = 0;
+
+ int iop_node = asc_get_iop_node(asc);
+ ADT_GETPROP(adt, iop_node, "asc-dram-mask", &rtk->dva_base);
+
+ return rtk;
+
+out_free_rtk:
+ free(rtk);
+ return NULL;
+}
+
+void rtkit_free(rtkit_dev_t *rtk)
+{
+ rtkit_free_buffer(rtk, &rtk->syslog_bfr);
+ rtkit_free_buffer(rtk, &rtk->crashlog_bfr);
+ rtkit_free_buffer(rtk, &rtk->ioreport_bfr);
+ free(rtk->name);
+ free(rtk);
+}
+
+bool rtkit_send(rtkit_dev_t *rtk, const struct rtkit_message *msg)
+{
+ struct asc_message asc_msg;
+
+ asc_msg.msg0 = msg->msg;
+ asc_msg.msg1 = msg->ep;
+
+ return asc_send(rtk->asc, &asc_msg);
+}
+
+bool rtkit_map(rtkit_dev_t *rtk, void *phys, size_t sz, u64 *dva)
+{
+ sz = ALIGN_UP(sz, 16384);
+
+ if (rtk->sart) {
+ if (!sart_add_allowed_region(rtk->sart, phys, sz)) {
+ rtkit_printf("sart_add_allowed_region failed (%p, 0x%lx)\n", phys, sz);
+ return false;
+ }
+ *dva = (u64)phys;
+ return true;
+ } else if (rtk->dart) {
+ u64 iova = iova_alloc(rtk->dart_iovad, sz);
+ if (!iova) {
+ rtkit_printf("failed to alloc iova (size 0x%lx)\n", sz);
+ return false;
+ }
+
+ if (dart_map(rtk->dart, iova, phys, sz) < 0) {
+ rtkit_printf("failed to DART map %p -> 0x%lx (0x%lx)\n", phys, iova, sz);
+ iova_free(rtk->dart_iovad, iova, sz);
+ return false;
+ }
+
+ *dva = iova | rtk->dva_base;
+ return true;
+ } else {
+ rtkit_printf("TODO: implement no IOMMU buffers\n");
+ return false;
+ }
+}
+
+bool rtkit_unmap(rtkit_dev_t *rtk, u64 dva, size_t sz)
+{
+ if (rtk->sart) {
+ if (!sart_remove_allowed_region(rtk->sart, (void *)dva, sz))
+ rtkit_printf("sart_remove_allowed_region failed (0x%lx, 0x%lx)\n", dva, sz);
+ return true;
+ } else if (rtk->dart) {
+ dva &= ~rtk->dva_base;
+ dart_unmap(rtk->dart, dva & IOVA_MASK, sz);
+ iova_free(rtk->dart_iovad, dva & IOVA_MASK, sz);
+ return true;
+ } else {
+ rtkit_printf("TODO: implement no IOMMU buffers\n");
+ return false;
+ }
+}
+
+bool rtkit_alloc_buffer(rtkit_dev_t *rtk, struct rtkit_buffer *bfr, size_t sz)
+{
+ bfr->bfr = memalign(SZ_16K, sz);
+ if (!bfr->bfr) {
+ rtkit_printf("unable to allocate %zu buffer\n", sz);
+ return false;
+ }
+
+ sz = ALIGN_UP(sz, 16384);
+
+ bfr->sz = sz;
+ if (!rtkit_map(rtk, bfr->bfr, sz, &bfr->dva))
+ goto error;
+
+ return true;
+
+error:
+ free(bfr->bfr);
+ bfr->bfr = NULL;
+ return false;
+}
+
+bool rtkit_free_buffer(rtkit_dev_t *rtk, struct rtkit_buffer *bfr)
+{
+ if (!bfr->bfr || !is_heap(bfr->bfr))
+ return true;
+
+ if (!rtkit_unmap(rtk, bfr->dva, bfr->sz))
+ return false;
+
+ free(bfr->bfr);
+
+ return false;
+}
+
+static bool rtkit_handle_buffer_request(rtkit_dev_t *rtk, struct rtkit_message *msg,
+ struct rtkit_buffer *bfr)
+{
+ size_t n_4kpages = FIELD_GET(MSG_BUFFER_REQUEST_SIZE, msg->msg);
+ size_t sz = n_4kpages << 12;
+ u64 addr = FIELD_GET(MSG_BUFFER_REQUEST_IOVA, msg->msg);
+
+ if (addr) {
+ bfr->dva = addr & ~rtk->dva_base;
+ bfr->sz = sz;
+ bfr->bfr = dart_translate(rtk->dart, bfr->dva & IOVA_MASK);
+ if (!bfr->bfr) {
+ rtkit_printf("failed to translate pre-allocated buffer (ep 0x%x, buf 0x%lx)\n", msg->ep,
+ addr);
+ return false;
+ } else {
+ rtkit_printf("pre-allocated buffer (ep 0x%x, dva 0x%lx, phys %p)\n", msg->ep, addr,
+ bfr->bfr);
+ }
+ return true;
+
+ } else {
+ if (!rtkit_alloc_buffer(rtk, bfr, sz)) {
+ rtkit_printf("unable to allocate buffer\n");
+ return false;
+ }
+ }
+
+ struct asc_message reply;
+ reply.msg1 = msg->ep;
+ reply.msg0 = FIELD_PREP(MGMT_TYPE, MSG_BUFFER_REQUEST);
+ reply.msg0 |= FIELD_PREP(MSG_BUFFER_REQUEST_SIZE, n_4kpages);
+ if (!addr)
+ reply.msg0 |= FIELD_PREP(MSG_BUFFER_REQUEST_IOVA, bfr->dva | rtk->dva_base);
+
+ if (!asc_send(rtk->asc, &reply)) {
+ rtkit_printf("unable to send buffer reply\n");
+ rtkit_free_buffer(rtk, bfr);
+ goto error;
+ }
+
+ return true;
+
+error:
+ return false;
+}
+
+static void rtkit_crashed(rtkit_dev_t *rtk)
+{
+ struct crashlog_hdr *hdr = rtk->crashlog_bfr.bfr;
+ rtk->crashed = true;
+
+ rtkit_printf("IOP crashed!\n");
+
+ if (hdr->type != 'CLHE') {
+ rtkit_printf("bad crashlog header 0x%x @ %p\n", hdr->type, hdr);
+ return;
+ }
+
+ struct crashlog_entry *p = (void *)(hdr + 1);
+
+ rtkit_printf("== CRASH INFO ==\n");
+ while (p->type != 'CLHE') {
+ switch (p->type) {
+ case 'Cstr':
+ rtkit_printf(" Message %d: %s\n", p->payload[0], &p->payload[4]);
+ break;
+ default:
+ rtkit_printf(" 0x%x\n", p->type);
+ break;
+ }
+ p = ((void *)p) + p->len;
+ }
+}
+
+int rtkit_recv(rtkit_dev_t *rtk, struct rtkit_message *msg)
+{
+ struct asc_message asc_msg;
+ bool ok = true;
+
+ if (rtk->crashed)
+ return -1;
+
+ while (asc_recv(rtk->asc, &asc_msg)) {
+ if (asc_msg.msg1 >= 0x100) {
+ rtkit_printf("WARNING: received message for invalid endpoint %x >= 0x100\n",
+ asc_msg.msg1);
+ continue;
+ }
+
+ msg->msg = asc_msg.msg0;
+ msg->ep = (u8)asc_msg.msg1;
+
+ /* if this is an app message we can just forwad it to the caller */
+ if (msg->ep >= 0x20)
+ return 1;
+
+ u32 msgtype = FIELD_GET(MGMT_TYPE, msg->msg);
+ switch (msg->ep) {
+ case RTKIT_EP_MGMT:
+ switch (msgtype) {
+ case MGMT_MSG_IOP_PWR_STATE_ACK:
+ rtk->iop_power = FIELD_GET(MGMT_PWR_STATE, msg->msg);
+ break;
+ case MGMT_MSG_AP_PWR_STATE_ACK:
+ rtk->ap_power = FIELD_GET(MGMT_PWR_STATE, msg->msg);
+ break;
+ default:
+ rtkit_printf("unknown management message %x\n", msgtype);
+ }
+ break;
+ case RTKIT_EP_SYSLOG:
+ switch (msgtype) {
+ case MSG_BUFFER_REQUEST:
+ ok = ok && rtkit_handle_buffer_request(rtk, msg, &rtk->syslog_bfr);
+ break;
+ case MSG_SYSLOG_INIT:
+ rtk->syslog_cnt = FIELD_GET(MSG_SYSLOG_INIT_COUNT, msg->msg);
+ rtk->syslog_size = FIELD_GET(MSG_SYSLOG_INIT_ENTRYSIZE, msg->msg);
+ break;
+ case MSG_SYSLOG_LOG:
+#ifdef RTKIT_SYSLOG
+ {
+ u64 index = FIELD_GET(MSG_SYSLOG_LOG_INDEX, msg->msg);
+ u64 stride = rtk->syslog_size + sizeof(struct syslog_log);
+ struct syslog_log *log = rtk->syslog_bfr.bfr + stride * index;
+ rtkit_printf("syslog: [%s]%s", log->context, log->msg);
+ if (log->msg[strlen(log->msg) - 1] != '\n')
+ printf("\n");
+ }
+#endif
+ if (!asc_send(rtk->asc, &asc_msg))
+ rtkit_printf("failed to ack syslog\n");
+ break;
+ default:
+ rtkit_printf("unknown syslog message %x\n", msgtype);
+ }
+ break;
+ case RTKIT_EP_CRASHLOG:
+ switch (msgtype) {
+ case MSG_BUFFER_REQUEST:
+ if (!rtk->crashlog_bfr.bfr) {
+ ok = ok && rtkit_handle_buffer_request(rtk, msg, &rtk->crashlog_bfr);
+ } else {
+ rtkit_crashed(rtk);
+ return -1;
+ }
+ break;
+ default:
+ rtkit_printf("unknown crashlog message %x\n", msgtype);
+ }
+ break;
+ case RTKIT_EP_IOREPORT:
+ switch (msgtype) {
+ case MSG_BUFFER_REQUEST:
+ ok = ok && rtkit_handle_buffer_request(rtk, msg, &rtk->ioreport_bfr);
+ break;
+ /* unknown but must be ACKed */
+ case 0x8:
+ case 0xc:
+ if (!rtkit_send(rtk, msg))
+ rtkit_printf("unable to ACK unknown ioreport message\n");
+ break;
+ default:
+ rtkit_printf("unknown ioreport message %x\n", msgtype);
+ }
+ break;
+ case RTKIT_EP_OSLOG:
+ switch (msgtype) {
+ case MSG_OSLOG_INIT:
+ msg->msg = FIELD_PREP(MGMT_TYPE, MSG_OSLOG_ACK);
+ if (!rtkit_send(rtk, msg))
+ rtkit_printf("unable to ACK oslog init message\n");
+ break;
+ default:
+ rtkit_printf("unknown oslog message %x\n", msgtype);
+ }
+ break;
+ default:
+ rtkit_printf("message to unknown system endpoint 0x%02x: %lx\n", msg->ep, msg->msg);
+ }
+
+ if (!ok) {
+ rtkit_printf("failed to handle system message 0x%02x: %lx\n", msg->ep, msg->msg);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+bool rtkit_start_ep(rtkit_dev_t *rtk, u8 ep)
+{
+ struct asc_message msg;
+
+ msg.msg0 = FIELD_PREP(MGMT_TYPE, MGMT_MSG_START_EP);
+ msg.msg0 |= MGMT_MSG_START_EP_FLAG;
+ msg.msg0 |= FIELD_PREP(MGMT_MSG_START_EP_IDX, ep);
+ msg.msg1 = RTKIT_EP_MGMT;
+
+ if (!asc_send(rtk->asc, &msg)) {
+ rtkit_printf("unable to start endpoint 0x%02x\n", ep);
+ return false;
+ }
+
+ return true;
+}
+
+bool rtkit_boot(rtkit_dev_t *rtk)
+{
+ struct asc_message msg;
+
+ /* boot the IOP if it isn't already */
+ asc_cpu_start(rtk->asc);
+ /* can be sent unconditionally to wake up a possibly sleeping IOP */
+ msg.msg0 = FIELD_PREP(MGMT_TYPE, MGMT_MSG_IOP_PWR_STATE) |
+ FIELD_PREP(MGMT_PWR_STATE, RTKIT_POWER_INIT);
+ msg.msg1 = RTKIT_EP_MGMT;
+ if (!asc_send(rtk->asc, &msg)) {
+ rtkit_printf("unable to send wakeup message\n");
+ return false;
+ }
+
+ if (!asc_recv_timeout(rtk->asc, &msg, USEC_PER_SEC)) {
+ rtkit_printf("did not receive HELLO\n");
+ return false;
+ }
+
+ if (msg.msg1 != RTKIT_EP_MGMT) {
+ rtkit_printf("expected HELLO but got message for EP 0x%x", msg.msg1);
+ return false;
+ }
+
+ u32 msgtype;
+ msgtype = FIELD_GET(MGMT_TYPE, msg.msg0);
+ if (msgtype != MGMT_MSG_HELLO) {
+ rtkit_printf("expected HELLO but got message with type 0x%02x", msgtype);
+
+ return false;
+ }
+
+ u32 min_ver, max_ver, want_ver;
+ min_ver = FIELD_GET(MGMT_MSG_HELLO_MINVER, msg.msg0);
+ max_ver = FIELD_GET(MGMT_MSG_HELLO_MAXVER, msg.msg0);
+ want_ver = min(RTKIT_MAX_VERSION, max_ver);
+
+ if (min_ver > RTKIT_MAX_VERSION || max_ver < RTKIT_MIN_VERSION) {
+ rtkit_printf("supported versions [%d,%d] must overlap versions [%d,%d]\n",
+ RTKIT_MIN_VERSION, RTKIT_MAX_VERSION, min_ver, max_ver);
+ return false;
+ }
+
+ rtkit_printf("booting with version %d\n", want_ver);
+
+ msg.msg0 = FIELD_PREP(MGMT_TYPE, MGMT_MSG_HELLO_ACK);
+ msg.msg0 |= FIELD_PREP(MGMT_MSG_HELLO_MINVER, want_ver);
+ msg.msg0 |= FIELD_PREP(MGMT_MSG_HELLO_MAXVER, want_ver);
+ msg.msg1 = RTKIT_EP_MGMT;
+ if (!asc_send(rtk->asc, &msg)) {
+ rtkit_printf("couldn't send HELLO ack\n");
+ return false;
+ }
+
+ bool has_crashlog = false;
+ bool has_debug = false;
+ bool has_ioreport = false;
+ bool has_syslog = false;
+ bool has_oslog = false;
+ bool got_epmap = false;
+ while (!got_epmap) {
+ if (!asc_recv_timeout(rtk->asc, &msg, USEC_PER_SEC)) {
+ rtkit_printf("couldn't receive message while waiting for endpoint map\n");
+ return false;
+ }
+
+ if (msg.msg1 != RTKIT_EP_MGMT) {
+ rtkit_printf("expected management message while waiting for endpoint map but got "
+ "message for endpoint 0x%x\n",
+ msg.msg1);
+ return false;
+ }
+
+ msgtype = FIELD_GET(MGMT_TYPE, msg.msg0);
+ if (msgtype != MGMT_MSG_EPMAP) {
+ rtkit_printf("expected endpoint map message but got 0x%x instead\n", msgtype);
+ return false;
+ }
+
+ u32 bitmap = FIELD_GET(MGMT_MSG_EPMAP_BITMAP, msg.msg0);
+ u32 base = FIELD_GET(MGMT_MSG_EPMAP_BASE, msg.msg0);
+ for (unsigned int i = 0; i < 32; i++) {
+ if (bitmap & (1U << i)) {
+ u8 ep_idx = 32 * base + i;
+
+ if (ep_idx >= 0x20)
+ continue;
+ switch (ep_idx) {
+ case RTKIT_EP_CRASHLOG:
+ has_crashlog = true;
+ break;
+ case RTKIT_EP_DEBUG:
+ has_debug = true;
+ break;
+ case RTKIT_EP_IOREPORT:
+ has_ioreport = true;
+ break;
+ case RTKIT_EP_SYSLOG:
+ has_syslog = true;
+ break;
+ case RTKIT_EP_OSLOG:
+ has_oslog = true;
+ case RTKIT_EP_MGMT:
+ break;
+ default:
+ rtkit_printf("unknown system endpoint 0x%02x\n", ep_idx);
+ }
+ }
+ }
+
+ if (msg.msg0 & MGMT_MSG_EPMAP_DONE)
+ got_epmap = true;
+
+ msg.msg0 = FIELD_PREP(MGMT_TYPE, MGMT_MSG_EPMAP_REPLY);
+ msg.msg0 |= FIELD_PREP(MGMT_MSG_EPMAP_BASE, base);
+ if (got_epmap)
+ msg.msg0 |= MGMT_MSG_EPMAP_REPLY_DONE;
+ else
+ msg.msg0 |= MGMT_MSG_EPMAP_REPLY_MORE;
+
+ msg.msg1 = RTKIT_EP_MGMT;
+
+ if (!asc_send(rtk->asc, &msg)) {
+ rtkit_printf("couldn't reply to endpoint map\n");
+ return false;
+ }
+ }
+
+ /* start all required system endpoints */
+ if (has_debug && !rtkit_start_ep(rtk, RTKIT_EP_DEBUG))
+ return false;
+ if (has_crashlog && !rtkit_start_ep(rtk, RTKIT_EP_CRASHLOG))
+ return false;
+ if (has_syslog && !rtkit_start_ep(rtk, RTKIT_EP_SYSLOG))
+ return false;
+ if (has_ioreport && !rtkit_start_ep(rtk, RTKIT_EP_IOREPORT))
+ return false;
+ if (has_oslog && !rtkit_start_ep(rtk, RTKIT_EP_OSLOG))
+ return false;
+
+ while (rtk->iop_power != RTKIT_POWER_ON) {
+ struct rtkit_message rtk_msg;
+ int ret = rtkit_recv(rtk, &rtk_msg);
+ if (ret == 1)
+ rtkit_printf("unexpected message to non-system endpoint 0x%02x during boot: %lx\n",
+ rtk_msg.ep, rtk_msg.msg);
+ else if (ret < 0)
+ return false;
+ }
+
+ /* this enables syslog */
+ msg.msg0 =
+ FIELD_PREP(MGMT_TYPE, MGMT_MSG_AP_PWR_STATE) | FIELD_PREP(MGMT_PWR_STATE, RTKIT_POWER_ON);
+ msg.msg1 = RTKIT_EP_MGMT;
+ if (!asc_send(rtk->asc, &msg)) {
+ rtkit_printf("unable to send AP power message\n");
+ return false;
+ }
+
+ return true;
+}
+
+static bool rtkit_switch_power_state(rtkit_dev_t *rtk, enum rtkit_power_state target)
+{
+ struct asc_message msg;
+
+ if (rtk->crashed)
+ return false;
+
+ /* AP power should always go to QUIESCED, otherwise rebooting doesn't work */
+ msg.msg0 = FIELD_PREP(MGMT_TYPE, MGMT_MSG_AP_PWR_STATE) |
+ FIELD_PREP(MGMT_PWR_STATE, RTKIT_POWER_QUIESCED);
+ msg.msg1 = RTKIT_EP_MGMT;
+ if (!asc_send(rtk->asc, &msg)) {
+ rtkit_printf("unable to send shutdown message\n");
+ return false;
+ }
+
+ while (rtk->ap_power != RTKIT_POWER_QUIESCED) {
+ struct rtkit_message rtk_msg;
+ int ret = rtkit_recv(rtk, &rtk_msg);
+
+ if (ret > 0) {
+ rtkit_printf("unexpected message to non-system endpoint 0x%02x during shutdown: %lx\n",
+ rtk_msg.ep, rtk_msg.msg);
+ continue;
+ } else if (ret < 0) {
+ rtkit_printf("IOP died during shutdown\n");
+ return false;
+ }
+ }
+
+ msg.msg0 = FIELD_PREP(MGMT_TYPE, MGMT_MSG_IOP_PWR_STATE) | FIELD_PREP(MGMT_PWR_STATE, target);
+ if (!asc_send(rtk->asc, &msg)) {
+ rtkit_printf("unable to send shutdown message\n");
+ return false;
+ }
+
+ while (rtk->iop_power != target) {
+ struct rtkit_message rtk_msg;
+ int ret = rtkit_recv(rtk, &rtk_msg);
+
+ if (ret > 0) {
+ rtkit_printf("unexpected message to non-system endpoint 0x%02x during shutdown: %lx\n",
+ rtk_msg.ep, rtk_msg.msg);
+ continue;
+ } else if (ret < 0) {
+ rtkit_printf("IOP died during shutdown\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool rtkit_quiesce(rtkit_dev_t *rtk)
+{
+ return rtkit_switch_power_state(rtk, RTKIT_POWER_QUIESCED);
+}
+
+bool rtkit_sleep(rtkit_dev_t *rtk)
+{
+ int ret = rtkit_switch_power_state(rtk, RTKIT_POWER_SLEEP);
+ if (ret < 0)
+ return ret;
+
+ asc_cpu_stop(rtk->asc);
+ return 0;
+}
diff --git a/tools/src/rtkit.h b/tools/src/rtkit.h
new file mode 100644
index 0000000..9d87ee1
--- /dev/null
+++ b/tools/src/rtkit.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef RTKIT_H
+#define RTKIT_H
+
+#include "asc.h"
+#include "dart.h"
+#include "iova.h"
+#include "sart.h"
+#include "types.h"
+
+typedef struct rtkit_dev rtkit_dev_t;
+
+struct rtkit_message {
+ u8 ep;
+ u64 msg;
+};
+
+struct rtkit_buffer {
+ void *bfr;
+ u64 dva;
+ size_t sz;
+};
+
+rtkit_dev_t *rtkit_init(const char *name, asc_dev_t *asc, dart_dev_t *dart,
+ iova_domain_t *dart_iovad, sart_dev_t *sart);
+bool rtkit_quiesce(rtkit_dev_t *rtk);
+bool rtkit_sleep(rtkit_dev_t *rtk);
+void rtkit_free(rtkit_dev_t *rtk);
+
+bool rtkit_start_ep(rtkit_dev_t *rtk, u8 ep);
+bool rtkit_boot(rtkit_dev_t *rtk);
+
+int rtkit_recv(rtkit_dev_t *rtk, struct rtkit_message *msg);
+bool rtkit_send(rtkit_dev_t *rtk, const struct rtkit_message *msg);
+
+bool rtkit_map(rtkit_dev_t *rtk, void *phys, size_t sz, u64 *dva);
+bool rtkit_unmap(rtkit_dev_t *rtk, u64 dva, size_t sz);
+
+bool rtkit_alloc_buffer(rtkit_dev_t *rtk, struct rtkit_buffer *bfr, size_t sz);
+bool rtkit_free_buffer(rtkit_dev_t *rtk, struct rtkit_buffer *bfr);
+
+#endif
diff --git a/tools/src/sart.c b/tools/src/sart.c
new file mode 100644
index 0000000..e0345cd
--- /dev/null
+++ b/tools/src/sart.c
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "adt.h"
+#include "malloc.h"
+#include "sart.h"
+#include "string.h"
+#include "utils.h"
+
+struct sart_dev {
+ uintptr_t base;
+ u32 protected_entries;
+
+ void (*get_entry)(sart_dev_t *sart, int index, u8 *flags, void **paddr, size_t *size);
+ bool (*set_entry)(sart_dev_t *sart, int index, u8 flags, void *paddr, size_t size);
+};
+
+#define APPLE_SART_MAX_ENTRIES 16
+
+/* This is probably a bitfield but the exact meaning of each bit is unknown. */
+#define APPLE_SART_FLAGS_ALLOW 0xff
+
+/* SARTv2 registers */
+#define APPLE_SART2_CONFIG(idx) (0x00 + 4 * (idx))
+#define APPLE_SART2_CONFIG_FLAGS GENMASK(31, 24)
+#define APPLE_SART2_CONFIG_SIZE GENMASK(23, 0)
+#define APPLE_SART2_CONFIG_SIZE_SHIFT 12
+#define APPLE_SART2_CONFIG_SIZE_MAX GENMASK(23, 0)
+
+#define APPLE_SART2_PADDR(idx) (0x40 + 4 * (idx))
+#define APPLE_SART2_PADDR_SHIFT 12
+
+/* SARTv3 registers */
+#define APPLE_SART3_CONFIG(idx) (0x00 + 4 * (idx))
+
+#define APPLE_SART3_PADDR(idx) (0x40 + 4 * (idx))
+#define APPLE_SART3_PADDR_SHIFT 12
+
+#define APPLE_SART3_SIZE(idx) (0x80 + 4 * (idx))
+#define APPLE_SART3_SIZE_SHIFT 12
+#define APPLE_SART3_SIZE_MAX GENMASK(29, 0)
+
+static void sart2_get_entry(sart_dev_t *sart, int index, u8 *flags, void **paddr, size_t *size)
+{
+ u32 cfg = read32(sart->base + APPLE_SART2_CONFIG(index));
+ *flags = FIELD_GET(APPLE_SART2_CONFIG_FLAGS, cfg);
+ *size = (size_t)FIELD_GET(APPLE_SART2_CONFIG_SIZE, cfg) << APPLE_SART2_CONFIG_SIZE_SHIFT;
+ *paddr =
+ (void *)((u64)read32(sart->base + APPLE_SART2_PADDR(index)) << APPLE_SART2_PADDR_SHIFT);
+}
+
+static bool sart2_set_entry(sart_dev_t *sart, int index, u8 flags, void *paddr_, size_t size)
+{
+ u32 cfg;
+ u64 paddr = (u64)paddr_;
+
+ if (size & ((1 << APPLE_SART2_CONFIG_SIZE_SHIFT) - 1))
+ return false;
+ if (paddr & ((1 << APPLE_SART2_PADDR_SHIFT) - 1))
+ return false;
+
+ size >>= APPLE_SART2_CONFIG_SIZE_SHIFT;
+ paddr >>= APPLE_SART2_PADDR_SHIFT;
+
+ if (size > APPLE_SART2_CONFIG_SIZE_MAX)
+ return false;
+
+ cfg = FIELD_PREP(APPLE_SART2_CONFIG_FLAGS, flags);
+ cfg |= FIELD_PREP(APPLE_SART2_CONFIG_SIZE, size);
+
+ write32(sart->base + APPLE_SART2_PADDR(index), paddr);
+ write32(sart->base + APPLE_SART2_CONFIG(index), cfg);
+
+ return true;
+}
+
+static void sart3_get_entry(sart_dev_t *sart, int index, u8 *flags, void **paddr, size_t *size)
+{
+ *flags = read32(sart->base + APPLE_SART3_CONFIG(index));
+ *size = (size_t)read32(sart->base + APPLE_SART3_SIZE(index)) << APPLE_SART3_SIZE_SHIFT;
+ *paddr =
+ (void *)((u64)read32(sart->base + APPLE_SART3_PADDR(index)) << APPLE_SART3_PADDR_SHIFT);
+}
+
+static bool sart3_set_entry(sart_dev_t *sart, int index, u8 flags, void *paddr_, size_t size)
+{
+ u64 paddr = (u64)paddr_;
+ if (size & ((1 << APPLE_SART3_SIZE_SHIFT) - 1))
+ return false;
+ if (paddr & ((1 << APPLE_SART3_PADDR_SHIFT) - 1))
+ return false;
+
+ paddr >>= APPLE_SART3_PADDR_SHIFT;
+ size >>= APPLE_SART3_SIZE_SHIFT;
+
+ if (size > APPLE_SART3_SIZE_MAX)
+ return false;
+
+ write32(sart->base + APPLE_SART3_PADDR(index), paddr);
+ write32(sart->base + APPLE_SART3_SIZE(index), size);
+ write32(sart->base + APPLE_SART3_CONFIG(index), flags);
+
+ return true;
+}
+
+sart_dev_t *sart_init(const char *adt_path)
+{
+ int sart_path[8];
+ int node = adt_path_offset_trace(adt, adt_path, sart_path);
+ if (node < 0) {
+ printf("sart: Error getting SART node %s\n", adt_path);
+ return NULL;
+ }
+
+ u64 base;
+ if (adt_get_reg(adt, sart_path, "reg", 0, &base, NULL) < 0) {
+ printf("sart: Error getting SART %s base address.\n", adt_path);
+ return NULL;
+ }
+
+ const u32 *sart_version = adt_getprop(adt, node, "sart-version", NULL);
+ if (!sart_version) {
+ printf("sart: SART %s has no sart-version property\n", adt_path);
+ return NULL;
+ }
+
+ sart_dev_t *sart = malloc(sizeof(*sart));
+ if (!sart)
+ return NULL;
+
+ memset(sart, 0, sizeof(*sart));
+ sart->base = base;
+
+ switch (*sart_version) {
+ case 2:
+ sart->get_entry = sart2_get_entry;
+ sart->set_entry = sart2_set_entry;
+ break;
+ case 3:
+ sart->get_entry = sart3_get_entry;
+ sart->set_entry = sart3_set_entry;
+ break;
+ default:
+ printf("sart: SART %s has unknown version %d\n", adt_path, *sart_version);
+ free(sart);
+ return NULL;
+ }
+
+ printf("sart: SARTv%d %s at 0x%lx\n", *sart_version, adt_path, base);
+
+ sart->protected_entries = 0;
+ for (unsigned int i = 0; i < APPLE_SART_MAX_ENTRIES; ++i) {
+ void *paddr;
+ u8 flags;
+ size_t sz;
+
+ sart->get_entry(sart, i, &flags, &paddr, &sz);
+ if (flags)
+ sart->protected_entries |= 1 << i;
+ }
+
+ return sart;
+}
+
+void sart_free(sart_dev_t *sart)
+{
+ for (unsigned int i = 0; i < APPLE_SART_MAX_ENTRIES; ++i) {
+ if (sart->protected_entries & (1 << i))
+ continue;
+ sart->set_entry(sart, i, 0, NULL, 0);
+ }
+
+ free(sart);
+}
+
+bool sart_add_allowed_region(sart_dev_t *sart, void *paddr, size_t sz)
+{
+ for (unsigned int i = 0; i < APPLE_SART_MAX_ENTRIES; ++i) {
+ void *e_paddr;
+ u8 e_flags;
+ size_t e_sz;
+
+ if (sart->protected_entries & (1 << i))
+ continue;
+
+ sart->get_entry(sart, i, &e_flags, &e_paddr, &e_sz);
+ if (e_flags)
+ continue;
+
+ return sart->set_entry(sart, i, APPLE_SART_FLAGS_ALLOW, paddr, sz);
+ }
+
+ printf("sart: no more free entries\n");
+ return false;
+}
+
+bool sart_remove_allowed_region(sart_dev_t *sart, void *paddr, size_t sz)
+{
+ for (unsigned int i = 0; i < APPLE_SART_MAX_ENTRIES; ++i) {
+ void *e_paddr;
+ u8 e_flags;
+ size_t e_sz;
+
+ if (sart->protected_entries & (1 << i))
+ continue;
+
+ sart->get_entry(sart, i, &e_flags, &e_paddr, &e_sz);
+ if (!e_flags)
+ continue;
+ if (e_paddr != paddr)
+ continue;
+ if (e_sz != sz)
+ continue;
+
+ return sart->set_entry(sart, i, 0, NULL, 0);
+ }
+
+ printf("sart: could not find entry to be removed\n");
+ return false;
+}
diff --git a/tools/src/sart.h b/tools/src/sart.h
new file mode 100644
index 0000000..37828c0
--- /dev/null
+++ b/tools/src/sart.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef SART_H
+#define SART_H
+
+#include "types.h"
+
+typedef struct sart_dev sart_dev_t;
+
+sart_dev_t *sart_init(const char *adt_path);
+void sart_free(sart_dev_t *asc);
+
+bool sart_add_allowed_region(sart_dev_t *sart, void *paddr, size_t sz);
+bool sart_remove_allowed_region(sart_dev_t *sart, void *paddr, size_t sz);
+
+#endif
diff --git a/tools/src/sep.c b/tools/src/sep.c
new file mode 100644
index 0000000..7a40fef
--- /dev/null
+++ b/tools/src/sep.c
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: MIT */
+
+#include <string.h>
+
+#include "asc.h"
+#include "sep.h"
+#include "types.h"
+#include "utils.h"
+
+#define SEP_MSG_EP GENMASK(7, 0)
+#define SEP_MSG_CMD GENMASK(23, 16)
+#define SEP_MSG_DATA GENMASK(63, 32)
+
+#define SEP_EP_ROM 0xff
+
+#define SEP_MSG_GETRAND 16
+#define SEP_REPLY_GETRAND 116
+
+#define SEP_TIMEOUT 1000
+
+static asc_dev_t *sep_asc = NULL;
+
+int sep_init(void)
+{
+ if (!sep_asc)
+ sep_asc = asc_init("/arm-io/sep");
+ if (!sep_asc)
+ return -1;
+ return 0;
+}
+
+size_t sep_get_random(void *buffer, size_t len)
+{
+ const struct asc_message msg_getrand = {.msg0 = FIELD_PREP(SEP_MSG_EP, SEP_EP_ROM) |
+ FIELD_PREP(SEP_MSG_CMD, SEP_MSG_GETRAND)};
+ int ret;
+ size_t done = 0;
+
+ ret = sep_init();
+ if (ret)
+ return 0;
+
+ while (len) {
+ struct asc_message reply;
+ u32 rng;
+ size_t copy;
+
+ if (!asc_send(sep_asc, &msg_getrand))
+ return done;
+ if (!asc_recv_timeout(sep_asc, &reply, SEP_TIMEOUT))
+ return done;
+ if (FIELD_GET(SEP_MSG_CMD, reply.msg0) != SEP_REPLY_GETRAND) {
+ printf("SEP: unexpected getrand reply: %016lx\n", reply.msg0);
+ return done;
+ }
+
+ rng = FIELD_GET(SEP_MSG_DATA, reply.msg0);
+ copy = sizeof(rng);
+ if (copy > len)
+ copy = len;
+ memcpy(buffer, &rng, copy);
+ done += copy;
+ len -= copy;
+ buffer += copy;
+ }
+
+ return done;
+}
diff --git a/tools/src/sep.h b/tools/src/sep.h
new file mode 100644
index 0000000..8d7d04a
--- /dev/null
+++ b/tools/src/sep.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef SEP_H
+#define SEP_H
+
+#include "asc.h"
+#include "types.h"
+
+int sep_init(void);
+size_t sep_get_random(void *buffer, size_t len);
+
+#endif
diff --git a/tools/src/smp.c b/tools/src/smp.c
new file mode 100644
index 0000000..6ed522d
--- /dev/null
+++ b/tools/src/smp.c
@@ -0,0 +1,296 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "smp.h"
+#include "adt.h"
+#include "cpu_regs.h"
+#include "malloc.h"
+#include "pmgr.h"
+#include "soc.h"
+#include "string.h"
+#include "types.h"
+#include "utils.h"
+
+#define CPU_START_OFF_T8103 0x54000
+#define CPU_START_OFF_T8112 0x34000
+
+#define CPU_REG_CORE GENMASK(7, 0)
+#define CPU_REG_CLUSTER GENMASK(10, 8)
+#define CPU_REG_DIE GENMASK(14, 11)
+
+struct spin_table {
+ u64 mpidr;
+ u64 flag;
+ u64 target;
+ u64 args[4];
+ u64 retval;
+};
+
+void *_reset_stack;
+
+#define DUMMY_STACK_SIZE 0x1000
+u8 dummy_stack[DUMMY_STACK_SIZE];
+
+u8 *secondary_stacks[MAX_CPUS] = {dummy_stack};
+
+static bool wfe_mode = false;
+
+static int target_cpu;
+static struct spin_table spin_table[MAX_CPUS];
+
+extern u8 _vectors_start[0];
+
+void smp_secondary_entry(void)
+{
+ struct spin_table *me = &spin_table[target_cpu];
+
+ if (in_el2())
+ msr(TPIDR_EL2, target_cpu);
+ else
+ msr(TPIDR_EL1, target_cpu);
+
+ printf(" Index: %d (table: %p)\n\n", target_cpu, me);
+
+ me->mpidr = mrs(MPIDR_EL1) & 0xFFFFFF;
+
+ sysop("dmb sy");
+ me->flag = 1;
+ sysop("dmb sy");
+ u64 target;
+
+ while (1) {
+ while (!(target = me->target)) {
+ if (wfe_mode) {
+ sysop("wfe");
+ } else {
+ deep_wfi();
+ msr(SYS_IMP_APL_IPI_SR_EL1, 1);
+ }
+ sysop("isb");
+ }
+ sysop("dmb sy");
+ me->flag++;
+ sysop("dmb sy");
+ me->retval = ((u64(*)(u64 a, u64 b, u64 c, u64 d))target)(me->args[0], me->args[1],
+ me->args[2], me->args[3]);
+ sysop("dmb sy");
+ me->target = 0;
+ sysop("dmb sy");
+ }
+}
+
+static void smp_start_cpu(int index, int die, int cluster, int core, u64 rvbar, u64 cpu_start_base)
+{
+ int i;
+
+ if (index >= MAX_CPUS)
+ return;
+
+ if (spin_table[index].flag)
+ return;
+
+ printf("Starting CPU %d (%d:%d:%d)... ", index, die, cluster, core);
+
+ memset(&spin_table[index], 0, sizeof(struct spin_table));
+
+ target_cpu = index;
+ secondary_stacks[index] = memalign(0x4000, SECONDARY_STACK_SIZE);
+ _reset_stack = secondary_stacks[index] + SECONDARY_STACK_SIZE;
+
+ sysop("dmb sy");
+
+ write64(rvbar, (u64)_vectors_start);
+
+ cpu_start_base += die * PMGR_DIE_OFFSET;
+
+ // Some kind of system level startup/status bit
+ // Without this, IRQs don't work
+ write32(cpu_start_base + 0x4, 1 << (4 * cluster + core));
+
+ // Actually start the core
+ write32(cpu_start_base + 0x8 + 4 * cluster, 1 << core);
+
+ for (i = 0; i < 500; i++) {
+ sysop("dmb ld");
+ if (spin_table[index].flag)
+ break;
+ udelay(1000);
+ }
+
+ if (i >= 500)
+ printf("Failed!\n");
+ else
+ printf(" Started.\n");
+
+ _reset_stack = dummy_stack + DUMMY_STACK_SIZE;
+}
+
+void smp_start_secondaries(void)
+{
+ printf("Starting secondary CPUs...\n");
+
+ int pmgr_path[8];
+ u64 pmgr_reg;
+
+ if (adt_path_offset_trace(adt, "/arm-io/pmgr", pmgr_path) < 0) {
+ printf("Error getting /arm-io/pmgr node\n");
+ return;
+ }
+ if (adt_get_reg(adt, pmgr_path, "reg", 0, &pmgr_reg, NULL) < 0) {
+ printf("Error getting /arm-io/pmgr regs\n");
+ return;
+ }
+
+ int node = adt_path_offset(adt, "/cpus");
+ if (node < 0) {
+ printf("Error getting /cpus node\n");
+ return;
+ }
+
+ int cpu_nodes[MAX_CPUS];
+ u64 cpu_start_off;
+
+ memset(cpu_nodes, 0, sizeof(cpu_nodes));
+
+ switch (chip_id) {
+ case T8103:
+ case T6000:
+ case T6001:
+ case T6002:
+ cpu_start_off = CPU_START_OFF_T8103;
+ break;
+ case T8112:
+ cpu_start_off = CPU_START_OFF_T8112;
+ break;
+ default:
+ printf("CPU start offset is unknown for this SoC!\n");
+ return;
+ }
+
+ ADT_FOREACH_CHILD(adt, node)
+ {
+ u32 cpu_id;
+
+ if (ADT_GETPROP(adt, node, "cpu-id", &cpu_id) < 0)
+ continue;
+ if (cpu_id >= MAX_CPUS) {
+ printf("cpu-id %d exceeds max CPU count %d: increase MAX_CPUS\n", cpu_id, MAX_CPUS);
+ continue;
+ }
+
+ cpu_nodes[cpu_id] = node;
+ }
+
+ for (int i = 1; i < MAX_CPUS; i++) {
+ int node = cpu_nodes[i];
+
+ if (!node)
+ continue;
+
+ u32 reg;
+ u64 cpu_impl_reg[2];
+ if (ADT_GETPROP(adt, node, "reg", &reg) < 0)
+ continue;
+ if (ADT_GETPROP_ARRAY(adt, node, "cpu-impl-reg", cpu_impl_reg) < 0)
+ continue;
+
+ u8 core = FIELD_GET(CPU_REG_CORE, reg);
+ u8 cluster = FIELD_GET(CPU_REG_CLUSTER, reg);
+ u8 die = FIELD_GET(CPU_REG_DIE, reg);
+
+ smp_start_cpu(i, die, cluster, core, cpu_impl_reg[0], pmgr_reg + cpu_start_off);
+ }
+
+ spin_table[0].mpidr = mrs(MPIDR_EL1) & 0xFFFFFF;
+}
+
+void smp_send_ipi(int cpu)
+{
+ if (cpu >= MAX_CPUS)
+ return;
+
+ u64 mpidr = spin_table[cpu].mpidr;
+ msr(SYS_IMP_APL_IPI_RR_GLOBAL_EL1, (mpidr & 0xff) | ((mpidr & 0xff00) << 8));
+}
+
+void smp_call4(int cpu, void *func, u64 arg0, u64 arg1, u64 arg2, u64 arg3)
+{
+ if (cpu >= MAX_CPUS)
+ return;
+
+ struct spin_table *target = &spin_table[cpu];
+
+ if (cpu == 0)
+ return;
+
+ u64 flag = target->flag;
+ target->args[0] = arg0;
+ target->args[1] = arg1;
+ target->args[2] = arg2;
+ target->args[3] = arg3;
+ sysop("dmb sy");
+ target->target = (u64)func;
+ sysop("dsb sy");
+
+ if (wfe_mode)
+ sysop("sev");
+ else
+ smp_send_ipi(cpu);
+
+ while (target->flag == flag)
+ sysop("dmb sy");
+}
+
+u64 smp_wait(int cpu)
+{
+ if (cpu >= MAX_CPUS)
+ return 0;
+
+ struct spin_table *target = &spin_table[cpu];
+
+ while (target->target)
+ sysop("dmb sy");
+
+ return target->retval;
+}
+
+void smp_set_wfe_mode(bool new_mode)
+{
+ wfe_mode = new_mode;
+ sysop("dsb sy");
+
+ for (int cpu = 1; cpu < MAX_CPUS; cpu++)
+ if (smp_is_alive(cpu))
+ smp_send_ipi(cpu);
+
+ sysop("sev");
+}
+
+bool smp_is_alive(int cpu)
+{
+ if (cpu >= MAX_CPUS)
+ return false;
+
+ return spin_table[cpu].flag;
+}
+
+uint64_t smp_get_mpidr(int cpu)
+{
+ if (cpu >= MAX_CPUS)
+ return 0;
+
+ return spin_table[cpu].mpidr;
+}
+
+u64 smp_get_release_addr(int cpu)
+{
+ struct spin_table *target = &spin_table[cpu];
+
+ if (cpu >= MAX_CPUS)
+ return 0;
+
+ target->args[0] = 0;
+ target->args[1] = 0;
+ target->args[2] = 0;
+ target->args[3] = 0;
+ return (u64)&target->target;
+}
diff --git a/tools/src/smp.h b/tools/src/smp.h
new file mode 100644
index 0000000..c802f3e
--- /dev/null
+++ b/tools/src/smp.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef __SMP_H__
+#define __SMP_H__
+
+#include "types.h"
+#include "utils.h"
+
+#define MAX_CPUS 20
+
+#define SECONDARY_STACK_SIZE 0x10000
+extern u8 *secondary_stacks[MAX_CPUS];
+
+void smp_secondary_entry(void);
+
+void smp_start_secondaries(void);
+
+#define smp_call0(i, f) smp_call4(i, f, 0, 0, 0, 0)
+#define smp_call1(i, f, a) smp_call4(i, f, a, 0, 0, 0)
+#define smp_call2(i, f, a, b) smp_call4(i, f, a, b, 0, 0)
+#define smp_call3(i, f, a, b, c) smp_call4(i, f, a, b, c, 0)
+
+void smp_call4(int cpu, void *func, u64 arg0, u64 arg1, u64 arg2, u64 arg3);
+
+u64 smp_wait(int cpu);
+
+bool smp_is_alive(int cpu);
+uint64_t smp_get_mpidr(int cpu);
+u64 smp_get_release_addr(int cpu);
+void smp_set_wfe_mode(bool new_mode);
+void smp_send_ipi(int cpu);
+
+static inline int smp_id(void)
+{
+ if (in_el2())
+ return mrs(TPIDR_EL2);
+ else
+ return mrs(TPIDR_EL1);
+}
+
+#endif
diff --git a/tools/src/soc.h b/tools/src/soc.h
new file mode 100644
index 0000000..26ddddc
--- /dev/null
+++ b/tools/src/soc.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef __SOC_H__
+#define __SOC_H__
+
+#include "../config.h"
+
+#define T8103 0x8103
+#define T8112 0x8112
+#define T6000 0x6000
+#define T6001 0x6001
+#define T6002 0x6002
+
+#ifdef TARGET
+
+#if TARGET == T8103
+#define EARLY_UART_BASE 0x235200000
+#elif TARGET == T6000 || TARGET == T6001 || TARGET == T6002
+#define EARLY_UART_BASE 0x39b200000
+#elif TARGET == T8112
+#define EARLY_UART_BASE 0x235200000
+#endif
+
+#endif
+#endif
diff --git a/tools/src/start.S b/tools/src/start.S
new file mode 100644
index 0000000..b0051e6
--- /dev/null
+++ b/tools/src/start.S
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "soc.h"
+
+#define UTRSTAT 0x010
+#define UTXH 0x020
+
+.extern _start_c
+.extern _stack_bot
+.extern _v_sp0_sync
+.extern _v_sp0_irq
+.extern _v_sp0_fiq
+.extern _v_sp0_serr
+.extern _reset_stack
+.extern _cpu_reset_c
+.extern wdt_reboot
+
+.section .init, "ax"
+
+.align 11
+.globl _vectors_start
+_vectors_start:
+
+ mov x9, '0'
+ b cpu_reset
+ .align 7
+ mov x9, '1'
+ b exc_unk
+ .align 7
+ mov x9, '2'
+ b exc_unk
+ .align 7
+ mov x9, '3'
+ b exc_unk
+ .align 7
+ b _v_sp0_sync
+ .align 7
+ b _v_sp0_irq
+ .align 7
+ b _v_sp0_fiq
+ .align 7
+ b _v_sp0_serr
+ .align 7
+ b _v_sp0_sync
+ .align 7
+ b _v_sp0_irq
+ .align 7
+ b _v_sp0_fiq
+ .align 7
+ b _v_sp0_serr
+ .align 7
+ mov x9, 'p'
+ b exc_unk
+ .align 7
+ mov x9, 'q'
+ b exc_unk
+ .align 7
+ mov x9, 'r'
+ b exc_unk
+ .align 7
+ mov x9, 's'
+ b exc_unk
+ .align 7
+
+.globl _start
+.type _start, @function
+_start:
+ mov x19, x0
+
+ mov w0, 'm'
+ bl debug_putc
+
+ adrp x1, _stack_bot
+ mov sp, x1
+
+ mov w0, '1'
+ bl debug_putc
+
+ ldr x2, [sp, #-8]
+
+ mov w0, 'n'
+ bl debug_putc
+
+ adrp x0, _base
+ mov x20, x0
+ adrp x1, _rela_start
+ add x1, x1, :lo12:_rela_start
+ adrp x2, _rela_end
+ add x2, x2, :lo12:_rela_end
+ bl apply_rela
+
+ mov w0, '1'
+ bl debug_putc
+ mov w0, 0xd /* '\r', clang compat */
+ bl debug_putc
+ mov w0, '\n'
+ bl debug_putc
+
+ mov x0, x19
+ mov x1, x20
+ bl _start_c
+ b .
+
+.globl exc_unk
+.type exc_unk, @function
+exc_unk:
+ mov w0, 0xd /* '\r', clang compat */
+ bl debug_putc
+ mov w0, '\n'
+ bl debug_putc
+ mov w0, '!'
+ bl debug_putc
+ mov w0, 'E'
+ bl debug_putc
+ mov w0, 'x'
+ bl debug_putc
+ mov w0, 'C'
+ bl debug_putc
+ mov w0, ':'
+ bl debug_putc
+ mov w0, w9
+ bl debug_putc
+ mov w0, '!'
+ bl debug_putc
+ mov w0, 0xd /* '\r', clang compat */
+ bl debug_putc
+ mov w0, '\n'
+ bl debug_putc
+ b reboot
+
+.globl cpu_reset
+.type cpu_reset, @function
+cpu_reset:
+ mov w0, 'O'
+ bl debug_putc
+
+ adrp x1, _reset_stack
+ add x1, x1, :lo12:_reset_stack
+ ldr x1, [x1]
+ mov sp, x1
+
+ ldr x2, [sp, #-8]
+
+ mov w0, 'K'
+ bl debug_putc
+
+ mov x0, sp
+ bl _cpu_reset_c
+ b .
+
+.globl debug_putc
+.type debug_putc, @function
+debug_putc:
+#ifdef EARLY_UART_BASE
+ ldr x1, =EARLY_UART_BASE
+
+1:
+ ldr w2, [x1, UTRSTAT]
+ tst w2, #2
+ beq 1b
+ str w0, [x1, UTXH]
+#endif
+ ret
+
+.globl reboot
+.type reboot, @function
+reboot:
+ mrs x0, CurrentEL
+ cmp x0, #8
+ beq 1f
+ hvc #0
+1:
+ bl wdt_reboot
+ b .
+
+.pool
diff --git a/tools/src/startup.c b/tools/src/startup.c
new file mode 100644
index 0000000..1052707
--- /dev/null
+++ b/tools/src/startup.c
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "chickens.h"
+#include "exception.h"
+#include "smp.h"
+#include "string.h"
+#include "types.h"
+#include "uart.h"
+#include "utils.h"
+#include "xnuboot.h"
+
+u64 boot_args_addr;
+struct boot_args cur_boot_args;
+void *adt;
+
+struct rela_entry {
+ uint64_t off, type, addend;
+};
+
+void debug_putc(char c);
+void m1n1_main(void);
+
+extern char _bss_start[0];
+extern char _bss_end[0];
+
+#define R_AARCH64_RELATIVE 1027
+
+void apply_rela(uint64_t base, struct rela_entry *rela_start, struct rela_entry *rela_end)
+{
+ struct rela_entry *e = rela_start;
+
+ while (e < rela_end) {
+ switch (e->type) {
+ case R_AARCH64_RELATIVE:
+ *(u64 *)(base + e->off) = base + e->addend;
+ break;
+ default:
+ debug_putc('R');
+ debug_putc('!');
+ while (1)
+ ;
+ }
+ e++;
+ }
+}
+
+void dump_boot_args(struct boot_args *ba)
+{
+ printf(" revision: %d\n", ba->revision);
+ printf(" version: %d\n", ba->version);
+ printf(" virt_base: 0x%lx\n", ba->virt_base);
+ printf(" phys_base: 0x%lx\n", ba->phys_base);
+ printf(" mem_size: 0x%lx\n", ba->mem_size);
+ printf(" top_of_kdata: 0x%lx\n", ba->top_of_kernel_data);
+ printf(" video:\n");
+ printf(" base: 0x%lx\n", ba->video.base);
+ printf(" display: 0x%lx\n", ba->video.display);
+ printf(" stride: 0x%lx\n", ba->video.stride);
+ printf(" width: %lu\n", ba->video.width);
+ printf(" height: %lu\n", ba->video.height);
+ printf(" depth: %lubpp\n", ba->video.depth & 0xff);
+ printf(" density: %lu\n", ba->video.depth >> 16);
+ printf(" machine_type: %d\n", ba->machine_type);
+ printf(" devtree: %p\n", ba->devtree);
+ printf(" devtree_size: 0x%x\n", ba->devtree_size);
+ printf(" cmdline: %s\n", ba->cmdline);
+ printf(" boot_flags: 0x%lx\n", ba->boot_flags);
+ printf(" mem_size_act: 0x%lx\n", ba->mem_size_actual);
+}
+
+void _start_c(void *boot_args, void *base)
+{
+ UNUSED(base);
+
+ if (in_el2())
+ msr(TPIDR_EL2, 0);
+ else
+ msr(TPIDR_EL1, 0);
+
+ memset64(_bss_start, 0, _bss_end - _bss_start);
+ boot_args_addr = (u64)boot_args;
+ memcpy(&cur_boot_args, boot_args, sizeof(cur_boot_args));
+
+ adt =
+ (void *)(((u64)cur_boot_args.devtree) - cur_boot_args.virt_base + cur_boot_args.phys_base);
+
+ int ret = uart_init();
+ if (ret < 0) {
+ debug_putc('!');
+ }
+
+ uart_puts("Initializing");
+ printf("CPU init (MIDR: 0x%lx)...\n", mrs(MIDR_EL1));
+ const char *type = init_cpu();
+ printf(" CPU: %s\n\n", type);
+
+ printf("boot_args at %p\n", boot_args);
+
+ dump_boot_args(&cur_boot_args);
+ printf("\n");
+
+ exception_initialize();
+ m1n1_main();
+}
+
+/* Secondary SMP core boot */
+void _cpu_reset_c(void *stack)
+{
+ if (mrs(MPIDR_EL1) & 0xffffff)
+ uart_puts("RVBAR entry on secondary CPU");
+ else
+ uart_puts("RVBAR entry on primary CPU");
+
+ printf("\n Stack base: %p\n", stack);
+ printf(" MPIDR: 0x%lx\n", mrs(MPIDR_EL1));
+ const char *type = init_cpu();
+ printf(" CPU: %s\n", type);
+
+ exception_initialize();
+ smp_secondary_entry();
+}
diff --git a/tools/src/string.c b/tools/src/string.c
new file mode 100644
index 0000000..318d0fc
--- /dev/null
+++ b/tools/src/string.c
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: MIT */
+
+#include <stdbool.h>
+
+#include "string.h"
+
+// Routines based on The Public Domain C Library
+
+void *memcpy(void *s1, const void *s2, size_t n)
+{
+ char *dest = (char *)s1;
+ const char *src = (const char *)s2;
+
+ while (n--) {
+ *dest++ = *src++;
+ }
+
+ return s1;
+}
+
+void *memmove(void *s1, const void *s2, size_t n)
+{
+ char *dest = (char *)s1;
+ const char *src = (const char *)s2;
+
+ if (dest <= src) {
+ while (n--) {
+ *dest++ = *src++;
+ }
+ } else {
+ src += n;
+ dest += n;
+
+ while (n--) {
+ *--dest = *--src;
+ }
+ }
+
+ return s1;
+}
+
+int memcmp(const void *s1, const void *s2, size_t n)
+{
+ const unsigned char *p1 = (const unsigned char *)s1;
+ const unsigned char *p2 = (const unsigned char *)s2;
+
+ while (n--) {
+ if (*p1 != *p2) {
+ return *p1 - *p2;
+ }
+
+ ++p1;
+ ++p2;
+ }
+
+ return 0;
+}
+
+void *memset(void *s, int c, size_t n)
+{
+ unsigned char *p = (unsigned char *)s;
+
+ while (n--) {
+ *p++ = (unsigned char)c;
+ }
+
+ return s;
+}
+
+void *memchr(const void *s, int c, size_t n)
+{
+ const unsigned char *p = (const unsigned char *)s;
+
+ while (n--) {
+ if (*p == (unsigned char)c) {
+ return (void *)p;
+ }
+
+ ++p;
+ }
+
+ return NULL;
+}
+
+char *strcpy(char *s1, const char *s2)
+{
+ char *rc = s1;
+
+ while ((*s1++ = *s2++)) {
+ /* EMPTY */
+ }
+
+ return rc;
+}
+
+char *strncpy(char *s1, const char *s2, size_t n)
+{
+ char *rc = s1;
+
+ while (n && (*s1++ = *s2++)) {
+ /* Cannot do "n--" in the conditional as size_t is unsigned and we have
+ to check it again for >0 in the next loop below, so we must not risk
+ underflow.
+ */
+ --n;
+ }
+
+ /* Checking against 1 as we missed the last --n in the loop above. */
+ while (n-- > 1) {
+ *s1++ = '\0';
+ }
+
+ return rc;
+}
+
+int strcmp(const char *s1, const char *s2)
+{
+ while ((*s1) && (*s1 == *s2)) {
+ ++s1;
+ ++s2;
+ }
+
+ return (*(unsigned char *)s1 - *(unsigned char *)s2);
+}
+
+int strncmp(const char *s1, const char *s2, size_t n)
+{
+ while (n && *s1 && (*s1 == *s2)) {
+ ++s1;
+ ++s2;
+ --n;
+ }
+
+ if (n == 0) {
+ return 0;
+ } else {
+ return (*(unsigned char *)s1 - *(unsigned char *)s2);
+ }
+}
+
+size_t strlen(const char *s)
+{
+ size_t rc = 0;
+
+ while (s[rc]) {
+ ++rc;
+ }
+
+ return rc;
+}
+
+size_t strnlen(const char *s, size_t n)
+{
+ size_t rc = 0;
+
+ while (rc < n && s[rc]) {
+ ++rc;
+ }
+
+ return rc;
+}
+
+char *strchr(const char *s, int c)
+{
+ do {
+ if (*s == (char)c) {
+ return (char *)s;
+ }
+ } while (*s++);
+
+ return NULL;
+}
+
+char *strrchr(const char *s, int c)
+{
+ size_t i = 0;
+
+ while (s[i++]) {
+ /* EMPTY */
+ }
+
+ do {
+ if (s[--i] == (char)c) {
+ return (char *)s + i;
+ }
+ } while (i);
+
+ return NULL;
+}
+
+/* Very naive, no attempt to check for errors */
+long atol(const char *s)
+{
+ long val = 0;
+ bool neg = false;
+
+ if (*s == '-') {
+ neg = true;
+ s++;
+ }
+
+ while (*s >= '0' && *s <= '9')
+ val = (val * 10) + (*s++ - '0');
+
+ if (neg)
+ val = -val;
+
+ return val;
+}
diff --git a/tools/src/tinf/adler32.c b/tools/src/tinf/adler32.c
new file mode 100644
index 0000000..5b3c54f
--- /dev/null
+++ b/tools/src/tinf/adler32.c
@@ -0,0 +1,95 @@
+/*
+ * Adler-32 checksum
+ *
+ * Copyright (c) 2003-2019 Joergen Ibsen
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must
+ * not claim that you wrote the original software. If you use this
+ * software in a product, an acknowledgment in the product
+ * documentation would be appreciated but is not required.
+ *
+ * 2. Altered source versions must be plainly marked as such, and must
+ * not be misrepresented as being the original software.
+ *
+ * 3. This notice may not be removed or altered from any source
+ * distribution.
+ */
+
+/*
+ * Adler-32 algorithm taken from the zlib source, which is
+ * Copyright (C) 1995-1998 Jean-loup Gailly and Mark Adler
+ */
+
+#include "tinf.h"
+
+#define A32_BASE 65521
+#define A32_NMAX 5552
+
+unsigned int tinf_adler32(const void *data, unsigned int length)
+{
+ const unsigned char *buf = (const unsigned char *) data;
+
+ unsigned int s1 = 1;
+ unsigned int s2 = 0;
+
+ while (length > 0) {
+ int k = length < A32_NMAX ? length : A32_NMAX;
+ int i;
+
+ for (i = k / 16; i; --i, buf += 16) {
+ s1 += buf[0];
+ s2 += s1;
+ s1 += buf[1];
+ s2 += s1;
+ s1 += buf[2];
+ s2 += s1;
+ s1 += buf[3];
+ s2 += s1;
+ s1 += buf[4];
+ s2 += s1;
+ s1 += buf[5];
+ s2 += s1;
+ s1 += buf[6];
+ s2 += s1;
+ s1 += buf[7];
+ s2 += s1;
+
+ s1 += buf[8];
+ s2 += s1;
+ s1 += buf[9];
+ s2 += s1;
+ s1 += buf[10];
+ s2 += s1;
+ s1 += buf[11];
+ s2 += s1;
+ s1 += buf[12];
+ s2 += s1;
+ s1 += buf[13];
+ s2 += s1;
+ s1 += buf[14];
+ s2 += s1;
+ s1 += buf[15];
+ s2 += s1;
+ }
+
+ for (i = k % 16; i; --i) {
+ s1 += *buf++;
+ s2 += s1;
+ }
+
+ s1 %= A32_BASE;
+ s2 %= A32_BASE;
+
+ length -= k;
+ }
+
+ return (s2 << 16) | s1;
+}
diff --git a/tools/src/tinf/crc32.c b/tools/src/tinf/crc32.c
new file mode 100644
index 0000000..b83232c
--- /dev/null
+++ b/tools/src/tinf/crc32.c
@@ -0,0 +1,57 @@
+/*
+ * CRC32 checksum
+ *
+ * Copyright (c) 1998-2019 Joergen Ibsen
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must
+ * not claim that you wrote the original software. If you use this
+ * software in a product, an acknowledgment in the product
+ * documentation would be appreciated but is not required.
+ *
+ * 2. Altered source versions must be plainly marked as such, and must
+ * not be misrepresented as being the original software.
+ *
+ * 3. This notice may not be removed or altered from any source
+ * distribution.
+ */
+
+/*
+ * CRC32 algorithm taken from the zlib source, which is
+ * Copyright (C) 1995-1998 Jean-loup Gailly and Mark Adler
+ */
+
+#include "tinf.h"
+
+static const unsigned int tinf_crc32tab[16] = {
+ 0x00000000, 0x1DB71064, 0x3B6E20C8, 0x26D930AC, 0x76DC4190,
+ 0x6B6B51F4, 0x4DB26158, 0x5005713C, 0xEDB88320, 0xF00F9344,
+ 0xD6D6A3E8, 0xCB61B38C, 0x9B64C2B0, 0x86D3D2D4, 0xA00AE278,
+ 0xBDBDF21C
+};
+
+unsigned int tinf_crc32(const void *data, unsigned int length)
+{
+ const unsigned char *buf = (const unsigned char *) data;
+ unsigned int crc = 0xFFFFFFFF;
+ unsigned int i;
+
+ if (length == 0) {
+ return 0;
+ }
+
+ for (i = 0; i < length; ++i) {
+ crc ^= buf[i];
+ crc = tinf_crc32tab[crc & 0x0F] ^ (crc >> 4);
+ crc = tinf_crc32tab[crc & 0x0F] ^ (crc >> 4);
+ }
+
+ return crc ^ 0xFFFFFFFF;
+}
diff --git a/tools/src/tinf/tinf.h b/tools/src/tinf/tinf.h
new file mode 100644
index 0000000..ab23c83
--- /dev/null
+++ b/tools/src/tinf/tinf.h
@@ -0,0 +1,142 @@
+/*
+ * tinf - tiny inflate library (inflate, gzip, zlib)
+ *
+ * Copyright (c) 2003-2019 Joergen Ibsen
+ *
+ * This version of tinfzlib was modified for use with m1n1.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must
+ * not claim that you wrote the original software. If you use this
+ * software in a product, an acknowledgment in the product
+ * documentation would be appreciated but is not required.
+ *
+ * 2. Altered source versions must be plainly marked as such, and must
+ * not be misrepresented as being the original software.
+ *
+ * 3. This notice may not be removed or altered from any source
+ * distribution.
+ */
+
+#ifndef TINF_H_INCLUDED
+#define TINF_H_INCLUDED
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define TINF_VER_MAJOR 1 /**< Major version number */
+#define TINF_VER_MINOR 2 /**< Minor version number */
+#define TINF_VER_PATCH 1 /**< Patch version number */
+#define TINF_VER_STRING "1.2.1" /**< Version number as a string */
+
+#ifndef TINFCC
+# ifdef __WATCOMC__
+# define TINFCC __cdecl
+# else
+# define TINFCC
+# endif
+#endif
+
+/**
+ * Status codes returned.
+ *
+ * @see tinf_uncompress, tinf_gzip_uncompress, tinf_zlib_uncompress
+ */
+typedef enum {
+ TINF_OK = 0, /**< Success */
+ TINF_DATA_ERROR = -3, /**< Input error */
+ TINF_BUF_ERROR = -5 /**< Not enough room for output */
+} tinf_error_code;
+
+/**
+ * Initialize global data used by tinf.
+ *
+ * @deprecated No longer required, may be removed in a future version.
+ */
+void TINFCC tinf_init(void);
+
+/**
+ * Decompress `sourceLen` bytes of deflate data from `source` to `dest`.
+ *
+ * The variable `destLen` points to must contain the size of `dest` on entry,
+ * and will be set to the size of the decompressed data on success.
+ *
+ * Reads at most `sourceLen` bytes from `source`.
+ * Writes at most `*destLen` bytes to `dest`.
+ *
+ * @param dest pointer to where to place decompressed data
+ * @param destLen pointer to variable containing size of `dest`
+ * @param source pointer to compressed data
+ * @param sourceLen size of compressed data
+ * @return `TINF_OK` on success, error code on error
+ */
+int TINFCC tinf_uncompress(void *dest, unsigned int *destLen,
+ const void *source, unsigned int *sourceLen);
+
+/**
+ * Decompress `sourceLen` bytes of gzip data from `source` to `dest`.
+ *
+ * The variable `destLen` points to must contain the size of `dest` on entry,
+ * and will be set to the size of the decompressed data on success.
+ *
+ * Reads at most `sourceLen` bytes from `source`.
+ * Writes at most `*destLen` bytes to `dest`.
+ *
+ * @param dest pointer to where to place decompressed data
+ * @param destLen pointer to variable containing size of `dest`
+ * @param source pointer to compressed data
+ * @param sourceLen size of compressed data
+ * @return `TINF_OK` on success, error code on error
+ */
+int TINFCC tinf_gzip_uncompress(void *dest, unsigned int *destLen,
+ const void *source, unsigned int *sourceLen);
+
+/**
+ * Decompress `sourceLen` bytes of zlib data from `source` to `dest`.
+ *
+ * The variable `destLen` points to must contain the size of `dest` on entry,
+ * and will be set to the size of the decompressed data on success.
+ *
+ * Reads at most `sourceLen` bytes from `source`.
+ * Writes at most `*destLen` bytes to `dest`.
+ *
+ * @param dest pointer to where to place decompressed data
+ * @param destLen pointer to variable containing size of `dest`
+ * @param source pointer to compressed data
+ * @param sourceLen size of compressed data
+ * @return `TINF_OK` on success, error code on error
+ */
+int TINFCC tinf_zlib_uncompress(void *dest, unsigned int *destLen,
+ const void *source, unsigned int *sourceLen);
+
+/**
+ * Compute Adler-32 checksum of `length` bytes starting at `data`.
+ *
+ * @param data pointer to data
+ * @param length size of data
+ * @return Adler-32 checksum
+ */
+unsigned int TINFCC tinf_adler32(const void *data, unsigned int length);
+
+/**
+ * Compute CRC32 checksum of `length` bytes starting at `data`.
+ *
+ * @param data pointer to data
+ * @param length size of data
+ * @return CRC32 checksum
+ */
+unsigned int TINFCC tinf_crc32(const void *data, unsigned int length);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* TINF_H_INCLUDED */
diff --git a/tools/src/tinf/tinfgzip.c b/tools/src/tinf/tinfgzip.c
new file mode 100644
index 0000000..ea07cd7
--- /dev/null
+++ b/tools/src/tinf/tinfgzip.c
@@ -0,0 +1,191 @@
+/*
+ * tinfgzip - tiny gzip decompressor
+ *
+ * Copyright (c) 2003-2019 Joergen Ibsen
+ *
+ * This version of tinfzlib was modified for use with m1n1.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must
+ * not claim that you wrote the original software. If you use this
+ * software in a product, an acknowledgment in the product
+ * documentation would be appreciated but is not required.
+ *
+ * 2. Altered source versions must be plainly marked as such, and must
+ * not be misrepresented as being the original software.
+ *
+ * 3. This notice may not be removed or altered from any source
+ * distribution.
+ */
+
+#include "tinf.h"
+
+typedef enum {
+ FTEXT = 1,
+ FHCRC = 2,
+ FEXTRA = 4,
+ FNAME = 8,
+ FCOMMENT = 16
+} tinf_gzip_flag;
+
+static unsigned int read_le16(const unsigned char *p)
+{
+ return ((unsigned int) p[0])
+ | ((unsigned int) p[1] << 8);
+}
+
+static unsigned int read_le32(const unsigned char *p)
+{
+ return ((unsigned int) p[0])
+ | ((unsigned int) p[1] << 8)
+ | ((unsigned int) p[2] << 16)
+ | ((unsigned int) p[3] << 24);
+}
+
+int tinf_gzip_uncompress(void *dest, unsigned int *destLen,
+ const void *source, unsigned int *sourceLen)
+{
+ const unsigned char *src = (const unsigned char *) source;
+ unsigned char *dst = (unsigned char *) dest;
+ const unsigned char *start;
+ unsigned int dlen, crc32;
+ int res;
+ unsigned char flg;
+ unsigned int sourceDataLen = 0;
+
+ /* -- Check header -- */
+
+ /* Check room for at least 10 byte header and 8 byte trailer */
+ if (*sourceLen && *sourceLen < 18) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* Check id bytes */
+ if (src[0] != 0x1F || src[1] != 0x8B) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* Check method is deflate */
+ if (src[2] != 8) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* Get flag byte */
+ flg = src[3];
+
+ /* Check that reserved bits are zero */
+ if (flg & 0xE0) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* -- Find start of compressed data -- */
+
+ /* Skip base header of 10 bytes */
+ start = src + 10;
+
+ /* Skip extra data if present */
+ if (flg & FEXTRA) {
+ unsigned int xlen = read_le16(start);
+
+ if (*sourceLen && xlen > *sourceLen - 12) {
+ return TINF_DATA_ERROR;
+ }
+
+ start += xlen + 2;
+ }
+
+ /* Skip file name if present */
+ if (flg & FNAME) {
+ do {
+ if (*sourceLen && start - src >= *sourceLen) {
+ return TINF_DATA_ERROR;
+ }
+ } while (*start++);
+ }
+
+ /* Skip file comment if present */
+ if (flg & FCOMMENT) {
+ do {
+ if (*sourceLen && start - src >= *sourceLen) {
+ return TINF_DATA_ERROR;
+ }
+ } while (*start++);
+ }
+
+ /* Check header crc if present */
+ if (flg & FHCRC) {
+ unsigned int hcrc;
+
+ if (*sourceLen && start - src > *sourceLen - 2) {
+ return TINF_DATA_ERROR;
+ }
+
+ hcrc = read_le16(start);
+
+ if (hcrc != (tinf_crc32(src, start - src) & 0x0000FFFF)) {
+ return TINF_DATA_ERROR;
+ }
+
+ start += 2;
+ }
+
+ /* -- Get decompressed length if available -- */
+
+ if (*sourceLen) {
+ dlen = read_le32(&src[*sourceLen - 4]);
+
+ if (dlen > *destLen) {
+ return TINF_BUF_ERROR;
+ }
+ }
+
+ /* -- Check source length if available -- */
+
+ if (*sourceLen) {
+ if ((src + *sourceLen) - start < 8) {
+ return TINF_DATA_ERROR;
+ }
+ sourceDataLen = (src + *sourceLen) - start - 8;
+ }
+
+ /* -- Decompress data -- */
+
+ res = tinf_uncompress(dst, destLen, start, &sourceDataLen);
+
+ if (res != TINF_OK) {
+ return TINF_DATA_ERROR;
+ }
+
+ sourceDataLen += (start - src) + 8;
+
+ if (*sourceLen && *sourceLen != sourceDataLen) {
+ return TINF_DATA_ERROR;
+ }
+
+ *sourceLen = sourceDataLen;
+
+ /* -- Check decompressed length -- */
+
+ dlen = read_le32(&src[*sourceLen - 4]);
+
+ if (*destLen != dlen) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* -- Check CRC32 checksum -- */
+
+ crc32 = read_le32(&src[*sourceLen - 8]);
+
+ if (crc32 != tinf_crc32(dst, dlen)) {
+ return TINF_DATA_ERROR;
+ }
+
+ return TINF_OK;
+}
diff --git a/tools/src/tinf/tinflate.c b/tools/src/tinf/tinflate.c
new file mode 100644
index 0000000..c82526c
--- /dev/null
+++ b/tools/src/tinf/tinflate.c
@@ -0,0 +1,648 @@
+/*
+ * tinflate - tiny inflate
+ *
+ * Copyright (c) 2003-2019 Joergen Ibsen
+ *
+ * This version of tinfzlib was modified for use with m1n1.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must
+ * not claim that you wrote the original software. If you use this
+ * software in a product, an acknowledgment in the product
+ * documentation would be appreciated but is not required.
+ *
+ * 2. Altered source versions must be plainly marked as such, and must
+ * not be misrepresented as being the original software.
+ *
+ * 3. This notice may not be removed or altered from any source
+ * distribution.
+ */
+
+#include "tinf.h"
+
+#include <assert.h>
+#include <limits.h>
+
+#if defined(UINT_MAX) && (UINT_MAX) < 0xFFFFFFFFUL
+# error "tinf requires unsigned int to be at least 32-bit"
+#endif
+
+/* -- Internal data structures -- */
+
+struct tinf_tree {
+ unsigned short counts[16]; /* Number of codes with a given length */
+ unsigned short symbols[288]; /* Symbols sorted by code */
+ int max_sym;
+};
+
+struct tinf_data {
+ const unsigned char *source;
+ const unsigned char *source_end;
+ unsigned int tag;
+ int bitcount;
+ int overflow;
+
+ unsigned char *dest_start;
+ unsigned char *dest;
+ unsigned char *dest_end;
+
+ struct tinf_tree ltree; /* Literal/length tree */
+ struct tinf_tree dtree; /* Distance tree */
+};
+
+/* -- Utility functions -- */
+
+static unsigned int read_le16(const unsigned char *p)
+{
+ return ((unsigned int) p[0])
+ | ((unsigned int) p[1] << 8);
+}
+
+/* Build fixed Huffman trees */
+static void tinf_build_fixed_trees(struct tinf_tree *lt, struct tinf_tree *dt)
+{
+ int i;
+
+ /* Build fixed literal/length tree */
+ for (i = 0; i < 16; ++i) {
+ lt->counts[i] = 0;
+ }
+
+ lt->counts[7] = 24;
+ lt->counts[8] = 152;
+ lt->counts[9] = 112;
+
+ for (i = 0; i < 24; ++i) {
+ lt->symbols[i] = 256 + i;
+ }
+ for (i = 0; i < 144; ++i) {
+ lt->symbols[24 + i] = i;
+ }
+ for (i = 0; i < 8; ++i) {
+ lt->symbols[24 + 144 + i] = 280 + i;
+ }
+ for (i = 0; i < 112; ++i) {
+ lt->symbols[24 + 144 + 8 + i] = 144 + i;
+ }
+
+ lt->max_sym = 285;
+
+ /* Build fixed distance tree */
+ for (i = 0; i < 16; ++i) {
+ dt->counts[i] = 0;
+ }
+
+ dt->counts[5] = 32;
+
+ for (i = 0; i < 32; ++i) {
+ dt->symbols[i] = i;
+ }
+
+ dt->max_sym = 29;
+}
+
+/* Given an array of code lengths, build a tree */
+static int tinf_build_tree(struct tinf_tree *t, const unsigned char *lengths,
+ unsigned int num)
+{
+ unsigned short offs[16];
+ unsigned int i, num_codes, available;
+
+ assert(num <= 288);
+
+ for (i = 0; i < 16; ++i) {
+ t->counts[i] = 0;
+ }
+
+ t->max_sym = -1;
+
+ /* Count number of codes for each non-zero length */
+ for (i = 0; i < num; ++i) {
+ assert(lengths[i] <= 15);
+
+ if (lengths[i]) {
+ t->max_sym = i;
+ t->counts[lengths[i]]++;
+ }
+ }
+
+ /* Compute offset table for distribution sort */
+ for (available = 1, num_codes = 0, i = 0; i < 16; ++i) {
+ unsigned int used = t->counts[i];
+
+ /* Check length contains no more codes than available */
+ if (used > available) {
+ return TINF_DATA_ERROR;
+ }
+ available = 2 * (available - used);
+
+ offs[i] = num_codes;
+ num_codes += used;
+ }
+
+ /*
+ * Check all codes were used, or for the special case of only one
+ * code that it has length 1
+ */
+ if ((num_codes > 1 && available > 0)
+ || (num_codes == 1 && t->counts[1] != 1)) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* Fill in symbols sorted by code */
+ for (i = 0; i < num; ++i) {
+ if (lengths[i]) {
+ t->symbols[offs[lengths[i]]++] = i;
+ }
+ }
+
+ /*
+ * For the special case of only one code (which will be 0) add a
+ * code 1 which results in a symbol that is too large
+ */
+ if (num_codes == 1) {
+ t->counts[1] = 2;
+ t->symbols[1] = t->max_sym + 1;
+ }
+
+ return TINF_OK;
+}
+
+/* -- Decode functions -- */
+
+static void tinf_refill(struct tinf_data *d, int num)
+{
+ assert(num >= 0 && num <= 32);
+
+ /* Read bytes until at least num bits available */
+ while (d->bitcount < num) {
+ if (d->source != d->source_end) {
+ d->tag |= (unsigned int) *d->source++ << d->bitcount;
+ }
+ else {
+ d->overflow = 1;
+ }
+ d->bitcount += 8;
+ }
+
+ assert(d->bitcount <= 32);
+}
+
+static unsigned int tinf_getbits_no_refill(struct tinf_data *d, int num)
+{
+ unsigned int bits;
+
+ assert(num >= 0 && num <= d->bitcount);
+
+ /* Get bits from tag */
+ bits = d->tag & ((1UL << num) - 1);
+
+ /* Remove bits from tag */
+ d->tag >>= num;
+ d->bitcount -= num;
+
+ return bits;
+}
+
+/* Get num bits from source stream */
+static unsigned int tinf_getbits(struct tinf_data *d, int num)
+{
+ tinf_refill(d, num);
+ return tinf_getbits_no_refill(d, num);
+}
+
+/* Read a num bit value from stream and add base */
+static unsigned int tinf_getbits_base(struct tinf_data *d, int num, int base)
+{
+ return base + (num ? tinf_getbits(d, num) : 0);
+}
+
+/* Given a data stream and a tree, decode a symbol */
+static int tinf_decode_symbol(struct tinf_data *d, const struct tinf_tree *t)
+{
+ int base = 0, offs = 0;
+ int len;
+
+ /*
+ * Get more bits while code index is above number of codes
+ *
+ * Rather than the actual code, we are computing the position of the
+ * code in the sorted order of codes, which is the index of the
+ * corresponding symbol.
+ *
+ * Conceptually, for each code length (level in the tree), there are
+ * counts[len] leaves on the left and internal nodes on the right.
+ * The index we have decoded so far is base + offs, and if that
+ * falls within the leaves we are done. Otherwise we adjust the range
+ * of offs and add one more bit to it.
+ */
+ for (len = 1; ; ++len) {
+ offs = 2 * offs + tinf_getbits(d, 1);
+
+ assert(len <= 15);
+
+ if (offs < t->counts[len]) {
+ break;
+ }
+
+ base += t->counts[len];
+ offs -= t->counts[len];
+ }
+
+ assert(base + offs >= 0 && base + offs < 288);
+
+ return t->symbols[base + offs];
+}
+
+/* Given a data stream, decode dynamic trees from it */
+static int tinf_decode_trees(struct tinf_data *d, struct tinf_tree *lt,
+ struct tinf_tree *dt)
+{
+ unsigned char lengths[288 + 32];
+
+ /* Special ordering of code length codes */
+ static const unsigned char clcidx[19] = {
+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5,
+ 11, 4, 12, 3, 13, 2, 14, 1, 15
+ };
+
+ unsigned int hlit, hdist, hclen;
+ unsigned int i, num, length;
+ int res;
+
+ /* Get 5 bits HLIT (257-286) */
+ hlit = tinf_getbits_base(d, 5, 257);
+
+ /* Get 5 bits HDIST (1-32) */
+ hdist = tinf_getbits_base(d, 5, 1);
+
+ /* Get 4 bits HCLEN (4-19) */
+ hclen = tinf_getbits_base(d, 4, 4);
+
+ /*
+ * The RFC limits the range of HLIT to 286, but lists HDIST as range
+ * 1-32, even though distance codes 30 and 31 have no meaning. While
+ * we could allow the full range of HLIT and HDIST to make it possible
+ * to decode the fixed trees with this function, we consider it an
+ * error here.
+ *
+ * See also: https://github.com/madler/zlib/issues/82
+ */
+ if (hlit > 286 || hdist > 30) {
+ return TINF_DATA_ERROR;
+ }
+
+ for (i = 0; i < 19; ++i) {
+ lengths[i] = 0;
+ }
+
+ /* Read code lengths for code length alphabet */
+ for (i = 0; i < hclen; ++i) {
+ /* Get 3 bits code length (0-7) */
+ unsigned int clen = tinf_getbits(d, 3);
+
+ lengths[clcidx[i]] = clen;
+ }
+
+ /* Build code length tree (in literal/length tree to save space) */
+ res = tinf_build_tree(lt, lengths, 19);
+
+ if (res != TINF_OK) {
+ return res;
+ }
+
+ /* Check code length tree is not empty */
+ if (lt->max_sym == -1) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* Decode code lengths for the dynamic trees */
+ for (num = 0; num < hlit + hdist; ) {
+ int sym = tinf_decode_symbol(d, lt);
+
+ if (sym > lt->max_sym) {
+ return TINF_DATA_ERROR;
+ }
+
+ switch (sym) {
+ case 16:
+ /* Copy previous code length 3-6 times (read 2 bits) */
+ if (num == 0) {
+ return TINF_DATA_ERROR;
+ }
+ sym = lengths[num - 1];
+ length = tinf_getbits_base(d, 2, 3);
+ break;
+ case 17:
+ /* Repeat code length 0 for 3-10 times (read 3 bits) */
+ sym = 0;
+ length = tinf_getbits_base(d, 3, 3);
+ break;
+ case 18:
+ /* Repeat code length 0 for 11-138 times (read 7 bits) */
+ sym = 0;
+ length = tinf_getbits_base(d, 7, 11);
+ break;
+ default:
+ /* Values 0-15 represent the actual code lengths */
+ length = 1;
+ break;
+ }
+
+ if (length > hlit + hdist - num) {
+ return TINF_DATA_ERROR;
+ }
+
+ while (length--) {
+ lengths[num++] = sym;
+ }
+ }
+
+ /* Check EOB symbol is present */
+ if (lengths[256] == 0) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* Build dynamic trees */
+ res = tinf_build_tree(lt, lengths, hlit);
+
+ if (res != TINF_OK) {
+ return res;
+ }
+
+ res = tinf_build_tree(dt, lengths + hlit, hdist);
+
+ if (res != TINF_OK) {
+ return res;
+ }
+
+ return TINF_OK;
+}
+
+/* -- Block inflate functions -- */
+
+/* Given a stream and two trees, inflate a block of data */
+static int tinf_inflate_block_data(struct tinf_data *d, struct tinf_tree *lt,
+ struct tinf_tree *dt)
+{
+ /* Extra bits and base tables for length codes */
+ static const unsigned char length_bits[30] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
+ 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 4, 4, 5, 5, 5, 5, 0, 127
+ };
+
+ static const unsigned short length_base[30] = {
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
+ 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
+ 67, 83, 99, 115, 131, 163, 195, 227, 258, 0
+ };
+
+ /* Extra bits and base tables for distance codes */
+ static const unsigned char dist_bits[30] = {
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
+ 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
+ 9, 9, 10, 10, 11, 11, 12, 12, 13, 13
+ };
+
+ static const unsigned short dist_base[30] = {
+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25,
+ 33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
+ 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
+ };
+
+ for (;;) {
+ int sym = tinf_decode_symbol(d, lt);
+
+ /* Check for overflow in bit reader */
+ if (d->overflow) {
+ return TINF_DATA_ERROR;
+ }
+
+ if (sym < 256) {
+ if (d->dest == d->dest_end) {
+ return TINF_BUF_ERROR;
+ }
+ *d->dest++ = sym;
+ }
+ else {
+ int length, dist, offs;
+ int i;
+
+ /* Check for end of block */
+ if (sym == 256) {
+ return TINF_OK;
+ }
+
+ /* Check sym is within range and distance tree is not empty */
+ if (sym > lt->max_sym || sym - 257 > 28 || dt->max_sym == -1) {
+ return TINF_DATA_ERROR;
+ }
+
+ sym -= 257;
+
+ /* Possibly get more bits from length code */
+ length = tinf_getbits_base(d, length_bits[sym],
+ length_base[sym]);
+
+ dist = tinf_decode_symbol(d, dt);
+
+ /* Check dist is within range */
+ if (dist > dt->max_sym || dist > 29) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* Possibly get more bits from distance code */
+ offs = tinf_getbits_base(d, dist_bits[dist],
+ dist_base[dist]);
+
+ if (offs > d->dest - d->dest_start) {
+ return TINF_DATA_ERROR;
+ }
+
+ if (d->dest_end - d->dest < length) {
+ return TINF_BUF_ERROR;
+ }
+
+ /* Copy match */
+ for (i = 0; i < length; ++i) {
+ d->dest[i] = d->dest[i - offs];
+ }
+
+ d->dest += length;
+ }
+ }
+}
+
+/* Inflate an uncompressed block of data */
+static int tinf_inflate_uncompressed_block(struct tinf_data *d)
+{
+ unsigned int length, invlength;
+
+ if (d->source_end && d->source_end - d->source < 4) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* Get length */
+ length = read_le16(d->source);
+
+ /* Get one's complement of length */
+ invlength = read_le16(d->source + 2);
+
+ /* Check length */
+ if (length != (~invlength & 0x0000FFFF)) {
+ return TINF_DATA_ERROR;
+ }
+
+ d->source += 4;
+
+ if (d->source_end && d->source_end - d->source < length) {
+ return TINF_DATA_ERROR;
+ }
+
+ if (d->dest_end - d->dest < length) {
+ return TINF_BUF_ERROR;
+ }
+
+ /* Copy block */
+ while (length--) {
+ *d->dest++ = *d->source++;
+ }
+
+ /* Make sure we start next block on a byte boundary */
+ d->tag = 0;
+ d->bitcount = 0;
+
+ return TINF_OK;
+}
+
+/* Inflate a block of data compressed with fixed Huffman trees */
+static int tinf_inflate_fixed_block(struct tinf_data *d)
+{
+ /* Build fixed Huffman trees */
+ tinf_build_fixed_trees(&d->ltree, &d->dtree);
+
+ /* Decode block using fixed trees */
+ return tinf_inflate_block_data(d, &d->ltree, &d->dtree);
+}
+
+/* Inflate a block of data compressed with dynamic Huffman trees */
+static int tinf_inflate_dynamic_block(struct tinf_data *d)
+{
+ /* Decode trees from stream */
+ int res = tinf_decode_trees(d, &d->ltree, &d->dtree);
+
+ if (res != TINF_OK) {
+ return res;
+ }
+
+ /* Decode block using decoded trees */
+ return tinf_inflate_block_data(d, &d->ltree, &d->dtree);
+}
+
+/* -- Public functions -- */
+
+/* Initialize global (static) data */
+void tinf_init(void)
+{
+ return;
+}
+
+/* Inflate stream from source to dest */
+int tinf_uncompress(void *dest, unsigned int *destLen,
+ const void *source, unsigned int *sourceLen)
+{
+ struct tinf_data d;
+ int bfinal;
+
+ /* Initialise data */
+ d.source = (const unsigned char *) source;
+ if (sourceLen && *sourceLen)
+ d.source_end = d.source + *sourceLen;
+ else
+ d.source_end = 0;
+ d.tag = 0;
+ d.bitcount = 0;
+ d.overflow = 0;
+
+ d.dest = (unsigned char *) dest;
+ d.dest_start = d.dest;
+ d.dest_end = d.dest + *destLen;
+
+ do {
+ unsigned int btype;
+ int res;
+
+ /* Read final block flag */
+ bfinal = tinf_getbits(&d, 1);
+
+ /* Read block type (2 bits) */
+ btype = tinf_getbits(&d, 2);
+
+ /* Decompress block */
+ switch (btype) {
+ case 0:
+ /* Decompress uncompressed block */
+ res = tinf_inflate_uncompressed_block(&d);
+ break;
+ case 1:
+ /* Decompress block with fixed Huffman trees */
+ res = tinf_inflate_fixed_block(&d);
+ break;
+ case 2:
+ /* Decompress block with dynamic Huffman trees */
+ res = tinf_inflate_dynamic_block(&d);
+ break;
+ default:
+ res = TINF_DATA_ERROR;
+ break;
+ }
+
+ if (res != TINF_OK) {
+ return res;
+ }
+ } while (!bfinal);
+
+ /* Check for overflow in bit reader */
+ if (d.overflow) {
+ return TINF_DATA_ERROR;
+ }
+
+ if (sourceLen) {
+ unsigned int slen = d.source - (const unsigned char *)source;
+ if (!*sourceLen)
+ *sourceLen = slen;
+ else if (*sourceLen != slen)
+ return TINF_DATA_ERROR;
+ }
+
+ *destLen = d.dest - d.dest_start;
+ return TINF_OK;
+}
+
+/* clang -g -O1 -fsanitize=fuzzer,address -DTINF_FUZZING tinflate.c */
+#if defined(TINF_FUZZING)
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+unsigned char depacked[64 * 1024];
+
+extern int
+LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
+{
+ if (size > UINT_MAX / 2) { return 0; }
+ unsigned int destLen = sizeof(depacked);
+ tinf_uncompress(depacked, &destLen, data, size);
+ return 0;
+}
+#endif
diff --git a/tools/src/tinf/tinfzlib.c b/tools/src/tinf/tinfzlib.c
new file mode 100644
index 0000000..6af07b8
--- /dev/null
+++ b/tools/src/tinf/tinfzlib.c
@@ -0,0 +1,99 @@
+/*
+ * tinfzlib - tiny zlib decompressor
+ *
+ * This version of tinfzlib was modified for use with m1n1.
+ *
+ * Copyright (c) 2003-2019 Joergen Ibsen
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must
+ * not claim that you wrote the original software. If you use this
+ * software in a product, an acknowledgment in the product
+ * documentation would be appreciated but is not required.
+ *
+ * 2. Altered source versions must be plainly marked as such, and must
+ * not be misrepresented as being the original software.
+ *
+ * 3. This notice may not be removed or altered from any source
+ * distribution.
+ */
+
+#include "tinf.h"
+
+static unsigned int read_be32(const unsigned char *p)
+{
+ return ((unsigned int) p[0] << 24)
+ | ((unsigned int) p[1] << 16)
+ | ((unsigned int) p[2] << 8)
+ | ((unsigned int) p[3]);
+}
+
+int tinf_zlib_uncompress(void *dest, unsigned int *destLen,
+ const void *source, unsigned int *sourceLen)
+{
+ const unsigned char *src = (const unsigned char *) source;
+ unsigned char *dst = (unsigned char *) dest;
+ unsigned int a32;
+ int res;
+ unsigned char cmf, flg;
+ unsigned int sourceDataLen = sourceLen ? *sourceLen - 6 : 0;
+
+ /* -- Check header -- */
+
+ /* Check room for at least 2 byte header and 4 byte trailer */
+ if (*sourceLen && *sourceLen < 6) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* Get header bytes */
+ cmf = src[0];
+ flg = src[1];
+
+ /* Check checksum */
+ if ((256 * cmf + flg) % 31) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* Check method is deflate */
+ if ((cmf & 0x0F) != 8) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* Check window size is valid */
+ if ((cmf >> 4) > 7) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* Check there is no preset dictionary */
+ if (flg & 0x20) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* -- Decompress data -- */
+
+ res = tinf_uncompress(dst, destLen, src + 2, &sourceDataLen);
+
+ if (res != TINF_OK) {
+ return TINF_DATA_ERROR;
+ }
+
+ /* -- Check Adler-32 checksum -- */
+
+ a32 = read_be32(&src[sourceDataLen + 2]);
+
+ if (a32 != tinf_adler32(dst, *destLen)) {
+ return TINF_DATA_ERROR;
+ }
+
+ if (sourceLen)
+ *sourceLen = sourceDataLen + 6;
+
+ return TINF_OK;
+}
diff --git a/tools/src/tps6598x.c b/tools/src/tps6598x.c
new file mode 100644
index 0000000..fdb5e11
--- /dev/null
+++ b/tools/src/tps6598x.c
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "tps6598x.h"
+#include "adt.h"
+#include "i2c.h"
+#include "iodev.h"
+#include "malloc.h"
+#include "types.h"
+#include "utils.h"
+
+#define TPS_REG_CMD1 0x08
+#define TPS_REG_DATA1 0x09
+#define TPS_REG_INT_EVENT1 0x14
+#define TPS_REG_INT_MASK1 0x16
+#define TPS_REG_INT_CLEAR1 0x18
+#define TPS_REG_POWER_STATE 0x20
+#define TPS_CMD_INVALID 0x21434d44 // !CMD
+
+struct tps6598x_dev {
+ i2c_dev_t *i2c;
+ u8 addr;
+};
+
+tps6598x_dev_t *tps6598x_init(const char *adt_node, i2c_dev_t *i2c)
+{
+ int adt_offset;
+ adt_offset = adt_path_offset(adt, adt_node);
+ if (adt_offset < 0) {
+ printf("tps6598x: Error getting %s node\n", adt_node);
+ return NULL;
+ }
+
+ const u8 *iic_addr = adt_getprop(adt, adt_offset, "hpm-iic-addr", NULL);
+ if (iic_addr == NULL) {
+ printf("tps6598x: Error getting %s hpm-iic-addr\n.", adt_node);
+ return NULL;
+ }
+
+ tps6598x_dev_t *dev = malloc(sizeof(*dev));
+ if (!dev)
+ return NULL;
+
+ dev->i2c = i2c;
+ dev->addr = *iic_addr;
+ return dev;
+}
+
+void tps6598x_shutdown(tps6598x_dev_t *dev)
+{
+ free(dev);
+}
+
+int tps6598x_command(tps6598x_dev_t *dev, const char *cmd, const u8 *data_in, size_t len_in,
+ u8 *data_out, size_t len_out)
+{
+ if (len_in) {
+ if (i2c_smbus_write(dev->i2c, dev->addr, TPS_REG_DATA1, data_in, len_in) < 0)
+ return -1;
+ }
+
+ if (i2c_smbus_write(dev->i2c, dev->addr, TPS_REG_CMD1, (const u8 *)cmd, 4) < 0)
+ return -1;
+
+ u32 cmd_status;
+ do {
+ if (i2c_smbus_read32(dev->i2c, dev->addr, TPS_REG_CMD1, &cmd_status))
+ return -1;
+ if (cmd_status == TPS_CMD_INVALID)
+ return -1;
+ udelay(100);
+ } while (cmd_status != 0);
+
+ if (len_out) {
+ if (i2c_smbus_read(dev->i2c, dev->addr, TPS_REG_DATA1, data_out, len_out) !=
+ (ssize_t)len_out)
+ return -1;
+ }
+
+ return 0;
+}
+
+int tps6598x_disable_irqs(tps6598x_dev_t *dev, tps6598x_irq_state_t *state)
+{
+ size_t read;
+ int written;
+ static const u8 zeros[CD3218B12_IRQ_WIDTH] = {0x00};
+ static const u8 ones[CD3218B12_IRQ_WIDTH] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF};
+
+ // store IntEvent 1 to restore it later
+ read = i2c_smbus_read(dev->i2c, dev->addr, TPS_REG_INT_MASK1, state->int_mask1,
+ sizeof(state->int_mask1));
+ if (read != CD3218B12_IRQ_WIDTH) {
+ printf("tps6598x: reading TPS_REG_INT_MASK1 failed\n");
+ return -1;
+ }
+ state->valid = 1;
+
+ // mask interrupts and ack all interrupt flags
+ written = i2c_smbus_write(dev->i2c, dev->addr, TPS_REG_INT_CLEAR1, ones, sizeof(ones));
+ if (written != sizeof(zeros)) {
+ printf("tps6598x: writing TPS_REG_INT_CLEAR1 failed, written: %d\n", written);
+ return -1;
+ }
+ written = i2c_smbus_write(dev->i2c, dev->addr, TPS_REG_INT_MASK1, zeros, sizeof(zeros));
+ if (written != sizeof(ones)) {
+ printf("tps6598x: writing TPS_REG_INT_MASK1 failed, written: %d\n", written);
+ return -1;
+ }
+
+#ifdef DEBUG
+ u8 tmp[CD3218B12_IRQ_WIDTH] = {0x00};
+ read = i2c_smbus_read(dev->i2c, dev->addr, TPS_REG_INT_MASK1, tmp, CD3218B12_IRQ_WIDTH);
+ if (read != CD3218B12_IRQ_WIDTH)
+ printf("tps6598x: failed verifcation, can't read TPS_REG_INT_MASK1\n");
+ else {
+ printf("tps6598x: verify: TPS_REG_INT_MASK1 vs. saved IntMask1\n");
+ hexdump(tmp, sizeof(tmp));
+ hexdump(state->int_mask1, sizeof(state->int_mask1));
+ }
+#endif
+ return 0;
+}
+
+int tps6598x_restore_irqs(tps6598x_dev_t *dev, tps6598x_irq_state_t *state)
+{
+ int written;
+
+ written = i2c_smbus_write(dev->i2c, dev->addr, TPS_REG_INT_MASK1, state->int_mask1,
+ sizeof(state->int_mask1));
+ if (written != sizeof(state->int_mask1)) {
+ printf("tps6598x: restoring TPS_REG_INT_MASK1 failed\n");
+ return -1;
+ }
+
+#ifdef DEBUG
+ int read;
+ u8 tmp[CD3218B12_IRQ_WIDTH];
+ read = i2c_smbus_read(dev->i2c, dev->addr, TPS_REG_INT_MASK1, tmp, sizeof(tmp));
+ if (read != sizeof(tmp))
+ printf("tps6598x: failed verifcation, can't read TPS_REG_INT_MASK1\n");
+ else {
+ printf("tps6598x: verify saved IntMask1 vs. TPS_REG_INT_MASK1:\n");
+ hexdump(state->int_mask1, sizeof(state->int_mask1));
+ hexdump(tmp, sizeof(tmp));
+ }
+#endif
+
+ return 0;
+}
+
+int tps6598x_powerup(tps6598x_dev_t *dev)
+{
+ u8 power_state;
+
+ if (i2c_smbus_read8(dev->i2c, dev->addr, TPS_REG_POWER_STATE, &power_state))
+ return -1;
+
+ if (power_state == 0)
+ return 0;
+
+ const u8 data = 0;
+ tps6598x_command(dev, "SSPS", &data, 1, NULL, 0);
+
+ if (i2c_smbus_read8(dev->i2c, dev->addr, TPS_REG_POWER_STATE, &power_state))
+ return -1;
+
+ if (power_state != 0)
+ return -1;
+
+ return 0;
+}
diff --git a/tools/src/tps6598x.h b/tools/src/tps6598x.h
new file mode 100644
index 0000000..9e6d26a
--- /dev/null
+++ b/tools/src/tps6598x.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef TPS6598X_H
+#define TPS6598X_H
+
+#include "i2c.h"
+#include "types.h"
+
+typedef struct tps6598x_dev tps6598x_dev_t;
+
+tps6598x_dev_t *tps6598x_init(const char *adt_path, i2c_dev_t *i2c);
+void tps6598x_shutdown(tps6598x_dev_t *dev);
+
+int tps6598x_command(tps6598x_dev_t *dev, const char *cmd, const u8 *data_in, size_t len_in,
+ u8 *data_out, size_t len_out);
+int tps6598x_powerup(tps6598x_dev_t *dev);
+
+#define CD3218B12_IRQ_WIDTH 9
+
+typedef struct tps6598x_irq_state {
+ u8 int_mask1[CD3218B12_IRQ_WIDTH];
+ bool valid;
+} tps6598x_irq_state_t;
+
+int tps6598x_disable_irqs(tps6598x_dev_t *dev, tps6598x_irq_state_t *state);
+int tps6598x_restore_irqs(tps6598x_dev_t *dev, tps6598x_irq_state_t *state);
+
+#endif
diff --git a/tools/src/tunables.c b/tools/src/tunables.c
new file mode 100644
index 0000000..ced789e
--- /dev/null
+++ b/tools/src/tunables.c
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "adt.h"
+#include "tunables.h"
+#include "types.h"
+#include "utils.h"
+
+struct tunable_info {
+ int node_offset;
+ int node_path[8];
+ const u32 *tunable_raw;
+ u32 tunable_len;
+};
+
+static int tunables_adt_find(const char *path, const char *prop, struct tunable_info *info,
+ u32 item_size)
+{
+ info->node_offset = adt_path_offset_trace(adt, path, info->node_path);
+ if (info->node_offset < 0) {
+ printf("tunable: unable to find ADT node %s.\n", path);
+ return -1;
+ }
+
+ info->tunable_raw = adt_getprop(adt, info->node_offset, prop, &info->tunable_len);
+ if (info->tunable_raw == NULL || info->tunable_len == 0) {
+ printf("tunable: Error getting ADT node %s property %s .\n", path, prop);
+ return -1;
+ }
+
+ if (info->tunable_len % item_size) {
+ printf("tunable: tunable length needs to be a multiply of %d but is %d\n", item_size,
+ info->tunable_len);
+ return -1;
+ }
+
+ info->tunable_len /= item_size;
+
+ return 0;
+}
+
+struct tunable_global {
+ u32 reg_idx;
+ u32 offset;
+ u32 mask;
+ u32 value;
+} PACKED;
+
+int tunables_apply_global(const char *path, const char *prop)
+{
+ struct tunable_info info;
+
+ if (tunables_adt_find(path, prop, &info, sizeof(struct tunable_global)) < 0)
+ return -1;
+
+ const struct tunable_global *tunables = (const struct tunable_global *)info.tunable_raw;
+ for (u32 i = 0; i < info.tunable_len; ++i) {
+ const struct tunable_global *tunable = &tunables[i];
+
+ u64 addr;
+ if (adt_get_reg(adt, info.node_path, "reg", tunable->reg_idx, &addr, NULL) < 0) {
+ printf("tunable: Error getting regs with index %d\n", tunable->reg_idx);
+ return -1;
+ }
+
+ mask32(addr + tunable->offset, tunable->mask, tunable->value);
+ }
+
+ return 0;
+}
+
+struct tunable_local {
+ u32 offset;
+ u32 size;
+ u64 mask;
+ u64 value;
+} PACKED;
+
+int tunables_apply_local_addr(const char *path, const char *prop, uintptr_t base)
+{
+ struct tunable_info info;
+
+ if (tunables_adt_find(path, prop, &info, sizeof(struct tunable_local)) < 0)
+ return -1;
+
+ const struct tunable_local *tunables = (const struct tunable_local *)info.tunable_raw;
+ for (u32 i = 0; i < info.tunable_len; ++i) {
+ const struct tunable_local *tunable = &tunables[i];
+
+ switch (tunable->size) {
+ case 1:
+ mask8(base + tunable->offset, tunable->mask, tunable->value);
+ break;
+ case 2:
+ mask16(base + tunable->offset, tunable->mask, tunable->value);
+ break;
+ case 4:
+ mask32(base + tunable->offset, tunable->mask, tunable->value);
+ break;
+ case 8:
+ mask64(base + tunable->offset, tunable->mask, tunable->value);
+ break;
+ default:
+ printf("tunable: unknown tunable size 0x%08x\n", tunable->size);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+int tunables_apply_local(const char *path, const char *prop, u32 reg_offset)
+{
+ struct tunable_info info;
+
+ if (tunables_adt_find(path, prop, &info, sizeof(struct tunable_local)) < 0)
+ return -1;
+
+ u64 base;
+ if (adt_get_reg(adt, info.node_path, "reg", reg_offset, &base, NULL) < 0) {
+ printf("tunable: Error getting regs\n");
+ return -1;
+ }
+
+ return tunables_apply_local_addr(path, prop, base);
+}
diff --git a/tools/src/tunables.h b/tools/src/tunables.h
new file mode 100644
index 0000000..cf3091a
--- /dev/null
+++ b/tools/src/tunables.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef TUNABLES_H
+#define TUNABLES_H
+
+#include "types.h"
+
+/*
+ * This function applies the tunables usually passed in the node "tunable".
+ * They usually apply to multiple entries from the "reg" node.
+ *
+ * Example usage for the USB DRD node:
+ * tunables_apply_global("/arm-io/usb-drd0", "tunable");
+ */
+int tunables_apply_global(const char *path, const char *prop);
+
+/*
+ * This function applies the tunables specified in device-specific tunable properties.
+ * These only apply to a single MMIO region from the "reg" node which needs to
+ * be specified.
+ *
+ * Example usage for two tunables from the USB DRD DART node:
+ * tunables_apply_local("/arm-io/dart-usb0", "dart-tunables-instance-0", 0);
+ * tunables_apply_local("/arm-io/dart-usb0", "dart-tunables-instance-1", 1);
+ *
+ */
+int tunables_apply_local(const char *path, const char *prop, u32 reg_idx);
+
+/*
+ * This functions does the same as tunables_apply_local except that it allows
+ * to specify the base address to which the tunables will be applied to instead
+ * of extracting it from the "regs" property.
+ *
+ * Example usage for two tunables for the USB DRD DART node:
+ * tunables_apply_local_addr("/arm-io/dart-usb0", "dart-tunables-instance-0", 0x382f00000);
+ * tunables_apply_local_addr("/arm-io/dart-usb0", "dart-tunables-instance-1", 0x382f80000);
+ */
+int tunables_apply_local_addr(const char *path, const char *prop, uintptr_t base);
+
+int tunables_apply_static(void);
+
+#endif
diff --git a/tools/src/tunables_static.c b/tools/src/tunables_static.c
new file mode 100644
index 0000000..e569e6b
--- /dev/null
+++ b/tools/src/tunables_static.c
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "tunables.h"
+#include "adt.h"
+#include "pmgr.h"
+#include "soc.h"
+#include "types.h"
+#include "utils.h"
+
+/*
+ * These magic tunable sequences are hardcoded in various places in XNU, and are required for
+ * proper operation of various fabric features and other miscellanea. Without them, things tend
+ * to subtly break...
+ */
+
+struct entry {
+ u32 offset;
+ u32 clear;
+ u32 set;
+};
+
+struct entry t8103_agx_tunables[] = {
+ {0x30, 0xffffffff, 0x50014}, {0x34, 0xffffffff, 0xa003c},
+ {0x400, 0x400103ff, 0x40010001}, {0x600, 0x1ffffff, 0x1ffffff},
+ {0x738, 0x1ff01ff, 0x140034}, {0x798, 0x1ff01ff, 0x14003c},
+ {0x800, 0x100, 0x100}, {-1, 0, 0},
+};
+
+// TODO: check masks
+struct entry t600x_agx_tunables[] = {
+ {0x0, 0x1, 0x1},
+ {0x10, 0xfff0000, 0xd0000},
+ {0x14, 0x3, 0x1},
+ {0x18, 0x3, 0x1},
+ {0x1c, 0x3, 0x3},
+ {0x20, 0x3, 0x3},
+ {0x24, 0x3, 0x3},
+ {0x28, 0x3, 0x3},
+ {0x2c, 0x3, 0x3},
+ {0x400, 0x400103ff, 0x40010001},
+ {0x600, 0x1ffffff, 0x1ffffff},
+ {0x800, 0x100, 0x100},
+ {-1, 0, 0},
+};
+
+// TODO: check masks
+struct entry t8112_agx_tunables[] = {
+ {0x0, 0x200, 0x200},
+ {0x34, 0xffffffff, 0x50014},
+ {0x38, 0xffffffff, 0xa003c},
+ {0x400, 0xc00103ff, 0xc0010001},
+ {0x600, 0x1ffffff, 0x1ffffff},
+ {0x738, 0x1ff01ff, 0x14003c},
+ {0x798, 0x1ff01ff, 0x14003c},
+ {0x800, 0x100, 0x100},
+ {-1, 0, 0},
+};
+
+static void tunables_apply(u64 base, struct entry *entry)
+{
+ while (entry->offset != UINT32_MAX) {
+ mask32(base + entry->offset, entry->clear, entry->set);
+ entry++;
+ }
+}
+
+int power_and_apply(const char *path, u64 base, struct entry *entries)
+{
+ if (pmgr_adt_power_enable(path) < 0) {
+ printf("tunables: Failed to enable power: %s\n", path);
+ return -1;
+ }
+
+ tunables_apply(base, entries);
+
+ if (pmgr_adt_power_disable(path) < 0) {
+ printf("tunables: Failed to disable power: %s\n", path);
+ return -1;
+ }
+
+ return 0;
+}
+
+int tunables_apply_static(void)
+{
+ int ret = 0;
+
+ switch (chip_id) {
+ case T8103:
+ ret |= power_and_apply("/arm-io/sgx", 0x205000000, t8103_agx_tunables);
+ break;
+ case T8112:
+ ret |= power_and_apply("/arm-io/sgx", 0x205000000, t8112_agx_tunables);
+ break;
+ case T6000:
+ case T6001:
+ case T6002:
+ ret |= power_and_apply("/arm-io/sgx", 0x405000000, t600x_agx_tunables);
+ break;
+ default:
+ break;
+ }
+
+ return ret ? -1 : 0;
+}
diff --git a/tools/src/types.h b/tools/src/types.h
new file mode 100644
index 0000000..6fd0789
--- /dev/null
+++ b/tools/src/types.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef TYPES_H
+#define TYPES_H
+
+#ifndef __ASSEMBLER__
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+
+typedef int8_t s8;
+typedef int16_t s16;
+typedef int32_t s32;
+typedef int64_t s64;
+
+typedef u64 uintptr_t;
+typedef s64 ptrdiff_t;
+
+typedef s64 ssize_t;
+
+#endif
+
+#define UNUSED(x) (void)(x)
+#define ALIGNED(x) __attribute__((aligned(x)))
+#define PACKED __attribute__((packed))
+
+#define STACK_ALIGN(type, name, cnt, alignment) \
+ u8 _al__##name[((sizeof(type) * (cnt)) + (alignment) + \
+ (((sizeof(type) * (cnt)) % (alignment)) > 0 \
+ ? ((alignment) - ((sizeof(type) * (cnt)) % (alignment))) \
+ : 0))]; \
+ type *name = \
+ (type *)(((u32)(_al__##name)) + ((alignment) - (((u32)(_al__##name)) & ((alignment)-1))))
+
+#define HAVE_PTRDIFF_T 1
+#define HAVE_UINTPTR_T 1
+#define UPTRDIFF_T uintptr_t
+
+#define SZ_2K (1 << 11)
+#define SZ_4K (1 << 12)
+#define SZ_16K (1 << 14)
+#define SZ_1M (1 << 20)
+#define SZ_32M (1 << 25)
+
+#ifdef __ASSEMBLER__
+
+#define sys_reg(op0, op1, CRn, CRm, op2) s##op0##_##op1##_c##CRn##_c##CRm##_##op2
+
+#else
+
+#define sys_reg(op0, op1, CRn, CRm, op2) , _S, op0, op1, CRn, CRm, op2
+
+#endif
+
+#endif
diff --git a/tools/src/uart.c b/tools/src/uart.c
new file mode 100644
index 0000000..67aa0e3
--- /dev/null
+++ b/tools/src/uart.c
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: MIT */
+
+#include <stdarg.h>
+
+#include "adt.h"
+#include "iodev.h"
+#include "types.h"
+#include "uart.h"
+#include "uart_regs.h"
+#include "utils.h"
+#include "vsprintf.h"
+
+#define UART_CLOCK 24000000
+
+static u64 uart_base = 0;
+
+int uart_init(void)
+{
+ int path[8];
+ int node = adt_path_offset_trace(adt, "/arm-io/uart0", path);
+
+ if (node < 0) {
+ printf("!!! UART node not found!\n");
+ return -1;
+ }
+
+ if (adt_get_reg(adt, path, "reg", 0, &uart_base, NULL)) {
+ printf("!!! Failed to get UART reg property!\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+void uart_putbyte(u8 c)
+{
+ if (!uart_base)
+ return;
+
+ while (!(read32(uart_base + UTRSTAT) & UTRSTAT_TXBE))
+ ;
+
+ write32(uart_base + UTXH, c);
+}
+
+u8 uart_getbyte(void)
+{
+ if (!uart_base)
+ return 0;
+
+ while (!(read32(uart_base + UTRSTAT) & UTRSTAT_RXD))
+ ;
+
+ return read32(uart_base + URXH);
+}
+
+void uart_putchar(u8 c)
+{
+ if (c == '\n')
+ uart_putbyte('\r');
+
+ uart_putbyte(c);
+}
+
+u8 uart_getchar(void)
+{
+ return uart_getbyte();
+}
+
+void uart_puts(const char *s)
+{
+ while (*s)
+ uart_putchar(*(s++));
+
+ uart_putchar('\n');
+}
+
+void uart_write(const void *buf, size_t count)
+{
+ const u8 *p = buf;
+
+ while (count--)
+ uart_putbyte(*p++);
+}
+
+size_t uart_read(void *buf, size_t count)
+{
+ u8 *p = buf;
+ size_t recvd = 0;
+
+ while (count--) {
+ *p++ = uart_getbyte();
+ recvd++;
+ }
+
+ return recvd;
+}
+
+void uart_setbaud(int baudrate)
+{
+ if (!uart_base)
+ return;
+
+ uart_flush();
+ write32(uart_base + UBRDIV, ((UART_CLOCK / baudrate + 7) / 16) - 1);
+}
+
+void uart_flush(void)
+{
+ if (!uart_base)
+ return;
+
+ while (!(read32(uart_base + UTRSTAT) & UTRSTAT_TXE))
+ ;
+}
+
+void uart_clear_irqs(void)
+{
+ if (!uart_base)
+ return;
+
+ write32(uart_base + UTRSTAT, UTRSTAT_TXTHRESH | UTRSTAT_RXTHRESH | UTRSTAT_RXTO);
+}
+
+int uart_printf(const char *fmt, ...)
+{
+ va_list args;
+ char buffer[512];
+ int i;
+
+ va_start(args, fmt);
+ i = vsnprintf(buffer, sizeof(buffer), fmt, args);
+ va_end(args);
+
+ uart_write(buffer, min(i, (int)(sizeof(buffer) - 1)));
+
+ return i;
+}
+
+static bool uart_iodev_can_write(void *opaque)
+{
+ UNUSED(opaque);
+ return true;
+}
+
+static ssize_t uart_iodev_can_read(void *opaque)
+{
+ UNUSED(opaque);
+
+ if (!uart_base)
+ return 0;
+
+ return (read32(uart_base + UTRSTAT) & UTRSTAT_RXD) ? 1 : 0;
+}
+
+static ssize_t uart_iodev_read(void *opaque, void *buf, size_t len)
+{
+ UNUSED(opaque);
+ return uart_read(buf, len);
+}
+
+static ssize_t uart_iodev_write(void *opaque, const void *buf, size_t len)
+{
+ UNUSED(opaque);
+ uart_write(buf, len);
+ return len;
+}
+
+static struct iodev_ops iodev_uart_ops = {
+ .can_read = uart_iodev_can_read,
+ .can_write = uart_iodev_can_write,
+ .read = uart_iodev_read,
+ .write = uart_iodev_write,
+};
+
+struct iodev iodev_uart = {
+ .ops = &iodev_uart_ops,
+ .usage = USAGE_CONSOLE | USAGE_UARTPROXY,
+ .lock = SPINLOCK_INIT,
+};
diff --git a/tools/src/uart.h b/tools/src/uart.h
new file mode 100644
index 0000000..0b03f2d
--- /dev/null
+++ b/tools/src/uart.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef UART_H
+#define UART_H
+
+#include "types.h"
+
+int uart_init(void);
+
+void uart_putbyte(u8 c);
+u8 uart_getbyte(void);
+
+void uart_putchar(u8 c);
+u8 uart_getchar(void);
+
+void uart_write(const void *buf, size_t count);
+size_t uart_read(void *buf, size_t count);
+
+void uart_puts(const char *s);
+
+void uart_setbaud(int baudrate);
+
+void uart_flush(void);
+
+void uart_clear_irqs(void);
+
+int uart_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
+
+#endif
diff --git a/tools/src/uart_regs.h b/tools/src/uart_regs.h
new file mode 100644
index 0000000..bca1fe4
--- /dev/null
+++ b/tools/src/uart_regs.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT */
+
+#define ULCON 0x000
+#define UCON 0x004
+#define UFCON 0x008
+#define UTRSTAT 0x010
+#define UFSTAT 0x018
+#define UTXH 0x020
+#define URXH 0x024
+#define UBRDIV 0x028
+#define UFRACVAL 0x02c
+
+#define UCON_TXTHRESH_ENA BIT(13)
+#define UCON_RXTHRESH_ENA BIT(12)
+#define UCON_RXTO_ENA BIT(9)
+#define UCON_TXMODE GENMASK(3, 2)
+#define UCON_RXMODE GENMASK(1, 0)
+
+#define UCON_MODE_OFF 0
+#define UCON_MODE_IRQ 1
+
+#define UTRSTAT_RXTO BIT(9)
+#define UTRSTAT_TXTHRESH BIT(5)
+#define UTRSTAT_RXTHRESH BIT(4)
+#define UTRSTAT_TXE BIT(2)
+#define UTRSTAT_TXBE BIT(1)
+#define UTRSTAT_RXD BIT(0)
+
+#define UFSTAT_TXFULL BIT(9)
+#define UFSTAT_RXFULL BIT(8)
+#define UFSTAT_TXCNT GENMASK(7, 4)
+#define UFSTAT_RXCNT GENMASK(3, 0)
diff --git a/tools/src/uartproxy.c b/tools/src/uartproxy.c
new file mode 100644
index 0000000..fed9cc5
--- /dev/null
+++ b/tools/src/uartproxy.c
@@ -0,0 +1,317 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "uartproxy.h"
+#include "assert.h"
+#include "exception.h"
+#include "iodev.h"
+#include "proxy.h"
+#include "string.h"
+#include "types.h"
+#include "utils.h"
+
+#define REQ_SIZE 64
+
+typedef struct {
+ u32 _pad;
+ u32 type;
+ union {
+ ProxyRequest prequest;
+ struct {
+ u64 addr;
+ u64 size;
+ u32 dchecksum;
+ } mrequest;
+ u64 features;
+ };
+ u32 checksum;
+} UartRequest;
+
+#define REPLY_SIZE 36
+
+typedef struct {
+ u32 type;
+ s32 status;
+ union {
+ ProxyReply preply;
+ struct {
+ u32 dchecksum;
+ } mreply;
+ struct uartproxy_msg_start start;
+ u64 features;
+ };
+ u32 checksum;
+ u32 _dummy; // Not transferred
+} UartReply;
+
+typedef struct {
+ u32 type;
+ u16 len;
+ u16 event_type;
+} UartEventHdr;
+
+static_assert(sizeof(UartReply) == (REPLY_SIZE + 4), "Invalid UartReply size");
+
+#define REQ_NOP 0x00AA55FF
+#define REQ_PROXY 0x01AA55FF
+#define REQ_MEMREAD 0x02AA55FF
+#define REQ_MEMWRITE 0x03AA55FF
+#define REQ_BOOT 0x04AA55FF
+#define REQ_EVENT 0x05AA55FF
+
+#define ST_OK 0
+#define ST_BADCMD -1
+#define ST_INVAL -2
+#define ST_XFRERR -3
+#define ST_CSUMERR -4
+
+#define PROXY_FEAT_DISABLE_DATA_CSUMS 0x01
+#define PROXY_FEAT_ALL (PROXY_FEAT_DISABLE_DATA_CSUMS)
+
+static u32 iodev_proxy_buffer[IODEV_MAX];
+
+#define CHECKSUM_INIT 0xDEADBEEF
+#define CHECKSUM_FINAL 0xADDEDBAD
+#define CHECKSUM_SENTINEL 0xD0DECADE
+#define DATA_END_SENTINEL 0xB0CACC10
+
+static bool disable_data_csums = false;
+
+// I just totally pulled this out of my arse
+// Noinline so that this can be bailed out by exc_guard = EXC_RETURN
+// We assume this function does not use the stack
+static u32 __attribute__((noinline)) checksum_block(void *start, u32 length, u32 init)
+{
+ u32 sum = init;
+ u8 *d = (u8 *)start;
+
+ while (length--) {
+ sum *= 31337;
+ sum += (*d++) ^ 0x5A;
+ }
+ return sum;
+}
+
+static inline u32 checksum_start(void *start, u32 length)
+{
+ return checksum_block(start, length, CHECKSUM_INIT);
+}
+
+static inline u32 checksum_add(void *start, u32 length, u32 sum)
+{
+ return checksum_block(start, length, sum);
+}
+
+static inline u32 checksum_finish(u32 sum)
+{
+ return sum ^ CHECKSUM_FINAL;
+}
+
+static inline u32 checksum(void *start, u32 length)
+{
+ return checksum_finish(checksum_start(start, length));
+}
+
+static u64 data_checksum(void *start, u32 length)
+{
+ if (disable_data_csums) {
+ return CHECKSUM_SENTINEL;
+ }
+
+ return checksum(start, length);
+}
+
+iodev_id_t uartproxy_iodev;
+
+int uartproxy_run(struct uartproxy_msg_start *start)
+{
+ int ret;
+ int running = 1;
+ size_t bytes;
+ u64 checksum_val;
+ u64 enabled_features = 0;
+
+ iodev_id_t iodev = IODEV_MAX;
+
+ UartRequest request;
+ UartReply reply = {REQ_BOOT};
+ if (!start) {
+ // Startup notification only goes out via UART
+ reply.checksum = checksum(&reply, REPLY_SIZE - 4);
+ iodev_write(IODEV_UART, &reply, REPLY_SIZE);
+ } else {
+ // Exceptions / hooks keep the current iodev
+ iodev = uartproxy_iodev;
+ reply.start = *start;
+ reply.checksum = checksum(&reply, REPLY_SIZE - 4);
+ iodev_write(iodev, &reply, REPLY_SIZE);
+ }
+
+ while (running) {
+ if (!start) {
+ // Look for commands from any iodev on startup
+ for (iodev = 0; iodev < IODEV_MAX;) {
+ u8 b;
+ if ((iodev_get_usage(iodev) & USAGE_UARTPROXY)) {
+ iodev_handle_events(iodev);
+ if (iodev_can_read(iodev) && iodev_read(iodev, &b, 1) == 1) {
+ iodev_proxy_buffer[iodev] >>= 8;
+ iodev_proxy_buffer[iodev] |= b << 24;
+ if ((iodev_proxy_buffer[iodev] & 0xffffff) == 0xAA55FF)
+ break;
+ }
+ }
+ iodev++;
+ if (iodev == IODEV_MAX)
+ iodev = 0;
+ }
+ } else {
+ // Stick to the current iodev for exceptions
+ do {
+ u8 b;
+ iodev_handle_events(iodev);
+ if (iodev_read(iodev, &b, 1) != 1) {
+ printf("Proxy: iodev read failed, exiting.\n");
+ return -1;
+ }
+ iodev_proxy_buffer[iodev] >>= 8;
+ iodev_proxy_buffer[iodev] |= b << 24;
+ } while ((iodev_proxy_buffer[iodev] & 0xffffff) != 0xAA55FF);
+ }
+
+ memset(&request, 0, sizeof(request));
+ request.type = iodev_proxy_buffer[iodev];
+ bytes = iodev_read(iodev, (&request.type) + 1, REQ_SIZE - 4);
+ if (bytes != REQ_SIZE - 4)
+ continue;
+
+ if (checksum(&(request.type), REQ_SIZE - 4) != request.checksum) {
+ memset(&reply, 0, sizeof(reply));
+ reply.type = request.type;
+ reply.status = ST_CSUMERR;
+ reply.checksum = checksum(&reply, REPLY_SIZE - 4);
+ iodev_write(iodev, &reply, REPLY_SIZE);
+ continue;
+ }
+
+ memset(&reply, 0, sizeof(reply));
+ reply.type = request.type;
+ reply.status = ST_OK;
+
+ uartproxy_iodev = iodev;
+
+ switch (request.type) {
+ case REQ_NOP:
+ enabled_features = request.features & PROXY_FEAT_ALL;
+ if (iodev == IODEV_UART) {
+ // Don't allow disabling checksums on UART
+ enabled_features &= ~PROXY_FEAT_DISABLE_DATA_CSUMS;
+ }
+
+ disable_data_csums = enabled_features & PROXY_FEAT_DISABLE_DATA_CSUMS;
+ reply.features = enabled_features;
+ break;
+ case REQ_PROXY:
+ ret = proxy_process(&request.prequest, &reply.preply);
+ if (ret != 0)
+ running = 0;
+ if (ret < 0)
+ printf("Proxy req error: %d\n", ret);
+ break;
+ case REQ_MEMREAD:
+ if (request.mrequest.size == 0)
+ break;
+ exc_count = 0;
+ exc_guard = GUARD_RETURN;
+ checksum_val = data_checksum((void *)request.mrequest.addr, request.mrequest.size);
+ exc_guard = GUARD_OFF;
+ if (exc_count)
+ reply.status = ST_XFRERR;
+ reply.mreply.dchecksum = checksum_val;
+ break;
+ case REQ_MEMWRITE:
+ exc_count = 0;
+ exc_guard = GUARD_SKIP;
+ if (request.mrequest.size != 0) {
+ // Probe for exception guard
+ // We can't do the whole buffer easily, because we'd drop UART data
+ write8(request.mrequest.addr, 0);
+ write8(request.mrequest.addr + request.mrequest.size - 1, 0);
+ }
+ exc_guard = GUARD_OFF;
+ if (exc_count) {
+ reply.status = ST_XFRERR;
+ break;
+ }
+ bytes = iodev_read(iodev, (void *)request.mrequest.addr, request.mrequest.size);
+ if (bytes != request.mrequest.size) {
+ reply.status = ST_XFRERR;
+ break;
+ }
+ checksum_val = data_checksum((void *)request.mrequest.addr, request.mrequest.size);
+ reply.mreply.dchecksum = checksum_val;
+ if (reply.mreply.dchecksum != request.mrequest.dchecksum) {
+ reply.status = ST_XFRERR;
+ break;
+ }
+ if (disable_data_csums) {
+ // Check the sentinel that should be present after the data
+ u32 sentinel = 0;
+ bytes = iodev_read(iodev, &sentinel, sizeof(sentinel));
+ if (bytes != sizeof(sentinel) || sentinel != DATA_END_SENTINEL) {
+ reply.status = ST_XFRERR;
+ break;
+ }
+ }
+ break;
+ default:
+ reply.status = ST_BADCMD;
+ break;
+ }
+ sysop("dsb sy");
+ sysop("isb");
+ reply.checksum = checksum(&reply, REPLY_SIZE - 4);
+ iodev_lock(uartproxy_iodev);
+ iodev_queue(iodev, &reply, REPLY_SIZE);
+
+ if ((request.type == REQ_MEMREAD) && (reply.status == ST_OK)) {
+ iodev_queue(iodev, (void *)request.mrequest.addr, request.mrequest.size);
+
+ if (disable_data_csums) {
+ // Since there is no checksum, put a sentinel after the data so the receiver
+ // can check that no packets were lost.
+ u32 sentinel = DATA_END_SENTINEL;
+
+ iodev_queue(iodev, &sentinel, sizeof(sentinel));
+ }
+ }
+
+ iodev_unlock(uartproxy_iodev);
+ // Flush all queued data
+ iodev_write(iodev, NULL, 0);
+ iodev_flush(iodev);
+ }
+
+ return ret;
+}
+
+void uartproxy_send_event(u16 event_type, void *data, u16 length)
+{
+ UartEventHdr hdr;
+ u32 csum;
+
+ hdr.type = REQ_EVENT;
+ hdr.len = length;
+ hdr.event_type = event_type;
+
+ if (disable_data_csums) {
+ csum = CHECKSUM_SENTINEL;
+ } else {
+ csum = checksum_start(&hdr, sizeof(UartEventHdr));
+ csum = checksum_finish(checksum_add(data, length, csum));
+ }
+ iodev_lock(uartproxy_iodev);
+ iodev_queue(uartproxy_iodev, &hdr, sizeof(UartEventHdr));
+ iodev_queue(uartproxy_iodev, data, length);
+ iodev_write(uartproxy_iodev, &csum, sizeof(csum));
+ iodev_unlock(uartproxy_iodev);
+}
diff --git a/tools/src/uartproxy.h b/tools/src/uartproxy.h
new file mode 100644
index 0000000..23ddd67
--- /dev/null
+++ b/tools/src/uartproxy.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef __UARTPROXY_H__
+#define __UARTPROXY_H__
+
+#include "iodev.h"
+
+extern iodev_id_t uartproxy_iodev;
+
+typedef enum _uartproxy_start_reason_t {
+ START_BOOT,
+ START_EXCEPTION,
+ START_EXCEPTION_LOWER,
+ START_HV,
+} uartproxy_boot_reason_t;
+
+typedef enum _uartproxy_exc_code_t {
+ EXC_SYNC,
+ EXC_IRQ,
+ EXC_FIQ,
+ EXC_SERROR,
+} uartproxy_exc_code_t;
+
+typedef enum _uartproxy_exc_ret_t {
+ EXC_RET_UNHANDLED = 1,
+ EXC_RET_HANDLED = 2,
+ EXC_EXIT_GUEST = 3,
+} uartproxy_exc_ret_t;
+
+typedef enum _uartproxy_event_type_t {
+ EVT_MMIOTRACE = 1,
+ EVT_IRQTRACE = 2,
+} uartproxy_event_type_t;
+
+struct uartproxy_msg_start {
+ u32 reason;
+ u32 code;
+ void *info;
+ void *reserved;
+};
+
+int uartproxy_run(struct uartproxy_msg_start *start);
+void uartproxy_send_event(u16 event_type, void *data, u16 length);
+
+#endif
diff --git a/tools/src/usb.c b/tools/src/usb.c
new file mode 100644
index 0000000..1f516a1
--- /dev/null
+++ b/tools/src/usb.c
@@ -0,0 +1,343 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "usb.h"
+#include "adt.h"
+#include "dart.h"
+#include "i2c.h"
+#include "iodev.h"
+#include "malloc.h"
+#include "pmgr.h"
+#include "tps6598x.h"
+#include "types.h"
+#include "usb_dwc3.h"
+#include "usb_dwc3_regs.h"
+#include "utils.h"
+#include "vsprintf.h"
+
+struct usb_drd_regs {
+ uintptr_t drd_regs;
+ uintptr_t drd_regs_unk3;
+ uintptr_t atc;
+};
+
+#if USB_IODEV_COUNT > 100
+#error "USB_IODEV_COUNT is limited to 100 to prevent overflow in ADT path names"
+#endif
+
+// length of the format string is is used as buffer size
+// limits the USB instance numbers to reasonable 2 digits
+#define FMT_DART_PATH "/arm-io/dart-usb%u"
+#define FMT_DART_MAPPER_PATH "/arm-io/dart-usb%u/mapper-usb%u"
+#define FMT_ATC_PATH "/arm-io/atc-phy%u"
+#define FMT_DRD_PATH "/arm-io/usb-drd%u"
+#define FMT_HPM_PATH "/arm-io/i2c0/hpmBusManager/hpm%u"
+
+static tps6598x_irq_state_t tps6598x_irq_state[USB_IODEV_COUNT];
+static bool usb_is_initialized = false;
+
+static dart_dev_t *usb_dart_init(u32 idx)
+{
+ int mapper_offset;
+ char path[sizeof(FMT_DART_MAPPER_PATH)];
+
+ snprintf(path, sizeof(path), FMT_DART_MAPPER_PATH, idx, idx);
+ mapper_offset = adt_path_offset(adt, path);
+ if (mapper_offset < 0) {
+ // Device not present
+ return NULL;
+ }
+
+ u32 dart_idx;
+ if (ADT_GETPROP(adt, mapper_offset, "reg", &dart_idx) < 0) {
+ printf("usb: Error getting DART %s device index/\n", path);
+ return NULL;
+ }
+
+ snprintf(path, sizeof(path), FMT_DART_PATH, idx);
+ return dart_init_adt(path, 1, dart_idx, false);
+}
+
+static int usb_drd_get_regs(u32 idx, struct usb_drd_regs *regs)
+{
+ int adt_drd_path[8];
+ int adt_drd_offset;
+ int adt_phy_path[8];
+ int adt_phy_offset;
+ char phy_path[sizeof(FMT_ATC_PATH)];
+ char drd_path[sizeof(FMT_DRD_PATH)];
+
+ snprintf(drd_path, sizeof(drd_path), FMT_DRD_PATH, idx);
+ adt_drd_offset = adt_path_offset_trace(adt, drd_path, adt_drd_path);
+ if (adt_drd_offset < 0) {
+ // Nonexistent device
+ return -1;
+ }
+
+ snprintf(phy_path, sizeof(phy_path), FMT_ATC_PATH, idx);
+ adt_phy_offset = adt_path_offset_trace(adt, phy_path, adt_phy_path);
+ if (adt_phy_offset < 0) {
+ printf("usb: Error getting phy node %s\n", phy_path);
+ return -1;
+ }
+
+ if (adt_get_reg(adt, adt_phy_path, "reg", 0, &regs->atc, NULL) < 0) {
+ printf("usb: Error getting reg with index 0 for %s.\n", phy_path);
+ return -1;
+ }
+ if (adt_get_reg(adt, adt_drd_path, "reg", 0, &regs->drd_regs, NULL) < 0) {
+ printf("usb: Error getting reg with index 0 for %s.\n", drd_path);
+ return -1;
+ }
+ if (adt_get_reg(adt, adt_drd_path, "reg", 3, &regs->drd_regs_unk3, NULL) < 0) {
+ printf("usb: Error getting reg with index 3 for %s.\n", drd_path);
+ return -1;
+ }
+
+ return 0;
+}
+
+int usb_phy_bringup(u32 idx)
+{
+ char path[24];
+
+ if (idx >= USB_IODEV_COUNT)
+ return -1;
+
+ struct usb_drd_regs usb_regs;
+ if (usb_drd_get_regs(idx, &usb_regs) < 0)
+ return -1;
+
+ snprintf(path, sizeof(path), FMT_ATC_PATH, idx);
+ if (pmgr_adt_power_enable(path) < 0)
+ return -1;
+
+ snprintf(path, sizeof(path), FMT_DART_PATH, idx);
+ if (pmgr_adt_power_enable(path) < 0)
+ return -1;
+
+ snprintf(path, sizeof(path), FMT_DRD_PATH, idx);
+ if (pmgr_adt_power_enable(path) < 0)
+ return -1;
+
+ write32(usb_regs.atc + 0x08, 0x01c1000f);
+ write32(usb_regs.atc + 0x04, 0x00000003);
+ write32(usb_regs.atc + 0x04, 0x00000000);
+ write32(usb_regs.atc + 0x1c, 0x008c0813);
+ write32(usb_regs.atc + 0x00, 0x00000002);
+
+ write32(usb_regs.drd_regs_unk3 + 0x0c, 0x00000002);
+ write32(usb_regs.drd_regs_unk3 + 0x0c, 0x00000022);
+ write32(usb_regs.drd_regs_unk3 + 0x1c, 0x00000021);
+ write32(usb_regs.drd_regs_unk3 + 0x20, 0x00009332);
+
+ return 0;
+}
+
+dwc3_dev_t *usb_iodev_bringup(u32 idx)
+{
+ dart_dev_t *usb_dart = usb_dart_init(idx);
+ if (!usb_dart)
+ return NULL;
+
+ struct usb_drd_regs usb_reg;
+ if (usb_drd_get_regs(idx, &usb_reg) < 0)
+ return NULL;
+
+ return usb_dwc3_init(usb_reg.drd_regs, usb_dart);
+}
+
+#define USB_IODEV_WRAPPER(name, pipe) \
+ static ssize_t usb_##name##_can_read(void *dev) \
+ { \
+ return usb_dwc3_can_read(dev, pipe); \
+ } \
+ \
+ static bool usb_##name##_can_write(void *dev) \
+ { \
+ return usb_dwc3_can_write(dev, pipe); \
+ } \
+ \
+ static ssize_t usb_##name##_read(void *dev, void *buf, size_t count) \
+ { \
+ return usb_dwc3_read(dev, pipe, buf, count); \
+ } \
+ \
+ static ssize_t usb_##name##_write(void *dev, const void *buf, size_t count) \
+ { \
+ return usb_dwc3_write(dev, pipe, buf, count); \
+ } \
+ \
+ static ssize_t usb_##name##_queue(void *dev, const void *buf, size_t count) \
+ { \
+ return usb_dwc3_queue(dev, pipe, buf, count); \
+ } \
+ \
+ static void usb_##name##_handle_events(void *dev) \
+ { \
+ usb_dwc3_handle_events(dev); \
+ } \
+ \
+ static void usb_##name##_flush(void *dev) \
+ { \
+ usb_dwc3_flush(dev, pipe); \
+ }
+
+USB_IODEV_WRAPPER(0, CDC_ACM_PIPE_0)
+USB_IODEV_WRAPPER(1, CDC_ACM_PIPE_1)
+
+static struct iodev_ops iodev_usb_ops = {
+ .can_read = usb_0_can_read,
+ .can_write = usb_0_can_write,
+ .read = usb_0_read,
+ .write = usb_0_write,
+ .queue = usb_0_queue,
+ .flush = usb_0_flush,
+ .handle_events = usb_0_handle_events,
+};
+
+static struct iodev_ops iodev_usb_sec_ops = {
+ .can_read = usb_1_can_read,
+ .can_write = usb_1_can_write,
+ .read = usb_1_read,
+ .write = usb_1_write,
+ .queue = usb_1_queue,
+ .flush = usb_1_flush,
+ .handle_events = usb_1_handle_events,
+};
+
+struct iodev iodev_usb_vuart = {
+ .ops = &iodev_usb_sec_ops,
+ .usage = 0,
+ .lock = SPINLOCK_INIT,
+};
+
+static tps6598x_dev_t *hpm_init(i2c_dev_t *i2c, const char *hpm_path)
+{
+ tps6598x_dev_t *tps = tps6598x_init(hpm_path, i2c);
+ if (!tps) {
+ printf("usb: tps6598x_init failed for %s.\n", hpm_path);
+ return NULL;
+ }
+
+ if (tps6598x_powerup(tps) < 0) {
+ printf("usb: tps6598x_powerup failed for %s.\n", hpm_path);
+ tps6598x_shutdown(tps);
+ return NULL;
+ }
+
+ return tps;
+}
+
+void usb_init(void)
+{
+ char hpm_path[sizeof(FMT_HPM_PATH)];
+
+ if (usb_is_initialized)
+ return;
+
+ i2c_dev_t *i2c = i2c_init("/arm-io/i2c0");
+ if (!i2c) {
+ printf("usb: i2c init failed.\n");
+ return;
+ }
+
+ for (u32 idx = 0; idx < USB_IODEV_COUNT; ++idx) {
+ snprintf(hpm_path, sizeof(hpm_path), FMT_HPM_PATH, idx);
+ if (adt_path_offset(adt, hpm_path) < 0)
+ continue; // device not present
+ tps6598x_dev_t *tps = hpm_init(i2c, hpm_path);
+ if (!tps) {
+ printf("usb: failed to init hpm%d\n", idx);
+ continue;
+ }
+
+ if (tps6598x_disable_irqs(tps, &tps6598x_irq_state[idx]))
+ printf("usb: unable to disable IRQ masks for hpm%d\n", idx);
+
+ tps6598x_shutdown(tps);
+ }
+
+ i2c_shutdown(i2c);
+
+ for (int idx = 0; idx < USB_IODEV_COUNT; ++idx)
+ usb_phy_bringup(idx); /* Fails on missing devices, just continue */
+
+ usb_is_initialized = true;
+}
+
+void usb_hpm_restore_irqs(bool force)
+{
+ char hpm_path[sizeof(FMT_HPM_PATH)];
+
+ i2c_dev_t *i2c = i2c_init("/arm-io/i2c0");
+ if (!i2c) {
+ printf("usb: i2c init failed.\n");
+ return;
+ }
+
+ for (u32 idx = 0; idx < USB_IODEV_COUNT; ++idx) {
+ if (iodev_get_usage(IODEV_USB0 + idx) && !force)
+ continue;
+
+ if (tps6598x_irq_state[idx].valid) {
+ snprintf(hpm_path, sizeof(hpm_path), FMT_HPM_PATH, idx);
+ if (adt_path_offset(adt, hpm_path) < 0)
+ continue; // device not present
+ tps6598x_dev_t *tps = hpm_init(i2c, hpm_path);
+ if (!tps)
+ continue;
+
+ if (tps6598x_restore_irqs(tps, &tps6598x_irq_state[idx]))
+ printf("usb: unable to restore IRQ masks for hpm%d\n", idx);
+
+ tps6598x_shutdown(tps);
+ }
+ }
+
+ i2c_shutdown(i2c);
+}
+
+void usb_iodev_init(void)
+{
+ for (int i = 0; i < USB_IODEV_COUNT; i++) {
+ dwc3_dev_t *opaque;
+ struct iodev *usb_iodev;
+
+ opaque = usb_iodev_bringup(i);
+ if (!opaque)
+ continue;
+
+ usb_iodev = memalign(SPINLOCK_ALIGN, sizeof(*usb_iodev));
+ if (!usb_iodev)
+ continue;
+
+ usb_iodev->ops = &iodev_usb_ops;
+ usb_iodev->opaque = opaque;
+ usb_iodev->usage = USAGE_CONSOLE | USAGE_UARTPROXY;
+ spin_init(&usb_iodev->lock);
+
+ iodev_register_device(IODEV_USB0 + i, usb_iodev);
+ printf("USB%d: initialized at %p\n", i, opaque);
+ }
+}
+
+void usb_iodev_shutdown(void)
+{
+ for (int i = 0; i < USB_IODEV_COUNT; i++) {
+ struct iodev *usb_iodev = iodev_unregister_device(IODEV_USB0 + i);
+ if (!usb_iodev)
+ continue;
+
+ printf("USB%d: shutdown\n", i);
+ usb_dwc3_shutdown(usb_iodev->opaque);
+ free(usb_iodev);
+ }
+}
+
+void usb_iodev_vuart_setup(iodev_id_t iodev)
+{
+ if (iodev < IODEV_USB0 || iodev >= IODEV_USB0 + USB_IODEV_COUNT)
+ return;
+
+ iodev_usb_vuart.opaque = iodev_get_opaque(iodev);
+}
diff --git a/tools/src/usb.h b/tools/src/usb.h
new file mode 100644
index 0000000..1ba859a
--- /dev/null
+++ b/tools/src/usb.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef USB_H
+#define USB_H
+
+#include "iodev.h"
+#include "types.h"
+#include "usb_dwc3.h"
+
+dwc3_dev_t *usb_bringup(u32 idx);
+
+void usb_init(void);
+void usb_hpm_restore_irqs(bool force);
+void usb_iodev_init(void);
+void usb_iodev_shutdown(void);
+void usb_iodev_vuart_setup(iodev_id_t iodev);
+
+#endif
diff --git a/tools/src/usb_dwc3.c b/tools/src/usb_dwc3.c
new file mode 100644
index 0000000..de05c95
--- /dev/null
+++ b/tools/src/usb_dwc3.c
@@ -0,0 +1,1416 @@
+/* SPDX-License-Identifier: MIT */
+
+/*
+ * Useful references:
+ * - TI KeyStone II Architecture Universal Serial Bus 3.0 (USB 3.0) User's Guide
+ * Literature Number: SPRUHJ7A, https://www.ti.com/lit/ug/spruhj7a/spruhj7a.pdf
+ * - https://www.beyondlogic.org/usbnutshell/usb1.shtml
+ */
+
+#include "../build/build_tag.h"
+
+#include "usb_dwc3.h"
+#include "dart.h"
+#include "malloc.h"
+#include "memory.h"
+#include "ringbuffer.h"
+#include "string.h"
+#include "types.h"
+#include "usb_dwc3_regs.h"
+#include "usb_types.h"
+#include "utils.h"
+
+#define MAX_ENDPOINTS 16
+#define CDC_BUFFER_SIZE SZ_1M
+
+#define usb_debug_printf(fmt, ...) debug_printf("usb-dwc3@%lx: " fmt, dev->regs, ##__VA_ARGS__)
+
+#define STRING_DESCRIPTOR_LANGUAGES 0
+#define STRING_DESCRIPTOR_MANUFACTURER 1
+#define STRING_DESCRIPTOR_PRODUCT 2
+#define STRING_DESCRIPTOR_SERIAL 3
+
+#define CDC_DEVICE_CLASS 0x02
+
+#define CDC_USB_VID 0x1209
+#define CDC_USB_PID 0x316d
+
+#define CDC_INTERFACE_CLASS 0x02
+#define CDC_INTERFACE_CLASS_DATA 0x0a
+#define CDC_INTERFACE_SUBCLASS_ACM 0x02
+#define CDC_INTERFACE_PROTOCOL_NONE 0x00
+#define CDC_INTERFACE_PROTOCOL_AT 0x01
+
+#define DWC3_SCRATCHPAD_SIZE SZ_16K
+#define TRB_BUFFER_SIZE SZ_16K
+#define XFER_BUFFER_SIZE (SZ_16K * MAX_ENDPOINTS * 2)
+#define PAD_BUFFER_SIZE SZ_16K
+
+#define TRBS_PER_EP (TRB_BUFFER_SIZE / (MAX_ENDPOINTS * sizeof(struct dwc3_trb)))
+#define XFER_BUFFER_BYTES_PER_EP (XFER_BUFFER_SIZE / MAX_ENDPOINTS)
+
+#define XFER_SIZE SZ_16K
+
+#define SCRATCHPAD_IOVA 0xbeef0000
+#define EVENT_BUFFER_IOVA 0xdead0000
+#define XFER_BUFFER_IOVA 0xbabe0000
+#define TRB_BUFFER_IOVA 0xf00d0000
+
+/* these map to the control endpoint 0x00/0x80 */
+#define USB_LEP_CTRL_OUT 0
+#define USB_LEP_CTRL_IN 1
+
+/* maps to interrupt endpoint 0x81 */
+#define USB_LEP_CDC_INTR_IN 3
+
+/* these map to physical endpoints 0x02 and 0x82 */
+#define USB_LEP_CDC_BULK_OUT 4
+#define USB_LEP_CDC_BULK_IN 5
+
+/* maps to interrupt endpoint 0x83 */
+#define USB_LEP_CDC_INTR_IN_2 7
+
+/* these map to physical endpoints 0x04 and 0x84 */
+#define USB_LEP_CDC_BULK_OUT_2 8
+#define USB_LEP_CDC_BULK_IN_2 9
+
+/* content doesn't matter at all, this is the setting linux writes by default */
+static const u8 cdc_default_line_coding[] = {0x80, 0x25, 0x00, 0x00, 0x00, 0x00, 0x08};
+
+enum ep0_state {
+ USB_DWC3_EP0_STATE_IDLE,
+ USB_DWC3_EP0_STATE_SETUP_HANDLE,
+ USB_DWC3_EP0_STATE_DATA_SEND,
+ USB_DWC3_EP0_STATE_DATA_RECV,
+ USB_DWC3_EP0_STATE_DATA_SEND_DONE,
+ USB_DWC3_EP0_STATE_DATA_RECV_DONE,
+ USB_DWC3_EP0_STATE_DATA_RECV_STATUS,
+ USB_DWC3_EP0_STATE_DATA_RECV_STATUS_DONE,
+ USB_DWC3_EP0_STATE_DATA_SEND_STATUS,
+ USB_DWC3_EP0_STATE_DATA_SEND_STATUS_DONE
+};
+
+typedef struct dwc3_dev {
+ /* USB DRD */
+ uintptr_t regs;
+ dart_dev_t *dart;
+
+ enum ep0_state ep0_state;
+ const void *ep0_buffer;
+ u32 ep0_buffer_len;
+ void *ep0_read_buffer;
+ u32 ep0_read_buffer_len;
+
+ void *evtbuffer;
+ u32 evt_buffer_offset;
+
+ void *scratchpad;
+ void *xferbuffer;
+ struct dwc3_trb *trbs;
+
+ struct {
+ bool xfer_in_progress;
+ bool zlp_pending;
+
+ void *xfer_buffer;
+ uintptr_t xfer_buffer_iova;
+
+ struct dwc3_trb *trb;
+ uintptr_t trb_iova;
+ } endpoints[MAX_ENDPOINTS];
+
+ struct {
+ ringbuffer_t *host2device;
+ ringbuffer_t *device2host;
+ u8 ep_intr;
+ u8 ep_in;
+ u8 ep_out;
+ bool ready;
+ /* USB ACM CDC serial */
+ u8 cdc_line_coding[7];
+ } pipe[CDC_ACM_PIPE_MAX];
+
+} dwc3_dev_t;
+
+static const struct usb_string_descriptor str_manufacturer =
+ make_usb_string_descriptor("Asahi Linux");
+static const struct usb_string_descriptor str_product =
+ make_usb_string_descriptor("m1n1 uartproxy " BUILD_TAG);
+static const struct usb_string_descriptor str_serial = make_usb_string_descriptor("P-0");
+
+static const struct usb_string_descriptor_languages str_langs = {
+ .bLength = sizeof(str_langs) + 2,
+ .bDescriptorType = USB_STRING_DESCRIPTOR,
+ .wLANGID = {USB_LANGID_EN_US},
+};
+
+struct cdc_dev_desc {
+ const struct usb_configuration_descriptor configuration;
+ const struct usb_interface_descriptor interface_management;
+ const struct cdc_union_functional_descriptor cdc_union_func;
+ const struct usb_endpoint_descriptor endpoint_notification;
+ const struct usb_interface_descriptor interface_data;
+ const struct usb_endpoint_descriptor endpoint_data_in;
+ const struct usb_endpoint_descriptor endpoint_data_out;
+ const struct usb_interface_descriptor sec_interface_management;
+ const struct cdc_union_functional_descriptor sec_cdc_union_func;
+ const struct usb_endpoint_descriptor sec_endpoint_notification;
+ const struct usb_interface_descriptor sec_interface_data;
+ const struct usb_endpoint_descriptor sec_endpoint_data_in;
+ const struct usb_endpoint_descriptor sec_endpoint_data_out;
+} PACKED;
+
+static const struct usb_device_descriptor usb_cdc_device_descriptor = {
+ .bLength = sizeof(struct usb_device_descriptor),
+ .bDescriptorType = USB_DEVICE_DESCRIPTOR,
+ .bcdUSB = 0x0200,
+ .bDeviceClass = CDC_DEVICE_CLASS,
+ .bDeviceSubClass = 0, // unused
+ .bDeviceProtocol = 0, // unused
+ .bMaxPacketSize0 = 64,
+ .idVendor = CDC_USB_VID,
+ .idProduct = CDC_USB_PID,
+ .bcdDevice = 0x0100,
+ .iManufacturer = STRING_DESCRIPTOR_MANUFACTURER,
+ .iProduct = STRING_DESCRIPTOR_PRODUCT,
+ .iSerialNumber = STRING_DESCRIPTOR_SERIAL,
+ .bNumConfigurations = 1,
+};
+
+static const struct cdc_dev_desc cdc_configuration_descriptor = {
+ .configuration =
+ {
+ .bLength = sizeof(cdc_configuration_descriptor.configuration),
+ .bDescriptorType = USB_CONFIGURATION_DESCRIPTOR,
+ .wTotalLength = sizeof(cdc_configuration_descriptor),
+ .bNumInterfaces = 4,
+ .bConfigurationValue = 1,
+ .iConfiguration = 0,
+ .bmAttributes = USB_CONFIGURATION_ATTRIBUTE_RES1 | USB_CONFIGURATION_SELF_POWERED,
+ .bMaxPower = 250,
+
+ },
+ .interface_management =
+ {
+ .bLength = sizeof(cdc_configuration_descriptor.interface_management),
+ .bDescriptorType = USB_INTERFACE_DESCRIPTOR,
+ .bInterfaceNumber = 0,
+ .bAlternateSetting = 0,
+ .bNumEndpoints = 1,
+ .bInterfaceClass = CDC_INTERFACE_CLASS,
+ .bInterfaceSubClass = CDC_INTERFACE_SUBCLASS_ACM,
+ .bInterfaceProtocol = CDC_INTERFACE_PROTOCOL_NONE,
+ .iInterface = 0,
+
+ },
+ .cdc_union_func =
+ {
+ .bFunctionLength = sizeof(cdc_configuration_descriptor.cdc_union_func),
+ .bDescriptorType = USB_CDC_INTERFACE_FUNCTIONAL_DESCRIPTOR,
+ .bDescriptorSubtype = USB_CDC_UNION_SUBTYPE,
+ .bControlInterface = 0,
+ .bDataInterface = 1,
+ },
+ /*
+ * we never use this endpoint, but it should exist and always be idle.
+ * it needs to exist in the descriptor though to make hosts correctly recognize
+ * us as a ACM CDC device.
+ */
+ .endpoint_notification =
+ {
+ .bLength = sizeof(cdc_configuration_descriptor.endpoint_notification),
+ .bDescriptorType = USB_ENDPOINT_DESCRIPTOR,
+ .bEndpointAddress = USB_ENDPOINT_ADDR_IN(1),
+ .bmAttributes = USB_ENDPOINT_ATTR_TYPE_INTERRUPT,
+ .wMaxPacketSize = 64,
+ .bInterval = 10,
+
+ },
+ .interface_data =
+ {
+ .bLength = sizeof(cdc_configuration_descriptor.interface_data),
+ .bDescriptorType = USB_INTERFACE_DESCRIPTOR,
+ .bInterfaceNumber = 1,
+ .bAlternateSetting = 0,
+ .bNumEndpoints = 2,
+ .bInterfaceClass = CDC_INTERFACE_CLASS_DATA,
+ .bInterfaceSubClass = 0, // unused
+ .bInterfaceProtocol = 0, // unused
+ .iInterface = 0,
+ },
+ .endpoint_data_in =
+ {
+ .bLength = sizeof(cdc_configuration_descriptor.endpoint_data_in),
+ .bDescriptorType = USB_ENDPOINT_DESCRIPTOR,
+ .bEndpointAddress = USB_ENDPOINT_ADDR_OUT(2),
+ .bmAttributes = USB_ENDPOINT_ATTR_TYPE_BULK,
+ .wMaxPacketSize = 512,
+ .bInterval = 10,
+ },
+ .endpoint_data_out =
+ {
+ .bLength = sizeof(cdc_configuration_descriptor.endpoint_data_out),
+ .bDescriptorType = USB_ENDPOINT_DESCRIPTOR,
+ .bEndpointAddress = USB_ENDPOINT_ADDR_IN(2),
+ .bmAttributes = USB_ENDPOINT_ATTR_TYPE_BULK,
+ .wMaxPacketSize = 512,
+ .bInterval = 10,
+ },
+
+ /*
+ * CDC ACM interface for virtual uart
+ */
+
+ .sec_interface_management =
+ {
+ .bLength = sizeof(cdc_configuration_descriptor.sec_interface_management),
+ .bDescriptorType = USB_INTERFACE_DESCRIPTOR,
+ .bInterfaceNumber = 2,
+ .bAlternateSetting = 0,
+ .bNumEndpoints = 1,
+ .bInterfaceClass = CDC_INTERFACE_CLASS,
+ .bInterfaceSubClass = CDC_INTERFACE_SUBCLASS_ACM,
+ .bInterfaceProtocol = CDC_INTERFACE_PROTOCOL_NONE,
+ .iInterface = 0,
+
+ },
+ .sec_cdc_union_func =
+ {
+ .bFunctionLength = sizeof(cdc_configuration_descriptor.sec_cdc_union_func),
+ .bDescriptorType = USB_CDC_INTERFACE_FUNCTIONAL_DESCRIPTOR,
+ .bDescriptorSubtype = USB_CDC_UNION_SUBTYPE,
+ .bControlInterface = 2,
+ .bDataInterface = 3,
+ },
+ /*
+ * we never use this endpoint, but it should exist and always be idle.
+ * it needs to exist in the descriptor though to make hosts correctly recognize
+ * us as a ACM CDC device.
+ */
+ .sec_endpoint_notification =
+ {
+ .bLength = sizeof(cdc_configuration_descriptor.sec_endpoint_notification),
+ .bDescriptorType = USB_ENDPOINT_DESCRIPTOR,
+ .bEndpointAddress = USB_ENDPOINT_ADDR_IN(3),
+ .bmAttributes = USB_ENDPOINT_ATTR_TYPE_INTERRUPT,
+ .wMaxPacketSize = 64,
+ .bInterval = 10,
+
+ },
+ .sec_interface_data =
+ {
+ .bLength = sizeof(cdc_configuration_descriptor.sec_interface_data),
+ .bDescriptorType = USB_INTERFACE_DESCRIPTOR,
+ .bInterfaceNumber = 3,
+ .bAlternateSetting = 0,
+ .bNumEndpoints = 2,
+ .bInterfaceClass = CDC_INTERFACE_CLASS_DATA,
+ .bInterfaceSubClass = 0, // unused
+ .bInterfaceProtocol = 0, // unused
+ .iInterface = 0,
+ },
+ .sec_endpoint_data_in =
+ {
+ .bLength = sizeof(cdc_configuration_descriptor.sec_endpoint_data_in),
+ .bDescriptorType = USB_ENDPOINT_DESCRIPTOR,
+ .bEndpointAddress = USB_ENDPOINT_ADDR_OUT(4),
+ .bmAttributes = USB_ENDPOINT_ATTR_TYPE_BULK,
+ .wMaxPacketSize = 512,
+ .bInterval = 10,
+ },
+ .sec_endpoint_data_out =
+ {
+ .bLength = sizeof(cdc_configuration_descriptor.sec_endpoint_data_out),
+ .bDescriptorType = USB_ENDPOINT_DESCRIPTOR,
+ .bEndpointAddress = USB_ENDPOINT_ADDR_IN(4),
+ .bmAttributes = USB_ENDPOINT_ATTR_TYPE_BULK,
+ .wMaxPacketSize = 512,
+ .bInterval = 10,
+ },
+};
+
+static const struct usb_device_qualifier_descriptor usb_cdc_device_qualifier_descriptor = {
+ .bLength = sizeof(struct usb_device_qualifier_descriptor),
+ .bDescriptorType = USB_DEVICE_QUALIFIER_DESCRIPTOR,
+ .bcdUSB = 0x0200,
+ .bDeviceClass = CDC_DEVICE_CLASS,
+ .bDeviceSubClass = 0, // unused
+ .bDeviceProtocol = 0, // unused
+ .bMaxPacketSize0 = 64,
+ .bNumConfigurations = 0,
+};
+
+static const char *devt_names[] = {
+ "DisconnEvt", "USBRst", "ConnectDone", "ULStChng", "WkUpEvt", "Reserved", "EOPF",
+ "SOF", "Reserved", "ErrticErr", "CmdCmplt", "EvntOverflow", "VndrDevTstRcved"};
+static const char *depvt_names[] = {
+ "Reserved",
+ "XferComplete",
+ "XferInProgress",
+ "XferNotReady",
+ "RxTxFifoEvt (IN->Underrun, OUT->Overrun)",
+ "Reserved",
+ "StreamEvt",
+ "EPCmdCmplt",
+};
+
+static const char *ep0_state_names[] = {
+ "STATE_IDLE",
+ "STATE_SETUP_HANDLE",
+ "STATE_DATA_SEND",
+ "STATE_DATA_RECV",
+ "STATE_DATA_SEND_DONE",
+ "STATE_DATA_RECV_DONE",
+ "STATE_DATA_RECV_STATUS",
+ "STATE_DATA_RECV_STATUS_DONE",
+ "STATE_DATA_SEND_STATUS",
+ "STATE_DATA_SEND_STATUS_DONE",
+};
+
+static u8 ep_to_num(u8 epno)
+{
+ return (epno << 1) | (epno >> 7);
+}
+
+static int usb_dwc3_command(dwc3_dev_t *dev, u32 command, u32 par)
+{
+ write32(dev->regs + DWC3_DGCMDPAR, par);
+ write32(dev->regs + DWC3_DGCMD, command | DWC3_DGCMD_CMDACT);
+
+ if (poll32(dev->regs + DWC3_DGCMD, DWC3_DGCMD_CMDACT, 0, 1000)) {
+ usb_debug_printf("timeout while waiting for DWC3_DGCMD_CMDACT to clear.\n");
+ return -1;
+ }
+
+ return DWC3_DGCMD_STATUS(read32(dev->regs + DWC3_DGCMD));
+}
+
+static int usb_dwc3_ep_command(dwc3_dev_t *dev, u8 ep, u32 command, u32 par0, u32 par1, u32 par2)
+{
+ write32(dev->regs + DWC3_DEPCMDPAR0(ep), par0);
+ write32(dev->regs + DWC3_DEPCMDPAR1(ep), par1);
+ write32(dev->regs + DWC3_DEPCMDPAR2(ep), par2);
+ write32(dev->regs + DWC3_DEPCMD(ep), command | DWC3_DEPCMD_CMDACT);
+
+ if (poll32(dev->regs + DWC3_DEPCMD(ep), DWC3_DEPCMD_CMDACT, 0, 1000)) {
+ usb_debug_printf("timeout while waiting for DWC3_DEPCMD_CMDACT to clear.\n");
+ return -1;
+ }
+
+ return DWC3_DEPCMD_STATUS(read32(dev->regs + DWC3_DEPCMD(ep)));
+}
+
+static int usb_dwc3_ep_configure(dwc3_dev_t *dev, u8 ep, u8 type, u32 max_packet_len)
+{
+ u32 param0, param1;
+
+ param0 = DWC3_DEPCFG_EP_TYPE(type) | DWC3_DEPCFG_MAX_PACKET_SIZE(max_packet_len);
+ if (type != DWC3_DEPCMD_TYPE_CONTROL)
+ param0 |= DWC3_DEPCFG_FIFO_NUMBER(ep);
+
+ param1 =
+ DWC3_DEPCFG_XFER_COMPLETE_EN | DWC3_DEPCFG_XFER_NOT_READY_EN | DWC3_DEPCFG_EP_NUMBER(ep);
+
+ if (usb_dwc3_ep_command(dev, ep, DWC3_DEPCMD_SETEPCONFIG, param0, param1, 0)) {
+ usb_debug_printf("cannot issue DWC3_DEPCMD_SETEPCONFIG for EP %d.\n", ep);
+ return -1;
+ }
+
+ if (usb_dwc3_ep_command(dev, ep, DWC3_DEPCMD_SETTRANSFRESOURCE, 1, 0, 0)) {
+ usb_debug_printf("cannot issue DWC3_DEPCMD_SETTRANSFRESOURCE EP %d.\n", ep);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int usb_dwc3_ep_start_transfer(dwc3_dev_t *dev, u8 ep, uintptr_t trb_iova)
+{
+ if (dev->endpoints[ep].xfer_in_progress) {
+ usb_debug_printf(
+ "Tried to start a transfer for ep 0x%02x while another transfer is ongoing.\n", ep);
+ return -1;
+ }
+
+ dma_wmb();
+ int ret =
+ usb_dwc3_ep_command(dev, ep, DWC3_DEPCMD_STARTTRANSFER, trb_iova >> 32, (u32)trb_iova, 0);
+ if (ret) {
+ usb_debug_printf("cannot issue DWC3_DEPCMD_STARTTRANSFER for EP %d: %d.\n", ep, ret);
+ return ret;
+ }
+
+ dev->endpoints[ep].xfer_in_progress = true;
+ return 0;
+}
+
+static uintptr_t usb_dwc3_init_trb(dwc3_dev_t *dev, u8 ep, struct dwc3_trb **trb)
+{
+ struct dwc3_trb *next_trb = dev->endpoints[ep].trb;
+
+ if (trb)
+ *trb = next_trb;
+
+ next_trb->ctrl = DWC3_TRB_CTRL_HWO | DWC3_TRB_CTRL_ISP_IMI | DWC3_TRB_CTRL_LST;
+ next_trb->size = DWC3_TRB_SIZE_LENGTH(0);
+ next_trb->bph = 0;
+ next_trb->bpl = dev->endpoints[ep].xfer_buffer_iova;
+
+ return dev->endpoints[ep].trb_iova;
+}
+
+static int usb_dwc3_run_data_trb(dwc3_dev_t *dev, u8 ep, u32 data_len)
+{
+ struct dwc3_trb *trb;
+ uintptr_t trb_iova = usb_dwc3_init_trb(dev, ep, &trb);
+
+ trb->ctrl |= DWC3_TRBCTL_CONTROL_DATA;
+ trb->size = DWC3_TRB_SIZE_LENGTH(data_len);
+
+ return usb_dwc3_ep_start_transfer(dev, ep, trb_iova);
+}
+
+static int usb_dwc3_start_setup_phase(dwc3_dev_t *dev)
+{
+ struct dwc3_trb *trb;
+ uintptr_t trb_iova = usb_dwc3_init_trb(dev, USB_LEP_CTRL_OUT, &trb);
+
+ trb->ctrl |= DWC3_TRBCTL_CONTROL_SETUP;
+ trb->size = DWC3_TRB_SIZE_LENGTH(sizeof(union usb_setup_packet));
+ return usb_dwc3_ep_start_transfer(dev, USB_LEP_CTRL_OUT, trb_iova);
+}
+
+static int usb_dwc3_start_status_phase(dwc3_dev_t *dev, u8 ep)
+{
+ struct dwc3_trb *trb;
+ uintptr_t trb_iova = usb_dwc3_init_trb(dev, ep, &trb);
+
+ trb->ctrl |= DWC3_TRBCTL_CONTROL_STATUS2;
+ trb->size = DWC3_TRB_SIZE_LENGTH(0);
+
+ return usb_dwc3_ep_start_transfer(dev, ep, trb_iova);
+}
+
+static int usb_dwc3_ep0_start_data_send_phase(dwc3_dev_t *dev)
+{
+ if (dev->ep0_buffer_len > XFER_BUFFER_BYTES_PER_EP) {
+ usb_debug_printf("Cannot xfer more than %d bytes but was requested to xfer %d on ep 1\n",
+ XFER_BUFFER_BYTES_PER_EP, dev->ep0_buffer_len);
+ return -1;
+ }
+
+ memset(dev->endpoints[USB_LEP_CTRL_IN].xfer_buffer, 0, 64);
+ memcpy(dev->endpoints[USB_LEP_CTRL_IN].xfer_buffer, dev->ep0_buffer, dev->ep0_buffer_len);
+
+ return usb_dwc3_run_data_trb(dev, USB_LEP_CTRL_IN, dev->ep0_buffer_len);
+}
+
+static int usb_dwc3_ep0_start_data_recv_phase(dwc3_dev_t *dev)
+{
+ if (dev->ep0_buffer_len > XFER_BUFFER_BYTES_PER_EP) {
+ usb_debug_printf("Cannot xfer more than %d bytes but was requested to xfer %d on ep 0\n",
+ XFER_BUFFER_BYTES_PER_EP, dev->ep0_buffer_len);
+ return -1;
+ }
+
+ memset(dev->endpoints[USB_LEP_CTRL_OUT].xfer_buffer, 0, 64);
+
+ return usb_dwc3_run_data_trb(dev, USB_LEP_CTRL_OUT, 64);
+}
+
+static void usb_dwc3_ep_set_stall(dwc3_dev_t *dev, u8 ep, u8 stall)
+{
+ if (stall)
+ usb_dwc3_ep_command(dev, ep, DWC3_DEPCMD_SETSTALL, 0, 0, 0);
+ else
+ usb_dwc3_ep_command(dev, ep, DWC3_DEPCMD_CLEARSTALL, 0, 0, 0);
+}
+
+static void usb_cdc_get_string_descriptor(u32 index, const void **descriptor, u16 *descriptor_len)
+{
+ switch (index) {
+ case STRING_DESCRIPTOR_LANGUAGES:
+ *descriptor = &str_langs;
+ *descriptor_len = str_langs.bLength;
+ break;
+ case STRING_DESCRIPTOR_MANUFACTURER:
+ *descriptor = &str_manufacturer;
+ *descriptor_len = str_manufacturer.bLength;
+ break;
+ case STRING_DESCRIPTOR_PRODUCT:
+ *descriptor = &str_product;
+ *descriptor_len = str_product.bLength;
+ break;
+ case STRING_DESCRIPTOR_SERIAL:
+ *descriptor = &str_serial;
+ *descriptor_len = str_serial.bLength;
+ break;
+ default:
+ *descriptor = NULL;
+ *descriptor_len = 0;
+ }
+}
+
+static int
+usb_dwc3_handle_ep0_get_descriptor(dwc3_dev_t *dev,
+ const struct usb_setup_packet_get_descriptor *get_descriptor)
+{
+ const void *descriptor = NULL;
+ u16 descriptor_len = 0;
+
+ switch (get_descriptor->type) {
+ case USB_DEVICE_DESCRIPTOR:
+ descriptor = &usb_cdc_device_descriptor;
+ descriptor_len = usb_cdc_device_descriptor.bLength;
+ break;
+ case USB_CONFIGURATION_DESCRIPTOR:
+ descriptor = &cdc_configuration_descriptor;
+ descriptor_len = cdc_configuration_descriptor.configuration.wTotalLength;
+ break;
+ case USB_STRING_DESCRIPTOR:
+ usb_cdc_get_string_descriptor(get_descriptor->index, &descriptor, &descriptor_len);
+ break;
+ case USB_DEVICE_QUALIFIER_DESCRIPTOR:
+ descriptor = &usb_cdc_device_qualifier_descriptor;
+ descriptor_len = usb_cdc_device_qualifier_descriptor.bLength;
+ break;
+ default:
+ usb_debug_printf("Unknown descriptor type: %d\n", get_descriptor->type);
+ break;
+ }
+
+ if (descriptor) {
+ dev->ep0_buffer = descriptor;
+ dev->ep0_buffer_len = min(get_descriptor->wLength, descriptor_len);
+ return 0;
+ } else {
+ return -1;
+ }
+}
+
+static void usb_dwc3_ep0_handle_standard_device(dwc3_dev_t *dev,
+ const union usb_setup_packet *setup)
+{
+ switch (setup->raw.bRequest) {
+ case USB_REQUEST_SET_ADDRESS:
+ mask32(dev->regs + DWC3_DCFG, DWC3_DCFG_DEVADDR_MASK,
+ DWC3_DCFG_DEVADDR(setup->set_address.address));
+ dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND_STATUS;
+ break;
+
+ case USB_REQUEST_SET_CONFIGURATION:
+ switch (setup->set_configuration.configuration) {
+ case 0:
+ clear32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_BULK_OUT));
+ clear32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_BULK_IN));
+ clear32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_INTR_IN));
+ clear32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_BULK_OUT_2));
+ clear32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_BULK_IN_2));
+ clear32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_INTR_IN_2));
+ dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND_STATUS;
+ for (int i = 0; i < CDC_ACM_PIPE_MAX; i++)
+ dev->pipe[i].ready = false;
+ break;
+ case 1:
+ /* we've already configured these endpoints so that we just need to enable them
+ * here */
+ set32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_BULK_OUT));
+ set32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_BULK_IN));
+ set32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_INTR_IN));
+ set32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_BULK_OUT_2));
+ set32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_BULK_IN_2));
+ set32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(USB_LEP_CDC_INTR_IN_2));
+ dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND_STATUS;
+ break;
+ default:
+ usb_dwc3_ep_set_stall(dev, 0, 1);
+ dev->ep0_state = USB_DWC3_EP0_STATE_IDLE;
+ break;
+ }
+ break;
+
+ case USB_REQUEST_GET_DESCRIPTOR:
+ if (usb_dwc3_handle_ep0_get_descriptor(dev, &setup->get_descriptor) < 0) {
+ usb_dwc3_ep_set_stall(dev, 0, 1);
+ dev->ep0_state = USB_DWC3_EP0_STATE_IDLE;
+ } else {
+ dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND;
+ }
+ break;
+
+ case USB_REQUEST_GET_STATUS: {
+ static const u16 device_status = 0x0001; // self-powered
+ dev->ep0_buffer = &device_status;
+ dev->ep0_buffer_len = 2;
+ dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND;
+ break;
+ }
+
+ default:
+ usb_dwc3_ep_set_stall(dev, 0, 1);
+ dev->ep0_state = USB_DWC3_EP0_STATE_IDLE;
+ usb_debug_printf("unsupported SETUP packet\n");
+ }
+}
+
+static void usb_dwc3_ep0_handle_standard_interface(dwc3_dev_t *dev,
+ const union usb_setup_packet *setup)
+{
+ switch (setup->raw.bRequest) {
+ case USB_REQUEST_GET_STATUS: {
+ static const u16 device_status = 0x0000; // reserved
+ dev->ep0_buffer = &device_status;
+ dev->ep0_buffer_len = 2;
+ dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND;
+ break;
+ }
+ default:
+ usb_dwc3_ep_set_stall(dev, 0, 1);
+ dev->ep0_state = USB_DWC3_EP0_STATE_IDLE;
+ usb_debug_printf("unsupported SETUP packet\n");
+ }
+}
+
+static void usb_dwc3_ep0_handle_standard_endpoint(dwc3_dev_t *dev,
+ const union usb_setup_packet *setup)
+{
+ switch (setup->raw.bRequest) {
+ case USB_REQUEST_GET_STATUS: {
+ static const u16 device_status = 0x0000; // reserved
+ dev->ep0_buffer = &device_status;
+ dev->ep0_buffer_len = 2;
+ dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND;
+ break;
+ }
+ case USB_REQUEST_CLEAR_FEATURE: {
+ switch (setup->feature.wFeatureSelector) {
+ case USB_FEATURE_ENDPOINT_HALT:
+ usb_debug_printf("Host cleared EP 0x%x stall\n", setup->feature.wEndpoint);
+ usb_dwc3_ep_set_stall(dev, ep_to_num(setup->feature.wEndpoint), 0);
+ usb_dwc3_start_status_phase(dev, USB_LEP_CTRL_IN);
+ dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND_STATUS_DONE;
+ break;
+ default:
+ usb_dwc3_ep_set_stall(dev, 0, 1);
+ dev->ep0_state = USB_DWC3_EP0_STATE_IDLE;
+ usb_debug_printf("unsupported CLEAR FEATURE: 0x%x\n",
+ setup->feature.wFeatureSelector);
+ break;
+ }
+ break;
+ }
+ default:
+ usb_dwc3_ep_set_stall(dev, 0, 1);
+ dev->ep0_state = USB_DWC3_EP0_STATE_IDLE;
+ usb_debug_printf("unsupported SETUP packet\n");
+ }
+}
+
+static void usb_dwc3_ep0_handle_standard(dwc3_dev_t *dev, const union usb_setup_packet *setup)
+{
+ switch (setup->raw.bmRequestType & USB_REQUEST_TYPE_RECIPIENT_MASK) {
+ case USB_REQUEST_TYPE_RECIPIENT_DEVICE:
+ usb_dwc3_ep0_handle_standard_device(dev, setup);
+ break;
+
+ case USB_REQUEST_TYPE_RECIPIENT_INTERFACE:
+ usb_dwc3_ep0_handle_standard_interface(dev, setup);
+ break;
+
+ case USB_REQUEST_TYPE_RECIPIENT_ENDPOINT:
+ usb_dwc3_ep0_handle_standard_endpoint(dev, setup);
+ break;
+
+ default:
+ usb_dwc3_ep_set_stall(dev, 0, 1);
+ dev->ep0_state = USB_DWC3_EP0_STATE_IDLE;
+ usb_debug_printf("unimplemented request recipient\n");
+ }
+}
+
+static void usb_dwc3_ep0_handle_class(dwc3_dev_t *dev, const union usb_setup_packet *setup)
+{
+ int pipe = setup->raw.wIndex / 2;
+
+ switch (setup->raw.bRequest) {
+ case USB_REQUEST_CDC_GET_LINE_CODING:
+ dev->ep0_buffer_len = min(setup->raw.wLength, sizeof(dev->pipe[pipe].cdc_line_coding));
+ dev->ep0_buffer = dev->pipe[pipe].cdc_line_coding;
+ dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND;
+ break;
+
+ case USB_REQUEST_CDC_SET_CTRL_LINE_STATE:
+ if (setup->raw.wValue & 1) { // DTR
+ dev->pipe[pipe].ready = false;
+ usb_debug_printf("ACM device opened\n");
+ dev->pipe[pipe].ready = true;
+ } else {
+ dev->pipe[pipe].ready = false;
+ usb_debug_printf("ACM device closed\n");
+ }
+ usb_dwc3_start_status_phase(dev, USB_LEP_CTRL_IN);
+ dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND_STATUS_DONE;
+ break;
+
+ case USB_REQUEST_CDC_SET_LINE_CODING:
+ dev->ep0_read_buffer = dev->pipe[pipe].cdc_line_coding;
+ dev->ep0_read_buffer_len =
+ min(setup->raw.wLength, sizeof(dev->pipe[pipe].cdc_line_coding));
+ dev->ep0_state = USB_DWC3_EP0_STATE_DATA_RECV;
+ break;
+
+ default:
+ usb_dwc3_ep_set_stall(dev, 0, 1);
+ dev->ep0_state = USB_DWC3_EP0_STATE_IDLE;
+ usb_debug_printf("unsupported SETUP packet\n");
+ }
+}
+
+static void usb_dwc3_ep0_handle_setup(dwc3_dev_t *dev)
+{
+ const union usb_setup_packet *setup = dev->endpoints[0].xfer_buffer;
+
+ switch (setup->raw.bmRequestType & USB_REQUEST_TYPE_MASK) {
+ case USB_REQUEST_TYPE_STANDARD:
+ usb_dwc3_ep0_handle_standard(dev, setup);
+ break;
+ case USB_REQUEST_TYPE_CLASS:
+ usb_dwc3_ep0_handle_class(dev, setup);
+ break;
+ default:
+ usb_debug_printf("unsupported request type\n");
+ usb_dwc3_ep_set_stall(dev, 0, 1);
+ dev->ep0_state = USB_DWC3_EP0_STATE_IDLE;
+ }
+}
+
+static void usb_dwc3_ep0_handle_xfer_done(dwc3_dev_t *dev, const struct dwc3_event_depevt event)
+{
+ switch (dev->ep0_state) {
+ case USB_DWC3_EP0_STATE_SETUP_HANDLE:
+ usb_dwc3_ep0_handle_setup(dev);
+ break;
+
+ case USB_DWC3_EP0_STATE_DATA_RECV_STATUS_DONE:
+ case USB_DWC3_EP0_STATE_DATA_SEND_STATUS_DONE:
+ usb_dwc3_start_setup_phase(dev);
+ dev->ep0_state = USB_DWC3_EP0_STATE_SETUP_HANDLE;
+ break;
+
+ case USB_DWC3_EP0_STATE_DATA_SEND_DONE:
+ dev->ep0_state = USB_DWC3_EP0_STATE_DATA_RECV_STATUS;
+ break;
+
+ case USB_DWC3_EP0_STATE_DATA_RECV_DONE:
+ memcpy(dev->ep0_read_buffer, dev->endpoints[event.endpoint_number].xfer_buffer,
+ dev->ep0_read_buffer_len);
+ dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND_STATUS;
+ break;
+
+ case USB_DWC3_EP0_STATE_IDLE:
+ default:
+ usb_debug_printf("invalid state in usb_dwc3_ep0_handle_xfer_done: %d, %s\n",
+ dev->ep0_state, ep0_state_names[dev->ep0_state]);
+ usb_dwc3_ep_set_stall(dev, 0, 1);
+ dev->ep0_state = USB_DWC3_EP0_STATE_IDLE;
+ }
+}
+
+static void usb_dwc3_ep0_handle_xfer_not_ready(dwc3_dev_t *dev,
+ const struct dwc3_event_depevt event)
+{
+ switch (dev->ep0_state) {
+ case USB_DWC3_EP0_STATE_IDLE:
+ usb_dwc3_start_setup_phase(dev);
+ dev->ep0_state = USB_DWC3_EP0_STATE_SETUP_HANDLE;
+ break;
+
+ case USB_DWC3_EP0_STATE_DATA_SEND:
+ if (usb_dwc3_ep0_start_data_send_phase(dev))
+ usb_debug_printf("cannot start xtrl xfer data phase for EP 1.\n");
+ dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND_DONE;
+ break;
+
+ case USB_DWC3_EP0_STATE_DATA_RECV:
+ if (usb_dwc3_ep0_start_data_recv_phase(dev))
+ usb_debug_printf("cannot start xtrl xfer data phase for EP 0.\n");
+ dev->ep0_state = USB_DWC3_EP0_STATE_DATA_RECV_DONE;
+ break;
+
+ case USB_DWC3_EP0_STATE_DATA_RECV_STATUS:
+ usb_dwc3_start_status_phase(dev, USB_LEP_CTRL_OUT);
+ dev->ep0_state = USB_DWC3_EP0_STATE_DATA_RECV_STATUS_DONE;
+ break;
+
+ case USB_DWC3_EP0_STATE_DATA_SEND_STATUS:
+ usb_dwc3_start_status_phase(dev, USB_LEP_CTRL_IN);
+ dev->ep0_state = USB_DWC3_EP0_STATE_DATA_SEND_STATUS_DONE;
+ break;
+
+ default:
+ usb_debug_printf(
+ "invalid state in usb_dwc3_ep0_handle_xfer_not_ready: %d, %s for ep %d (%x)\n",
+ dev->ep0_state, ep0_state_names[dev->ep0_state], event.endpoint_number,
+ event.endpoint_event);
+ usb_dwc3_ep_set_stall(dev, 0, 1);
+ dev->ep0_state = USB_DWC3_EP0_STATE_IDLE;
+ }
+}
+
+ringbuffer_t *usb_dwc3_cdc_get_ringbuffer(dwc3_dev_t *dev, u8 endpoint_number)
+{
+ switch (endpoint_number) {
+ case USB_LEP_CDC_BULK_IN:
+ return dev->pipe[CDC_ACM_PIPE_0].device2host;
+ case USB_LEP_CDC_BULK_OUT:
+ return dev->pipe[CDC_ACM_PIPE_0].host2device;
+ case USB_LEP_CDC_BULK_IN_2:
+ return dev->pipe[CDC_ACM_PIPE_1].device2host;
+ case USB_LEP_CDC_BULK_OUT_2:
+ return dev->pipe[CDC_ACM_PIPE_1].host2device;
+ default:
+ return NULL;
+ }
+}
+
+static void usb_dwc3_cdc_start_bulk_out_xfer(dwc3_dev_t *dev, u8 endpoint_number)
+{
+ struct dwc3_trb *trb;
+ uintptr_t trb_iova;
+
+ if (dev->endpoints[endpoint_number].xfer_in_progress)
+ return;
+
+ ringbuffer_t *host2device = usb_dwc3_cdc_get_ringbuffer(dev, endpoint_number);
+ if (!host2device)
+ return;
+
+ if (ringbuffer_get_free(host2device) < XFER_SIZE)
+ return;
+
+ memset(dev->endpoints[endpoint_number].xfer_buffer, 0xaa, XFER_SIZE);
+ trb_iova = usb_dwc3_init_trb(dev, endpoint_number, &trb);
+ trb->ctrl |= DWC3_TRBCTL_NORMAL;
+ trb->size = DWC3_TRB_SIZE_LENGTH(XFER_SIZE);
+
+ usb_dwc3_ep_start_transfer(dev, endpoint_number, trb_iova);
+ dev->endpoints[endpoint_number].xfer_in_progress = true;
+}
+
+static void usb_dwc3_cdc_start_bulk_in_xfer(dwc3_dev_t *dev, u8 endpoint_number)
+{
+ struct dwc3_trb *trb;
+ uintptr_t trb_iova;
+
+ if (dev->endpoints[endpoint_number].xfer_in_progress)
+ return;
+
+ ringbuffer_t *device2host = usb_dwc3_cdc_get_ringbuffer(dev, endpoint_number);
+ if (!device2host)
+ return;
+
+ size_t len =
+ ringbuffer_read(dev->endpoints[endpoint_number].xfer_buffer, XFER_SIZE, device2host);
+
+ if (!len && !dev->endpoints[endpoint_number].zlp_pending)
+ return;
+
+ trb_iova = usb_dwc3_init_trb(dev, endpoint_number, &trb);
+ trb->ctrl |= DWC3_TRBCTL_NORMAL;
+ trb->size = DWC3_TRB_SIZE_LENGTH(len);
+
+ usb_dwc3_ep_start_transfer(dev, endpoint_number, trb_iova);
+ dev->endpoints[endpoint_number].xfer_in_progress = true;
+ dev->endpoints[endpoint_number].zlp_pending = (len % 512) == 0;
+}
+
+static void usb_dwc3_cdc_handle_bulk_out_xfer_done(dwc3_dev_t *dev,
+ const struct dwc3_event_depevt event)
+{
+ ringbuffer_t *host2device = usb_dwc3_cdc_get_ringbuffer(dev, event.endpoint_number);
+ if (!host2device)
+ return;
+ size_t len = min(XFER_SIZE, ringbuffer_get_free(host2device));
+ ringbuffer_write(dev->endpoints[event.endpoint_number].xfer_buffer,
+ len - dev->endpoints[event.endpoint_number].trb->size, host2device);
+}
+
+static void usb_dwc3_handle_event_ep(dwc3_dev_t *dev, const struct dwc3_event_depevt event)
+{
+ if (event.endpoint_event == DWC3_DEPEVT_XFERCOMPLETE) {
+ dev->endpoints[event.endpoint_number].xfer_in_progress = false;
+
+ switch (event.endpoint_number) {
+ case USB_LEP_CTRL_IN:
+ case USB_LEP_CTRL_OUT:
+ return usb_dwc3_ep0_handle_xfer_done(dev, event);
+ case USB_LEP_CDC_INTR_IN: // [[fallthrough]]
+ case USB_LEP_CDC_INTR_IN_2:
+ return;
+ case USB_LEP_CDC_BULK_IN: // [[fallthrough]]
+ case USB_LEP_CDC_BULK_IN_2:
+ return;
+ case USB_LEP_CDC_BULK_OUT: // [[fallthrough]]
+ case USB_LEP_CDC_BULK_OUT_2:
+ return usb_dwc3_cdc_handle_bulk_out_xfer_done(dev, event);
+ }
+ } else if (event.endpoint_event == DWC3_DEPEVT_XFERNOTREADY) {
+ /*
+ * this might be a bug, we sometimes get spurious events like these here.
+ * ignoring them works just fine though
+ */
+ if (dev->endpoints[event.endpoint_number].xfer_in_progress)
+ return;
+
+ switch (event.endpoint_number) {
+ case USB_LEP_CTRL_IN:
+ case USB_LEP_CTRL_OUT:
+ return usb_dwc3_ep0_handle_xfer_not_ready(dev, event);
+ case USB_LEP_CDC_INTR_IN: // [[fallthrough]]
+ case USB_LEP_CDC_INTR_IN_2:
+ return;
+ case USB_LEP_CDC_BULK_IN: // [[fallthrough]]
+ case USB_LEP_CDC_BULK_IN_2:
+ return usb_dwc3_cdc_start_bulk_in_xfer(dev, event.endpoint_number);
+ case USB_LEP_CDC_BULK_OUT: // [[fallthrough]]
+ case USB_LEP_CDC_BULK_OUT_2:
+ return usb_dwc3_cdc_start_bulk_out_xfer(dev, event.endpoint_number);
+ }
+ }
+
+ usb_debug_printf("unhandled EP %02x event: %s (0x%02x) (%d)\n", event.endpoint_number,
+ depvt_names[event.endpoint_event], event.endpoint_event,
+ dev->endpoints[event.endpoint_number].xfer_in_progress);
+ usb_dwc3_ep_set_stall(dev, event.endpoint_event, 1);
+}
+
+static void usb_dwc3_handle_event_usbrst(dwc3_dev_t *dev)
+{
+ /* clear STALL mode for all endpoints */
+ dev->endpoints[0].xfer_in_progress = false;
+ for (int i = 1; i < MAX_ENDPOINTS; ++i) {
+ dev->endpoints[i].xfer_in_progress = false;
+ memset(dev->endpoints[i].xfer_buffer, 0, XFER_BUFFER_BYTES_PER_EP);
+ memset(dev->endpoints[i].trb, 0, TRBS_PER_EP * sizeof(struct dwc3_trb));
+ usb_dwc3_ep_set_stall(dev, i, 0);
+ }
+
+ /* set device address back to zero */
+ mask32(dev->regs + DWC3_DCFG, DWC3_DCFG_DEVADDR_MASK, DWC3_DCFG_DEVADDR(0));
+
+ /* only keep control endpoints enabled */
+ write32(dev->regs + DWC3_DALEPENA, DWC3_DALEPENA_EP(0) | DWC3_DALEPENA_EP(1));
+}
+
+static void usb_dwc3_handle_event_connect_done(dwc3_dev_t *dev)
+{
+ u32 speed = read32(dev->regs + DWC3_DSTS) & DWC3_DSTS_CONNECTSPD;
+
+ if (speed != DWC3_DSTS_HIGHSPEED) {
+ usb_debug_printf(
+ "WARNING: we only support high speed right now but %02x was requested in DSTS\n",
+ speed);
+ }
+
+ usb_dwc3_start_setup_phase(dev);
+ dev->ep0_state = USB_DWC3_EP0_STATE_SETUP_HANDLE;
+}
+
+static void usb_dwc3_handle_event_dev(dwc3_dev_t *dev, const struct dwc3_event_devt event)
+{
+ usb_debug_printf("device event: %s (0x%02x)\n", devt_names[event.type], event.type);
+ switch (event.type) {
+ case DWC3_DEVT_USBRST:
+ usb_dwc3_handle_event_usbrst(dev);
+ break;
+ case DWC3_DEVT_CONNECTDONE:
+ usb_dwc3_handle_event_connect_done(dev);
+ break;
+ default:
+ usb_debug_printf("unhandled device event: %s (0x%02x)\n", devt_names[event.type],
+ event.type);
+ }
+}
+
+static void usb_dwc3_handle_event(dwc3_dev_t *dev, const union dwc3_event event)
+{
+ if (!event.type.is_devspec)
+ usb_dwc3_handle_event_ep(dev, event.depevt);
+ else if (event.type.type == DWC3_EVENT_TYPE_DEV)
+ usb_dwc3_handle_event_dev(dev, event.devt);
+ else
+ usb_debug_printf("unknown event %08x\n", event.raw);
+}
+
+void usb_dwc3_handle_events(dwc3_dev_t *dev)
+{
+ if (!dev)
+ return;
+
+ u32 n_events = read32(dev->regs + DWC3_GEVNTCOUNT(0)) / sizeof(union dwc3_event);
+ if (n_events == 0)
+ return;
+
+ dma_rmb();
+
+ const union dwc3_event *evtbuffer = dev->evtbuffer;
+ for (u32 i = 0; i < n_events; ++i) {
+ usb_dwc3_handle_event(dev, evtbuffer[dev->evt_buffer_offset]);
+
+ dev->evt_buffer_offset =
+ (dev->evt_buffer_offset + 1) % (DWC3_EVENT_BUFFERS_SIZE / sizeof(union dwc3_event));
+ }
+
+ write32(dev->regs + DWC3_GEVNTCOUNT(0), sizeof(union dwc3_event) * n_events);
+}
+
+dwc3_dev_t *usb_dwc3_init(uintptr_t regs, dart_dev_t *dart)
+{
+ /* sanity check */
+ u32 snpsid = read32(regs + DWC3_GSNPSID);
+ if ((snpsid & DWC3_GSNPSID_MASK) != 0x33310000) {
+ debug_printf("no DWC3 core found at 0x%lx: %08x\n", regs, snpsid);
+ return NULL;
+ }
+
+ dwc3_dev_t *dev = malloc(sizeof(*dev));
+ if (!dev)
+ return NULL;
+
+ memset(dev, 0, sizeof(*dev));
+ for (int i = 0; i < CDC_ACM_PIPE_MAX; i++)
+ memcpy(dev->pipe[i].cdc_line_coding, cdc_default_line_coding,
+ sizeof(cdc_default_line_coding));
+
+ dev->regs = regs;
+ dev->dart = dart;
+
+ /* allocate and map dma buffers */
+ dev->evtbuffer = memalign(SZ_16K, max(DWC3_EVENT_BUFFERS_SIZE, SZ_16K));
+ if (!dev->evtbuffer)
+ goto error;
+
+ dev->scratchpad = memalign(SZ_16K, max(DWC3_SCRATCHPAD_SIZE, SZ_16K));
+ if (!dev->scratchpad)
+ goto error;
+
+ dev->trbs = memalign(SZ_16K, TRB_BUFFER_SIZE);
+ if (!dev->trbs)
+ goto error;
+
+ dev->xferbuffer = memalign(SZ_16K, XFER_BUFFER_SIZE);
+ if (!dev->xferbuffer)
+ goto error;
+
+ memset(dev->evtbuffer, 0xaa, max(DWC3_EVENT_BUFFERS_SIZE, SZ_16K));
+ memset(dev->scratchpad, 0, max(DWC3_SCRATCHPAD_SIZE, SZ_16K));
+ memset(dev->xferbuffer, 0, XFER_BUFFER_SIZE);
+ memset(dev->trbs, 0, TRB_BUFFER_SIZE);
+
+ if (dart_map(dev->dart, EVENT_BUFFER_IOVA, dev->evtbuffer,
+ max(DWC3_EVENT_BUFFERS_SIZE, SZ_16K)))
+ goto error;
+ if (dart_map(dev->dart, SCRATCHPAD_IOVA, dev->scratchpad, max(DWC3_SCRATCHPAD_SIZE, SZ_16K)))
+ goto error;
+ if (dart_map(dev->dart, TRB_BUFFER_IOVA, dev->trbs, TRB_BUFFER_SIZE))
+ goto error;
+ if (dart_map(dev->dart, XFER_BUFFER_IOVA, dev->xferbuffer, XFER_BUFFER_SIZE))
+ goto error;
+
+ /* prepare endpoint buffers */
+ for (int i = 0; i < MAX_ENDPOINTS; ++i) {
+ u32 xferbuffer_offset = i * XFER_BUFFER_BYTES_PER_EP;
+ dev->endpoints[i].xfer_buffer = dev->xferbuffer + xferbuffer_offset;
+ dev->endpoints[i].xfer_buffer_iova = XFER_BUFFER_IOVA + xferbuffer_offset;
+
+ u32 trb_offset = i * TRBS_PER_EP;
+ dev->endpoints[i].trb = &dev->trbs[i * TRBS_PER_EP];
+ dev->endpoints[i].trb_iova = TRB_BUFFER_IOVA + trb_offset * sizeof(struct dwc3_trb);
+ }
+
+ /* reset the device side of the controller */
+ set32(dev->regs + DWC3_DCTL, DWC3_DCTL_CSFTRST);
+ if (poll32(dev->regs + DWC3_DCTL, DWC3_DCTL_CSFTRST, 0, 1000)) {
+ usb_debug_printf("timeout while waiting for DWC3_DCTL_CSFTRST to clear.\n");
+ goto error;
+ }
+
+ /* soft reset the core and phy */
+ set32(dev->regs + DWC3_GCTL, DWC3_GCTL_CORESOFTRESET);
+ set32(dev->regs + DWC3_GUSB3PIPECTL(0), DWC3_GUSB3PIPECTL_PHYSOFTRST);
+ set32(dev->regs + DWC3_GUSB2PHYCFG(0), DWC3_GUSB2PHYCFG_PHYSOFTRST);
+ mdelay(100);
+ clear32(dev->regs + DWC3_GUSB3PIPECTL(0), DWC3_GUSB3PIPECTL_PHYSOFTRST);
+ clear32(dev->regs + DWC3_GUSB2PHYCFG(0), DWC3_GUSB2PHYCFG_PHYSOFTRST);
+ mdelay(100);
+ clear32(dev->regs + DWC3_GCTL, DWC3_GCTL_CORESOFTRESET);
+ mdelay(100);
+
+ /* disable unused features */
+ clear32(dev->regs + DWC3_GCTL, DWC3_GCTL_SCALEDOWN_MASK | DWC3_GCTL_DISSCRAMBLE);
+
+ /* switch to device-only mode */
+ mask32(dev->regs + DWC3_GCTL, DWC3_GCTL_PRTCAPDIR(DWC3_GCTL_PRTCAP_OTG),
+ DWC3_GCTL_PRTCAPDIR(DWC3_GCTL_PRTCAP_DEVICE));
+
+ /* stick to USB 2.0 high speed for now */
+ mask32(dev->regs + DWC3_DCFG, DWC3_DCFG_SPEED_MASK, DWC3_DCFG_HIGHSPEED);
+
+ /* setup scratchpad at SCRATCHPAD_IOVA */
+ if (usb_dwc3_command(dev, DWC3_DGCMD_SET_SCRATCHPAD_ADDR_LO, SCRATCHPAD_IOVA)) {
+ usb_debug_printf("DWC3_DGCMD_SET_SCRATCHPAD_ADDR_LO failed.");
+ goto error;
+ }
+ if (usb_dwc3_command(dev, DWC3_DGCMD_SET_SCRATCHPAD_ADDR_HI, 0)) {
+ usb_debug_printf("DWC3_DGCMD_SET_SCRATCHPAD_ADDR_HI failed.");
+ goto error;
+ }
+
+ /* setup a single event buffer at EVENT_BUFFER_IOVA */
+ write32(dev->regs + DWC3_GEVNTADRLO(0), EVENT_BUFFER_IOVA);
+ write32(dev->regs + DWC3_GEVNTADRHI(0), 0);
+ write32(dev->regs + DWC3_GEVNTSIZ(0), DWC3_EVENT_BUFFERS_SIZE);
+ write32(dev->regs + DWC3_GEVNTCOUNT(0), 0);
+
+ /* enable connect, disconnect and reset events */
+ write32(dev->regs + DWC3_DEVTEN,
+ DWC3_DEVTEN_DISCONNEVTEN | DWC3_DEVTEN_USBRSTEN | DWC3_DEVTEN_CONNECTDONEEN);
+
+ if (usb_dwc3_ep_command(dev, 0, DWC3_DEPCMD_DEPSTARTCFG, 0, 0, 0)) {
+ usb_debug_printf("cannot issue initial DWC3_DEPCMD_DEPSTARTCFG.\n");
+ goto error;
+ }
+
+ /* prepare control endpoint 0 IN and OUT */
+ if (usb_dwc3_ep_configure(dev, USB_LEP_CTRL_OUT, DWC3_DEPCMD_TYPE_CONTROL, 64))
+ goto error;
+ if (usb_dwc3_ep_configure(dev, USB_LEP_CTRL_IN, DWC3_DEPCMD_TYPE_CONTROL, 64))
+ goto error;
+
+ /* prepare CDC ACM interfaces */
+
+ dev->pipe[CDC_ACM_PIPE_0].ep_intr = USB_LEP_CDC_INTR_IN;
+ dev->pipe[CDC_ACM_PIPE_0].ep_in = USB_LEP_CDC_BULK_IN;
+ dev->pipe[CDC_ACM_PIPE_0].ep_out = USB_LEP_CDC_BULK_OUT;
+
+ dev->pipe[CDC_ACM_PIPE_1].ep_intr = USB_LEP_CDC_INTR_IN_2;
+ dev->pipe[CDC_ACM_PIPE_1].ep_in = USB_LEP_CDC_BULK_IN_2;
+ dev->pipe[CDC_ACM_PIPE_1].ep_out = USB_LEP_CDC_BULK_OUT_2;
+
+ for (int i = 0; i < CDC_ACM_PIPE_MAX; i++) {
+ dev->pipe[i].host2device = ringbuffer_alloc(CDC_BUFFER_SIZE);
+ if (!dev->pipe[i].host2device)
+ goto error;
+ dev->pipe[i].device2host = ringbuffer_alloc(CDC_BUFFER_SIZE);
+ if (!dev->pipe[i].device2host)
+ goto error;
+
+ /* prepare INTR endpoint so that we don't have to reconfigure this device later */
+ if (usb_dwc3_ep_configure(dev, dev->pipe[i].ep_intr, DWC3_DEPCMD_TYPE_INTR, 64))
+ goto error;
+
+ /* prepare BULK endpoints so that we don't have to reconfigure this device later */
+ if (usb_dwc3_ep_configure(dev, dev->pipe[i].ep_in, DWC3_DEPCMD_TYPE_BULK, 512))
+ goto error;
+ if (usb_dwc3_ep_configure(dev, dev->pipe[i].ep_out, DWC3_DEPCMD_TYPE_BULK, 512))
+ goto error;
+ }
+
+ /* prepare first control transfer */
+ dev->ep0_state = USB_DWC3_EP0_STATE_IDLE;
+
+ /* only enable control endpoints for now */
+ write32(dev->regs + DWC3_DALEPENA,
+ DWC3_DALEPENA_EP(USB_LEP_CTRL_IN) | DWC3_DALEPENA_EP(USB_LEP_CTRL_OUT));
+
+ /* and finally kick the device controller to go live! */
+ set32(dev->regs + DWC3_DCTL, DWC3_DCTL_RUN_STOP);
+
+ return dev;
+
+error:
+ usb_dwc3_shutdown(dev);
+ return NULL;
+}
+
+void usb_dwc3_shutdown(dwc3_dev_t *dev)
+{
+ for (int i = 0; i < CDC_ACM_PIPE_MAX; i++)
+ dev->pipe[i].ready = false;
+
+ /* stop all ongoing transfers */
+ for (int i = 1; i < MAX_ENDPOINTS; ++i) {
+ if (!dev->endpoints[i].xfer_in_progress)
+ continue;
+
+ if (usb_dwc3_ep_command(dev, i, DWC3_DEPCMD_ENDTRANSFER, 0, 0, 0))
+ usb_debug_printf("cannot issue DWC3_DEPCMD_ENDTRANSFER for EP %02x.\n", i);
+ }
+
+ /* disable events and all endpoints and stop the device controller */
+ write32(dev->regs + DWC3_DEVTEN, 0);
+ write32(dev->regs + DWC3_DALEPENA, 0);
+ clear32(dev->regs + DWC3_DCTL, DWC3_DCTL_RUN_STOP);
+
+ /* wait until the controller is shut down */
+ if (poll32(dev->regs + DWC3_DSTS, DWC3_DSTS_DEVCTRLHLT, DWC3_DSTS_DEVCTRLHLT, 1000))
+ usb_debug_printf("timeout while waiting for DWC3_DSTS_DEVCTRLHLT during shutdown.\n");
+
+ /* reset the device side of the controller just to be safe */
+ set32(dev->regs + DWC3_DCTL, DWC3_DCTL_CSFTRST);
+ if (poll32(dev->regs + DWC3_DCTL, DWC3_DCTL_CSFTRST, 0, 1000))
+ usb_debug_printf("timeout while waiting for DWC3_DCTL_CSFTRST to clear during shutdown.\n");
+
+ /* unmap and free dma buffers */
+ dart_unmap(dev->dart, TRB_BUFFER_IOVA, TRB_BUFFER_SIZE);
+ dart_unmap(dev->dart, XFER_BUFFER_IOVA, XFER_BUFFER_SIZE);
+ dart_unmap(dev->dart, SCRATCHPAD_IOVA, max(DWC3_SCRATCHPAD_SIZE, SZ_16K));
+ dart_unmap(dev->dart, EVENT_BUFFER_IOVA, max(DWC3_EVENT_BUFFERS_SIZE, SZ_16K));
+
+ free(dev->evtbuffer);
+ free(dev->scratchpad);
+ free(dev->xferbuffer);
+ free(dev->trbs);
+ for (int i = 0; i < CDC_ACM_PIPE_MAX; i++) {
+ ringbuffer_free(dev->pipe[i].device2host);
+ ringbuffer_free(dev->pipe[i].host2device);
+ }
+
+ if (dev->dart)
+ dart_shutdown(dev->dart);
+ free(dev);
+}
+
+u8 usb_dwc3_getbyte(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe)
+{
+ ringbuffer_t *host2device = dev->pipe[pipe].host2device;
+ if (!host2device)
+ return 0;
+
+ u8 ep = dev->pipe[pipe].ep_out;
+
+ u8 c;
+ while (ringbuffer_read(&c, 1, host2device) < 1) {
+ usb_dwc3_handle_events(dev);
+ usb_dwc3_cdc_start_bulk_out_xfer(dev, ep);
+ }
+ return c;
+}
+
+void usb_dwc3_putbyte(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe, u8 byte)
+{
+ ringbuffer_t *device2host = dev->pipe[pipe].device2host;
+ if (!device2host)
+ return;
+
+ u8 ep = dev->pipe[pipe].ep_in;
+
+ while (ringbuffer_write(&byte, 1, device2host) < 1) {
+ usb_dwc3_handle_events(dev);
+ usb_dwc3_cdc_start_bulk_in_xfer(dev, ep);
+ }
+}
+
+size_t usb_dwc3_queue(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe, const void *buf, size_t count)
+{
+ const u8 *p = buf;
+ size_t wrote, sent = 0;
+
+ if (!dev || !dev->pipe[pipe].ready)
+ return 0;
+
+ ringbuffer_t *device2host = dev->pipe[pipe].device2host;
+ if (!device2host)
+ return 0;
+
+ u8 ep = dev->pipe[pipe].ep_in;
+
+ while (count) {
+ wrote = ringbuffer_write(p, count, device2host);
+ count -= wrote;
+ p += wrote;
+ sent += wrote;
+ if (count) {
+ usb_dwc3_handle_events(dev);
+ usb_dwc3_cdc_start_bulk_in_xfer(dev, ep);
+ }
+ }
+
+ return sent;
+}
+
+size_t usb_dwc3_write(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe, const void *buf, size_t count)
+{
+ if (!dev)
+ return -1;
+
+ u8 ep = dev->pipe[pipe].ep_in;
+ size_t ret = usb_dwc3_queue(dev, pipe, buf, count);
+
+ usb_dwc3_cdc_start_bulk_in_xfer(dev, ep);
+
+ return ret;
+}
+
+size_t usb_dwc3_read(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe, void *buf, size_t count)
+{
+ u8 *p = buf;
+ size_t read, recvd = 0;
+
+ if (!dev || !dev->pipe[pipe].ready)
+ return 0;
+
+ ringbuffer_t *host2device = dev->pipe[pipe].host2device;
+ if (!host2device)
+ return 0;
+
+ u8 ep = dev->pipe[pipe].ep_out;
+
+ while (count) {
+ read = ringbuffer_read(p, count, host2device);
+ count -= read;
+ p += read;
+ recvd += read;
+ usb_dwc3_handle_events(dev);
+ usb_dwc3_cdc_start_bulk_out_xfer(dev, ep);
+ }
+
+ return recvd;
+}
+
+ssize_t usb_dwc3_can_read(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe)
+{
+ if (!dev || !dev->pipe[pipe].ready)
+ return 0;
+
+ ringbuffer_t *host2device = dev->pipe[pipe].host2device;
+ if (!host2device)
+ return 0;
+
+ return ringbuffer_get_used(host2device);
+}
+
+bool usb_dwc3_can_write(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe)
+{
+ (void)pipe;
+ if (!dev)
+ return false;
+
+ return dev->pipe[pipe].ready;
+}
+
+void usb_dwc3_flush(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe)
+{
+ if (!dev || !dev->pipe[pipe].ready)
+ return;
+
+ ringbuffer_t *device2host = dev->pipe[pipe].device2host;
+ if (!device2host)
+ return;
+
+ u8 ep = dev->pipe[pipe].ep_in;
+
+ while (ringbuffer_get_used(device2host) != 0 || dev->endpoints[ep].xfer_in_progress) {
+ usb_dwc3_handle_events(dev);
+ }
+}
diff --git a/tools/src/usb_dwc3.h b/tools/src/usb_dwc3.h
new file mode 100644
index 0000000..6b23c7c
--- /dev/null
+++ b/tools/src/usb_dwc3.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef USB_DWC3_H
+#define USB_DWC3_H
+
+#include "dart.h"
+#include "types.h"
+
+typedef struct dwc3_dev dwc3_dev_t;
+
+typedef enum _cdc_acm_pipe_id_t {
+ CDC_ACM_PIPE_0,
+ CDC_ACM_PIPE_1,
+ CDC_ACM_PIPE_MAX
+} cdc_acm_pipe_id_t;
+
+dwc3_dev_t *usb_dwc3_init(uintptr_t regs, dart_dev_t *dart);
+void usb_dwc3_shutdown(dwc3_dev_t *dev);
+
+void usb_dwc3_handle_events(dwc3_dev_t *dev);
+
+ssize_t usb_dwc3_can_read(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe);
+bool usb_dwc3_can_write(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe);
+
+u8 usb_dwc3_getbyte(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe);
+void usb_dwc3_putbyte(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe, u8 byte);
+
+size_t usb_dwc3_read(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe, void *buf, size_t count);
+size_t usb_dwc3_write(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe, const void *buf, size_t count);
+size_t usb_dwc3_queue(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe, const void *buf, size_t count);
+void usb_dwc3_flush(dwc3_dev_t *dev, cdc_acm_pipe_id_t pipe);
+
+#endif
diff --git a/tools/src/usb_dwc3_regs.h b/tools/src/usb_dwc3_regs.h
new file mode 100644
index 0000000..9c3d9ca
--- /dev/null
+++ b/tools/src/usb_dwc3_regs.h
@@ -0,0 +1,625 @@
+/**
+ * core.h - DesignWare USB3 DRD Core Header
+ * linux commit 7bc5a6ba369217e0137833f5955cf0b0f08b0712 before
+ * the switch to GPLv2 only
+ *
+ * Copyright (C) 2010-2011 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Authors: Felipe Balbi <balbi@ti.com>,
+ * Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the above-listed copyright holders may not be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2, as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DRIVERS_USB_DWC3_CORE_H
+#define __DRIVERS_USB_DWC3_CORE_H
+
+#include "types.h"
+
+/* Global constants */
+#define DWC3_EP0_BOUNCE_SIZE 512
+#define DWC3_ENDPOINTS_NUM 32
+#define DWC3_XHCI_RESOURCES_NUM 2
+
+#define DWC3_EVENT_SIZE 4 /* bytes */
+#define DWC3_EVENT_MAX_NUM 64 /* 2 events/endpoint */
+#define DWC3_EVENT_BUFFERS_SIZE (DWC3_EVENT_SIZE * DWC3_EVENT_MAX_NUM)
+#define DWC3_EVENT_TYPE_MASK 0xfe
+
+#define DWC3_EVENT_TYPE_DEV 0
+#define DWC3_EVENT_TYPE_CARKIT 3
+#define DWC3_EVENT_TYPE_I2C 4
+
+#define DWC3_DEVICE_EVENT_DISCONNECT 0
+#define DWC3_DEVICE_EVENT_RESET 1
+#define DWC3_DEVICE_EVENT_CONNECT_DONE 2
+#define DWC3_DEVICE_EVENT_LINK_STATUS_CHANGE 3
+#define DWC3_DEVICE_EVENT_WAKEUP 4
+#define DWC3_DEVICE_EVENT_HIBER_REQ 5
+#define DWC3_DEVICE_EVENT_EOPF 6
+#define DWC3_DEVICE_EVENT_SOF 7
+#define DWC3_DEVICE_EVENT_ERRATIC_ERROR 9
+#define DWC3_DEVICE_EVENT_CMD_CMPL 10
+#define DWC3_DEVICE_EVENT_OVERFLOW 11
+
+#define DWC3_GEVNTCOUNT_MASK 0xfffc
+#define DWC3_GSNPSID_MASK 0xffff0000
+#define DWC3_GSNPSREV_MASK 0xffff
+
+/* DWC3 registers memory space boundries */
+#define DWC3_XHCI_REGS_START 0x0
+#define DWC3_XHCI_REGS_END 0x7fff
+#define DWC3_GLOBALS_REGS_START 0xc100
+#define DWC3_GLOBALS_REGS_END 0xc6ff
+#define DWC3_DEVICE_REGS_START 0xc700
+#define DWC3_DEVICE_REGS_END 0xcbff
+#define DWC3_OTG_REGS_START 0xcc00
+#define DWC3_OTG_REGS_END 0xccff
+
+/* Global Registers */
+#define DWC3_GSBUSCFG0 0xc100
+#define DWC3_GSBUSCFG1 0xc104
+#define DWC3_GTXTHRCFG 0xc108
+#define DWC3_GRXTHRCFG 0xc10c
+#define DWC3_GCTL 0xc110
+#define DWC3_GEVTEN 0xc114
+#define DWC3_GSTS 0xc118
+#define DWC3_GSNPSID 0xc120
+#define DWC3_GGPIO 0xc124
+#define DWC3_GUID 0xc128
+#define DWC3_GUCTL 0xc12c
+#define DWC3_GBUSERRADDR0 0xc130
+#define DWC3_GBUSERRADDR1 0xc134
+#define DWC3_GPRTBIMAP0 0xc138
+#define DWC3_GPRTBIMAP1 0xc13c
+#define DWC3_GHWPARAMS0 0xc140
+#define DWC3_GHWPARAMS1 0xc144
+#define DWC3_GHWPARAMS2 0xc148
+#define DWC3_GHWPARAMS3 0xc14c
+#define DWC3_GHWPARAMS4 0xc150
+#define DWC3_GHWPARAMS5 0xc154
+#define DWC3_GHWPARAMS6 0xc158
+#define DWC3_GHWPARAMS7 0xc15c
+#define DWC3_GDBGFIFOSPACE 0xc160
+#define DWC3_GDBGLTSSM 0xc164
+#define DWC3_GPRTBIMAP_HS0 0xc180
+#define DWC3_GPRTBIMAP_HS1 0xc184
+#define DWC3_GPRTBIMAP_FS0 0xc188
+#define DWC3_GPRTBIMAP_FS1 0xc18c
+
+#define DWC3_GUSB2PHYCFG(n) (0xc200 + (n * 0x04))
+#define DWC3_GUSB2I2CCTL(n) (0xc240 + (n * 0x04))
+
+#define DWC3_GUSB2PHYACC(n) (0xc280 + (n * 0x04))
+
+#define DWC3_GUSB3PIPECTL(n) (0xc2c0 + (n * 0x04))
+
+#define DWC3_GTXFIFOSIZ(n) (0xc300 + (n * 0x04))
+#define DWC3_GRXFIFOSIZ(n) (0xc380 + (n * 0x04))
+
+#define DWC3_GEVNTADRLO(n) (0xc400 + (n * 0x10))
+#define DWC3_GEVNTADRHI(n) (0xc404 + (n * 0x10))
+#define DWC3_GEVNTSIZ(n) (0xc408 + (n * 0x10))
+#define DWC3_GEVNTCOUNT(n) (0xc40c + (n * 0x10))
+
+#define DWC3_GHWPARAMS8 0xc600
+
+/* Device Registers */
+#define DWC3_DCFG 0xc700
+#define DWC3_DCTL 0xc704
+#define DWC3_DEVTEN 0xc708
+#define DWC3_DSTS 0xc70c
+#define DWC3_DGCMDPAR 0xc710
+#define DWC3_DGCMD 0xc714
+#define DWC3_DALEPENA 0xc720
+#define DWC3_DEPCMDPAR2(n) (0xc800 + (n * 0x10))
+#define DWC3_DEPCMDPAR1(n) (0xc804 + (n * 0x10))
+#define DWC3_DEPCMDPAR0(n) (0xc808 + (n * 0x10))
+#define DWC3_DEPCMD(n) (0xc80c + (n * 0x10))
+
+/* OTG Registers */
+#define DWC3_OCFG 0xcc00
+#define DWC3_OCTL 0xcc04
+#define DWC3_OEVT 0xcc08
+#define DWC3_OEVTEN 0xcc0C
+#define DWC3_OSTS 0xcc10
+
+/* Bit fields */
+
+/* Global Configuration Register */
+#define DWC3_GCTL_PWRDNSCALE(n) ((n) << 19)
+#define DWC3_GCTL_U2RSTECN (1 << 16)
+#define DWC3_GCTL_RAMCLKSEL(x) (((x)&DWC3_GCTL_CLK_MASK) << 6)
+#define DWC3_GCTL_CLK_BUS (0)
+#define DWC3_GCTL_CLK_PIPE (1)
+#define DWC3_GCTL_CLK_PIPEHALF (2)
+#define DWC3_GCTL_CLK_MASK (3)
+
+#define DWC3_GCTL_PRTCAP(n) (((n) & (3 << 12)) >> 12)
+#define DWC3_GCTL_PRTCAPDIR(n) ((n) << 12)
+#define DWC3_GCTL_PRTCAP_HOST 1
+#define DWC3_GCTL_PRTCAP_DEVICE 2
+#define DWC3_GCTL_PRTCAP_OTG 3
+
+#define DWC3_GCTL_CORESOFTRESET (1 << 11)
+#define DWC3_GCTL_SCALEDOWN(n) ((n) << 4)
+#define DWC3_GCTL_SCALEDOWN_MASK DWC3_GCTL_SCALEDOWN(3)
+#define DWC3_GCTL_DISSCRAMBLE (1 << 3)
+#define DWC3_GCTL_GBLHIBERNATIONEN (1 << 1)
+#define DWC3_GCTL_DSBLCLKGTNG (1 << 0)
+
+/* Global USB2 PHY Configuration Register */
+#define DWC3_GUSB2PHYCFG_PHYSOFTRST (1 << 31)
+#define DWC3_GUSB2PHYCFG_SUSPHY (1 << 6)
+
+/* Global USB3 PIPE Control Register */
+#define DWC3_GUSB3PIPECTL_PHYSOFTRST (1 << 31)
+#define DWC3_GUSB3PIPECTL_SUSPHY (1 << 17)
+
+/* Global TX Fifo Size Register */
+#define DWC3_GTXFIFOSIZ_TXFDEF(n) ((n)&0xffff)
+#define DWC3_GTXFIFOSIZ_TXFSTADDR(n) ((n)&0xffff0000)
+
+/* Global HWPARAMS1 Register */
+#define DWC3_GHWPARAMS1_EN_PWROPT(n) (((n) & (3 << 24)) >> 24)
+#define DWC3_GHWPARAMS1_EN_PWROPT_NO 0
+#define DWC3_GHWPARAMS1_EN_PWROPT_CLK 1
+#define DWC3_GHWPARAMS1_EN_PWROPT_HIB 2
+#define DWC3_GHWPARAMS1_PWROPT(n) ((n) << 24)
+#define DWC3_GHWPARAMS1_PWROPT_MASK DWC3_GHWPARAMS1_PWROPT(3)
+
+/* Global HWPARAMS4 Register */
+#define DWC3_GHWPARAMS4_HIBER_SCRATCHBUFS(n) (((n) & (0x0f << 13)) >> 13)
+#define DWC3_MAX_HIBER_SCRATCHBUFS 15
+
+/* Device Configuration Register */
+#define DWC3_DCFG_LPM_CAP (1 << 22)
+#define DWC3_DCFG_DEVADDR(addr) ((addr) << 3)
+#define DWC3_DCFG_DEVADDR_MASK DWC3_DCFG_DEVADDR(0x7f)
+
+#define DWC3_DCFG_SPEED_MASK (7 << 0)
+#define DWC3_DCFG_SUPERSPEED (4 << 0)
+#define DWC3_DCFG_HIGHSPEED (0 << 0)
+#define DWC3_DCFG_FULLSPEED2 (1 << 0)
+#define DWC3_DCFG_LOWSPEED (2 << 0)
+#define DWC3_DCFG_FULLSPEED1 (3 << 0)
+
+#define DWC3_DCFG_LPM_CAP (1 << 22)
+
+/* Device Control Register */
+#define DWC3_DCTL_RUN_STOP (1 << 31)
+#define DWC3_DCTL_CSFTRST (1 << 30)
+#define DWC3_DCTL_LSFTRST (1 << 29)
+
+#define DWC3_DCTL_HIRD_THRES_MASK (0x1f << 24)
+#define DWC3_DCTL_HIRD_THRES(n) ((n) << 24)
+
+#define DWC3_DCTL_APPL1RES (1 << 23)
+
+/* These apply for core versions 1.87a and earlier */
+#define DWC3_DCTL_TRGTULST_MASK (0x0f << 17)
+#define DWC3_DCTL_TRGTULST(n) ((n) << 17)
+#define DWC3_DCTL_TRGTULST_U2 (DWC3_DCTL_TRGTULST(2))
+#define DWC3_DCTL_TRGTULST_U3 (DWC3_DCTL_TRGTULST(3))
+#define DWC3_DCTL_TRGTULST_SS_DIS (DWC3_DCTL_TRGTULST(4))
+#define DWC3_DCTL_TRGTULST_RX_DET (DWC3_DCTL_TRGTULST(5))
+#define DWC3_DCTL_TRGTULST_SS_INACT (DWC3_DCTL_TRGTULST(6))
+
+/* These apply for core versions 1.94a and later */
+#define DWC3_DCTL_KEEP_CONNECT (1 << 19)
+#define DWC3_DCTL_L1_HIBER_EN (1 << 18)
+#define DWC3_DCTL_CRS (1 << 17)
+#define DWC3_DCTL_CSS (1 << 16)
+
+#define DWC3_DCTL_INITU2ENA (1 << 12)
+#define DWC3_DCTL_ACCEPTU2ENA (1 << 11)
+#define DWC3_DCTL_INITU1ENA (1 << 10)
+#define DWC3_DCTL_ACCEPTU1ENA (1 << 9)
+#define DWC3_DCTL_TSTCTRL_MASK (0xf << 1)
+
+#define DWC3_DCTL_ULSTCHNGREQ_MASK (0x0f << 5)
+#define DWC3_DCTL_ULSTCHNGREQ(n) (((n) << 5) & DWC3_DCTL_ULSTCHNGREQ_MASK)
+
+#define DWC3_DCTL_ULSTCHNG_NO_ACTION (DWC3_DCTL_ULSTCHNGREQ(0))
+#define DWC3_DCTL_ULSTCHNG_SS_DISABLED (DWC3_DCTL_ULSTCHNGREQ(4))
+#define DWC3_DCTL_ULSTCHNG_RX_DETECT (DWC3_DCTL_ULSTCHNGREQ(5))
+#define DWC3_DCTL_ULSTCHNG_SS_INACTIVE (DWC3_DCTL_ULSTCHNGREQ(6))
+#define DWC3_DCTL_ULSTCHNG_RECOVERY (DWC3_DCTL_ULSTCHNGREQ(8))
+#define DWC3_DCTL_ULSTCHNG_COMPLIANCE (DWC3_DCTL_ULSTCHNGREQ(10))
+#define DWC3_DCTL_ULSTCHNG_LOOPBACK (DWC3_DCTL_ULSTCHNGREQ(11))
+
+/* Device Event Enable Register */
+#define DWC3_DEVTEN_VNDRDEVTSTRCVEDEN (1 << 12)
+#define DWC3_DEVTEN_EVNTOVERFLOWEN (1 << 11)
+#define DWC3_DEVTEN_CMDCMPLTEN (1 << 10)
+#define DWC3_DEVTEN_ERRTICERREN (1 << 9)
+#define DWC3_DEVTEN_SOFEN (1 << 7)
+#define DWC3_DEVTEN_EOPFEN (1 << 6)
+#define DWC3_DEVTEN_HIBERNATIONREQEVTEN (1 << 5)
+#define DWC3_DEVTEN_WKUPEVTEN (1 << 4)
+#define DWC3_DEVTEN_ULSTCNGEN (1 << 3)
+#define DWC3_DEVTEN_CONNECTDONEEN (1 << 2)
+#define DWC3_DEVTEN_USBRSTEN (1 << 1)
+#define DWC3_DEVTEN_DISCONNEVTEN (1 << 0)
+
+/* Device Status Register */
+#define DWC3_DSTS_DCNRD (1 << 29)
+
+/* This applies for core versions 1.87a and earlier */
+#define DWC3_DSTS_PWRUPREQ (1 << 24)
+
+/* These apply for core versions 1.94a and later */
+#define DWC3_DSTS_RSS (1 << 25)
+#define DWC3_DSTS_SSS (1 << 24)
+
+#define DWC3_DSTS_COREIDLE (1 << 23)
+#define DWC3_DSTS_DEVCTRLHLT (1 << 22)
+
+#define DWC3_DSTS_USBLNKST_MASK (0x0f << 18)
+#define DWC3_DSTS_USBLNKST(n) (((n)&DWC3_DSTS_USBLNKST_MASK) >> 18)
+
+#define DWC3_DSTS_RXFIFOEMPTY (1 << 17)
+
+#define DWC3_DSTS_SOFFN_MASK (0x3fff << 3)
+#define DWC3_DSTS_SOFFN(n) (((n)&DWC3_DSTS_SOFFN_MASK) >> 3)
+
+#define DWC3_DSTS_CONNECTSPD (7 << 0)
+
+#define DWC3_DSTS_SUPERSPEED (4 << 0)
+#define DWC3_DSTS_HIGHSPEED (0 << 0)
+#define DWC3_DSTS_FULLSPEED2 (1 << 0)
+#define DWC3_DSTS_LOWSPEED (2 << 0)
+#define DWC3_DSTS_FULLSPEED1 (3 << 0)
+
+/* Device Generic Command Register */
+#define DWC3_DGCMD_SET_LMP 0x01
+#define DWC3_DGCMD_SET_PERIODIC_PAR 0x02
+#define DWC3_DGCMD_XMIT_FUNCTION 0x03
+
+/* These apply for core versions 1.94a and later */
+#define DWC3_DGCMD_SET_SCRATCHPAD_ADDR_LO 0x04
+#define DWC3_DGCMD_SET_SCRATCHPAD_ADDR_HI 0x05
+
+#define DWC3_DGCMD_SELECTED_FIFO_FLUSH 0x09
+#define DWC3_DGCMD_ALL_FIFO_FLUSH 0x0a
+#define DWC3_DGCMD_SET_ENDPOINT_NRDY 0x0c
+#define DWC3_DGCMD_RUN_SOC_BUS_LOOPBACK 0x10
+
+#define DWC3_DGCMD_STATUS(n) (((n) >> 15) & 1)
+#define DWC3_DGCMD_CMDACT (1 << 10)
+#define DWC3_DGCMD_CMDIOC (1 << 8)
+
+/* Device Generic Command Parameter Register */
+#define DWC3_DGCMDPAR_FORCE_LINKPM_ACCEPT (1 << 0)
+#define DWC3_DGCMDPAR_FIFO_NUM(n) ((n) << 0)
+#define DWC3_DGCMDPAR_RX_FIFO (0 << 5)
+#define DWC3_DGCMDPAR_TX_FIFO (1 << 5)
+#define DWC3_DGCMDPAR_LOOPBACK_DIS (0 << 0)
+#define DWC3_DGCMDPAR_LOOPBACK_ENA (1 << 0)
+
+/* Device Endpoint Command Register */
+#define DWC3_DEPCMD_PARAM_SHIFT 16
+#define DWC3_DEPCMD_PARAM(x) ((x) << DWC3_DEPCMD_PARAM_SHIFT)
+#define DWC3_DEPCMD_GET_RSC_IDX(x) (((x) >> DWC3_DEPCMD_PARAM_SHIFT) & 0x7f)
+#define DWC3_DEPCMD_STATUS(x) (((x) >> 15) & 1)
+#define DWC3_DEPCMD_HIPRI_FORCERM (1 << 11)
+#define DWC3_DEPCMD_CMDACT (1 << 10)
+#define DWC3_DEPCMD_CMDIOC (1 << 8)
+
+#define DWC3_DEPCMD_DEPSTARTCFG (0x09 << 0)
+#define DWC3_DEPCMD_ENDTRANSFER (0x08 << 0)
+#define DWC3_DEPCMD_UPDATETRANSFER (0x07 << 0)
+#define DWC3_DEPCMD_STARTTRANSFER (0x06 << 0)
+#define DWC3_DEPCMD_CLEARSTALL (0x05 << 0)
+#define DWC3_DEPCMD_SETSTALL (0x04 << 0)
+/* This applies for core versions 1.90a and earlier */
+#define DWC3_DEPCMD_GETSEQNUMBER (0x03 << 0)
+/* This applies for core versions 1.94a and later */
+#define DWC3_DEPCMD_GETEPSTATE (0x03 << 0)
+#define DWC3_DEPCMD_SETTRANSFRESOURCE (0x02 << 0)
+#define DWC3_DEPCMD_SETEPCONFIG (0x01 << 0)
+
+/* The EP number goes 0..31 so ep0 is always out and ep1 is always in */
+#define DWC3_DALEPENA_EP(n) (1 << n)
+
+#define DWC3_DEPCMD_TYPE_CONTROL 0
+#define DWC3_DEPCMD_TYPE_ISOC 1
+#define DWC3_DEPCMD_TYPE_BULK 2
+#define DWC3_DEPCMD_TYPE_INTR 3
+
+#define DWC3_EVENT_PENDING BIT(0)
+
+#define DWC3_EP_FLAG_STALLED (1 << 0)
+#define DWC3_EP_FLAG_WEDGED (1 << 1)
+
+#define DWC3_EP_DIRECTION_TX true
+#define DWC3_EP_DIRECTION_RX false
+
+#define DWC3_TRB_NUM 32
+#define DWC3_TRB_MASK (DWC3_TRB_NUM - 1)
+
+#define DWC3_EP_ENABLED (1 << 0)
+#define DWC3_EP_STALL (1 << 1)
+#define DWC3_EP_WEDGE (1 << 2)
+#define DWC3_EP_BUSY (1 << 4)
+#define DWC3_EP_PENDING_REQUEST (1 << 5)
+#define DWC3_EP_MISSED_ISOC (1 << 6)
+
+/* This last one is specific to EP0 */
+#define DWC3_EP0_DIR_IN (1 << 31)
+
+enum dwc3_link_state {
+ /* In SuperSpeed */
+ DWC3_LINK_STATE_U0 = 0x00, /* in HS, means ON */
+ DWC3_LINK_STATE_U1 = 0x01,
+ DWC3_LINK_STATE_U2 = 0x02, /* in HS, means SLEEP */
+ DWC3_LINK_STATE_U3 = 0x03, /* in HS, means SUSPEND */
+ DWC3_LINK_STATE_SS_DIS = 0x04,
+ DWC3_LINK_STATE_RX_DET = 0x05, /* in HS, means Early Suspend */
+ DWC3_LINK_STATE_SS_INACT = 0x06,
+ DWC3_LINK_STATE_POLL = 0x07,
+ DWC3_LINK_STATE_RECOV = 0x08,
+ DWC3_LINK_STATE_HRESET = 0x09,
+ DWC3_LINK_STATE_CMPLY = 0x0a,
+ DWC3_LINK_STATE_LPBK = 0x0b,
+ DWC3_LINK_STATE_RESET = 0x0e,
+ DWC3_LINK_STATE_RESUME = 0x0f,
+ DWC3_LINK_STATE_MASK = 0x0f,
+};
+
+/* TRB Length, PCM and Status */
+#define DWC3_TRB_SIZE_MASK (0x00ffffff)
+#define DWC3_TRB_SIZE_LENGTH(n) ((n)&DWC3_TRB_SIZE_MASK)
+#define DWC3_TRB_SIZE_PCM1(n) (((n)&0x03) << 24)
+#define DWC3_TRB_SIZE_TRBSTS(n) (((n) & (0x0f << 28)) >> 28)
+
+#define DWC3_TRBSTS_OK 0
+#define DWC3_TRBSTS_MISSED_ISOC 1
+#define DWC3_TRBSTS_SETUP_PENDING 2
+#define DWC3_TRB_STS_XFER_IN_PROG 4
+
+/* TRB Control */
+#define DWC3_TRB_CTRL_HWO (1 << 0)
+#define DWC3_TRB_CTRL_LST (1 << 1)
+#define DWC3_TRB_CTRL_CHN (1 << 2)
+#define DWC3_TRB_CTRL_CSP (1 << 3)
+#define DWC3_TRB_CTRL_TRBCTL(n) (((n)&0x3f) << 4)
+#define DWC3_TRB_CTRL_ISP_IMI (1 << 10)
+#define DWC3_TRB_CTRL_IOC (1 << 11)
+#define DWC3_TRB_CTRL_SID_SOFN(n) (((n)&0xffff) << 14)
+
+#define DWC3_TRBCTL_NORMAL DWC3_TRB_CTRL_TRBCTL(1)
+#define DWC3_TRBCTL_CONTROL_SETUP DWC3_TRB_CTRL_TRBCTL(2)
+#define DWC3_TRBCTL_CONTROL_STATUS2 DWC3_TRB_CTRL_TRBCTL(3)
+#define DWC3_TRBCTL_CONTROL_STATUS3 DWC3_TRB_CTRL_TRBCTL(4)
+#define DWC3_TRBCTL_CONTROL_DATA DWC3_TRB_CTRL_TRBCTL(5)
+#define DWC3_TRBCTL_ISOCHRONOUS_FIRST DWC3_TRB_CTRL_TRBCTL(6)
+#define DWC3_TRBCTL_ISOCHRONOUS DWC3_TRB_CTRL_TRBCTL(7)
+#define DWC3_TRBCTL_LINK_TRB DWC3_TRB_CTRL_TRBCTL(8)
+
+/**
+ * struct dwc3_trb - transfer request block (hw format)
+ * @bpl: DW0-3
+ * @bph: DW4-7
+ * @size: DW8-B
+ * @trl: DWC-F
+ */
+struct dwc3_trb {
+ u32 bpl;
+ u32 bph;
+ u32 size;
+ u32 ctrl;
+} PACKED;
+
+/* HWPARAMS0 */
+#define DWC3_MODE(n) ((n)&0x7)
+
+#define DWC3_MODE_DEVICE 0
+#define DWC3_MODE_HOST 1
+#define DWC3_MODE_DRD 2
+#define DWC3_MODE_HUB 3
+
+#define DWC3_MDWIDTH(n) (((n)&0xff00) >> 8)
+
+/* HWPARAMS1 */
+#define DWC3_NUM_INT(n) (((n) & (0x3f << 15)) >> 15)
+
+/* HWPARAMS3 */
+#define DWC3_NUM_IN_EPS_MASK (0x1f << 18)
+#define DWC3_NUM_EPS_MASK (0x3f << 12)
+#define DWC3_NUM_EPS(p) (((p)->hwparams3 & (DWC3_NUM_EPS_MASK)) >> 12)
+#define DWC3_NUM_IN_EPS(p) (((p)->hwparams3 & (DWC3_NUM_IN_EPS_MASK)) >> 18)
+
+/* HWPARAMS7 */
+#define DWC3_RAM1_DEPTH(n) ((n)&0xffff)
+
+#define DWC3_REVISION_173A 0x5533173a
+#define DWC3_REVISION_175A 0x5533175a
+#define DWC3_REVISION_180A 0x5533180a
+#define DWC3_REVISION_183A 0x5533183a
+#define DWC3_REVISION_185A 0x5533185a
+#define DWC3_REVISION_187A 0x5533187a
+#define DWC3_REVISION_188A 0x5533188a
+#define DWC3_REVISION_190A 0x5533190a
+#define DWC3_REVISION_194A 0x5533194a
+#define DWC3_REVISION_200A 0x5533200a
+#define DWC3_REVISION_202A 0x5533202a
+#define DWC3_REVISION_210A 0x5533210a
+#define DWC3_REVISION_220A 0x5533220a
+#define DWC3_REVISION_230A 0x5533230a
+#define DWC3_REVISION_240A 0x5533240a
+#define DWC3_REVISION_250A 0x5533250a
+
+/* -------------------------------------------------------------------------- */
+
+/* -------------------------------------------------------------------------- */
+
+struct dwc3_event_type {
+ u32 is_devspec : 1;
+ u32 type : 7;
+ u32 reserved8_31 : 24;
+} PACKED;
+
+#define DWC3_DEPEVT_XFERCOMPLETE 0x01
+#define DWC3_DEPEVT_XFERINPROGRESS 0x02
+#define DWC3_DEPEVT_XFERNOTREADY 0x03
+#define DWC3_DEPEVT_RXTXFIFOEVT 0x04
+#define DWC3_DEPEVT_STREAMEVT 0x06
+#define DWC3_DEPEVT_EPCMDCMPLT 0x07
+
+/**
+ * struct dwc3_event_depvt - Device Endpoint Events
+ * @one_bit: indicates this is an endpoint event (not used)
+ * @endpoint_number: number of the endpoint
+ * @endpoint_event: The event we have:
+ * 0x00 - Reserved
+ * 0x01 - XferComplete
+ * 0x02 - XferInProgress
+ * 0x03 - XferNotReady
+ * 0x04 - RxTxFifoEvt (IN->Underrun, OUT->Overrun)
+ * 0x05 - Reserved
+ * 0x06 - StreamEvt
+ * 0x07 - EPCmdCmplt
+ * @reserved11_10: Reserved, don't use.
+ * @status: Indicates the status of the event. Refer to databook for
+ * more information.
+ * @parameters: Parameters of the current event. Refer to databook for
+ * more information.
+ */
+struct dwc3_event_depevt {
+ u32 one_bit : 1;
+ u32 endpoint_number : 5;
+ u32 endpoint_event : 4;
+ u32 reserved11_10 : 2;
+ u32 status : 4;
+
+/* Within XferNotReady */
+#define DEPEVT_STATUS_TRANSFER_ACTIVE (1 << 3)
+
+/* Within XferComplete */
+#define DEPEVT_STATUS_BUSERR (1 << 0)
+#define DEPEVT_STATUS_SHORT (1 << 1)
+#define DEPEVT_STATUS_IOC (1 << 2)
+#define DEPEVT_STATUS_LST (1 << 3)
+
+/* Stream event only */
+#define DEPEVT_STREAMEVT_FOUND 1
+#define DEPEVT_STREAMEVT_NOTFOUND 2
+
+/* Control-only Status */
+#define DEPEVT_STATUS_CONTROL_DATA 1
+#define DEPEVT_STATUS_CONTROL_STATUS 2
+
+ u32 parameters : 16;
+} PACKED;
+
+#define DWC3_DEVT_DISCONN 0x00
+#define DWC3_DEVT_USBRST 0x01
+#define DWC3_DEVT_CONNECTDONE 0x02
+#define DWC3_DEVT_ULSTCHNG 0x03
+#define DWC3_DEVT_WKUPEVT 0x04
+#define DWC3_DEVT_EOPF 0x06
+#define DWC3_DEVT_SOF 0x07
+#define DWC3_DEVT_ERRTICERR 0x09
+#define DWC3_DEVT_CMDCMPLT 0x0a
+#define DWC3_DEVT_EVNTOVERFLOW 0x0b
+#define DWC3_DEVT_VNDRDEVTSTRCVED 0x0c
+
+/**
+ * struct dwc3_event_devt - Device Events
+ * @one_bit: indicates this is a non-endpoint event (not used)
+ * @device_event: indicates it's a device event. Should read as 0x00
+ * @type: indicates the type of device event.
+ * 0 - DisconnEvt
+ * 1 - USBRst
+ * 2 - ConnectDone
+ * 3 - ULStChng
+ * 4 - WkUpEvt
+ * 5 - Reserved
+ * 6 - EOPF
+ * 7 - SOF
+ * 8 - Reserved
+ * 9 - ErrticErr
+ * 10 - CmdCmplt
+ * 11 - EvntOverflow
+ * 12 - VndrDevTstRcved
+ * @reserved15_12: Reserved, not used
+ * @event_info: Information about this event
+ * @reserved31_24: Reserved, not used
+ */
+struct dwc3_event_devt {
+ u32 one_bit : 1;
+ u32 device_event : 7;
+ u32 type : 4;
+ u32 reserved15_12 : 4;
+ u32 event_info : 8;
+ u32 reserved31_24 : 8;
+} PACKED;
+
+/**
+ * struct dwc3_event_gevt - Other Core Events
+ * @one_bit: indicates this is a non-endpoint event (not used)
+ * @device_event: indicates it's (0x03) Carkit or (0x04) I2C event.
+ * @phy_port_number: self-explanatory
+ * @reserved31_12: Reserved, not used.
+ */
+struct dwc3_event_gevt {
+ u32 one_bit : 1;
+ u32 device_event : 7;
+ u32 phy_port_number : 4;
+ u32 reserved31_12 : 20;
+} PACKED;
+
+union dwc3_event {
+ u32 raw;
+ struct dwc3_event_type type;
+ struct dwc3_event_depevt depevt;
+ struct dwc3_event_devt devt;
+ struct dwc3_event_gevt gevt;
+};
+
+#define DWC3_DEPCFG_EP_TYPE(n) (((n)&0x3) << 1)
+#define DWC3_DEPCFG_EP_NUMBER(n) (((n)&0x1f) << 25)
+#define DWC3_DEPCFG_FIFO_NUMBER(n) (((n)&0xf) << 17)
+#define DWC3_DEPCFG_MAX_PACKET_SIZE(n) (((n)&0x7ff) << 3)
+
+#define DWC3_DEPCFG_INT_NUM(n) (((n)&0x1f) << 0)
+#define DWC3_DEPCFG_XFER_COMPLETE_EN BIT(8)
+#define DWC3_DEPCFG_XFER_IN_PROGRESS_EN BIT(9)
+#define DWC3_DEPCFG_XFER_NOT_READY_EN BIT(10)
+#define DWC3_DEPCFG_FIFO_ERROR_EN BIT(11)
+#define DWC3_DEPCFG_STREAM_EVENT_EN BIT(13)
+#define DWC3_DEPCFG_BINTERVAL_M1(n) (((n)&0xff) << 16)
+#define DWC3_DEPCFG_STREAM_CAPABLE BIT(24)
+#define DWC3_DEPCFG_EP_NUMBER(n) (((n)&0x1f) << 25)
+#define DWC3_DEPCFG_BULK_BASED BIT(30)
+#define DWC3_DEPCFG_FIFO_BASED BIT(31)
+
+#endif /* __DRIVERS_USB_DWC3_CORE_H */
diff --git a/tools/src/usb_types.h b/tools/src/usb_types.h
new file mode 100644
index 0000000..2571a1a
--- /dev/null
+++ b/tools/src/usb_types.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef USB_TYPES_H
+#define USB_TYPES_H
+
+#include "types.h"
+
+#define USB_REQUEST_TYPE_DIRECTION_SHIFT 7
+#define USB_REQUEST_TYPE_DIRECTION(d) ((d) << USB_REQUEST_TYPE_DIRECTION_SHIFT)
+#define USB_REQUEST_TYPE_DIRECTION_HOST2DEVICE 0
+#define USB_REQUEST_TYPE_DIRECTION_DEVICE2HOST 1
+
+#define USB_REQUEST_TYPE_SHIFT 5
+#define USB_REQUEST_TYPE(t) ((t) << USB_REQUEST_TYPE_SHIFT)
+#define USB_REQUEST_TYPE_STANDARD USB_REQUEST_TYPE(0b00)
+#define USB_REQUEST_TYPE_CLASS USB_REQUEST_TYPE(0b01)
+#define USB_REQUEST_TYPE_VENDOR USB_REQUEST_TYPE(0b10)
+#define USB_REQUEST_TYPE_MASK USB_REQUEST_TYPE(0b11)
+
+#define USB_REQUEST_TYPE_RECIPIENT_DEVICE 0
+#define USB_REQUEST_TYPE_RECIPIENT_INTERFACE 1
+#define USB_REQUEST_TYPE_RECIPIENT_ENDPOINT 2
+#define USB_REQUEST_TYPE_RECIPIENT_OTHER 3
+#define USB_REQUEST_TYPE_RECIPIENT_MASK 0b11
+
+#define USB_REQUEST_GET_STATUS 0x00
+#define USB_REQUEST_CLEAR_FEATURE 0x01
+#define USB_REQUEST_SET_FEATURE 0x03
+#define USB_REQUEST_SET_ADDRESS 0x05
+#define USB_REQUEST_GET_DESCRIPTOR 0x06
+#define USB_REQUEST_SET_DESCRIPTOR 0x07
+#define USB_REQUEST_GET_CONFIGURATION 0x08
+#define USB_REQUEST_SET_CONFIGURATION 0x09
+
+#define USB_EP_REQUEST_CLEAR_FEATURE 0x01
+#define USB_EP_REQUEST_SET_FEATURE 0x03
+
+#define USB_FEATURE_ENDPOINT_HALT 0x00
+
+#define USB_REQUEST_CDC_SET_LINE_CODING 0x20
+#define USB_REQUEST_CDC_GET_LINE_CODING 0x21
+#define USB_REQUEST_CDC_SET_CTRL_LINE_STATE 0x22
+
+struct usb_setup_packet_raw {
+ u8 bmRequestType;
+ u8 bRequest;
+ u16 wValue;
+ u16 wIndex;
+ u16 wLength;
+} PACKED;
+
+struct usb_setup_packet_get_descriptor {
+ u8 bmRequestType;
+ u8 bRequest;
+ u8 index;
+ u8 type;
+ u16 language;
+ u16 wLength;
+} PACKED;
+
+struct usb_set_packet_set_address {
+ u8 bmRequestType;
+ u8 bRequest;
+ u16 address;
+ u16 zero0;
+ u16 zero1;
+} PACKED;
+
+struct usb_set_packet_set_configuration {
+ u8 bmRequestType;
+ u8 bRequest;
+ u16 configuration;
+ u16 zero0;
+ u16 zero1;
+} PACKED;
+
+struct usb_setup_packet_feature {
+ u8 bmRequestType;
+ u8 bRequest;
+ u16 wFeatureSelector;
+ u16 wEndpoint;
+ u16 wLength;
+} PACKED;
+
+union usb_setup_packet {
+ struct usb_setup_packet_raw raw;
+ struct usb_setup_packet_get_descriptor get_descriptor;
+ struct usb_set_packet_set_address set_address;
+ struct usb_set_packet_set_configuration set_configuration;
+ struct usb_setup_packet_feature feature;
+};
+
+#define USB_DEVICE_DESCRIPTOR 0x01
+#define USB_CONFIGURATION_DESCRIPTOR 0x02
+#define USB_STRING_DESCRIPTOR 0x03
+#define USB_INTERFACE_DESCRIPTOR 0x04
+#define USB_ENDPOINT_DESCRIPTOR 0x05
+#define USB_DEVICE_QUALIFIER_DESCRIPTOR 0x06
+#define USB_OTHER_SPEED_CONFIGURATION_DESCRIPTOR 0x07
+
+#define USB_CDC_INTERFACE_FUNCTIONAL_DESCRIPTOR 0x24
+#define USB_CDC_UNION_SUBTYPE 0x06
+
+#define USB_CONFIGURATION_SELF_POWERED 0x40
+#define USB_CONFIGURATION_ATTRIBUTE_RES1 0x80
+
+#define USB_ENDPOINT_ADDR_IN(ep) (0x80 | (ep))
+#define USB_ENDPOINT_ADDR_OUT(ep) (0x00 | (ep))
+
+#define USB_ENDPOINT_ATTR_TYPE_CONTROL 0b00
+#define USB_ENDPOINT_ATTR_TYPE_ISOCHRONOUS 0b01
+#define USB_ENDPOINT_ATTR_TYPE_BULK 0b10
+#define USB_ENDPOINT_ATTR_TYPE_INTERRUPT 0b11
+
+#define USB_LANGID_EN_US 0x0409
+
+struct usb_device_descriptor {
+ u8 bLength;
+ u8 bDescriptorType;
+ u16 bcdUSB;
+ u8 bDeviceClass;
+ u8 bDeviceSubClass;
+ u8 bDeviceProtocol;
+ u8 bMaxPacketSize0;
+ u16 idVendor;
+ u16 idProduct;
+ u16 bcdDevice;
+ u8 iManufacturer;
+ u8 iProduct;
+ u8 iSerialNumber;
+ u8 bNumConfigurations;
+} PACKED;
+
+struct usb_configuration_descriptor {
+ u8 bLength;
+ u8 bDescriptorType;
+ u16 wTotalLength;
+ u8 bNumInterfaces;
+ u8 bConfigurationValue;
+ u8 iConfiguration;
+ u8 bmAttributes;
+ u8 bMaxPower;
+} PACKED;
+
+struct usb_interface_descriptor {
+ u8 bLength;
+ u8 bDescriptorType;
+ u8 bInterfaceNumber;
+ u8 bAlternateSetting;
+ u8 bNumEndpoints;
+ u8 bInterfaceClass;
+ u8 bInterfaceSubClass;
+ u8 bInterfaceProtocol;
+ u8 iInterface;
+} PACKED;
+
+struct usb_endpoint_descriptor {
+ u8 bLength;
+ u8 bDescriptorType;
+ u8 bEndpointAddress;
+ u8 bmAttributes;
+ u16 wMaxPacketSize;
+ u8 bInterval;
+} PACKED;
+
+struct usb_string_descriptor {
+ u8 bLength;
+ u8 bDescriptorType;
+ u16 bString[];
+} PACKED;
+
+struct usb_string_descriptor_languages {
+ u8 bLength;
+ u8 bDescriptorType;
+ u16 wLANGID[];
+} PACKED;
+
+struct cdc_union_functional_descriptor {
+ u8 bFunctionLength;
+ u8 bDescriptorType;
+ u8 bDescriptorSubtype;
+ u8 bControlInterface;
+ u8 bDataInterface;
+} PACKED;
+
+struct usb_device_qualifier_descriptor {
+ u8 bLength;
+ u8 bDescriptorType;
+ u16 bcdUSB;
+ u8 bDeviceClass;
+ u8 bDeviceSubClass;
+ u8 bDeviceProtocol;
+ u8 bMaxPacketSize0;
+ u8 bNumConfigurations;
+ u8 bReserved;
+} PACKED;
+
+/*
+ * this macro is required because we need to convert any string literals
+ * to UTF16 and because we need to calculate the correct total size of the
+ * string descriptor.
+ */
+#define make_usb_string_descriptor(str) \
+ { \
+ .bLength = sizeof(struct usb_string_descriptor) + sizeof(u##str), \
+ .bDescriptorType = USB_STRING_DESCRIPTOR, .bString = u##str \
+ }
+
+#endif
diff --git a/tools/src/utils.c b/tools/src/utils.c
new file mode 100644
index 0000000..2343476
--- /dev/null
+++ b/tools/src/utils.c
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: MIT */
+
+#include <assert.h>
+#include <stdarg.h>
+
+#include "utils.h"
+#include "iodev.h"
+#include "smp.h"
+#include "types.h"
+#include "vsprintf.h"
+#include "xnuboot.h"
+
+static char ascii(char s)
+{
+ if (s < 0x20)
+ return '.';
+ if (s > 0x7E)
+ return '.';
+ return s;
+}
+
+void hexdump(const void *d, size_t len)
+{
+ u8 *data;
+ size_t i, off;
+ data = (u8 *)d;
+ for (off = 0; off < len; off += 16) {
+ printf("%08lx ", off);
+ for (i = 0; i < 16; i++) {
+ if ((i + off) >= len)
+ printf(" ");
+ else
+ printf("%02x ", data[off + i]);
+ }
+
+ printf(" ");
+ for (i = 0; i < 16; i++) {
+ if ((i + off) >= len)
+ printf(" ");
+ else
+ printf("%c", ascii(data[off + i]));
+ }
+ printf("\n");
+ }
+}
+
+void regdump(u64 addr, size_t len)
+{
+ u64 i, off;
+ for (off = 0; off < len; off += 32) {
+ printf("%016lx ", addr + off);
+ for (i = 0; i < 32; i += 4) {
+ printf("%08x ", read32(addr + off + i));
+ }
+ printf("\n");
+ }
+}
+
+int snprintf(char *buffer, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = vsnprintf(buffer, size, fmt, args);
+ va_end(args);
+ return i;
+}
+
+int debug_printf(const char *fmt, ...)
+{
+ va_list args;
+ char buffer[512];
+ int i;
+
+ va_start(args, fmt);
+ i = vsnprintf(buffer, sizeof(buffer), fmt, args);
+ va_end(args);
+
+ iodev_console_write(buffer, min(i, (int)(sizeof(buffer) - 1)));
+
+ return i;
+}
+
+void __assert_fail(const char *assertion, const char *file, unsigned int line, const char *function)
+{
+ printf("Assertion failed: '%s' on %s:%d:%s\n", assertion, file, line, function);
+ flush_and_reboot();
+}
+
+void udelay(u32 d)
+{
+ u64 delay = ((u64)d) * mrs(CNTFRQ_EL0) / 1000000;
+ u64 val = mrs(CNTPCT_EL0);
+ while ((mrs(CNTPCT_EL0) - val) < delay)
+ ;
+ sysop("isb");
+}
+
+u64 ticks_to_msecs(u64 ticks)
+{
+ // NOTE: only accurate if freq is even kHz
+ return ticks / (mrs(CNTFRQ_EL0) / 1000);
+}
+
+u64 ticks_to_usecs(u64 ticks)
+{
+ // NOTE: only accurate if freq is even MHz
+ return ticks / (mrs(CNTFRQ_EL0) / 1000000);
+}
+
+u64 timeout_calculate(u32 usec)
+{
+ u64 delay = ((u64)usec) * mrs(CNTFRQ_EL0) / 1000000;
+ return mrs(CNTPCT_EL0) + delay;
+}
+
+bool timeout_expired(u64 timeout)
+{
+ bool expired = mrs(CNTPCT_EL0) > timeout;
+ sysop("isb");
+ return expired;
+}
+
+void flush_and_reboot(void)
+{
+ iodev_console_flush();
+ reboot();
+}
+
+void spin_init(spinlock_t *lock)
+{
+ lock->lock = -1;
+ lock->count = 0;
+}
+
+void spin_lock(spinlock_t *lock)
+{
+ s64 tmp;
+ s64 me = smp_id();
+ if (__atomic_load_n(&lock->lock, __ATOMIC_ACQUIRE) == me) {
+ lock->count++;
+ return;
+ }
+
+ __asm__ volatile("1:\n"
+ "mov\t%0, -1\n"
+ "2:\n"
+ "\tcasa\t%0, %2, %1\n"
+ "\tcmn\t%0, 1\n"
+ "\tbeq\t3f\n"
+ "\tldxr\t%0, %1\n"
+ "\tcmn\t%0, 1\n"
+ "\tbeq\t2b\n"
+ "\twfe\n"
+ "\tb\t1b\n"
+ "3:"
+ : "=&r"(tmp), "+m"(lock->lock)
+ : "r"(me)
+ : "cc", "memory");
+
+ assert(__atomic_load_n(&lock->lock, __ATOMIC_RELAXED) == me);
+ lock->count++;
+}
+
+void spin_unlock(spinlock_t *lock)
+{
+ s64 me = smp_id();
+ assert(__atomic_load_n(&lock->lock, __ATOMIC_RELAXED) == me);
+ assert(lock->count > 0);
+ if (!--lock->count)
+ __atomic_store_n(&lock->lock, -1L, __ATOMIC_RELEASE);
+}
+
+bool is_heap(void *addr)
+{
+ u64 p = (u64)addr;
+ u64 top_of_kernel_data = (u64)cur_boot_args.top_of_kernel_data;
+ u64 top_of_ram = cur_boot_args.mem_size + cur_boot_args.phys_base;
+
+ return p > top_of_kernel_data && p < top_of_ram;
+}
diff --git a/tools/src/utils.h b/tools/src/utils.h
new file mode 100644
index 0000000..1d053d2
--- /dev/null
+++ b/tools/src/utils.h
@@ -0,0 +1,444 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef UTILS_H
+#define UTILS_H
+
+#include "types.h"
+
+#define printf(...) debug_printf(__VA_ARGS__)
+
+#ifdef DEBUG
+#define dprintf(...) debug_printf(__VA_ARGS__)
+#else
+#define dprintf(...) \
+ do { \
+ } while (0)
+#endif
+
+#define ARRAY_SIZE(s) (sizeof(s) / sizeof((s)[0]))
+
+#define BIT(x) (1UL << (x))
+#define MASK(x) (BIT(x) - 1)
+#define GENMASK(msb, lsb) ((BIT((msb + 1) - (lsb)) - 1) << (lsb))
+#define _FIELD_LSB(field) ((field) & ~(field - 1))
+#define FIELD_PREP(field, val) ((val) * (_FIELD_LSB(field)))
+#define FIELD_GET(field, val) (((val) & (field)) / _FIELD_LSB(field))
+
+#define ALIGN_UP(x, a) (((x) + ((a)-1)) & ~((a)-1))
+#define ALIGN_DOWN(x, a) ((x) & ~((a)-1))
+
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+#define max(a, b) (((a) > (b)) ? (a) : (b))
+
+#define USEC_PER_SEC 1000000L
+
+static inline u64 read64(u64 addr)
+{
+ u64 data;
+ __asm__ volatile("ldr\t%0, [%1]" : "=r"(data) : "r"(addr) : "memory");
+ return data;
+}
+
+static inline void write64(u64 addr, u64 data)
+{
+ __asm__ volatile("str\t%0, [%1]" : : "r"(data), "r"(addr) : "memory");
+}
+
+static inline u64 set64(u64 addr, u64 set)
+{
+ u64 data;
+ __asm__ volatile("ldr\t%0, [%1]\n"
+ "\torr\t%0, %0, %2\n"
+ "\tstr\t%0, [%1]"
+ : "=&r"(data)
+ : "r"(addr), "r"(set)
+ : "memory");
+ return data;
+}
+
+static inline u64 clear64(u64 addr, u64 clear)
+{
+ u64 data;
+ __asm__ volatile("ldr\t%0, [%1]\n"
+ "\tbic\t%0, %0, %2\n"
+ "\tstr\t%0, [%1]"
+ : "=&r"(data)
+ : "r"(addr), "r"(clear)
+ : "memory");
+ return data;
+}
+
+static inline u64 mask64(u64 addr, u64 clear, u64 set)
+{
+ u64 data;
+ __asm__ volatile("ldr\t%0, [%1]\n"
+ "\tbic\t%0, %0, %3\n"
+ "\torr\t%0, %0, %2\n"
+ "\tstr\t%0, [%1]"
+ : "=&r"(data)
+ : "r"(addr), "r"(set), "r"(clear)
+ : "memory");
+ return data;
+}
+
+static inline u64 writeread64(u64 addr, u64 data)
+{
+ write64(addr, data);
+ return read64(addr);
+}
+
+static inline u32 read32(u64 addr)
+{
+ u32 data;
+ __asm__ volatile("ldr\t%w0, [%1]" : "=r"(data) : "r"(addr) : "memory");
+ return data;
+}
+
+static inline void write32(u64 addr, u32 data)
+{
+ __asm__ volatile("str\t%w0, [%1]" : : "r"(data), "r"(addr) : "memory");
+}
+
+static inline u32 writeread32(u64 addr, u32 data)
+{
+ write32(addr, data);
+ return read32(addr);
+}
+
+static inline u32 set32(u64 addr, u32 set)
+{
+ u32 data;
+ __asm__ volatile("ldr\t%w0, [%1]\n"
+ "\torr\t%w0, %w0, %w2\n"
+ "\tstr\t%w0, [%1]"
+ : "=&r"(data)
+ : "r"(addr), "r"(set)
+ : "memory");
+ return data;
+}
+
+static inline u32 clear32(u64 addr, u32 clear)
+{
+ u32 data;
+ __asm__ volatile("ldr\t%w0, [%1]\n"
+ "\tbic\t%w0, %w0, %w2\n"
+ "\tstr\t%w0, [%1]"
+ : "=&r"(data)
+ : "r"(addr), "r"(clear)
+ : "memory");
+ return data;
+}
+
+static inline u32 mask32(u64 addr, u32 clear, u32 set)
+{
+ u32 data;
+ __asm__ volatile("ldr\t%w0, [%1]\n"
+ "\tbic\t%w0, %w0, %w3\n"
+ "\torr\t%w0, %w0, %w2\n"
+ "\tstr\t%w0, [%1]"
+ : "=&r"(data)
+ : "r"(addr), "r"(set), "r"(clear)
+ : "memory");
+ return data;
+}
+
+static inline u16 read16(u64 addr)
+{
+ u32 data;
+ __asm__ volatile("ldrh\t%w0, [%1]" : "=r"(data) : "r"(addr) : "memory");
+ return data;
+}
+
+static inline void write16(u64 addr, u16 data)
+{
+ __asm__ volatile("strh\t%w0, [%1]" : : "r"(data), "r"(addr) : "memory");
+}
+
+static inline u16 set16(u64 addr, u16 set)
+{
+ u16 data;
+ __asm__ volatile("ldrh\t%w0, [%1]\n"
+ "\torr\t%w0, %w0, %w2\n"
+ "\tstrh\t%w0, [%1]"
+ : "=&r"(data)
+ : "r"(addr), "r"(set)
+ : "memory"
+
+ );
+ return data;
+}
+
+static inline u16 clear16(u64 addr, u16 clear)
+{
+ u16 data;
+ __asm__ volatile("ldrh\t%w0, [%1]\n"
+ "\tbic\t%w0, %w0, %w2\n"
+ "\tstrh\t%w0, [%1]"
+ : "=&r"(data)
+ : "r"(addr), "r"(clear)
+ : "memory");
+ return data;
+}
+
+static inline u16 mask16(u64 addr, u16 clear, u16 set)
+{
+ u16 data;
+ __asm__ volatile("ldrh\t%w0, [%1]\n"
+ "\tbic\t%w0, %w0, %w3\n"
+ "\torr\t%w0, %w0, %w2\n"
+ "\tstrh\t%w0, [%1]"
+ : "=&r"(data)
+ : "r"(addr), "r"(set), "r"(clear)
+ : "memory");
+ return data;
+}
+
+static inline u16 writeread16(u64 addr, u16 data)
+{
+ write16(addr, data);
+ return read16(addr);
+}
+
+static inline u8 read8(u64 addr)
+{
+ u32 data;
+ __asm__ volatile("ldrb\t%w0, [%1]" : "=r"(data) : "r"(addr) : "memory");
+ return data;
+}
+
+static inline void write8(u64 addr, u8 data)
+{
+ __asm__ volatile("strb\t%w0, [%1]" : : "r"(data), "r"(addr) : "memory");
+}
+
+static inline u8 set8(u64 addr, u8 set)
+{
+ u8 data;
+ __asm__ volatile("ldrb\t%w0, [%1]\n"
+ "\torr\t%w0, %w0, %w2\n"
+ "\tstrb\t%w0, [%1]"
+ : "=&r"(data)
+ : "r"(addr), "r"(set)
+ : "memory");
+ return data;
+}
+
+static inline u8 clear8(u64 addr, u8 clear)
+{
+ u8 data;
+ __asm__ volatile("ldrb\t%w0, [%1]\n"
+ "\tbic\t%w0, %w0, %w2\n"
+ "\tstrb\t%w0, [%1]"
+ : "=&r"(data)
+ : "r"(addr), "r"(clear)
+ : "memory");
+ return data;
+}
+
+static inline u8 mask8(u64 addr, u8 clear, u8 set)
+{
+ u8 data;
+ __asm__ volatile("ldrb\t%w0, [%1]\n"
+ "\tbic\t%w0, %w0, %w3\n"
+ "\torr\t%w0, %w0, %w2\n"
+ "\tstrb\t%w0, [%1]"
+ : "=&r"(data)
+ : "r"(addr), "r"(set), "r"(clear)
+ : "memory");
+ return data;
+}
+
+static inline u8 writeread8(u64 addr, u8 data)
+{
+ write8(addr, data);
+ return read8(addr);
+}
+
+static inline void write64_lo_hi(u64 addr, u64 val)
+{
+ write32(addr, val);
+ write32(addr + 4, val >> 32);
+}
+
+#define _concat(a, _1, b, ...) a##b
+
+#define _sr_tkn_S(_0, _1, op0, op1, CRn, CRm, op2) s##op0##_##op1##_c##CRn##_c##CRm##_##op2
+
+#define _sr_tkn(a) a
+
+#define sr_tkn(...) _concat(_sr_tkn, __VA_ARGS__, )(__VA_ARGS__)
+
+#define __mrs(reg) \
+ ({ \
+ u64 val; \
+ __asm__ volatile("mrs\t%0, " #reg : "=r"(val)); \
+ val; \
+ })
+#define _mrs(reg) __mrs(reg)
+
+#define __msr(reg, val) \
+ ({ \
+ u64 __val = (u64)val; \
+ __asm__ volatile("msr\t" #reg ", %0" : : "r"(__val)); \
+ })
+#define _msr(reg, val) __msr(reg, val)
+
+#define mrs(reg) _mrs(sr_tkn(reg))
+#define msr(reg, val) _msr(sr_tkn(reg), val)
+#define msr_sync(reg, val) \
+ ({ \
+ _msr(sr_tkn(reg), val); \
+ sysop("isb"); \
+ })
+
+#define reg_clr(reg, bits) _msr(sr_tkn(reg), _mrs(sr_tkn(reg)) & ~(bits))
+#define reg_set(reg, bits) _msr(sr_tkn(reg), _mrs(sr_tkn(reg)) | bits)
+#define reg_mask(reg, clr, set) _msr(sr_tkn(reg), (_mrs(sr_tkn(reg)) & ~(clr)) | set)
+
+#define reg_clr_sync(reg, bits) \
+ ({ \
+ reg_clr(sr_tkn(reg), bits); \
+ sysop("isb"); \
+ })
+#define reg_set_sync(reg, bits) \
+ ({ \
+ reg_set(sr_tkn(reg), bits); \
+ sysop("isb"); \
+ })
+#define reg_mask_sync(reg, clr, set) \
+ ({ \
+ reg_mask(sr_tkn(reg), clr, set); \
+ sysop("isb"); \
+ })
+
+#define sysop(op) __asm__ volatile(op ::: "memory")
+
+#define cacheop(op, val) ({ __asm__ volatile(op ", %0" : : "r"(val) : "memory"); })
+
+#define ic_ialluis() sysop("ic ialluis")
+#define ic_iallu() sysop("ic iallu")
+#define ic_iavau(p) cacheop("ic ivau", p)
+#define dc_ivac(p) cacheop("dc ivac", p)
+#define dc_isw(p) cacheop("dc isw", p)
+#define dc_csw(p) cacheop("dc csw", p)
+#define dc_cisw(p) cacheop("dc cisw", p)
+#define dc_zva(p) cacheop("dc zva", p)
+#define dc_cvac(p) cacheop("dc cvac", p)
+#define dc_cvau(p) cacheop("dc cvau", p)
+#define dc_civac(p) cacheop("dc civac", p)
+
+#define dma_mb() sysop("dmb osh")
+#define dma_rmb() sysop("dmb oshld")
+#define dma_wmb() sysop("dmb oshst")
+
+static inline int is_ecore(void)
+{
+ return !(mrs(MPIDR_EL1) & (1 << 16));
+}
+
+static inline int in_el2(void)
+{
+ return (mrs(CurrentEL) >> 2) == 2;
+}
+
+static inline int is_primary_core(void)
+{
+ return mrs(MPIDR_EL1) == 0x80000000;
+}
+
+extern char _base[];
+extern char _rodata_end[];
+extern char _end[];
+extern char _payload_start[];
+extern char _payload_end[];
+
+/*
+ * These functions are guaranteed to copy by reading from src and writing to dst
+ * in <n>-bit units If size is not aligned, the remaining bytes are not copied
+ */
+void memcpy128(void *dst, void *src, size_t size);
+void memset64(void *dst, u64 value, size_t size);
+void memcpy64(void *dst, void *src, size_t size);
+void memset32(void *dst, u32 value, size_t size);
+void memcpy32(void *dst, void *src, size_t size);
+void memset16(void *dst, u16 value, size_t size);
+void memcpy16(void *dst, void *src, size_t size);
+void memset8(void *dst, u8 value, size_t size);
+void memcpy8(void *dst, void *src, size_t size);
+
+void get_simd_state(void *state);
+void put_simd_state(void *state);
+
+void hexdump(const void *d, size_t len);
+void regdump(u64 addr, size_t len);
+int snprintf(char *str, size_t size, const char *fmt, ...);
+int debug_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
+void udelay(u32 d);
+
+static inline u64 get_ticks(void)
+{
+ return mrs(CNTPCT_EL0);
+}
+u64 ticks_to_msecs(u64 ticks);
+u64 ticks_to_usecs(u64 ticks);
+
+void reboot(void) __attribute__((noreturn));
+void flush_and_reboot(void) __attribute__((noreturn));
+
+u64 timeout_calculate(u32 usec);
+bool timeout_expired(u64 timeout);
+
+#define SPINLOCK_ALIGN 64
+
+typedef struct {
+ s64 lock;
+ int count;
+} spinlock_t ALIGNED(SPINLOCK_ALIGN);
+
+#define SPINLOCK_INIT \
+ { \
+ -1, 0 \
+ }
+#define DECLARE_SPINLOCK(n) spinlock_t n = SPINLOCK_INIT;
+
+void spin_init(spinlock_t *lock);
+void spin_lock(spinlock_t *lock);
+void spin_unlock(spinlock_t *lock);
+
+#define mdelay(m) udelay((m)*1000)
+
+#define panic(fmt, ...) \
+ do { \
+ debug_printf(fmt, ##__VA_ARGS__); \
+ flush_and_reboot(); \
+ } while (0)
+
+static inline int poll32(u64 addr, u32 mask, u32 target, u32 timeout)
+{
+ while (--timeout > 0) {
+ u32 value = read32(addr) & mask;
+ if (value == target)
+ return 0;
+ udelay(1);
+ }
+
+ return -1;
+}
+
+typedef u64(generic_func)(u64, u64, u64, u64, u64);
+
+struct vector_args {
+ generic_func *entry;
+ u64 args[5];
+ bool restore_logo;
+};
+
+extern u32 board_id, chip_id;
+
+extern struct vector_args next_stage;
+
+void deep_wfi(void);
+
+bool is_heap(void *addr);
+
+#endif
diff --git a/tools/src/utils_asm.S b/tools/src/utils_asm.S
new file mode 100644
index 0000000..7fe1cea
--- /dev/null
+++ b/tools/src/utils_asm.S
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "cpu_regs.h"
+
+.text
+
+.globl memcpy128
+.type memcpy128, @function
+memcpy128:
+ ands x2, x2, #~15
+ beq 2f
+1: ldp x3, x4, [x1], #16
+ stp x3, x4, [x0], #16
+ subs x2, x2, #16
+ bne 1b
+2:
+ ret
+
+.globl memcpy64
+.type memcpy64, @function
+memcpy64:
+ ands x2, x2, #~7
+ beq 2f
+1: ldr x3, [x1], #8
+ str x3, [x0], #8
+ subs x2, x2, #8
+ bne 1b
+2:
+ ret
+
+.globl memset64
+.type memset64, @function
+memset64:
+ ands x2, x2, #~7
+ beq 2f
+1: str x1, [x0], #8
+ subs x2, x2, #8
+ bne 1b
+2:
+ ret
+
+.globl memcpy32
+.type memcpy32, @function
+memcpy32:
+ ands x2, x2, #~3
+ beq 2f
+1: ldr w3, [x1], #4
+ str w3, [x0], #4
+ subs x2, x2, #4
+ bne 1b
+2:
+ ret
+
+.globl memset32
+.type memset32, @function
+memset32:
+ ands x2, x2, #~3
+ beq 2f
+1: str w1, [x0], #4
+ subs x2, x2, #4
+ bne 1b
+2:
+ ret
+
+.globl memcpy16
+.type memcpy16, @function
+memcpy16:
+ ands x2, x2, #~1
+ beq 2f
+1: ldrh w3, [x1], #2
+ strh w3, [x0], #2
+ subs x2, x2, #2
+ bne 1b
+2:
+ ret
+
+.globl memset16
+.type memset16, @function
+memset16:
+ ands x2, x2, #~1
+ beq 2f
+1: strh w1, [x0], #2
+ subs x2, x2, #2
+ bne 1b
+2:
+ ret
+
+.globl memcpy8
+.type memcpy8, @function
+memcpy8:
+ cmp x2, #0
+ beq 2f
+1: ldrb w3, [x1], #1
+ strb w3, [x0], #1
+ subs x2, x2, #1
+ bne 1b
+2:
+ ret
+
+.globl memset8
+.type memset8, @function
+memset8:
+ cmp x2, #0
+ beq 2f
+1: strb w1, [x0], #1
+ subs x2, x2, #1
+ bne 1b
+2:
+ ret
+
+.globl get_simd_state
+.type get_simd_state, @function
+get_simd_state:
+ stp q0, q1, [x0], #32
+ stp q2, q3, [x0], #32
+ stp q4, q5, [x0], #32
+ stp q6, q7, [x0], #32
+ stp q8, q9, [x0], #32
+ stp q10, q11, [x0], #32
+ stp q12, q13, [x0], #32
+ stp q14, q15, [x0], #32
+ stp q16, q17, [x0], #32
+ stp q18, q19, [x0], #32
+ stp q20, q21, [x0], #32
+ stp q22, q23, [x0], #32
+ stp q24, q25, [x0], #32
+ stp q26, q27, [x0], #32
+ stp q28, q29, [x0], #32
+ stp q30, q31, [x0], #32
+ ret
+
+.globl put_simd_state
+.type put_simd_state, @function
+put_simd_state:
+ ldp q0, q1, [x0], #32
+ ldp q2, q3, [x0], #32
+ ldp q4, q5, [x0], #32
+ ldp q6, q7, [x0], #32
+ ldp q8, q9, [x0], #32
+ ldp q10, q11, [x0], #32
+ ldp q12, q13, [x0], #32
+ ldp q14, q15, [x0], #32
+ ldp q16, q17, [x0], #32
+ ldp q18, q19, [x0], #32
+ ldp q20, q21, [x0], #32
+ ldp q22, q23, [x0], #32
+ ldp q24, q25, [x0], #32
+ ldp q26, q27, [x0], #32
+ ldp q28, q29, [x0], #32
+ ldp q30, q31, [x0], #32
+ ret
+
+.globl deep_wfi
+.type deep_wfi, @function
+deep_wfi:
+ str x30, [sp, #-16]!
+ stp x28, x29, [sp, #-16]!
+ stp x26, x27, [sp, #-16]!
+ stp x24, x25, [sp, #-16]!
+ stp x22, x23, [sp, #-16]!
+ stp x20, x21, [sp, #-16]!
+ stp x18, x19, [sp, #-16]!
+
+ mrs x0, SYS_IMP_APL_CYC_OVRD
+ orr x0, x0, #(3L << 24)
+ msr SYS_IMP_APL_CYC_OVRD, x0
+
+ wfi
+
+ mrs x0, SYS_IMP_APL_CYC_OVRD
+ bic x0, x0, #(1L << 24)
+ msr SYS_IMP_APL_CYC_OVRD, x0
+
+ ldp x18, x19, [sp], #16
+ ldp x20, x21, [sp], #16
+ ldp x22, x23, [sp], #16
+ ldp x24, x25, [sp], #16
+ ldp x26, x27, [sp], #16
+ ldp x28, x29, [sp], #16
+ ldr x30, [sp], #16
+
+ ret
diff --git a/tools/src/vsprintf.c b/tools/src/vsprintf.c
new file mode 100644
index 0000000..daa5d29
--- /dev/null
+++ b/tools/src/vsprintf.c
@@ -0,0 +1,703 @@
+/*
+ * Copyright (c) 1995 Patrick Powell.
+ *
+ * This code is based on code written by Patrick Powell <papowell@astart.com>.
+ * It may be used for any purpose as long as this notice remains intact on all
+ * source code distributions.
+ */
+
+/*
+ * Copyright (c) 2008 Holger Weiss.
+ *
+ * This version of the code is maintained by Holger Weiss <holger@jhweiss.de>.
+ * My changes to the code may freely be used, modified and/or redistributed for
+ * any purpose. It would be nice if additions and fixes to this file (including
+ * trivial code cleanups) would be sent back in order to let me include them in
+ * the version available at <http://www.jhweiss.de/software/snprintf.html>.
+ * However, this is not a requirement for using or redistributing (possibly
+ * modified) versions of this file, nor is leaving this notice intact mandatory.
+ */
+
+/*
+ * History
+ *
+ * 2009-03-05 Hector Martin "marcan" <marcan@marcansoft.com>
+ *
+ * Hacked up and removed a lot of stuff including floating-point support,
+ * a bunch of ifs and defines, locales, and tests
+ *
+ * 2008-01-20 Holger Weiss <holger@jhweiss.de> for C99-snprintf 1.1:
+ *
+ * Fixed the detection of infinite floating point values on IRIX (and
+ * possibly other systems) and applied another few minor cleanups.
+ *
+ * 2008-01-06 Holger Weiss <holger@jhweiss.de> for C99-snprintf 1.0:
+ *
+ * Added a lot of new features, fixed many bugs, and incorporated various
+ * improvements done by Andrew Tridgell <tridge@samba.org>, Russ Allbery
+ * <rra@stanford.edu>, Hrvoje Niksic <hniksic@xemacs.org>, Damien Miller
+ * <djm@mindrot.org>, and others for the Samba, INN, Wget, and OpenSSH
+ * projects. The additions include: support the "e", "E", "g", "G", and
+ * "F" conversion specifiers (and use conversion style "f" or "F" for the
+ * still unsupported "a" and "A" specifiers); support the "hh", "ll", "j",
+ * "t", and "z" length modifiers; support the "#" flag and the (non-C99)
+ * "'" flag; use localeconv(3) (if available) to get both the current
+ * locale's decimal point character and the separator between groups of
+ * digits; fix the handling of various corner cases of field width and
+ * precision specifications; fix various floating point conversion bugs;
+ * handle infinite and NaN floating point values; don't attempt to write to
+ * the output buffer (which may be NULL) if a size of zero was specified;
+ * check for integer overflow of the field width, precision, and return
+ * values and during the floating point conversion; use the OUTCHAR() macro
+ * instead of a function for better performance; provide asprintf(3) and
+ * vasprintf(3) functions; add new test cases. The replacement functions
+ * have been renamed to use an "rpl_" prefix, the function calls in the
+ * main project (and in this file) must be redefined accordingly for each
+ * replacement function which is needed (by using Autoconf or other means).
+ * Various other minor improvements have been applied and the coding style
+ * was cleaned up for consistency.
+ *
+ * 2007-07-23 Holger Weiss <holger@jhweiss.de> for Mutt 1.5.13:
+ *
+ * C99 compliant snprintf(3) and vsnprintf(3) functions return the number
+ * of characters that would have been written to a sufficiently sized
+ * buffer (excluding the '\0'). The original code simply returned the
+ * length of the resulting output string, so that's been fixed.
+ *
+ * 1998-03-05 Michael Elkins <me@mutt.org> for Mutt 0.90.8:
+ *
+ * The original code assumed that both snprintf(3) and vsnprintf(3) were
+ * missing. Some systems only have snprintf(3) but not vsnprintf(3), so
+ * the code is now broken down under HAVE_SNPRINTF and HAVE_VSNPRINTF.
+ *
+ * 1998-01-27 Thomas Roessler <roessler@does-not-exist.org> for Mutt 0.89i:
+ *
+ * The PGP code was using unsigned hexadecimal formats. Unfortunately,
+ * unsigned formats simply didn't work.
+ *
+ * 1997-10-22 Brandon Long <blong@fiction.net> for Mutt 0.87.1:
+ *
+ * Ok, added some minimal floating point support, which means this probably
+ * requires libm on most operating systems. Don't yet support the exponent
+ * (e,E) and sigfig (g,G). Also, fmtint() was pretty badly broken, it just
+ * wasn't being exercised in ways which showed it, so that's been fixed.
+ * Also, formatted the code to Mutt conventions, and removed dead code left
+ * over from the original. Also, there is now a builtin-test, run with:
+ * gcc -DTEST_SNPRINTF -o snprintf snprintf.c -lm && ./snprintf
+ *
+ * 2996-09-15 Brandon Long <blong@fiction.net> for Mutt 0.43:
+ *
+ * This was ugly. It is still ugly. I opted out of floating point
+ * numbers, but the formatter understands just about everything from the
+ * normal C string format, at least as far as I can tell from the Solaris
+ * 2.5 printf(3S) man page.
+ */
+
+#include <stdarg.h>
+
+#include "types.h"
+
+#define VA_START(ap, last) va_start(ap, last)
+#define VA_SHIFT(ap, value, type) /* No-op for ANSI C. */
+
+#define ULLONG unsigned long long
+#define UINTMAX_T unsigned long
+#define LLONG long
+#define INTMAX_T long
+
+/* Support for uintptr_t. */
+#ifndef UINTPTR_T
+#if HAVE_UINTPTR_T || defined(uintptr_t)
+#define UINTPTR_T uintptr_t
+#else
+#define UINTPTR_T unsigned long int
+#endif /* HAVE_UINTPTR_T || defined(uintptr_t) */
+#endif /* !defined(UINTPTR_T) */
+
+/* Support for ptrdiff_t. */
+#ifndef PTRDIFF_T
+#if HAVE_PTRDIFF_T || defined(ptrdiff_t)
+#define PTRDIFF_T ptrdiff_t
+#else
+#define PTRDIFF_T long int
+#endif /* HAVE_PTRDIFF_T || defined(ptrdiff_t) */
+#endif /* !defined(PTRDIFF_T) */
+
+/*
+ * We need an unsigned integer type corresponding to ptrdiff_t (cf. C99:
+ * 7.19.6.1, 7). However, we'll simply use PTRDIFF_T and convert it to an
+ * unsigned type if necessary. This should work just fine in practice.
+ */
+#ifndef UPTRDIFF_T
+#define UPTRDIFF_T PTRDIFF_T
+#endif /* !defined(UPTRDIFF_T) */
+
+/*
+ * We need a signed integer type corresponding to size_t (cf. C99: 7.19.6.1, 7).
+ * However, we'll simply use size_t and convert it to a signed type if
+ * necessary. This should work just fine in practice.
+ */
+#ifndef SSIZE_T
+#define SSIZE_T size_t
+#endif /* !defined(SSIZE_T) */
+
+/*
+ * Buffer size to hold the octal string representation of UINT128_MAX without
+ * nul-termination ("3777777777777777777777777777777777777777777").
+ */
+#ifdef MAX_CONVERT_LENGTH
+#undef MAX_CONVERT_LENGTH
+#endif /* defined(MAX_CONVERT_LENGTH) */
+#define MAX_CONVERT_LENGTH 43
+
+/* Format read states. */
+#define PRINT_S_DEFAULT 0
+#define PRINT_S_FLAGS 1
+#define PRINT_S_WIDTH 2
+#define PRINT_S_DOT 3
+#define PRINT_S_PRECISION 4
+#define PRINT_S_MOD 5
+#define PRINT_S_CONV 6
+
+/* Format flags. */
+#define PRINT_F_MINUS (1 << 0)
+#define PRINT_F_PLUS (1 << 1)
+#define PRINT_F_SPACE (1 << 2)
+#define PRINT_F_NUM (1 << 3)
+#define PRINT_F_ZERO (1 << 4)
+#define PRINT_F_QUOTE (1 << 5)
+#define PRINT_F_UP (1 << 6)
+#define PRINT_F_UNSIGNED (1 << 7)
+#define PRINT_F_TYPE_G (1 << 8)
+#define PRINT_F_TYPE_E (1 << 9)
+
+/* Conversion flags. */
+#define PRINT_C_CHAR 1
+#define PRINT_C_SHORT 2
+#define PRINT_C_LONG 3
+#define PRINT_C_LLONG 4
+// #define PRINT_C_LDOUBLE 5
+#define PRINT_C_SIZE 6
+#define PRINT_C_PTRDIFF 7
+#define PRINT_C_INTMAX 8
+
+#ifndef MAX
+#define MAX(x, y) ((x >= y) ? x : y)
+#endif /* !defined(MAX) */
+#ifndef CHARTOINT
+#define CHARTOINT(ch) (ch - '0')
+#endif /* !defined(CHARTOINT) */
+#ifndef ISDIGIT
+#define ISDIGIT(ch) ('0' <= (unsigned char)ch && (unsigned char)ch <= '9')
+#endif /* !defined(ISDIGIT) */
+
+#define OUTCHAR(str, len, size, ch) \
+ do { \
+ if (len + 1 < size) \
+ str[len] = ch; \
+ (len)++; \
+ } while (/* CONSTCOND */ 0)
+
+static void fmtstr(char *, size_t *, size_t, const char *, int, int, int);
+static void fmtint(char *, size_t *, size_t, INTMAX_T, int, int, int, int);
+static void printsep(char *, size_t *, size_t);
+static int getnumsep(int);
+static int convert(UINTMAX_T, char *, size_t, int, int);
+
+int vsnprintf(char *str, size_t size, const char *format, va_list args)
+{
+ INTMAX_T value;
+ unsigned char cvalue;
+ const char *strvalue;
+ INTMAX_T *intmaxptr;
+ PTRDIFF_T *ptrdiffptr;
+ SSIZE_T *sizeptr;
+ LLONG *llongptr;
+ long int *longptr;
+ int *intptr;
+ short int *shortptr;
+ signed char *charptr;
+ size_t len = 0;
+ int overflow = 0;
+ int base = 0;
+ int cflags = 0;
+ int flags = 0;
+ int width = 0;
+ int precision = -1;
+ int state = PRINT_S_DEFAULT;
+ char ch = *format++;
+
+ /*
+ * C99 says: "If `n' is zero, nothing is written, and `s' may be a null
+ * pointer." (7.19.6.5, 2) We're forgiving and allow a NULL pointer
+ * even if a size larger than zero was specified. At least NetBSD's
+ * snprintf(3) does the same, as well as other versions of this file.
+ * (Though some of these versions will write to a non-NULL buffer even
+ * if a size of zero was specified, which violates the standard.)
+ */
+ if (str == NULL && size != 0)
+ size = 0;
+
+ while (ch != '\0')
+ switch (state) {
+ case PRINT_S_DEFAULT:
+ if (ch == '%')
+ state = PRINT_S_FLAGS;
+ else
+ OUTCHAR(str, len, size, ch);
+ ch = *format++;
+ break;
+ case PRINT_S_FLAGS:
+ switch (ch) {
+ case '-':
+ flags |= PRINT_F_MINUS;
+ ch = *format++;
+ break;
+ case '+':
+ flags |= PRINT_F_PLUS;
+ ch = *format++;
+ break;
+ case ' ':
+ flags |= PRINT_F_SPACE;
+ ch = *format++;
+ break;
+ case '#':
+ flags |= PRINT_F_NUM;
+ ch = *format++;
+ break;
+ case '0':
+ flags |= PRINT_F_ZERO;
+ ch = *format++;
+ break;
+ case '\'': /* SUSv2 flag (not in C99). */
+ flags |= PRINT_F_QUOTE;
+ ch = *format++;
+ break;
+ default:
+ state = PRINT_S_WIDTH;
+ break;
+ }
+ break;
+ case PRINT_S_WIDTH:
+ if (ISDIGIT(ch)) {
+ ch = CHARTOINT(ch);
+ if (width > (INT_MAX - ch) / 10) {
+ overflow = 1;
+ goto out;
+ }
+ width = 10 * width + ch;
+ ch = *format++;
+ } else if (ch == '*') {
+ /*
+ * C99 says: "A negative field width argument is
+ * taken as a `-' flag followed by a positive
+ * field width." (7.19.6.1, 5)
+ */
+ if ((width = va_arg(args, int)) < 0) {
+ flags |= PRINT_F_MINUS;
+ width = -width;
+ }
+ ch = *format++;
+ state = PRINT_S_DOT;
+ } else
+ state = PRINT_S_DOT;
+ break;
+ case PRINT_S_DOT:
+ if (ch == '.') {
+ state = PRINT_S_PRECISION;
+ ch = *format++;
+ } else
+ state = PRINT_S_MOD;
+ break;
+ case PRINT_S_PRECISION:
+ if (precision == -1)
+ precision = 0;
+ if (ISDIGIT(ch)) {
+ ch = CHARTOINT(ch);
+ if (precision > (INT_MAX - ch) / 10) {
+ overflow = 1;
+ goto out;
+ }
+ precision = 10 * precision + ch;
+ ch = *format++;
+ } else if (ch == '*') {
+ /*
+ * C99 says: "A negative precision argument is
+ * taken as if the precision were omitted."
+ * (7.19.6.1, 5)
+ */
+ if ((precision = va_arg(args, int)) < 0)
+ precision = -1;
+ ch = *format++;
+ state = PRINT_S_MOD;
+ } else
+ state = PRINT_S_MOD;
+ break;
+ case PRINT_S_MOD:
+ switch (ch) {
+ case 'h':
+ ch = *format++;
+ if (ch == 'h') { /* It's a char. */
+ ch = *format++;
+ cflags = PRINT_C_CHAR;
+ } else
+ cflags = PRINT_C_SHORT;
+ break;
+ case 'l':
+ ch = *format++;
+ if (ch == 'l') { /* It's a long long. */
+ ch = *format++;
+ cflags = PRINT_C_LLONG;
+ } else
+ cflags = PRINT_C_LONG;
+ break;
+ case 'j':
+ cflags = PRINT_C_INTMAX;
+ ch = *format++;
+ break;
+ case 't':
+ cflags = PRINT_C_PTRDIFF;
+ ch = *format++;
+ break;
+ case 'z':
+ cflags = PRINT_C_SIZE;
+ ch = *format++;
+ break;
+ }
+ state = PRINT_S_CONV;
+ break;
+ case PRINT_S_CONV:
+ switch (ch) {
+ case 'd':
+ /* FALLTHROUGH */
+ case 'i':
+ switch (cflags) {
+ case PRINT_C_CHAR:
+ value = (signed char)va_arg(args, int);
+ break;
+ case PRINT_C_SHORT:
+ value = (short int)va_arg(args, int);
+ break;
+ case PRINT_C_LONG:
+ value = va_arg(args, long int);
+ break;
+ case PRINT_C_LLONG:
+ value = va_arg(args, LLONG);
+ break;
+ case PRINT_C_SIZE:
+ value = va_arg(args, SSIZE_T);
+ break;
+ case PRINT_C_INTMAX:
+ value = va_arg(args, INTMAX_T);
+ break;
+ case PRINT_C_PTRDIFF:
+ value = va_arg(args, PTRDIFF_T);
+ break;
+ default:
+ value = va_arg(args, int);
+ break;
+ }
+ fmtint(str, &len, size, value, 10, width, precision, flags);
+ break;
+ case 'X':
+ flags |= PRINT_F_UP;
+ /* FALLTHROUGH */
+ case 'x':
+ base = 16;
+ /* FALLTHROUGH */
+ case 'o':
+ if (base == 0)
+ base = 8;
+ /* FALLTHROUGH */
+ case 'u':
+ if (base == 0)
+ base = 10;
+ flags |= PRINT_F_UNSIGNED;
+ switch (cflags) {
+ case PRINT_C_CHAR:
+ value = (unsigned char)va_arg(args, unsigned int);
+ break;
+ case PRINT_C_SHORT:
+ value = (unsigned short int)va_arg(args, unsigned int);
+ break;
+ case PRINT_C_LONG:
+ value = va_arg(args, unsigned long int);
+ break;
+ case PRINT_C_LLONG:
+ value = va_arg(args, ULLONG);
+ break;
+ case PRINT_C_SIZE:
+ value = va_arg(args, size_t);
+ break;
+ case PRINT_C_INTMAX:
+ value = va_arg(args, UINTMAX_T);
+ break;
+ case PRINT_C_PTRDIFF:
+ value = va_arg(args, UPTRDIFF_T);
+ break;
+ default:
+ value = va_arg(args, unsigned int);
+ break;
+ }
+ fmtint(str, &len, size, value, base, width, precision, flags);
+ break;
+ case 'c':
+ cvalue = va_arg(args, int);
+ OUTCHAR(str, len, size, cvalue);
+ break;
+ case 's':
+ strvalue = va_arg(args, char *);
+ fmtstr(str, &len, size, strvalue, width, precision, flags);
+ break;
+ case 'p':
+ /*
+ * C99 says: "The value of the pointer is
+ * converted to a sequence of printing
+ * characters, in an implementation-defined
+ * manner." (C99: 7.19.6.1, 8)
+ */
+ if ((strvalue = va_arg(args, void *)) == NULL)
+ /*
+ * We use the glibc format. BSD prints
+ * "0x0", SysV "0".
+ */
+ fmtstr(str, &len, size, "(nil)", width, -1, flags);
+ else {
+ /*
+ * We use the BSD/glibc format. SysV
+ * omits the "0x" prefix (which we emit
+ * using the PRINT_F_NUM flag).
+ */
+ flags |= PRINT_F_NUM;
+ flags |= PRINT_F_UNSIGNED;
+ fmtint(str, &len, size, (UINTPTR_T)strvalue, 16, width, precision,
+ flags);
+ }
+ break;
+ case 'n':
+ switch (cflags) {
+ case PRINT_C_CHAR:
+ charptr = va_arg(args, signed char *);
+ *charptr = len;
+ break;
+ case PRINT_C_SHORT:
+ shortptr = va_arg(args, short int *);
+ *shortptr = len;
+ break;
+ case PRINT_C_LONG:
+ longptr = va_arg(args, long int *);
+ *longptr = len;
+ break;
+ case PRINT_C_LLONG:
+ llongptr = va_arg(args, LLONG *);
+ *llongptr = len;
+ break;
+ case PRINT_C_SIZE:
+ /*
+ * C99 says that with the "z" length
+ * modifier, "a following `n' conversion
+ * specifier applies to a pointer to a
+ * signed integer type corresponding to
+ * size_t argument." (7.19.6.1, 7)
+ */
+ sizeptr = va_arg(args, SSIZE_T *);
+ *sizeptr = len;
+ break;
+ case PRINT_C_INTMAX:
+ intmaxptr = va_arg(args, INTMAX_T *);
+ *intmaxptr = len;
+ break;
+ case PRINT_C_PTRDIFF:
+ ptrdiffptr = va_arg(args, PTRDIFF_T *);
+ *ptrdiffptr = len;
+ break;
+ default:
+ intptr = va_arg(args, int *);
+ *intptr = len;
+ break;
+ }
+ break;
+ case '%': /* Print a "%" character verbatim. */
+ OUTCHAR(str, len, size, ch);
+ break;
+ default: /* Skip other characters. */
+ break;
+ }
+ ch = *format++;
+ state = PRINT_S_DEFAULT;
+ base = cflags = flags = width = 0;
+ precision = -1;
+ break;
+ }
+out:
+ if (len < size)
+ str[len] = '\0';
+ else if (size > 0)
+ str[size - 1] = '\0';
+
+ if (overflow || len >= INT_MAX) {
+ return -1;
+ }
+ return (int)len;
+}
+
+static void fmtstr(char *str, size_t *len, size_t size, const char *value, int width, int precision,
+ int flags)
+{
+ int padlen, strln; /* Amount to pad. */
+ int noprecision = (precision == -1);
+
+ if (value == NULL) /* We're forgiving. */
+ value = "(null)";
+
+ /* If a precision was specified, don't read the string past it. */
+ for (strln = 0; value[strln] != '\0' && (noprecision || strln < precision); strln++)
+ continue;
+
+ if ((padlen = width - strln) < 0)
+ padlen = 0;
+ if (flags & PRINT_F_MINUS) /* Left justify. */
+ padlen = -padlen;
+
+ while (padlen > 0) { /* Leading spaces. */
+ OUTCHAR(str, *len, size, ' ');
+ padlen--;
+ }
+ while (*value != '\0' && (noprecision || precision-- > 0)) {
+ OUTCHAR(str, *len, size, *value);
+ value++;
+ }
+ while (padlen < 0) { /* Trailing spaces. */
+ OUTCHAR(str, *len, size, ' ');
+ padlen++;
+ }
+}
+
+static void fmtint(char *str, size_t *len, size_t size, INTMAX_T value, int base, int width,
+ int precision, int flags)
+{
+ UINTMAX_T uvalue;
+ char iconvert[MAX_CONVERT_LENGTH];
+ char sign = 0;
+ char hexprefix = 0;
+ int spadlen = 0; /* Amount to space pad. */
+ int zpadlen = 0; /* Amount to zero pad. */
+ int pos;
+ int separators = (flags & PRINT_F_QUOTE);
+ int noprecision = (precision == -1);
+
+ if (flags & PRINT_F_UNSIGNED)
+ uvalue = value;
+ else {
+ uvalue = (value >= 0) ? value : -value;
+ if (value < 0)
+ sign = '-';
+ else if (flags & PRINT_F_PLUS) /* Do a sign. */
+ sign = '+';
+ else if (flags & PRINT_F_SPACE)
+ sign = ' ';
+ }
+
+ pos = convert(uvalue, iconvert, sizeof(iconvert), base, flags & PRINT_F_UP);
+
+ if (flags & PRINT_F_NUM && uvalue != 0) {
+ /*
+ * C99 says: "The result is converted to an `alternative form'.
+ * For `o' conversion, it increases the precision, if and only
+ * if necessary, to force the first digit of the result to be a
+ * zero (if the value and precision are both 0, a single 0 is
+ * printed). For `x' (or `X') conversion, a nonzero result has
+ * `0x' (or `0X') prefixed to it." (7.19.6.1, 6)
+ */
+ switch (base) {
+ case 8:
+ if (precision <= pos)
+ precision = pos + 1;
+ break;
+ case 16:
+ hexprefix = (flags & PRINT_F_UP) ? 'X' : 'x';
+ break;
+ }
+ }
+
+ if (separators) /* Get the number of group separators we'll print. */
+ separators = getnumsep(pos);
+
+ zpadlen = precision - pos - separators;
+ spadlen = width /* Minimum field width. */
+ - separators /* Number of separators. */
+ - MAX(precision, pos) /* Number of integer digits. */
+ - ((sign != 0) ? 1 : 0) /* Will we print a sign? */
+ - ((hexprefix != 0) ? 2 : 0); /* Will we print a prefix? */
+
+ if (zpadlen < 0)
+ zpadlen = 0;
+ if (spadlen < 0)
+ spadlen = 0;
+
+ /*
+ * C99 says: "If the `0' and `-' flags both appear, the `0' flag is
+ * ignored. For `d', `i', `o', `u', `x', and `X' conversions, if a
+ * precision is specified, the `0' flag is ignored." (7.19.6.1, 6)
+ */
+ if (flags & PRINT_F_MINUS) /* Left justify. */
+ spadlen = -spadlen;
+ else if (flags & PRINT_F_ZERO && noprecision) {
+ zpadlen += spadlen;
+ spadlen = 0;
+ }
+ while (spadlen > 0) { /* Leading spaces. */
+ OUTCHAR(str, *len, size, ' ');
+ spadlen--;
+ }
+ if (sign != 0) /* Sign. */
+ OUTCHAR(str, *len, size, sign);
+ if (hexprefix != 0) { /* A "0x" or "0X" prefix. */
+ OUTCHAR(str, *len, size, '0');
+ OUTCHAR(str, *len, size, hexprefix);
+ }
+ while (zpadlen > 0) { /* Leading zeros. */
+ OUTCHAR(str, *len, size, '0');
+ zpadlen--;
+ }
+ while (pos > 0) { /* The actual digits. */
+ pos--;
+ OUTCHAR(str, *len, size, iconvert[pos]);
+ if (separators > 0 && pos > 0 && pos % 3 == 0)
+ printsep(str, len, size);
+ }
+ while (spadlen < 0) { /* Trailing spaces. */
+ OUTCHAR(str, *len, size, ' ');
+ spadlen++;
+ }
+}
+
+static void printsep(char *str, size_t *len, size_t size)
+{
+ OUTCHAR(str, *len, size, ',');
+}
+
+static int getnumsep(int digits)
+{
+ int separators = (digits - ((digits % 3 == 0) ? 1 : 0)) / 3;
+ return separators;
+}
+
+static int convert(UINTMAX_T value, char *buf, size_t size, int base, int caps)
+{
+ const char *digits = caps ? "0123456789ABCDEF" : "0123456789abcdef";
+ size_t pos = 0;
+
+ /* We return an unterminated buffer with the digits in reverse order. */
+ do {
+ buf[pos++] = digits[value % base];
+ value /= base;
+ } while (value != 0 && pos < size);
+
+ return (int)pos;
+}
+
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+ return vsnprintf(buf, INT_MAX, fmt, args);
+}
diff --git a/tools/src/vsprintf.h b/tools/src/vsprintf.h
new file mode 100644
index 0000000..cff6c93
--- /dev/null
+++ b/tools/src/vsprintf.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef VSPRINTF_H
+#define VSPRINTF_H
+
+#include <stdarg.h>
+
+int vsprintf(char *buf, const char *fmt, va_list args);
+int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+
+#endif
diff --git a/tools/src/wdt.c b/tools/src/wdt.c
new file mode 100644
index 0000000..a3ebe3a
--- /dev/null
+++ b/tools/src/wdt.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+
+#include "wdt.h"
+#include "adt.h"
+#include "types.h"
+#include "utils.h"
+
+#define WDT_COUNT 0x10
+#define WDT_ALARM 0x14
+#define WDT_CTL 0x1c
+
+static u64 wdt_base = 0;
+
+void wdt_disable(void)
+{
+ int path[8];
+ int node = adt_path_offset_trace(adt, "/arm-io/wdt", path);
+
+ if (node < 0) {
+ printf("WDT node not found!\n");
+ return;
+ }
+
+ if (adt_get_reg(adt, path, "reg", 0, &wdt_base, NULL)) {
+ printf("Failed to get WDT reg property!\n");
+ return;
+ }
+
+ printf("WDT registers @ 0x%lx\n", wdt_base);
+
+ write32(wdt_base + WDT_CTL, 0);
+
+ printf("WDT disabled\n");
+}
+
+void wdt_reboot(void)
+{
+ if (!wdt_base)
+ return;
+
+ write32(wdt_base + WDT_ALARM, 0x100000);
+ write32(wdt_base + WDT_COUNT, 0);
+ write32(wdt_base + WDT_CTL, 4);
+}
diff --git a/tools/src/wdt.h b/tools/src/wdt.h
new file mode 100644
index 0000000..6a48601
--- /dev/null
+++ b/tools/src/wdt.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef __WDT_H__
+#define __WDT_H__
+
+void wdt_disable(void);
+void wdt_reboot(void);
+
+#endif
diff --git a/tools/src/xnuboot.h b/tools/src/xnuboot.h
new file mode 100644
index 0000000..32623b3
--- /dev/null
+++ b/tools/src/xnuboot.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef XNUBOOT_H
+#define XNUBOOT_H
+
+#define CMDLINE_LENGTH 608
+
+struct boot_video {
+ u64 base;
+ u64 display;
+ u64 stride;
+ u64 width;
+ u64 height;
+ u64 depth;
+};
+
+struct boot_args {
+ u16 revision;
+ u16 version;
+ u64 virt_base;
+ u64 phys_base;
+ u64 mem_size;
+ u64 top_of_kernel_data;
+ struct boot_video video;
+ u32 machine_type;
+ void *devtree;
+ u32 devtree_size;
+ char cmdline[CMDLINE_LENGTH];
+ u64 boot_flags;
+ u64 mem_size_actual;
+};
+
+extern u64 boot_args_addr;
+extern struct boot_args cur_boot_args;
+
+#endif