diff --git a/Makefile b/Makefile index de12e29..b5276e4 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,9 @@ MAKEFLAGS += --no-builtin-rules +SHELL = /bin/bash +.SHELLFLAGS = -o pipefail -c + #### Defaults #### # If COMPARE is 1, check the output md5sum after building @@ -20,6 +23,8 @@ RUN_CC_CHECK ?= 1 CC_CHECK_COMP ?= gcc # Dump build object files OBJDUMP_BUILD ?= 0 +# Disassembles matched functions and migrated data as well +FULL_DISASM ?= 0 # Number of threads to compress with N_THREADS ?= $(shell nproc) @@ -69,7 +74,7 @@ $(error Native Windows is currently unsupported for building this repository, us else ifeq ($(UNAME_S),Linux) DETECTED_OS := linux else ifeq ($(UNAME_S),Darwin) - DETECTED_OS := mac + DETECTED_OS := macos MAKE := gmake CPPFLAGS += -xc++ endif @@ -157,6 +162,11 @@ ifeq ($(NON_MATCHING),0) COMPFLAGS += --matching endif +SPLAT_FLAGS ?= +ifneq ($(FULL_DISASM), 0) + SPLAT_FLAGS += --disassemble-all +endif + #### Files #### $(shell mkdir -p asm bin linker_scripts/$(VERSION)/auto) @@ -215,7 +225,7 @@ setup: extract: $(RM) -r asm/$(VERSION) bin/$(VERSION) $(CAT) yamls/$(VERSION)/header.yaml yamls/$(VERSION)/makerom.yaml yamls/$(VERSION)/main.yaml > $(SPLAT_YAML) - $(SPLAT) $(SPLAT_YAML) + $(SPLAT) $(SPLAT_FLAGS) $(SPLAT_YAML) diff-init: uncompressed $(RM) -rf expected/ @@ -255,7 +265,7 @@ $(BUILD_DIR)/%.o: %.bin $(OBJCOPY) -I binary -O elf32-big $< $@ $(BUILD_DIR)/%.o: %.s - $(CPP) $(CPPFLAGS) $(BUILD_DEFINES) $(IINC) -I $(dir $*) $(COMMON_DEFINES) $(RELEASE_DEFINES) $(GBI_DEFINES) $(AS_DEFINES) $< | $(ICONV) $(ICONV_FLAGS) | $(AS) $(ASFLAGS) $(ENDIAN) $(IINC) -I $(dir $*) -o $@ + $(ICONV) $(ICONV_FLAGS) $< | $(AS) $(ASFLAGS) $(ENDIAN) $(IINC) -I $(dir $*) -o $@ $(OBJDUMP_CMD) $(BUILD_DIR)/%.o: %.c diff --git a/tools/splat/.github/workflows/publish_docs_to_wiki.yml b/tools/splat/.github/workflows/publish_docs_to_wiki.yml new file mode 100644 index 0000000..41a15b2 --- /dev/null +++ b/tools/splat/.github/workflows/publish_docs_to_wiki.yml @@ -0,0 +1,54 @@ +# Based on script from https://github.com/orgs/community/discussions/25929 + +name: Publish docs to Wiki + +# Trigger this action only if there are changes pushed to the docs/** directory under the main branch +on: + push: + paths: + - docs/** # This includes all sub folders + branches: + - main # This can be changed to any branch of your preference + +jobs: + publish_docs_to_wiki: + name: Publish docs to Wiki + runs-on: ubuntu-latest + steps: + # Clone the wiki repository + - name: Checkout Wiki repository + uses: actions/checkout@v4 + with: + repository: ${{ github.event.repository.owner.name }}/${{ github.event.repository.name }}.wiki + path: wiki_repo + + # Clone the main repository + - name: Checkout main repository + uses: actions/checkout@v4 + with: + repository: ${{ github.event.repository.owner.name }}/${{ github.event.repository.name }} + path: splat_repo + + - name: Get the new Wiki files + run: | + cd wiki_repo + rm *.md + cp ../splat_repo/docs/* . + + # `git log -1 --pretty=%aN` prints the current commit's author name + # `git log -1 --pretty=%aE` prints the current commit's author mail + - name: Stage new files + run: | + cd wiki_repo + git config user.name $(git log -1 --pretty=%aN) + git config user.email $(git log -1 --pretty=%aE) + git add . + + # `git diff-index --quiet HEAD` returns non-zero if there are any changes. + # This allows to avoid making a commit/push if there are no changes to the Wiki files + + # `git log -1 --pretty=%B` prints the current commit's message + - name: Push new files to the Wiki + run: | + cd wiki_repo + git diff-index --quiet HEAD || (git commit -m "$(git log -1 --pretty=%B)" && git push) diff --git a/tools/splat/.gitrepo b/tools/splat/.gitrepo index 3426205..ee67429 100644 --- a/tools/splat/.gitrepo +++ b/tools/splat/.gitrepo @@ -1,12 +1,12 @@ ; DO NOT EDIT (unless you know what you are doing) ; ; This subdirectory is a git "subrepo", and this file is maintained by the -; git-subrepo command. See https://github.com/git-commands/git-subrepo#readme +; git-subrepo command. See https://github.com/ingydotnet/git-subrepo#readme ; [subrepo] - remote = https://github.com/ethteck/splat.git - branch = master - commit = 172022a8141f4e25b20c61885bc94c94c06062b0 - parent = 73c802a3bb33f347e761786c5b40b7bf6f272582 + remote = git@github.com:ethteck/splat.git + branch = main + commit = 3e9e7cb7342f1b53084f4faffe0ed05e4ccf577c + parent = 8d6aac5bdfa6573d40a8d11dfe94448d64f75d46 method = merge cmdver = 0.4.6 diff --git a/tools/splat/CHANGELOG.md b/tools/splat/CHANGELOG.md index 20f3eab..b11f4c3 100644 --- a/tools/splat/CHANGELOG.md +++ b/tools/splat/CHANGELOG.md @@ -1,6 +1,132 @@ # splat Release Notes +### 0.19.6 + +* The `*_END` linker symbol of every section for each segment is now aligned to the configured alignment by default. +* New yaml option: `ld_align_section_vram_end` + * Allows to toggle aligning the `*_END` linker symbol of each section. + * Defaults to `True`. + +### 0.19.5 + +* The `*_VRAM_END` linker symbol for each segment is now aligned to the configured alignment by default. +* New yaml option: `ld_align_segment_vram_end` + * Allows to toggle aligning the `*_VRAM_END` linker symbol. + * Defaults to `True`. + +### 0.19.4 + +* Fix `ld_fill_value` not accepting `null` as a valid value on the yaml + +### 0.19.3 + +* New yaml option: `ld_bss_is_noload` + * Allows to control if `bss` sections (and derivatived sections) will be put on a `NOLOAD` segment on the generated linker script or not. + * Applies to all `bss` (`sbss`, `common`, `scommon`, etc) sections. + * Defaults to `True`, meaning `bss` sections will be put on `NOLOAD` segments. + +### 0.19.2 + +* `named_regs_for_c_funcs` (default True): Can be disabled to make c functions' disassembled functions contain numeric registers. + +### 0.19.1 + +* Fixed disassembly of certain ps2 instructions to properly re-assemble in a compatible and matching way. + +### 0.19.0: vram_classes + +* New top-level yaml feature: `vram_classes`. This allows you to make common definitions for vram locations that can be applied to multiple segments. Please see the [documentation](docs/VramClasses.md) for more details! + * Renamed `ld_use_follows` to `ld_use_symbolic_vram_addresses` to more accurately describe what it's doing + * Renamed `vram_of_symbol` segment option to `vram_symbol` to provide consistency between the segment-level option and the vram class field. + * Removed `appears_after_overlays_addr` symbol_addrs option in favor of specifying this behavior with `vram_classes` +* Removed `dead` symbol_addrs option +* A warning is now emitted when the `sha1` top-level yaml option is not provided. Adding this is highly recommended, as it prevents errors using splat in which the wrong binary is provided. + +### 0.18.3 + +* splat now will emit a `FILL(0)` statement on each segment of a linker script by default, to customize this behavior use the `ld_fill_value` yaml option or the per-segment `ld_fill_value` option. +* New yaml option: `ld_fill_value` + * Allows to specify the value of the `FILL` statement generated on every segment of the linker script. + * It must be either an integer, which will be used as the parameter for the `FILL` statement, or `null`, which tells splat to not emit `FILL` statements. + * This behavior can be customized per segment too. +* New per segment option: `ld_fill_value` + * Allows to specify the value of the `FILL` statement generated for this specific top-level segment of the linker script, ignoring the global configuration. + * If not set, then the global configuration is used. + +### 0.18.2 + +* Fix rodata migration for `.rdata` sections (and other rodata sections that don't use the name `.rodata`) +* `spimdisasm` 1.18.0 or above is now required. + +### 0.18.1 + +* New yaml options: `check_consecutive_segment_types` + * Allows to turn off checking for segment types not being in a consecutive order +* New option for segments: `linker_section_order` and `linker_section` + * `linker_section_order`: Allows overriding the section order used for linker script generation. Useful when a section of a file is not between the other sections of the same type in the ROM, for example a file having its data section between other files's rodata. + * `linker_section`: Allows to override the `.section` directive that will be used when generating the disassembly of the corresponding section, without needing to write an extension segment. This also affects the section name that will be used during link time. Useful for sections with special names, like an executable section named `.start` + +### 0.18.0 + +* `symbol_addrs` parsing checks: + * Enforce lines contain a single `;` + * Enforce no duplicates (same vram, same rom) + +### 0.17.3 + +* Move wiki to the `docs` folder +* Added the ability to specify `find_file_boundaries` on a per segment basis +* Fix `cpp` segment not symbolizing rodata symbols properly + +### 0.17.2 + +* Added more support for PS2 elf files + +### 0.17.1 + +* New yaml options: `ld_sections_allowlist` and `ld_sections_denylist` + * `ld_sections_allowlist`: A list of sections to preserve during link time. It can be useful to preserve debugging sections. + * `ld_sections_denylist`: A list of sections to discard during link time. It can be useful to avoid using the wildcard discard. Note that this option does not turn off `ld_discard_section`. + +### 0.17.0 + +* BREAKING: Linker script generation now imposes the specified `section_order`, which may not completely reflect the yaml order. + * In case this new linker script generation can't be properly adapted to a repo, the old generation can be reenabled by using the `ld_legacy_generation` flag as a temporary solution. Keep in mind this option may be removed in the future. +* New yaml options related to linker script generation: `ld_partial_linking`, `ld_partial_scripts_path`, `ld_partial_build_segments_path`, `elf_path`, `ld_dependencies` + * `ld_partial_linking`: Changes how the linker script is generated, allowing partially linking each segment. This allows for faster linking times when making changes to files at the cost of a slower build time from a clean build and loosing filepaths in the mapfile. This is also known as "incremental linking". This option requires both `ld_partial_scripts_path` and `ld_partial_build_segments_path`. + * `ld_partial_scripts_path`: Folder were each intermediary linker script will be written to. + * `ld_partial_build_segments_path`: Folder where the built partially linked segments will be placed by the build system. + * `elf_path`: Path to the final elf target. + * `ld_dependencies`: Generate a dependency file for every linker script generated, including the main linker script and the ones for partial linking. Dependency files will have the same path and name as the corresponding linker script, but changing the extension to `.d`. Requires `elf_path` to be set. +* New misc yaml options: `asm_function_alt_macro` and `ique_symbols` + * `asm_function_alt_macro`: Allows to use a different label on symbols that are in the middle of functions (that are not branch targets of any kind) than the one used for the label for functions, allowing for alternative function entrypoints. + * `ique_symbols` Automatically fills libultra symbols that are exclusive for iQue. This option is ignored if platform is not N64. +* New "incbin" segments: `textbin`, `databin` and `rodatabin` + * Allows to specify binary blobs to be linked in a specific section instead of the data default. + * If a `textbin` section has a corresponding `databin` and/or `rodatabin` section with the same name then those will be included in the same generated assembly file. + * If a known symbol matches the vram of a incbin section then it will be emitted properly, allowing for better integration with the rest of splat's symbol system. +* `spimdisasm` 1.17.0 or above is now required. + +### 0.16.10 + +* Produce an error if subsegments do not have an ascending vram order. + * This can happen because bss subsegments need their vram to be specified explicitly. + +### 0.16.9 + +* Add command line argument `--disassemble-all`, which has the same effect as the `disassemble_all` yaml option so will disamble already matched functions as well as migrated data. + * Note: the command line argument takes precedence over the yaml, so will take effect even if the yaml option is set to false. + +### 0.16.8 + +* Avoid ignoring the `align` defined in a segment for `code` segments + +### 0.16.7 + +* Use `pylibyaml` to speed-up yaml parsing + ### 0.16.6 + * Add option `ld_rom_start`. * Allows offsetting rom address linker symbols by some arbitrary value. * Useful for SN64 games which often have rom addresses offset by 0xB0000000. @@ -462,10 +588,10 @@ Internally, there's a new Symbol class which stores information about a symbol a ## 0.5 The Rename Update * n64splat name changed to splat - * Some refactoring was done to support other platforms besides n64 in the future + * Some refactoring was done to support other platforms besides n64 in the future * New `platform` option, which defaults to `n64` * This will cause breaking changes in custom segments, so please refer to one of the changes in one of the n64 base segments for details -* Support for custom artifact paths +* Support for custom artifact paths * New `undefined_syms_auto_path` option * New `undefined_funcs_auto_path` option * New `cache_path` option diff --git a/tools/splat/README.md b/tools/splat/README.md index de7abc7..219be68 100644 --- a/tools/splat/README.md +++ b/tools/splat/README.md @@ -1,7 +1,7 @@ # splat A binary splitting tool to assist with decompilation and modding projects -Currently, only N64 and PSX binaries are supported. +Currently, only N64, PSX, and PS2 binaries are supported. Please check out the [wiki](https://github.com/ethteck/splat/wiki) for more information including [examples](https://github.com/ethteck/splat/wiki/Examples) of projects that use splat. diff --git a/tools/splat/create_config.py b/tools/splat/create_config.py index cf34481..9289858 100755 --- a/tools/splat/create_config.py +++ b/tools/splat/create_config.py @@ -4,15 +4,16 @@ import sys from pathlib import Path -from segtypes.gc.rarc import GcSegRarc from util.gc import gcinfo - from util.n64 import find_code_length, rominfo +from util.psx import psxexeinfo parser = argparse.ArgumentParser( - description="Create a splat config from an N64 ROM or a GameCube disc image." + description="Create a splat config from an N64 ROM, PSX executable, or a GameCube disc image." +) +parser.add_argument( + "file", help="Path to a .z64/.n64 ROM, PSX executable, or .iso/.gcm GameCube image" ) -parser.add_argument("file", help="Path to a .z64/.n64 ROM or .iso/.gcm GameCube image") def main(file_path: Path): @@ -31,6 +32,12 @@ def main(file_path: Path): # Check for GC disc image if int.from_bytes(file_bytes[0x1C:0x20], byteorder="big") == 0xC2339F3D: create_gc_config(file_path, file_bytes) + return + + # Check for PSX executable + if file_bytes[0:8] == b"PS-X EXE": + create_psx_config(file_path, file_bytes) + return def create_n64_config(rom_path: Path): @@ -45,25 +52,50 @@ def create_n64_config(rom_path: Path): options: basename: {basename} target_path: {rom_path.with_suffix(".z64")} + elf_path: build/{basename}.elf base_path: . - compiler: {rom.compiler} - find_file_boundaries: True - header_encoding: {rom.header_encoding} platform: n64 - # undefined_funcs_auto: True - # undefined_funcs_auto_path: undefined_funcs_auto.txt - # undefined_syms_auto: True - # undefined_syms_auto_path: undefined_syms_auto.txt - # symbol_addrs_path: symbol_addrs.txt + compiler: {rom.compiler} + # asm_path: asm # src_path: src # build_path: build - # extensions_path: tools/splat_ext - # mips_abi_float_regs: o32 + # create_asm_dependencies: True + + ld_script_path: {basename}.ld + ld_dependencies: True + + find_file_boundaries: True + header_encoding: {rom.header_encoding} + + o_as_suffix: True + use_legacy_include_asm: False + mips_abi_float_regs: o32 + + asm_function_macro: glabel + asm_jtbl_label_macro: jlabel + asm_data_macro: dlabel + # section_order: [".text", ".data", ".rodata", ".bss"] # auto_all_sections: [".data", ".rodata", ".bss"] + + symbol_addrs_path: + - symbol_addrs.txt + reloc_addrs_path: + - reloc_addrs.txt + + # undefined_funcs_auto_path: undefined_funcs_auto.txt + # undefined_syms_auto_path: undefined_syms_auto.txt + + extensions_path: tools/splat_ext + + # string_encoding: ASCII + # data_string_encoding: ASCII + rodata_string_guesser_level: 2 + data_string_guesser_level: 2 # libultra_symbols: True # hardware_regs: True + # gfx_ucode: # one of [f3d, f3db, f3dex, f3dexb, f3dex2] """ first_section_end = find_code_length.run(rom_bytes, 0x1000, rom.entry_point) @@ -187,6 +219,102 @@ def create_gc_config(iso_path: Path, iso_bytes: bytes): f.write(segments) +def create_psx_config(exe_path: Path, exe_bytes: bytes): + exe = psxexeinfo.PsxExe.get_info(exe_path, exe_bytes) + basename = exe_path.name.replace(" ", "").lower() + + header = f"""\ +name: {exe_path.name} +sha1: {exe.sha1} +options: + basename: {basename} + target_path: {exe_path} + base_path: . + platform: psx + compiler: GCC + + # asm_path: asm + # src_path: src + # build_path: build + # create_asm_dependencies: True + + ld_script_path: {basename}.ld + + find_file_boundaries: False + gp_value: 0x{exe.initial_gp:08X} + + o_as_suffix: True + use_legacy_include_asm: False + + asm_function_macro: glabel + asm_jtbl_label_macro: jlabel + asm_data_macro: dlabel + + section_order: [".rodata", ".text", ".data", ".bss"] + # auto_all_sections: [".data", ".rodata", ".bss"] + + symbol_addrs_path: + - symbol_addrs.txt + reloc_addrs_path: + - reloc_addrs.txt + + # undefined_funcs_auto_path: undefined_funcs_auto.txt + # undefined_syms_auto_path: undefined_syms_auto.txt + + extensions_path: tools/splat_ext + + subalign: 2 + + string_encoding: ASCII + data_string_encoding: ASCII + rodata_string_guesser_level: 2 + data_string_guesser_level: 2 +""" + + segments = f"""\ +segments: + - name: header + type: header + start: 0x0 + + - name: main + type: code + start: 0x800 + vram: 0x{exe.destination_vram:X} + bss_size: 0x{exe.bss_size:X} + subsegments: +""" + text_offset = exe.text_offset + if text_offset != 0x800: + segments += f"""\ + - [0x800, rodata, 800] +""" + segments += f"""\ + - [0x{text_offset:X}, asm, {text_offset:X}] +""" + + if exe.data_vram != 0 and exe.data_size != 0: + data_offset = exe.data_offset + segments += f"""\ + - [0x{data_offset:X}, data, {data_offset:X}] +""" + + if exe.bss_size != 0: + segments += f"""\ + - {{ start: 0x{exe.size:X}, type: bss, name: {exe.bss_vram:X}, vram: 0x{exe.bss_vram:X} }} +""" + + segments += f"""\ + - [0x{exe.size:X}] +""" + + out_file = f"{basename}.yaml" + with open(out_file, "w", newline="\n") as f: + print(f"Writing config to {out_file}") + f.write(header) + f.write(segments) + + if __name__ == "__main__": args = parser.parse_args() main(Path(args.file)) diff --git a/tools/splat/disassembler/spimdisasm_disassembler.py b/tools/splat/disassembler/spimdisasm_disassembler.py index c3e49a7..335c9d2 100644 --- a/tools/splat/disassembler/spimdisasm_disassembler.py +++ b/tools/splat/disassembler/spimdisasm_disassembler.py @@ -8,7 +8,7 @@ class SpimdisasmDisassembler(disassembler.Disassembler): # This value should be kept in sync with the version listed on requirements.txt - SPIMDISASM_MIN = (1, 16, 0) + SPIMDISASM_MIN = (1, 18, 0) def configure(self, opts: SplatOpts): # Configure spimdisasm @@ -80,6 +80,7 @@ def configure(self, opts: SplatOpts): spimdisasm.common.GlobalConfig.GP_VALUE = opts.gp spimdisasm.common.GlobalConfig.ASM_TEXT_LABEL = opts.asm_function_macro + spimdisasm.common.GlobalConfig.ASM_TEXT_ALT_LABEL = opts.asm_function_alt_macro spimdisasm.common.GlobalConfig.ASM_JTBL_LABEL = opts.asm_jtbl_label_macro spimdisasm.common.GlobalConfig.ASM_DATA_LABEL = opts.asm_data_macro spimdisasm.common.GlobalConfig.ASM_TEXT_END_LABEL = opts.asm_end_label @@ -102,12 +103,13 @@ def configure(self, opts: SplatOpts): opts.allow_data_addends ) - spimdisasm.common.GlobalConfig.ASM_GENERATED_BY = opts.asm_generated_by - spimdisasm.common.GlobalConfig.DISASSEMBLE_UNKNOWN_INSTRUCTIONS = ( opts.disasm_unknown ) + if opts.compiler == compiler.GCC and opts.platform == "ps2": + rabbitizer.config.toolchainTweaks_treatJAsUnconditionalBranch = False + def check_version(self, skip_version_check: bool, splat_version: str): if not skip_version_check and spimdisasm.__version_info__ < self.SPIMDISASM_MIN: log.error( diff --git a/tools/splat/disassembler_section.py b/tools/splat/disassembler_section.py index 2eb745a..b9e1cad 100644 --- a/tools/splat/disassembler_section.py +++ b/tools/splat/disassembler_section.py @@ -1,12 +1,9 @@ -import spimdisasm -from util import symbols -from typing import Optional, Set, Tuple -from segtypes.segment import Segment -from util import log, options, symbols +from abc import ABC, abstractmethod +from typing import Optional +import spimdisasm -from abc import ABC, abstractmethod -from typing import Callable +from util import options, symbols class DisassemblerSection(ABC): diff --git a/tools/splat/docs/Adding-Symbols.md b/tools/splat/docs/Adding-Symbols.md new file mode 100644 index 0000000..8bb0deb --- /dev/null +++ b/tools/splat/docs/Adding-Symbols.md @@ -0,0 +1,136 @@ +Symbols (i.e. labelling a function or variable) are controlled by the `symbols_addrs.txt` file. + +The format for defining symbols is: + +```ini +symbol = address; // option1:value1 option2:value2 +``` +e.g. +```ini +osInitialize = 0x801378C0; // type:func +``` + +:information_source: The file used can be overridden via the `symbol_addrs_path` setting in the global `options` section of the splat yaml. This option can also accept a list of paths, allowing for symbols to be organized in multiple files. + +## symbol + +This is the name of the symbol and can be any valid C variable name, e.g. `myCoolFunction` or `gReticulatedSplineCounter` + +## address + +This is the VRAM address expressed in hexadecimal, e.g. `0x80001050` + +## options + +An optional `key:pair` list of settings, note that each option should be separated by whitespace, but there should be no whitespace between the key:value pairs themselves. + +### `type` + +Override splat's automatic type detection, possible values are: +- `func`: Functions +- `jtbl`: Jumptables +- `jtbl_label`: Jumptables labels (inside functions) +- `label`: Branch labels (inside functions) +- `s8`, `u8`: To specify data/rodata to be disassembled as `.byte`s +- `s16`, `u16`: To specify data/rodata to be disassembled as `.short`s +- `s32`, `u32`: To specify data/rodata to be disassembled as `.word`s (the default) +- `s64`, `u64`: :man_shrugging: +- `f32`, `Vec3f`: To specify data/rodata to be disassembled as `.float`s +- `f64`: To specify data/rodata to be disassembled as `.double`s +- `asciz`, `char*`, `char`: C strings (disassembled as `.asciz`) +- Any custom type starting with a capital letter (will default to `.word`s) + +Any other type will produce an error. + +**Example:** +```ini +minFrameSize = 0x80241D08; // type:s32 +``` + +### `size` + +The size of the function or the size of the data depending on the type of the symbol. It specifies a size in bytes. e.g. `size:0x10`. + +**Example:** +```ini +RawHuffmanTable = 0x8022E0E0; // type:symbol size:0x100 +``` + +### `rom` + +The ROM offset for the symbol, useful (potentially mandatory) for symbols in overlays where multiple symbols could share the same VRAM address. + +**Example:** +```ini +create_particle_effect = 0x802D5F4C; // type:func rom:0x6E75FC +``` + +### `segment` + +Allows specifying to which specific segment this symbol belongs to, useful to disambiguate symbols from segments that share the same VRAM address. This name must be the same as the name of a segment listed in the yaml. + +**Example:** +```ini +sMenuTexture = 0x06004040; // segment:menu_assets +``` + +### `name_end` + +Emits a symbol after the end of the data of the current symbol. Useful to reference the end of an assembly symbol, like RSP data. + +**Example:** +```ini +rspbootTextStart = 0x80084690; // name_end:rspbootTextEnd +``` + +### `defined` + +Forces the symbol to be defined - i.e. prevent it from appearing in `undefined_syms_auto.txt` should splat not encounter the symbol during the symbol detection phase. + +**Example:** +```ini +__osDpDeviceBusy = 0x8014B3D0; // defined:true +``` + +### `extract` + +TBD + +### `ignore` + +Prevents an address from being symbolized and referenced. Useful to get a finer control over the disassembled output. + +**Example:** +```ini +D_A0000000 = 0xA0000000; // ignore:true +``` + +It can also be combined with the `size` attribute to avoid a range of addresses of being symbolized. + +**Example:** +```ini +D_80000000 = 0x80000000; // ignore:true size:0x10 +``` + +### `force_migration` and `force_not_migration` + +Grants a finer control over the automatic rodata migration to functions. This may be required because of the migration heuristic failing to migrate (or to not migrate) a symbol, producing a disordered rodata section. Forcing the migration of a rodata symbol to a function will only work if that function references said rodata symbol. Forcing the not-migration of a rodata symbol always works. + +This attribute is ignored if the `migrate_rodata_to_functions` option is disabled. + +**Example:** +```ini +jtbl_800B13D0 = 0x800B13D0; // type:jtbl force_migration:True +STR_800B32A8 = 0x800C9520; // type:asciz force_not_migration:True +``` + +### `allow_addend` and `dont_allow_addend` + +Allows this symbol to reference (or not reference) other symbols with an addend. + +This attribute overrides the global `allow_data_addends` option. + +**Example:** +```ini +aspMainTextStart = 0x80084760; // dont_allow_addend:True +``` diff --git a/tools/splat/docs/Advanced.md b/tools/splat/docs/Advanced.md new file mode 100644 index 0000000..fe9b497 --- /dev/null +++ b/tools/splat/docs/Advanced.md @@ -0,0 +1,7 @@ +## Writing custom segment handler + +The following list contains examples of custom segments: + +- [RNC](https://github.com/mkst/sssv/blob/master/tools/splat_ext/rnc.py) +- [Vtx](https://github.com/mkst/sssv/blob/master/tools/splat_ext/sssv_vtx.py) +- [Multiple](https://github.com/pmret/papermario/tree/main/tools/splat_ext) diff --git a/tools/splat/docs/Configuration.md b/tools/splat/docs/Configuration.md new file mode 100644 index 0000000..53d31c9 --- /dev/null +++ b/tools/splat/docs/Configuration.md @@ -0,0 +1,643 @@ +Splat has various options for configuration, all of which are listed under the `options` section of the yaml file. + +## Project configuration + +### base_path + +Path that all other configured paths are relative to. + +#### Usage + +```yaml +base_path: path/to/base/folder +``` + +#### Default + +`.` *(Current directory)* + + +### target_path + +Path to target binary. + +#### Usage + +```yaml +target_path: path/to/target/binary +``` + +### elf_path + +Path to the final elf target + +#### Default +Path to the binary that was used as the input to `create_config.py` + + +### platform + +The target platform for the binary. Options are +- `n64` (Nintendo 64) +- `psx` (PlayStation 1) +- `ps2` (PlayStation 2) +- `gc` (GameCube) + +#### Usage +```yaml +platform: psx +``` + + +### compiler + +Compiler used to build the binary. + +splat recognizes the following compilers, and it will adapt it behavior accordingly for them, but unknown compilers can be passed as well: +- GCC +- SN64 +- IDO + +#### Usage +```yaml +compiler: IDO +``` + +#### Default +`ido` + + +### endianness + +Determines the endianness of the target binary. If not set, the endiannesss will be guessed from the selected platform. + +Valid values: +- big +- little + + +### section_order + +Determines the default section order of the target binary. This can be overridden per-segment. + +Expects a list of strings. + + +### generated_c_preamble + +String that is placed before the contents of newly-generated `.c` files. + +#### Usage + +```yaml +generated_c_preamble: #include "header.h" +``` + +#### Default + +`#include "common.h"` + + +### generated_s_preamble + +String that is placed before the contents of newly-generated assembly (`.s`) files. + +#### Usage + +```yaml +generated_s_preamble: .set fp=64 +``` + + +### o_as_suffix + +Determines whether to replace the suffix of the file to `.o` or to append `.o` to the suffix of the file. + + +### gp_value + +The value of the `$gp` register to correctly calculate offset to `%gp_rel` relocs. + + +### check_consecutive_segment_types + +By default splat will check and error if there are any non consecutive segment types. +This option disables said feature. + +#### Usage + +```yaml +# Disable checking for non-consecutive segments +check_consecutive_segment_types: False +``` + + +## Paths + + +### asset_path + +Path to output split asset files. + +#### Usage + +```yaml +asset_path: path/to/assets/folder +``` + +#### Default + +`assets` + + +### symbol_addrs_path + +Determines the path to the symbol addresses file(s). A `symbol_addrs` file is to be updated/curated manually and contains addresses of symbols as well as optional metadata such as rom address, type, and more + +It's possible to use more than one file by supplying a list instead of a string + +#### Usage +```yaml +symbol_addrs_path: path/to/symbol_addrs +``` + +#### Default +`symbol_addrs.txt` + + + +### reloc_addrs_paths + + + +### build_path +Path that built files will be found. Used for generation of the linker script. + +#### Usage +```yaml +build_path: path/to/build/folder +``` + +#### Default +`build` + + +### src_path +Path to split `.c` files. + +#### Usage +```yaml +src_path: path/to/src/folder +``` + +#### Default +`src` + + +### asm_path +Path to output split assembly files. + +#### Usage +```yaml +asm_path: path/to/asm/folder +``` + +#### Default +`asm` + + +### data_path + +Determines the path to the asm data directory + + +### nonmatchings_path + +Determines the path to the asm nonmatchings directory + + +### cache_path +Path to splat cache + +#### Usage +```yaml +cache_path: path/to/splat/cache +``` + +#### Default +`.splat_cache` + + +### create_undefined_funcs_auto +If `True`, splat will generate an `undefined_funcs_auto.txt` file. + +#### Usage +```yaml +create_undefined_funcs_auto: False +``` + +#### Default +`True` + + +### undefined_funcs_auto_path +Path to file containing automatically defined functions. + +#### Usage +```yaml +undefined_funcs_auto_path: path/to/undefined_funcs_auto.txt +``` + +#### Default +`undefined_funcs_auto.txt` + + + +### create_undefined_syms_auto +If `True`, splat will generate an `undefined_syms_auto.txt` file. + +#### Usage +```yaml +create_undefined_syms_auto: False +``` + +#### Default +`True` + + +### undefined_syms_auto_path +Path to file containing automatically defined symbols. + +#### Usage +```yaml +undefined_syms_auto_path: path/to/undefined_syms_auto.txt +``` + +#### Default +`undefined_syms_auto.txt` + + +### extensions_path +If you are using splat extension(s), this is the path they will be loaded from. + +#### Usage +```yaml +extensions_path: path/to/extensions/folder +``` + +#### Default +`tools/splat_ext` + + +### lib_path + +Determines the path to library files that are to be linked into the target binary + + +### elf_section_list_path +Path to file containing elf section list. + +#### Usage +```yaml +elf_section_list_path: path/to/elf_sections +``` + +#### Default +`elf_sections.txt` + + +## Linker script + + +### subalign + +Sub-alignment (in bytes) of sections. + +#### Usage +```yaml +subalign: 4 +``` + +#### Default +`16` + + +### auto_all_sections + +TODO + +### ld_script_path + +Path to output ld script. + +#### Usage + +```yaml +ld_script_path: path/to/ld/script.ld +``` + +#### Default + +`{basename}.ld` + + +### ld_symbol_header_path + +Path to output a header containing linker symbols. + +#### Usage +```yaml +ld_symbol_header_path: path/to/linker_symbol_header +``` + +### ld_discard_section + +Determines whether to add a discard section to the linker script + +### ld_section_labels + +Determines the list of section labels that are to be added to the linker script + +### ld_wildcard_sections + +Determines whether to add wildcards for section linking in the linker script (.rodata* for example) + +### ld_use_symbolic_vram_addreses + +Determines whether to use `follows_vram` (segment option) and `vram_symbol` / `follows_classes` (vram_class options) to calculate vram addresses in the linker script. +Enabled by default. If disabled, this uses the plain integer values for vram addresses defined in the yaml. + +### ld_partial_linking + +Change linker script generation to allow partially linking segments. Requires both `ld_partial_scripts_path` and `ld_partial_build_segments_path` to be set. + +### ld_partial_scripts_path + +Folder were each intermediary linker script will be written to. + +### ld_partial_build_segments_path + +Folder where the built partially linked segments will be placed by the build system. + +### ld_dependencies + +Generate a dependency file for every linker script generated. Dependency files will have the same path and name as the corresponding linker script, but changing the extension to `.d`. Requires `elf_path` to be set. + +### ld_legacy_generation + +Legacy linker script generation does not impose the section_order specified in the yaml options or per-segment options. + +### segment_end_before_align + +If enabled, the end symbol for each segment will be placed before the alignment directive for the segment + +### segment_symbols_style + +Controls the style of the auto-generated segment symbols in the linker script. + +Possible values: +- splat +- makerom + + +### ld_rom_start + +Specifies the starting offset for rom address symbols in the linker script. + + +### ld_fill_value + +Allows to specify the value of the `FILL` statement generated on every segment of the linker script. + +It must be either an integer, which will be used as the parameter for the `FILL` statement, or `null`, which tells splat to not emit `FILL` statements. + +This behavior can be customized per segment too. See [ld_fill_value](Segments.md#ld_fill_value) on the Segments section. + +Defaults to 0. + + +### ld_bss_is_noload + +Allows to control if `bss` sections (and derivatived sections) will be put on a `NOLOAD` segment on the generated linker script or not. + +Applies to all `bss` (`sbss`, `common`, `scommon`, etc) sections. + +Defaults to `True`, meaning `bss` sections will be put on `NOLOAD` segments. + + +### ld_align_segment_vram_end + +Allows to toggle aligning the `*_VRAM_END` linker symbol of each segment. + +Setting this to `True` will make the `*_VRAM_END` to be aligned to the configured alignment of the segment. + +Defaults to `True`. + + +### ld_align_section_vram_end + +Allows to toggle aligning the `*_VRAM_END` linker symbol of each section for every segment. + +Setting this to `True` will make the `*_END` linker symbol of every section to be aligned to the configured alignment of the segment. + +Defaults to `True`. + + +## C file options + +### create_c_files + +Determines whether to create new c files if they don't exist + +### auto_decompile_empty_functions + +Determines whether to "auto-decompile" empty functions + +### do_c_func_detection + +Determines whether to detect matched/unmatched functions in existing c files so we can avoid creating `.s` files for already-decompiled functions. + +### c_newline + +Determines the newline char(s) to be used in c files + + +## (Dis)assembly-related options + +### symbol_name_format + +Determine the format that symbols should be named by default + +### symbol_name_format_no_rom + +Same as `symbol_name_format` but for symbols with no rom address + +### find_file_boundaries + +Determines whether to detect and hint to the user about likely file splits when disassembling. + +This setting can also be set on a per segment basis, if you'd like to enable or disable detection for specific segments. This could be useful when you are confident you identified all subsegments in a segment, yet `splat` still hints that subsegments could be split. + +### pair_rodata_to_text + +Determines whether to detect and hint to the user about possible rodata sections corresponding to a text section + +### migrate_rodata_to_functions + +Determines whether to attempt to automatically migrate rodata into functions + +### asm_inc_header + +Determines the header to be used in every asm file that's included from c files + +### asm_function_macro + +Determines the macro used to declare functions in asm files + +### asm_function_alt_macro + +Determines the macro used to declare symbols in the middle of functions in asm files (which may be alternative entries) + +### asm_jtbl_label_macro + +Determines the macro used to declare jumptable labels in asm files + +### asm_data_macro + +Determines the macro used to declare data symbols in asm files + +### asm_end_label + +Determines the macro used at the end of a function, such as endlabel or .end + +### asm_emit_size_directive + +Toggles the .size directive emitted by the disassembler + +### include_macro_inc + +Determines including the macro.inc file on non-migrated rodata variables + +### mnemonic_ljust + +Determines the number of characters to left align before the instruction + +### rom_address_padding + +Determines whether to pad the rom address + +### mips_abi_gpr + +Determines which ABI names to use for general purpose registers + +### mips_abi_float_regs + +Determines which ABI names to use for floating point registers. + +Valid values: +- numeric +- o32 +- n32 +- n64 + +`o32`` is highly recommended, as it provides logically named registers for floating point instructions. +For more info, see https://gist.github.com/EllipticEllipsis/27eef11205c7a59d8ea85632bc49224d + +### named_regs_for_c_funcs + +Determines whether functions inside c files should have named registers + +### add_set_gp_64 + +Determines whether to add ".set gp=64" to asm/hasm files + +### create_asm_dependencies + +Generate `.asmproc.d` dependency files for each C file which still reference functions in assembly files + +### string_encoding + +Global option for rodata string encoding. This can be overriden per segment + +### data_string_encoding + +Global option for data string encoding. This can be overriden per segment + +### rodata_string_guesser_level + +Global option for the rodata string guesser. 0 disables the guesser completely. + +### data_string_guesser_level + +Global option for the data string guesser. 0 disables the guesser completely. + +### allow_data_addends + +Global option for allowing data symbols using addends on symbol references. It can be overriden per symbol + +### disasm_unknown + +Tells the disassembler to try disassembling functions with unknown instructions instead of falling back to disassembling as raw data + +### detect_redundant_function_end + +Tries to detect redundant and unreferenced functions ends and merge them together. This option is ignored if the compiler is not set to IDO. + +### disassemble_all + +Don't skip disassembling already matched functions and migrated sections + + +## N64-specific options + +### header_encoding + +Used to specify what encoding should be used used when parsing the N64 ROM header. + +#### Default + +`ASCII` + + +### gfx_ucode + +Determines the type gfx ucode (used by gfx segments) + +Valid options are: +- f3d +- f3db +- f3dex +- f3dexb +- f3dex2 + +### libultra_symbols + +Use named libultra symbols by default. Those will need to be added to a linker script manually by the user + +### ique_symbols + +Use named libultra symbols by default. Those will need to be added to a linker script manually by the user + +### hardware_regs + +Use named hardware register symbols by default. Those will need to be added to a linker script manually by the user + + +## Gamecube-specific options + +### filesystem_path + +Path where the iso's filesystem will be extracted to + +## Compiler-specific options + +### use_legacy_include_asm +If `True`, generate c files using the longer `INCLUDE_ASM` macro. This is defaulted to `True` to by-default support projects using the longer macro. + +#### Usage +```yaml +use_legacy_include_asm: False +``` + +#### Default +`True` diff --git a/tools/splat/docs/Examples.md b/tools/splat/docs/Examples.md new file mode 100644 index 0000000..82bf8cb --- /dev/null +++ b/tools/splat/docs/Examples.md @@ -0,0 +1,32 @@ +The following is a list of projects known to be using **splat** along with the compilers used: + +## N64 Projects + +- [Aidyn Chronicles](https://github.com/blackgamma7/Aidyn) `unknown` (does not build source) +- [Animal Forest](https://github.com/zeldaret/af) `ido7.1` +- [Banjo Kazooie](https://gitlab.com/banjo.decomp/banjo-kazooie) `ido5.3` +- [Conker's Bad Fur Day](https://github.com/mkst/conker) `ido5.3` +- [Dinosaur Planet](https://github.com/zestydevy/dinosaur-planet) `ido5.3` +- [Dr. Mario 64](https://github.com/AngheloAlf/drmario64) `kmc gcc2.7.2` & `egcs gcc2.91.66` +- [Gauntlet Legends](https://github.com/Drahsid/gauntlet-legends) `kmc gcc2.7.2` (uses old KMC GCC wrapper - do not use for reference) +- [Mario Party 3](https://github.com/PartyPlanner64/mp3) `gcc2.7.2 TBD` +- [Mischief Makers](https://github.com/Drahsid/mischief-makers) `ido5.3` +- [Neon Genesis Evangelion 64](https://github.com/farisawan-2000/evangelion) `kmc gcc2.7.2` +- [Paper Mario](https://github.com/pmret/papermario) `gcc2.8.1` +- [Pokemon Snap](https://github.com/ethteck/pokemonsnap) `ido7.1` +- [Pokemon Stadium](https://github.com/ethteck/pokemonstadium) `ido7.1` +- [Pokémon Puzzle League](https://github.com/AngheloAlf/puzzleleague64) `kmc gcc2.7.2` +- [Rocket Robot on Wheels](https://github.com/RocketRet/Rocket-Robot-On-Wheels) `SN64 (build 970404)` +- [Space Station Silicon Valley](https://github.com/mkst/sssv) `ido5.3` +- [Turok 3](https://github.com/drahsid/turok3) `psyq gcc2.8.0` +- [Yoshi's Story](https://github.com/decompals/yoshis-story) `ido7.1` + +## PS1 Projects + +- [Evo's Space Adventures](https://github.com/mkst/esa) `psyq 4.6 (gcc2.95)` +- [Final Fantasy 7](https://github.com/Drahsid/ffvii) `psyq <= 4.1 (gcc2.7.2)` +- [Silent Hill](https://github.com/Vatuu/silent-hill-decomp) `psyq <= 4.1 (gcc2.7.2) TBD` + +## PS2 Projects + +- [Kingdom Hearts](https://github.com/ethteck/kh1) TBD diff --git a/tools/splat/docs/General-Workflow.md b/tools/splat/docs/General-Workflow.md new file mode 100644 index 0000000..947f2b3 --- /dev/null +++ b/tools/splat/docs/General-Workflow.md @@ -0,0 +1,179 @@ +This describes an example of how to iteratively edit the splat segments config in order to maximise code and data migration from the binary. + +# 1 Initial configuration + +After successfully following the [Quickstart](https://github.com/ethteck/splat/wiki/Quickstart), you should have an initial configuration like the one below: + +```yaml +- name: main + type: code + start: 0x1060 + vram: 0x80070C60 + follows_vram: entry + bss_size: 0x3AE70 + subsegments: + - [0x1060, asm] + # ... a lot of additional `asm` sections + # This section is found out to contain __osViSwapContext + - [0x25C20, asm, energy_orb_wave] + # ... a lot of additional `asm` sections + - [0x2E450, data] + + - [0x3E330, rodata] + # ... a lot of additional `rodata` sections + - { start: 0x3F1B0, type: bss, vram: 0x800E9C20 } + +- [0x3F1B0, bin] +``` + +## 1.1 Match `rodata` to `asm` sections + +It's good practice to start pairing `rodata` sections with `asm` sections _before_ changing the `asm` sections into `c` files. This is because rodata may need to be explicitly included within the `c` file (via `INCLUDE_RODATA` or `GLOBAL_ASM` macros). + +`splat` provides hints about which `rodata` segments are referenced by which `asm` segments based on references to these symbols within the disassembled functions. + +These messages are output when splitting and look like: + +``` +Rodata segment '3EE10' may belong to the text segment 'energy_orb_wave' + Based on the usage from the function func_0xXXXXXXXX to the symbol D_800AEA10 +``` + +To pair these two sections, simply add the _name_ of the suggested text (i.e. `asm`) segment to the `rodata` segment: + +```yaml +- [0x3EE10, rodata, energy_orb_wave] # segment will be paired with a text (i.e. asm or c) segment named "energy_orb_wave" +``` + +**NOTE:** + +By default `migrate_rodata_to_functions` functionality is enabled. This causes splat to include paired rodata along with the disassembled assembly code, allowing it to be linked via `.rodata` segments from the get-go. This guide assumes that you will disable this functionality until you have successfully paired up the segments. + +### Troubleshooting + +#### Multiple `rodata` segments for a single text segment + +Using the following configuration: +```yaml +# ... +- [0x3E900, rodata] +- [0x3E930, rodata] +# ... +``` + +`splat` outputs a hint that doesn't immediately seem to make sense: + +``` +Rodata segment '3E900' may belong to the text segment '16100' + Based on the usage from the function func_80085DA0 to the symbol jtbl_800AE500 + +Rodata segment '3E930' may belong to the text segment '16100' + Based on the usage from the function func_800862C0 to the symbol jtbl_800AE530 +``` + +This hint tells you that `splat` believes one text segment references two `rodata` sections. This usually means that either the `rodata` should not be split at `0x3E930`, or that there is a missing split in the `asm` at `0x16100`, as a text segment can only have one `rodata` segment. + +If we assume that the rodata split is incorrect, we can remove the extraneous split: + +```yaml +# ... +- [0x3E900, rodata, "16100"] +# ... +``` + +**NOTE:** Splat uses heuristics to determine `rodata` and `asm` splits and is not perfect - false positives are possible and, if in doubt, double-check the assembly yourself before changing the splits. + + +### Multiple `asm` segments referring to the same `rodata` segment + +Sometimes the opposite is true, and `splat` believes two `asm` segments belong to a single `rodata` segment. In this case, you can split the `asm` segment to make sure two files are not paired with the same `rodata`. Note that this too can be a false positive. + + +# 2 Disassemble text, data, rodata + +Let's say you want to start decompiling the subsegment at `0x25C20` (`energy_orb_wave`). Start by replacing the `asm` type with `c`, and then re-run splat. + +```yaml +- [0x25C20, c, energy_orb_wave] +# ... +- [0x3EE10, rodata, energy_orb_wave] +``` + +This will disassemble the ROM at `0x25C20` as code, creating individual `.s` files for each function found. The output will be located in `{asm_path}/nonmatchings/energy_orb_wave/.s`. + +Assuming `data` and `rodata` segments have been paired with the `c` segment, splat will generate `{asm_path}/energy_orb_wave.data.s` and `{asm_path}/energy_orb_wave.rodata.s` respectively. + +Finally, splat will generate a C file, at `{src_path}/energy_orb_wave.c` containing macros that will be used to include all disassembled function assembly. + +**NOTE:** +- the path for where assembly is written can be configured via `asm_path`, the default is `{base_dir}/asm` +- the source code path can be configured via `src_path`, the default is `{base_path}/src` + +## Macros + +The macros to include text/rodata assembly are different for GCC vs IDO compiler: + +**GCC**: `INCLUDE_ASM` & `INCLUDE_RODATA` (text/rodata respectively) +**IDO**: `GLOBAL_ASM` + +These macros must be defined in an included header, which splat currently does not produce. + +For a GCC example, see the [include.h](https://github.com/AngheloAlf/drmario64/blob/master/include/include_asm.h) from the Dr. Mario project. + +For IDO, you will need to use [asm-processor](https://github.com/simonlindholm/asm-processor) in order to include assembly code within the c files. + + +# 3 Decompile text + +This involved back and forth between `.c` and `.s` files: + +- editing the `data.s`, `rodata.s` files to add/fixup symbols at the proper locations +- decompiling functions, declaring symbols (`extern`s) in the `.c` + +The linker script links +- `.text` (only) from the `.o` built from `energy_orb_wave.c` +- `.data` (only) from the `.o` built from `energy_orb_wave.data.s` +- `.rodata` (only) from the `.o` built from `energy_orb_wave.rodata.s` + +# 4 Decompile (ro)data + +Migrate data to the .c file, using raw values, lists or structs as appropriate code. + +Once you have paired the rodata and text segments together, you can enabled `migrate_rodata_to_functions`. This will add the paired rodata into each individual function's assembly file, and therefore, the rodata will end up in the compiled .o file. + +To link the .data/.rodata from the .o built from the .c file (instead of from the .s files), the subsegments must be changed from: + +```yaml +- [0x42100, c, energy_orb_wave] +- [0x42200, data, energy_orb_wave] # extract data at this ROM address as energy_orb_wave.data.s +- [0x42300, rodata, energy_orb_wave] # extract rodata at this ROM address as energy_orb_wave.rodata.s +``` + +to: + +```yaml +- [0x42100, c, energy_orb_wave] +- [0x42200, .data, energy_orb_wave] # take the .data section from the compiled c file named energy_orb_wave +- [0x42300, .rodata, energy_orb_wave] # take the .rodata section from the compiled c file named energy_orb_wave +``` + + +**NOTE:** +If using `auto_all_section` and there are no other `data`/`.data`/`rodata`/`.rodata` in the subsegments in the code segment, the subsegments can also be changed to + +```yaml +- [0x42100, c, energy_orb_wave] +- [0x42200] +``` + +# 5 Decompile bss + +`bss` works in a similar way to data/rodata. However, `bss` is usually discarded from the final binary, which makes it somewhat tricker to migrate. + +The `bss` segment will create assembly files that are full of `space`. The `.bss` segment will link the `.bss` section of the referenced `c` file. + +# 6 Done! + +`.text`, `.data`, `.rodata` and `.bss` are linked from the .o built from `energy_orb_wave.c` which now has everything to match when building + +The assembly files (functions .s, data.s and rodata.s files) can be deleted diff --git a/tools/splat/docs/Home.md b/tools/splat/docs/Home.md new file mode 100644 index 0000000..33307ac --- /dev/null +++ b/tools/splat/docs/Home.md @@ -0,0 +1,25 @@ +### What is splat? + +**splat** is a binary splitting tool, written in Python. Its goal is to support the successful disassembly and then rebuilding of binary data. + +It is the spiritual successor to [n64split](https://github.com/queueRAM/sm64tools/blob/master/n64split.c), originally written to handle N64 ROMs, it now has limited support for PSX and PS2 binaries. + +MIPS code disassembly is handled via [spimdisasm](https://github.com/Decompollaborate/spimdisasm/). + +There are a number of asset types built-in (e.g. various image formats, N64 Vtx data, etc), and it is designed to be simple to extend by writing your own custom types that can do anything you want as part of the **splat** pipeline. + + +### How does it work? + +**splat** takes a [yaml](https://en.wikipedia.org/wiki/YAML) configuration file which tells it *where* and *how* to split a given file. Symbols can be mapped to addresses (and their types provided) via an optional "symbol_addrs" file. + +**splat** runs two distinct phases: scan and split. + +The _scan_ phase makes a first pass over the data and performs the initial disassembly of code and data. During the _split_ phase, information gathered during the _scan_ phase is used and files & data are written out to disk. + +After scanning and splitting, **splat** will output a linker script that can be used as part of re-building the input file. + + +### Sounds great, how do I get started? + +Have a look at the [Quickstart](https://github.com/ethteck/splat/wiki/Quickstart), or check out the [Examples](https://github.com/ethteck/splat/wiki/Examples) page to see projects that are using **splat**. diff --git a/tools/splat/docs/Quickstart.md b/tools/splat/docs/Quickstart.md new file mode 100644 index 0000000..f871144 --- /dev/null +++ b/tools/splat/docs/Quickstart.md @@ -0,0 +1,153 @@ +> **Note**: This quickstart is written with N64 ROMs in mind, and the assumption that you are using Ubuntu 20.04 either natively, via WSL2 or via Docker. + +For the purposes of this quickstart, we will assume that we are going to split a game called `mygame` and we have the ROM in `.z64` format named `baserom.z64`. + +Create a directory for `~/mygame` and `cd` into it: + +```sh +mkdir -p ${HOME}/mygame && cd ${HOME}/mygame +``` + +Copy the `baserom.z64` file into the `mygame` directory inside your home directory. + +### System packages + +#### Python 3.8 + +Ensure you are have **Python 3.8** or higher installed: + +```sh +$ python3 --version +Python 3.8.10 +``` + +If you get `bash: python3: command not found` install it with the following command: + +```sh +sudo apt-get update && sudo apt-get install -y python3 python3-pip +``` + +#### Git + +Ensure you have **git**: + +```sh +$ git --version +``` + +If you get `bash: git: command not found`, install it with the following command: + +```sh +sudo apt-get update && sudo apt-get install -y git +``` + +## Checkout the repository + +We will clone **splat** into a `tools` directory to keep things organised: + +```sh +git clone https://github.com/ethteck/splat.git tools/splat +``` + +## Python packages + +Run the following to install the prerequisite Python packages: + +```sh +python3 -m pip install -r ./tools/splat/requirements.txt +``` + +## Create a config file for your baserom + +**splat** has a script that will generate a `yaml` file for your ROM. + +```sh +python3 tools/splat/create_config.py baserom.z64 +``` + +The `yaml` file generated will be named based upon the name of the ROM (taken from its header). The example below is for Super Mario 64: + +```yaml +$ cat supermario64.yaml +name: Super Mario 64 (North America) +sha1: 9bef1128717f958171a4afac3ed78ee2bb4e86ce +options: + basename: supermario64 + target_path: baserom.z64 + base_path: . + compiler: IDO + find_file_boundaries: True + # platform: n64 + # undefined_funcs_auto_path: undefined_funcs_auto.txt + # undefined_syms_auto_path: undefined_syms_auto.txt + # symbol_addrs_path: symbol_addrs.txt + # undefined_syms_path: undefined_syms.txt + # asm_path: asm + # src_path: src + # build_path: build + # extensions_path: tools/splat_ext + # auto_all_sections: True +segments: + - name: header + type: header + start: 0x0 + - name: boot + type: bin + start: 0x40 + - name: main + type: code + start: 0x1000 + vram: 0x80246000 + subsegments: + - [0x1000, asm] + - type: bin + start: 0xE6430 + - [0x800000] +``` + +This is a bare-bones configuration and there is a lot of work required to map out the different sections of the ROM. + +## Run splat with your configuration + +```sh +python3 tools/splat/split.py supermario64.yaml +``` + +The output will look something like this: +``` +splat 0.7.10.1 +Loading and processing symbols +Starting scan +..Segment 1000, function at vram 80246DF8 ends with extra nops, indicating a likely file split. +File split suggestions for this segment will follow in config yaml format: + - [0x1E70, asm] + - [0x3C40, asm] + - [0x45E0, asm] + - [0x6FF0, asm] +# < -- snip --> + - [0xE6060, asm] + - [0xE61F0, asm] + - [0xE6200, asm] + - [0xE6260, asm] +.. +Starting split +.... +Split 943 KB (11.24%) in defined segments + header: 64 B (0.00%) 1 split, 0 cached + bin: 4 KB (0.05%) 1 split, 0 cached + code: 939 KB (11.19%) 1 split, 0 cached + unknown: 7 MB (88.76%) from unknown bin files +``` + +Notice that **splat** has found some potential file splits (function start/end with 16 byte alignment padded with nops). + +It's up to you to figure out the layout of the ROM. + + +## Next Steps + +The reassembly of the ROM is currently out of scope of this quickstart, as is switching out the `asm` segments for `c`. + +You can find a general workflow for using `splat` at [General Workflow](https://github.com/ethteck/splat/wiki/General-Workflow) + +Please feel free to improve this guide! diff --git a/tools/splat/docs/Segments.md b/tools/splat/docs/Segments.md new file mode 100644 index 0000000..c5346e3 --- /dev/null +++ b/tools/splat/docs/Segments.md @@ -0,0 +1,312 @@ +# Segments + +The configuration file for **splat** consists of a number of well-defined segments. + +Most segments can be defined as a either a dictionary or a list, however the list syntax is only suitable for simple cases as it does not allow for specifying many of the options a segment type has to offer. + +Splat segments' behavior generally falls under two categories: extraction and linking. Some segments will only do extraction, some will only do linking, some both, and some neither. Generally, segments will describe both extraction and linking behavior. Additionally, a segment type whose name starts with a dot (.) will only focus on linking. + +## `asm` + +**Description:** + +Segments designated Assembly, `asm`, will be disassembled via [spimdisasm](https://github.com/Decompollaborate/spimdisasm) and enriched with Symbols based on the contents of the `symbol_addrs` configuration. + +**Example:** + +```yaml +# as list +- [0xABC, asm, filepath1] +- [0xABC, asm, dir1/filepath2] # this will create filepath2.s inside a directory named dir1 + +# as dictionary +- name: filepath + type: asm + start: 0xABC +``` + +### `hasm` + +**Description:** + +Hand-written Assembly, `hasm`, similar to `asm` except it will not overwrite any existing files. Useful when assembly has been manually edited. + +**Example:** + +```yaml +# as list +- [0xABC, hasm, filepath] + +# as dictionary +- name: filepath + type: hasm + start: 0xABC +``` + +## `bin` + +**Description:** + +The `bin`(ary) segment type is for raw data, or data where the type is yet to be determined, data will be written out as raw `.bin` files. + +**Example:** + +```yaml +# as list +- [0xABC, bin, filepath] + +# as dictionary +- name: filepath + type: bin + start: 0xABC +``` + +## `code` + +**Description:** + +The 'code' segment type, `code` is a group that can have many `subsegments`. Useful to group sections of code together (e.g. all files part of the same overlay). + +**Example:** + +```yaml +# must be a dictionary +- name: main + type: code + start: 0x00001000 + vram: 0x80125900 + subsegments: + - [0x1000, asm, entrypoint] + - [0x1050, c, main] +``` + +## `c` + +**Description:** + +The C code segments have two behaviors: + +- If the target `.c` file does not exist, a new file will be generated with macros to include the original assembly (macros differ for IDO vs GCC compiler). +- Otherwise the target `.c` file is scanned to determine what assembly needs to be extracted from the ROM. + +Assembly that is extracted due to a `c` segment will be written to a `nonmatchings` folder, with one function per file. + +**Example:** + +```yaml +# as list +- [0xABC, c, filepath] + +# as dictionary +- name: filepath + type: c + start: 0xABC +``` + +## `header` + +**Description:** + +This is platform specific; parses the data and interprets as a header for e.g. N64 or PS1 elf. + +**Example:** + +```yaml +# as list +- [0xABC, header, filepath] + +# as dictionary +- name: filepath + type: header + start: 0xABC +``` + +## `data` + +**Description:** + +Data located in the ROM. Extracted as assembly; integer, float and string types will be attempted to be inferred by the disassembler. + +**Example:** + +```yaml +# as list +- [0xABC, data, filepath] + +# as dictionary +- name: filepath + type: data + start: 0xABC +``` + +This will created `filepath.data.s` within the `asm` folder. + +## `.data` + +**Description:** + +Data located in the ROM that is linked from a C file. Use the `.data` segment to tell the linker to pull the `.data` section from the compiled object of corresponding `c` segment. + +**Example:** + +```yaml +# as list +- [0xABC, .data, filepath] + +# as dictionary +- name: filepath + type: .data + start: 0xABC +``` + +**NOTE:** `splat` will not generate any `.data.s` files for these `.` (dot) sections. + +## `rodata` + +**Description:** + +Read-only data located in the ROM, e.g. floats, strings and jump tables. Extracted as assembly; integer, float and string types will be attempted to be inferred by the disassembler. + +**Example:** + +```yaml +# as list +- [0xABC, rodata, filepath] + +# as dictionary +- name: filepath + type: rodata + start: 0xABC +``` + +This will created `filepath.rodata.s` within the `asm` folder. + +## `.rodata` + +**Description:** + +Read-only data located in the ROM, linked to a C file. Use the `.rodata` segment to tell the linker to pull the `.rodata` section from the compiled object of corresponding `c` segment. + +**Example:** + +```yaml +# as list +- [0xABC, .rodata, filepath] + +# as dictionary +- name: filepath + type: .rodata + start: 0xABC +``` + +**NOTE:** `splat` will not generate any `.rodata.s` files for these `.` (dot) sections. + +## `bss` + +**Description:** + +`bss` is where variables are placed that have been declared but are not given an initial value. These sections are usually discarded from the final binary (although PSX binaries seem to include them!). + +Note that the `bss_size` option needs to be set at segment level for `bss` segments to work correctly. + +**Example:** + +```yaml +- { start: 0x7D1AD0, type: bss, name: filepath, vram: 0x803C0420 } +``` + +## `.bss` + +**Description:** + +Links the `.bss` section of the associated `c` file. + +**Example:** + +```yaml +- { start: 0x7D1AD0, type: .bss, name: filepath, vram: 0x803C0420 } +``` + +## Images + +**Description:** + +**splat** supports most of the [N64 image formats](https://n64squid.com/homebrew/n64-sdk/textures/image-formats/): + +- `i`, i.e. `i4` and `i8` +- `ia`, i.e. `ia4`, `ia8`, and `ia16` +- `ci`, i.e. `ci4` and `ci8` +- `rgb`, i.e. `rgba32` and `rgba16` + +These segments will parse the image data and dump out a `png` file. + +**Note:** Using the dictionary syntax allows for richer configuration. + +**Example:** + +```yaml +# as list +- [0xABC, i4, filename, width, height] +# as a dictionary +- name: filename + type: i4 + start: 0xABC + width: 64 + height: 64 + flip_x: yes + flip_y: no +``` + +## General segment options + +All splat's segments can be passed extra options for finer configuration. Note that those extra options require to rewrite the entry using the dictionary yaml notation instead of the list one. + +### `linker_section_order` + +**Description:** + +Allows overriding the section order used for linker script generation. + +Useful when a section of a file is not between the other sections of the same type in the ROM, for example a file having its data section between other files's rodata. + +Take in mind this option may need the [`check_consecutive_segment_types`](Configuration.md#check_consecutive_segment_types) yaml option to be turned off. + +**Example:** + +```yaml +- [0x400, data, file1] +# data ends + +# rodata starts +- [0x800, rodata, file2] +- { start: 0xA00, type: data, name: file3, linker_section_order: .rodata } +- [0xC00, rodata, file4] +``` + +This will created `file3.data.s` within the `asm` folder, but won't be reordered in the generated linker script to be placed on the data section. + +### `linker_section` + +**Description:** + +Allows to override the `.section` directive that will be used when generating the disassembly of the corresponding section, without needing to write an extension segment. This also affects the section name that will be used during link time. + +Useful for sections with special names, like an executable section named `.start` + +**Example:** + +```yaml +- { start: 0x1000, type: asm, name: snmain, linker_section: .start } +- [0x1070, rdata, libc] +- [0x10A0, rdata, main_030] +``` + +### `ld_fill_value` + +Allows to specify the value of the `FILL` statement generated for this specific top-level segment of the linker script, ignoring the global configuration. + +It must be either an integer, which will be used as the parameter for the `FILL` statement, or `null`, which tells splat to not emit a `FILL` statement for this segment. + +If not set, then the global configuration is used. See [ld_fill_value](Configuration.md#ld_fill_value) on the Configuration section. + +Defaults to the value of the global option. diff --git a/tools/splat/docs/VramClasses.md b/tools/splat/docs/VramClasses.md new file mode 100644 index 0000000..c889919 --- /dev/null +++ b/tools/splat/docs/VramClasses.md @@ -0,0 +1,59 @@ +# vram classes + +Version 0.19.0 introduced `vram_classes`, a new top-level yaml section that can be used to help reduce duplicated data in your yaml and more clearly organize its memory layout. + +## Introduction +Before vram classes, you might have had something like this in your yaml: + +```yaml +- type: code + start: 0x4269D0 + vram: 0x802A9000 + vram_symbol: battle_move_end + subsegments: ... +- type: code + start: 0x4273B0 + vram: 0x802A9000 # notice same `vram` and `vram_symbol` for both segments + vram_symbol: battle_move_end + subsegments: ... +``` + +Having to duplicate the vram address and vram_symbol properties for potentially dozens of hundreds of overlay segments is tedious and pollutes your yaml with repeated information that can become out of sync. Enter vram_classes! + +```yaml +- type: code + start: 0x4269D0 + vram_class: maps + subsegments: ... +- type: code + start: 0x4273B0 + vram_class: maps + subsegments: ... +``` + +Here, we are telling splat that both of these segments use the `maps` vram class. We are now effectively pointing both segments to the same source of information. Now let's look at how vram classes are defined: + +## Format + +```yaml +options: + ... + ld_use_symbolic_vram_addresses: True + ... +vram_classes: + - { name: maps, vram: 0x802A9000, vram_symbol: battle_move_end } +``` + +`vram_classes` is a top-level yaml section that contains a list of vram classes. You can either define them in dict form (as seen above) or list form. However, for list form, only `name` and `vram` are supported (`[maps, 0x802A9000]`). If you want to specify other options, please use the dict form. The fields supported are as follows: + +- `name` (required): The name of the class + +- `vram` (required): The vram address to be used during disasembly. If `ld_use_symbolic_vram_addresses` is disabled or no `vram_symbol` or `follows_classes` properties are provided, this address will be used in the linker script. + +The following properties are optional and only take effect if `ld_use_symbolic_vram_addresses` is enabled: + +- `vram_symbol`: The name of the symbol to use in the linker script for this class. + +- `follows_classes`: A list of vram class names that this class must come after in memory. If we added `follows_classes: [apples, bananas]` to our above vram_class, this would make all `maps` segments start at the end of all `apples` and `bananas` segments. + +The internal linker script symbol name that is chosen for `follows_classes` is the name of the class followed by `_CLASS_VRAM`. You can override this by also specifying `vram_symbol`. \ No newline at end of file diff --git a/tools/splat/platforms/n64.py b/tools/splat/platforms/n64.py index ed3ee1c..5836b66 100644 --- a/tools/splat/platforms/n64.py +++ b/tools/splat/platforms/n64.py @@ -1,4 +1,4 @@ -from util import compiler, log, options, palettes, symbols +from util import options, symbols def init(target_bytes: bytes): @@ -6,5 +6,7 @@ def init(target_bytes: bytes): if options.opts.libultra_symbols: symbols.spim_context.globalSegment.fillLibultraSymbols() + if options.opts.ique_symbols: + symbols.spim_context.globalSegment.fillIQueSymbols() if options.opts.hardware_regs: symbols.spim_context.globalSegment.fillHardwareRegs(True) diff --git a/tools/splat/platforms/ps2.py b/tools/splat/platforms/ps2.py new file mode 100644 index 0000000..09da192 --- /dev/null +++ b/tools/splat/platforms/ps2.py @@ -0,0 +1,2 @@ +def init(target_bytes: bytes): + pass diff --git a/tools/splat/requirements.txt b/tools/splat/requirements.txt index 429fdea..af8287d 100644 --- a/tools/splat/requirements.txt +++ b/tools/splat/requirements.txt @@ -4,7 +4,7 @@ tqdm intervaltree colorama # This value should be keep in sync with the version listed on disassembler/spimdisasm_disassembler.py -spimdisasm>=1.16.0 -rabbitizer>=1.7.0 +spimdisasm>=1.18.0 +rabbitizer>=1.8.0 pygfxd n64img>=0.1.4 diff --git a/tools/splat/run_tests.sh b/tools/splat/run_tests.sh index ca362d7..e57f8b3 100644 --- a/tools/splat/run_tests.sh +++ b/tools/splat/run_tests.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # docker build container docker build --tag splat-build:latest . && \ # get compilers and tools diff --git a/tools/splat/segtypes/common/bin.py b/tools/splat/segtypes/common/bin.py index b72a201..b6b64a5 100644 --- a/tools/splat/segtypes/common/bin.py +++ b/tools/splat/segtypes/common/bin.py @@ -7,6 +7,10 @@ class CommonSegBin(CommonSegment): + @staticmethod + def is_data() -> bool: + return True + def out_path(self) -> Optional[Path]: return options.opts.asset_path / self.dir / f"{self.name}.bin" diff --git a/tools/splat/segtypes/common/bss.py b/tools/splat/segtypes/common/bss.py index 4bc74d1..b4b9d81 100644 --- a/tools/splat/segtypes/common/bss.py +++ b/tools/splat/segtypes/common/bss.py @@ -11,6 +11,8 @@ def get_linker_section(self) -> str: @staticmethod def is_noload() -> bool: + if not options.opts.ld_bss_is_noload: + return False return True def disassemble_data(self, rom_bytes: bytes): diff --git a/tools/splat/segtypes/common/c.py b/tools/splat/segtypes/common/c.py index f23e9cd..4e09b64 100644 --- a/tools/splat/segtypes/common/c.py +++ b/tools/splat/segtypes/common/c.py @@ -1,8 +1,9 @@ import os import re from pathlib import Path -from typing import Optional, Set, List, Tuple +from typing import Optional, Set, List +import rabbitizer import spimdisasm from util import log, options, symbols @@ -10,27 +11,30 @@ from util.symbols import Symbol from segtypes.common.codesubsegment import CommonSegCodeSubsegment -from segtypes.common.group import CommonSegGroup from segtypes.common.rodata import CommonSegRodata -class CommonSegC(CommonSegCodeSubsegment): - defined_funcs: Set[str] = set() - global_asm_funcs: Set[str] = set() - global_asm_rodata_syms: Set[str] = set() +STRIP_C_COMMENTS_RE = re.compile( + r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', + re.DOTALL | re.MULTILINE, +) + +C_FUNC_RE = re.compile( + r"^(?:static\s+)?[^\s]+\s+([^\s(]+)\(([^;)]*)\)[^;]+?{", re.MULTILINE +) + +C_GLOBAL_ASM_IDO_RE = re.compile(r"GLOBAL_ASM\(\"(\w+\/)*(\w+)\.s\"\)", re.MULTILINE) - STRIP_C_COMMENTS_RE = re.compile( - r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', - re.DOTALL | re.MULTILINE, - ) - C_FUNC_RE = re.compile( - r"^(?:static\s+)?[^\s]+\s+([^\s(]+)\(([^;)]*)\)[^;]+?{", re.MULTILINE - ) +class CommonSegC(CommonSegCodeSubsegment): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.defined_funcs: Set[str] = set() + self.global_asm_funcs: Set[str] = set() + self.global_asm_rodata_syms: Set[str] = set() - C_GLOBAL_ASM_IDO_RE = re.compile( - r"GLOBAL_ASM\(\"(\w+\/)*(\w+)\.s\"\)", re.MULTILINE - ) + self.file_extension = "c" @staticmethod def strip_c_comments(text): @@ -41,14 +45,14 @@ def replacer(match): else: return s - return re.sub(CommonSegC.STRIP_C_COMMENTS_RE, replacer, text) + return re.sub(STRIP_C_COMMENTS_RE, replacer, text) @staticmethod def get_funcs_defined_in_c(c_file: Path) -> Set[str]: with open(c_file, "r") as f: text = CommonSegC.strip_c_comments(f.read()) - return set(m.group(1) for m in CommonSegC.C_FUNC_RE.finditer(text)) + return set(m.group(1) for m in C_FUNC_RE.finditer(text)) @staticmethod def find_all_instances(string: str, sub: str): @@ -104,9 +108,7 @@ def get_global_asm_funcs(c_file: Path) -> Set[str]: if options.opts.compiler in [GCC, SN64]: return set(CommonSegC.find_include_asm(text)) else: - return set( - m.group(2) for m in CommonSegC.C_GLOBAL_ASM_IDO_RE.finditer(text) - ) + return set(m.group(2) for m in C_GLOBAL_ASM_IDO_RE.finditer(text)) @staticmethod def get_global_asm_rodata_syms(c_file: Path) -> Set[str]: @@ -115,16 +117,14 @@ def get_global_asm_rodata_syms(c_file: Path) -> Set[str]: if options.opts.compiler in [GCC, SN64]: return set(CommonSegC.find_include_rodata(text)) else: - return set( - m.group(2) for m in CommonSegC.C_GLOBAL_ASM_IDO_RE.finditer(text) - ) + return set(m.group(2) for m in C_GLOBAL_ASM_IDO_RE.finditer(text)) @staticmethod def is_text() -> bool: return True def out_path(self) -> Optional[Path]: - return options.opts.src_path / self.dir / f"{self.name}.c" + return options.opts.src_path / self.dir / f"{self.name}.{self.file_extension}" def scan(self, rom_bytes: bytes): if ( @@ -156,6 +156,7 @@ def split(self, rom_bytes: bytes): self.spim_section.get_section(), spimdisasm.mips.sections.SectionText ), f"{self.name}, rom_start:{self.rom_start}, rom_end:{self.rom_end}" + rodata_section_type = "" rodata_spim_segment: Optional[spimdisasm.mips.sections.SectionRodata] = None if ( options.opts.migrate_rodata_to_functions @@ -164,6 +165,9 @@ def split(self, rom_bytes: bytes): assert isinstance( self.rodata_sibling, CommonSegRodata ), self.rodata_sibling.type + rodata_section_type = ( + self.rodata_sibling.get_linker_section_linksection() + ) if self.rodata_sibling.spim_section is not None: assert isinstance( self.rodata_sibling.spim_section.get_section(), @@ -193,6 +197,8 @@ def split(self, rom_bytes: bytes): # Produce the asm files for functions for entry in symbols_entries: + entry.sectionText = self.get_linker_section_linksection() + entry.sectionRodata = rodata_section_type if entry.function is not None: if ( entry.function.getName() in self.global_asm_funcs @@ -276,7 +282,13 @@ def create_c_asm_file( options.opts.c_newline.join(options.opts.asm_inc_header.split("\n")) ) + named_registers_opt = rabbitizer.config.regNames_namedRegisters + + rabbitizer.config.regNames_namedRegisters = ( + options.opts.named_regs_for_c_funcs + ) func_rodata_entry.writeToFile(f) + rabbitizer.config.regNames_namedRegisters = named_registers_opt if func_rodata_entry.function is not None: self.check_gaps_in_migrated_rodata( @@ -405,7 +417,10 @@ def create_asm_dependencies_file( dep_path = build_path / c_path.with_suffix(".asmproc.d") dep_path.parent.mkdir(parents=True, exist_ok=True) with dep_path.open("w") as f: - o_path = build_path / c_path.with_suffix(".o") + if options.opts.use_o_as_suffix: + o_path = build_path / c_path.with_suffix(".o") + else: + o_path = build_path / c_path.with_suffix(c_path.suffix + ".o") f.write(f"{o_path}:") depend_list = [] for entry in symbols_entries: diff --git a/tools/splat/segtypes/common/code.py b/tools/splat/segtypes/common/code.py index eda9936..3c2cf25 100644 --- a/tools/splat/segtypes/common/code.py +++ b/tools/splat/segtypes/common/code.py @@ -7,9 +7,7 @@ from util.symbols import Symbol from segtypes.common.group import CommonSegGroup -from segtypes.segment import Segment - -CODE_TYPES = ["c", "asm", "hasm"] +from segtypes.segment import Segment, parse_segment_align def dotless_type(type: str) -> str: @@ -44,7 +42,10 @@ def __init__( self.jtbl_glabels_to_add: Set[int] = set() self.jumptables: Dict[int, Tuple[int, int]] = {} self.rodata_syms: Dict[int, List[Symbol]] = {} - self.align = 0x10 + + self.align = parse_segment_align(yaml) + if self.align is None: + self.align = 0x10 @property def needs_symbols(self) -> bool: @@ -160,6 +161,7 @@ def parse_subsegments(self, segment_yaml) -> List[Segment]: base_segments: OrderedDict[str, Segment] = OrderedDict() ret = [] prev_start: Optional[int] = -1 + prev_vram: Optional[int] = -1 inserts: OrderedDict[ str, int ] = ( @@ -197,14 +199,16 @@ def parse_subsegments(self, segment_yaml) -> List[Segment]: else: if cur_section != typ: # We're changing sections - if found_sections[cur_section].has_end(): - log.error( - f"Section {cur_section} end encountered but was already ended earlier!" - ) - if found_sections[typ].has_start(): - log.error( - f"Section {typ} start encounted but has already started earlier!" - ) + + if options.opts.check_consecutive_segment_types: + if found_sections[cur_section].has_end(): + log.error( + f"Section {cur_section} end encountered but was already ended earlier!" + ) + if found_sections[typ].has_start(): + log.error( + f"Section {typ} start encounted but has already started earlier!" + ) # End the current section found_sections[cur_section].end = i @@ -269,7 +273,7 @@ def parse_subsegments(self, segment_yaml) -> List[Segment]: if start is not None and prev_start is not None and start < prev_start: log.error( - f"Error: Group segment {self.name} contains subsegments which are out of ascending rom order (0x{prev_start:X} followed by 0x{start:X})" + f"Error: Group segment '{self.name}' contains subsegments which are out of ascending rom order (0x{prev_start:X} followed by 0x{start:X})" ) vram = None @@ -287,6 +291,16 @@ def parse_subsegments(self, segment_yaml) -> List[Segment]: segment_class, subsegment_yaml, start, end, vram ) + if ( + segment.vram_start is not None + and prev_vram is not None + and segment.vram_start < prev_vram + ): + log.error( + f"Error: Group segment '{self.name}' contains subsegments which are out of ascending vram order (0x{prev_vram:X} followed by 0x{segment.vram_start:X}).\n" + + f"Detected when processing file '{segment.name}' of type '{segment.type}'" + ) + segment.sibling = base_segments.get(segment.name, None) if segment.sibling is not None: @@ -300,6 +314,16 @@ def parse_subsegments(self, segment_yaml) -> List[Segment]: segment.rodata_sibling = segment.sibling segment.sibling.sibling = segment + if self.section_order.index(".text") < self.section_order.index( + ".data" + ): + if segment.is_data(): + segment.sibling.data_sibling = segment + else: + if segment.is_text() and segment.sibling.is_data(): + segment.data_sibling = segment.sibling + segment.sibling.sibling = segment + segment.parent = self if segment.special_vram_segment: self.special_vram_segment = True @@ -324,6 +348,7 @@ def parse_subsegments(self, segment_yaml) -> List[Segment]: base_segments[segment.name] = segment prev_start = start + prev_vram = segment.vram_start if end is not None: last_rom_end = end @@ -385,10 +410,10 @@ def parse_subsegments(self, segment_yaml) -> List[Segment]: def scan(self, rom_bytes): # Always scan code first for sub in self.subsegments: - if sub.type in CODE_TYPES and sub.should_scan(): + if sub.is_text() and sub.should_scan(): sub.scan(rom_bytes) # Scan everyone else for sub in self.subsegments: - if sub.type not in CODE_TYPES and sub.should_scan(): + if not sub.is_text() and sub.should_scan(): sub.scan(rom_bytes) diff --git a/tools/splat/segtypes/common/codesubsegment.py b/tools/splat/segtypes/common/codesubsegment.py index 9731c11..a70a38f 100644 --- a/tools/splat/segtypes/common/codesubsegment.py +++ b/tools/splat/segtypes/common/codesubsegment.py @@ -130,6 +130,16 @@ def process_insns( # Main loop for i, insn in enumerate(func_spim.instructions): + if options.opts.platform == "ps2": + from segtypes.common.c import CommonSegC + from rabbitizer import TrinaryValue + + if isinstance(self, CommonSegC): + insn.flag_r5900UseDollar = TrinaryValue.FALSE + else: + insn.flag_r5900UseDollar = TrinaryValue.TRUE + insn.flag_r5900DisasmAsData = TrinaryValue.TRUE + instr_offset = i * 4 # update pointer accesses from this function @@ -148,7 +158,7 @@ def process_insns( self.parent.check_rodata_sym(func_spim.vram, sym) def print_file_boundaries(self): - if not options.opts.find_file_boundaries or not self.spim_section: + if not self.show_file_boundaries or not self.spim_section: return assert isinstance(self.rom_start, int) diff --git a/tools/splat/segtypes/common/cpp.py b/tools/splat/segtypes/common/cpp.py new file mode 100644 index 0000000..c163059 --- /dev/null +++ b/tools/splat/segtypes/common/cpp.py @@ -0,0 +1,8 @@ +from segtypes.common.c import CommonSegC + + +class CommonSegCpp(CommonSegC): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.file_extension = "cpp" diff --git a/tools/splat/segtypes/common/data.py b/tools/splat/segtypes/common/data.py index 81c648e..c96c787 100644 --- a/tools/splat/segtypes/common/data.py +++ b/tools/splat/segtypes/common/data.py @@ -10,6 +10,10 @@ class CommonSegData(CommonSegCodeSubsegment, CommonSegGroup): + @staticmethod + def is_data() -> bool: + return True + def asm_out_path(self) -> Path: typ = self.type if typ.startswith("."): diff --git a/tools/splat/segtypes/common/databin.py b/tools/splat/segtypes/common/databin.py new file mode 100644 index 0000000..6162886 --- /dev/null +++ b/tools/splat/segtypes/common/databin.py @@ -0,0 +1,45 @@ +from pathlib import Path +from typing import Optional + +from util import log, options + +from segtypes.common.textbin import CommonSegTextbin + + +class CommonSegDatabin(CommonSegTextbin): + @staticmethod + def is_text() -> bool: + return False + + @staticmethod + def is_data() -> bool: + return True + + def get_linker_section(self) -> str: + return ".data" + + def get_section_flags(self) -> Optional[str]: + return "wa" + + def split(self, rom_bytes): + if self.rom_end is None: + log.error( + f"segment {self.name} needs to know where it ends; add a position marker [0xDEADBEEF] after it" + ) + + self.write_bin(rom_bytes) + + if self.sibling is None: + # textbin will write the incbin instead + + s_path = self.out_path() + assert s_path is not None + s_path.parent.mkdir(parents=True, exist_ok=True) + + with s_path.open("w") as f: + f.write('.include "macro.inc"\n\n') + preamble = options.opts.generated_s_preamble + if preamble: + f.write(preamble + "\n") + + self.write_asm_contents(rom_bytes, f) diff --git a/tools/splat/segtypes/common/decompressor.py b/tools/splat/segtypes/common/decompressor.py index 3bdc5ea..152521e 100644 --- a/tools/splat/segtypes/common/decompressor.py +++ b/tools/splat/segtypes/common/decompressor.py @@ -43,6 +43,8 @@ def get_linker_entries(self): options.opts.asset_path / self.dir / f"{self.name}.{self.compression_type}", - self.get_linker_section(), + self.get_linker_section_order(), + self.get_linker_section_linksection(), + self.is_noload(), ) ] diff --git a/tools/splat/segtypes/common/header.py b/tools/splat/segtypes/common/header.py index b03b0ae..55bfc25 100644 --- a/tools/splat/segtypes/common/header.py +++ b/tools/splat/segtypes/common/header.py @@ -6,6 +6,10 @@ class CommonSegHeader(CommonSegment): + @staticmethod + def is_data() -> bool: + return True + def should_split(self): return self.extract and options.opts.is_mode_active("code") diff --git a/tools/splat/segtypes/common/lib.py b/tools/splat/segtypes/common/lib.py index 51371c1..17d4c12 100644 --- a/tools/splat/segtypes/common/lib.py +++ b/tools/splat/segtypes/common/lib.py @@ -50,4 +50,13 @@ def get_linker_entries(self): object_path = Path(f"{path}.a:{self.object}.o") - return [LinkerEntry(self, [path], object_path, self.get_linker_section())] + return [ + LinkerEntry( + self, + [path], + object_path, + self.get_linker_section_order(), + self.get_linker_section_linksection(), + self.is_noload(), + ) + ] diff --git a/tools/splat/segtypes/common/rodata.py b/tools/splat/segtypes/common/rodata.py index bac0826..599aacb 100644 --- a/tools/splat/segtypes/common/rodata.py +++ b/tools/splat/segtypes/common/rodata.py @@ -12,6 +12,10 @@ class CommonSegRodata(CommonSegData): def get_linker_section(self) -> str: return ".rodata" + @staticmethod + def is_data() -> bool: + return False + @staticmethod def is_rodata() -> bool: return True diff --git a/tools/splat/segtypes/common/rodatabin.py b/tools/splat/segtypes/common/rodatabin.py new file mode 100644 index 0000000..7cd5916 --- /dev/null +++ b/tools/splat/segtypes/common/rodatabin.py @@ -0,0 +1,45 @@ +from pathlib import Path +from typing import Optional + +from util import log, options + +from segtypes.common.textbin import CommonSegTextbin + + +class CommonSegRodatabin(CommonSegTextbin): + @staticmethod + def is_text() -> bool: + return False + + @staticmethod + def is_rodata() -> bool: + return True + + def get_linker_section(self) -> str: + return ".rodata" + + def get_section_flags(self) -> Optional[str]: + return "a" + + def split(self, rom_bytes): + if self.rom_end is None: + log.error( + f"segment {self.name} needs to know where it ends; add a position marker [0xDEADBEEF] after it" + ) + + self.write_bin(rom_bytes) + + if self.sibling is None: + # textbin will write the incbin instead + + s_path = self.out_path() + assert s_path is not None + s_path.parent.mkdir(parents=True, exist_ok=True) + + with s_path.open("w") as f: + f.write('.include "macro.inc"\n\n') + preamble = options.opts.generated_s_preamble + if preamble: + f.write(preamble + "\n") + + self.write_asm_contents(rom_bytes, f) diff --git a/tools/splat/segtypes/common/textbin.py b/tools/splat/segtypes/common/textbin.py new file mode 100644 index 0000000..5edf7aa --- /dev/null +++ b/tools/splat/segtypes/common/textbin.py @@ -0,0 +1,114 @@ +from pathlib import Path +from typing import Optional, TextIO + +from util import log, options + +from segtypes.common.segment import CommonSegment + + +class CommonSegTextbin(CommonSegment): + @staticmethod + def is_text() -> bool: + return True + + def get_linker_section(self) -> str: + return ".text" + + def get_section_flags(self) -> Optional[str]: + return "ax" + + def out_path(self) -> Optional[Path]: + return options.opts.data_path / self.dir / f"{self.name}.s" + + def bin_path(self) -> Path: + typ = self.type + if typ.startswith("."): + typ = typ[1:] + + return options.opts.asset_path / self.dir / f"{self.name}.{typ}.bin" + + def write_bin(self, rom_bytes): + binpath = self.bin_path() + binpath.parent.mkdir(parents=True, exist_ok=True) + + assert isinstance(self.rom_start, int) + assert isinstance(self.rom_end, int) + + binpath.write_bytes(rom_bytes[self.rom_start : self.rom_end]) + + self.log(f"Wrote {self.name} to {binpath}") + + def write_asm_contents(self, rom_bytes, f: TextIO): + binpath = self.bin_path() + asm_label = options.opts.asm_function_macro + if not self.is_text(): + asm_label = options.opts.asm_data_macro + + assert isinstance(self.rom_start, int) + assert isinstance(self.rom_end, int) + + f.write(f".section {self.get_linker_section()}") + section_flags = self.get_section_flags() + if section_flags: + f.write(f', "{section_flags}"') + f.write("\n\n") + + # Check if there's a symbol at this address + sym = None + vram = self.rom_to_ram(self.rom_start) + if vram is not None: + sym = self.get_symbol(vram, in_segment=True) + + if sym is not None: + f.write(f"{asm_label} {sym.name}\n") + sym.defined = True + + f.write(f'.incbin "{binpath}"\n') + + if sym is not None: + if self.is_text() and options.opts.asm_end_label != "": + f.write(f"{options.opts.asm_end_label} {sym.name}\n") + + if sym.given_name_end is not None: + if ( + sym.given_size is None + or sym.given_size == self.rom_end - self.rom_start + ): + f.write(f"{asm_label} {sym.given_name_end}\n") + + def split(self, rom_bytes): + if self.rom_end is None: + log.error( + f"segment {self.name} needs to know where it ends; add a position marker [0xDEADBEEF] after it" + ) + + self.write_bin(rom_bytes) + + s_path = self.out_path() + assert s_path is not None + s_path.parent.mkdir(parents=True, exist_ok=True) + + with s_path.open("w") as f: + f.write('.include "macro.inc"\n\n') + preamble = options.opts.generated_s_preamble + if preamble: + f.write(preamble + "\n") + + self.write_asm_contents(rom_bytes, f) + + # We check against CommonSegTextbin instead of the specific type because the other incbins inherit from this class + if isinstance(self.data_sibling, CommonSegTextbin): + f.write("\n") + self.data_sibling.write_asm_contents(rom_bytes, f) + + if isinstance(self.rodata_sibling, CommonSegTextbin): + f.write("\n") + self.rodata_sibling.write_asm_contents(rom_bytes, f) + + def should_scan(self) -> bool: + return self.rom_start is not None and self.rom_end is not None + + def should_split(self) -> bool: + return ( + self.extract and self.should_scan() + ) # only split if the segment was scanned first diff --git a/tools/splat/segtypes/linker_entry.py b/tools/splat/segtypes/linker_entry.py index fa49e57..d845012 100644 --- a/tools/splat/segtypes/linker_entry.py +++ b/tools/splat/segtypes/linker_entry.py @@ -1,10 +1,8 @@ import os import re -from dataclasses import dataclass from functools import lru_cache from pathlib import Path from typing import Dict, List, OrderedDict, Set, Tuple, Union -from segtypes.n64.palette import N64SegPalette from util import options @@ -57,17 +55,6 @@ def write_file_if_different(path: Path, new_content: str): f.write(new_content) -def segment_cname(segment: Segment) -> str: - name = segment.name - if segment.parent: - name = segment.parent.name + "_" + name - - if isinstance(segment, N64SegPalette): - name += "_pal" - - return to_cname(name) - - def get_segment_rom_start(cname: str) -> str: if options.opts.segment_symbols_style == "makerom": return f"_{cname}SegmentRomStart" @@ -124,30 +111,26 @@ def get_segment_section_size(segment_name: str, section_type: str) -> str: def get_segment_vram_end_symbol_name(segment: Segment) -> str: - return get_segment_vram_end(segment_cname(segment)) - - -@dataclass -class LinkerSection: - name: str - started: bool = False - ended: bool = False - - @property - def section_type(self) -> str: - if self.name == ".rdata": - return ".rodata" - return self.name + return get_segment_vram_end(segment.get_cname()) class LinkerEntry: def __init__( - self, segment: Segment, src_paths: List[Path], object_path: Path, section: str + self, + segment: Segment, + src_paths: List[Path], + object_path: Path, + section_order: str, + section_link: str, + noload: bool = False, ): self.segment = segment self.src_paths = [clean_up_path(p) for p in src_paths] - self.section = section - if self.section == "linker" or self.section == "linker_offset": + self.section_order = section_order + self.section_link = section_link + self.noload = noload + self.bss_contains_common = segment.bss_contains_common + if self.section_link == "linker" or self.section_link == "linker_offset": self.object_path = None elif self.segment.type == "lib": self.object_path = object_path @@ -155,30 +138,44 @@ def __init__( self.object_path = path_to_object_path(object_path) @property - def section_type(self) -> str: - if self.section == ".rdata": + def section_order_type(self) -> str: + if self.section_order == ".rdata": + return ".rodata" + else: + return self.section_order + + @property + def section_link_type(self) -> str: + if self.section_link == ".rdata": return ".rodata" else: - return self.section + return self.section_link class LinkerWriter: - def __init__(self): + def __init__(self, is_partial: bool = False): self.linker_discard_section: bool = options.opts.ld_discard_section + self.sections_allowlist: List[str] = options.opts.ld_sections_allowlist + self.sections_denylist: List[str] = options.opts.ld_sections_denylist # Used to store all the linker entries - build tools may want this information self.entries: List[LinkerEntry] = [] + self.dependencies_entries: List[LinkerEntry] = [] self.buffer: List[str] = [] self.header_symbols: Set[str] = set() + self.is_partial: bool = is_partial + self._indent_level = 0 self._writeln("SECTIONS") self._begin_block() - self._writeln(f"__romPos = {options.opts.ld_rom_start};") - if options.opts.gp is not None: - self._writeln("_gp = " + f"0x{options.opts.gp:X};") + if not self.is_partial: + self._writeln(f"__romPos = {options.opts.ld_rom_start};") + + if options.opts.gp is not None: + self._writeln("_gp = " + f"0x{options.opts.gp:X};") # Write a series of statements which compute a symbol that represents the highest address among a list of segments' end addresses def write_max_vram_end_sym(self, symbol: str, overlays: List[Segment]): @@ -196,145 +193,251 @@ def write_max_vram_end_sym(self, symbol: str, overlays: List[Segment]): def add(self, segment: Segment, max_vram_syms: List[Tuple[str, List[Segment]]]): entries = segment.get_linker_entries() self.entries.extend(entries) + self.dependencies_entries.extend(entries) - seg_name = segment_cname(segment) + seg_name = segment.get_cname() for sym, segs in max_vram_syms: self.write_max_vram_end_sym(sym, segs) - section_labels: OrderedDict[str, LinkerSection] = OrderedDict( - { - l: LinkerSection(l) - for l in options.opts.section_order - if l in options.opts.ld_section_labels - } - ) + if options.opts.ld_legacy_generation: + self.add_legacy(segment, entries) + return - # Start the first linker section + section_entries: OrderedDict[str, List[LinkerEntry]] = OrderedDict() + for l in segment.section_order: + if l in options.opts.ld_section_labels: + section_entries[l] = [] + + # Add all entries to section_entries + prev_entry = None + for entry in entries: + if entry.section_order_type in section_entries: + # Search for the very first section type + # This is required in case the very first entry is a type that's not listed on ld_section_labels (like linker_offset) because it would be dropped + prev_entry = entry.section_order_type + break + + any_load = False + any_noload = False + for entry in entries: + if entry.section_order_type in section_entries: + section_entries[entry.section_order_type].append(entry) + elif prev_entry is not None: + # If this section is not present in section_order or ld_section_labels then pretend it is part of the last seen section, mainly for handling linker_offset + section_entries[prev_entry].append(entry) + any_load = any_load or not entry.noload + any_noload = any_noload or entry.noload + prev_entry = entry.section_order_type seg_rom_start = get_segment_rom_start(seg_name) self._write_symbol(seg_rom_start, "__romPos") - if entries[0].section_type == ".bss": - self._begin_bss_segment(segment, is_first=True) - seg_bss_start = get_segment_section_start(seg_name, ".bss") - self._write_symbol(seg_bss_start, ".") - if ".bss" in section_labels: - section_labels[".bss"].started = True - else: - self._begin_segment(segment) + is_first = True + if any_load: + # Only emit normal segment if there's at least one normal entry + self._write_segment_sections( + segment, seg_name, section_entries, noload=False, is_first=is_first + ) + is_first = False - last_seen_sections: Dict[LinkerEntry, str] = {} + if any_noload: + # Only emit NOLOAD segment if there is at least one noload entry + self._write_segment_sections( + segment, seg_name, section_entries, noload=True, is_first=is_first + ) + is_first = False + + self._end_segment(segment, all_bss=not any_load) + + def add_legacy(self, segment: Segment, entries: List[LinkerEntry]): + seg_name = segment.get_cname() + + # To keep track which sections has been started + started_sections: Dict[str, bool] = { + l: False for l in options.opts.ld_section_labels + } # Find where sections are last seen + last_seen_sections: Dict[LinkerEntry, str] = {} for entry in reversed(entries): if ( - entry.section_type in section_labels.keys() - and entry.section_type not in last_seen_sections.values() + entry.section_order_type in options.opts.ld_section_labels + and entry.section_order_type not in last_seen_sections.values() ): - last_seen_sections[entry] = entry.section_type + last_seen_sections[entry] = entry.section_order_type + + seg_rom_start = get_segment_rom_start(seg_name) + self._write_symbol(seg_rom_start, "__romPos") + + self._begin_segment(segment, seg_name, noload=False, is_first=True) - cur_section = None - prev_section = None + i = 0 for entry in entries: - entering_bss = False - leaving_bss = False - cur_section = entry.section_type + if entry.noload: + break - if cur_section == "linker_offset": - self._write_symbol(f"{segment_cname(entry.segment)}_OFFSET", ".") - continue + started = started_sections.get(entry.section_order_type, True) + if not started: + self._begin_section(seg_name, entry.section_order_type) + started_sections[entry.section_order_type] = True - for i, section in enumerate(section_labels.values()): - # If we haven't seen this section yet - if not section.started and section.section_type == entry.section_type: - if prev_section == ".bss": - leaving_bss = True - elif cur_section == ".bss": - entering_bss = True - - if not (entering_bss or leaving_bss): - # Don't write a START symbol if we are about to end the section - section_start = get_segment_section_start( - seg_name, entry.section_type - ) - self._write_symbol(section_start, ".") - section_labels[entry.section_type].started = True + self._write_linker_entry(entry) - if ( - entry.object_path - and cur_section == ".data" - and entry.segment.type != "lib" - ): - path_cname = re.sub( - r"[^0-9a-zA-Z_]", - "_", - str(entry.segment.dir / entry.segment.name) - + ".".join(entry.object_path.suffixes[:-1]), - ) - self._write_symbol(path_cname, ".") + if entry in last_seen_sections: + self._end_section(seg_name, entry.section_order_type, segment) - wildcard = "*" if options.opts.ld_wildcard_sections else "" + i += 1 + + if any(entry.noload for entry in entries): + self._end_block() + + self._begin_segment(segment, seg_name, noload=True, is_first=False) + + for entry in entries[i:]: + started = started_sections.get(entry.section_order_type, True) + if not started: + self._begin_section(seg_name, entry.section_order_type) + started_sections[entry.section_order_type] = True + + self._write_linker_entry(entry) + + if entry in last_seen_sections: + self._end_section(seg_name, entry.section_order_type, segment) + + self._end_segment(segment, all_bss=False) + + def add_referenced_partial_segment( + self, segment: Segment, max_vram_syms: List[Tuple[str, List[Segment]]] + ): + entries = segment.get_linker_entries() + self.entries.extend(entries) + + segments_path = options.opts.ld_partial_build_segments_path + assert segments_path is not None + + seg_name = segment.get_cname() - # Create new linker section for BSS - if entering_bss or leaving_bss: - # If this is the last entry of its type, add the END marker for the section we're ending - if ( - entry in last_seen_sections - and section_labels[entry.section_type].started - ): - self._end_section( - seg_name, last_seen_sections[entry], section_labels - ) + for sym, segs in max_vram_syms: + self.write_max_vram_end_sym(sym, segs) + + seg_rom_start = get_segment_rom_start(seg_name) + self._write_symbol(seg_rom_start, "__romPos") + any_load = any(not e.noload for e in entries) + is_first = True + if any_load: + # Only emit normal segment if there's at least one normal entry + + self._begin_segment(segment, seg_name, noload=False, is_first=is_first) + + for l in segment.section_order: + if l not in options.opts.ld_section_labels: + continue + if l == ".bss": + continue + + entry = LinkerEntry( + segment, [], segments_path / f"{seg_name}.o", l, l, noload=False + ) + self.dependencies_entries.append(entry) + self._write_linker_entry(entry) + is_first = False + + if any(e.noload for e in entries): + # Only emit NOLOAD segment if there is at least one noload entry + + if not is_first: self._end_block() - if entering_bss: - self._begin_bss_segment(segment) - else: - self._begin_segment(segment) + self._begin_segment(segment, seg_name, noload=True, is_first=is_first) + + # Check if any section has the bss_contains_common option + bss_contains_common = False + for entry in entries: + if entry.segment.bss_contains_common: + bss_contains_common = True + break + + entry = LinkerEntry( + segment, + [], + segments_path / f"{seg_name}.o", + ".bss", + ".bss", + noload=True, + ) + entry.bss_contains_common = bss_contains_common + self.dependencies_entries.append(entry) + self._write_linker_entry(entry) - section_start = get_segment_section_start(seg_name, entry.section_type) - self._write_symbol(section_start, ".") - section_labels[cur_section].started = True + self._end_segment(segment, all_bss=not any_load) - # Write THIS linker entry - self._writeln(f"{entry.object_path}({entry.section}{wildcard});") - else: - # Write THIS linker entry - if entry.section == ".bss" and entry.segment.bss_contains_common: - self._writeln(f"{entry.object_path}(.bss COMMON .scommon);") - else: - self._writeln(f"{entry.object_path}({entry.section}{wildcard});") + def add_partial_segment(self, segment: Segment): + entries = segment.get_linker_entries() + self.entries.extend(entries) + self.dependencies_entries.extend(entries) - # If this is the last entry of its type, add the END marker for the section we're ending - if entry in last_seen_sections: - self._end_section(seg_name, cur_section, section_labels) + seg_name = segment.get_cname() - prev_section = cur_section + section_entries: OrderedDict[str, List[LinkerEntry]] = OrderedDict() + for l in segment.section_order: + if l in options.opts.ld_section_labels: + section_entries[l] = [] - # End all un-ended sections - for section in section_labels.values(): - if section.started and not section.ended: - self._end_section(seg_name, section.name, section_labels) + # Add all entries to section_entries + prev_entry = None + for entry in entries: + if entry.section_order_type in section_entries: + section_entries[entry.section_order_type].append(entry) + elif prev_entry is not None: + # If this section is not present in section_order or ld_section_labels then pretend it is part of the last seen section, mainly for handling linker_offset + section_entries[prev_entry].append(entry) + prev_entry = entry.section_order_type + + for section_name, entries in section_entries.items(): + if len(entries) == 0: + continue + first_entry = entries[0] + + self._begin_partial_segment(section_name, segment, first_entry.noload) + + self._begin_section(seg_name, section_name) + + for entry in entries: + self._write_linker_entry(entry) + + self._end_section(seg_name, section_name, segment) - all_bss = all(e.section == ".bss" for e in entries) - self._end_segment(segment, all_bss) + self._end_partial_segment(section_name) - def save_linker_script(self): - if self.linker_discard_section: + def save_linker_script(self, output_path: Path): + if len(self.sections_allowlist) > 0: + address = " 0" + if self.is_partial: + address = "" + for sect in self.sections_allowlist: + self._writeln(f"{sect}{address} :") + self._begin_block() + self._writeln(f"*({sect});") + self._end_block() + + self._writeln("") + + if self.linker_discard_section or len(self.sections_denylist) > 0: self._writeln("/DISCARD/ :") self._begin_block() - self._writeln("*(*);") + for sect in self.sections_denylist: + self._writeln(f"*({sect});") + if self.linker_discard_section: + self._writeln("*(*);") self._end_block() self._end_block() # SECTIONS assert self._indent_level == 0 - write_file_if_different( - options.opts.ld_script_path, "\n".join(self.buffer) + "\n" - ) + write_file_if_different(output_path, "\n".join(self.buffer) + "\n") def save_symbol_header(self): path = options.opts.ld_symbol_header_path @@ -347,11 +450,28 @@ def save_symbol_header(self): "\n" '#include "common.h"\n' "\n" - + "".join(f"extern Addr {symbol};\n" for symbol in self.header_symbols) + + "".join( + f"extern Addr {symbol};\n" for symbol in sorted(self.header_symbols) + ) + "\n" "#endif\n", ) + def save_dependencies_file(self, output_path: Path, target_elf_path: Path): + output = f"{target_elf_path}:" + + for entry in self.dependencies_entries: + if entry.object_path is None: + continue + output += f" \\\n {entry.object_path}" + + output += "\n" + for entry in self.dependencies_entries: + if entry.object_path is None: + continue + output += f"{entry.object_path}:\n" + write_file_if_different(output_path, output) + def _writeln(self, line: str): if len(line) == 0: self.buffer.append(line) @@ -376,32 +496,14 @@ def _write_symbol(self, symbol: str, value: Union[str, int]): self.header_symbols.add(symbol) - def _begin_segment(self, segment: Segment): - if options.opts.ld_use_follows and segment.vram_of_symbol: - vram_str = segment.vram_of_symbol + " " - else: - vram_str = ( - f"0x{segment.vram_start:X} " - if isinstance(segment.vram_start, int) - else "" - ) - - name = segment_cname(segment) - - seg_vram_start = get_segment_vram_start(name) - self._write_symbol(seg_vram_start, f"ADDR(.{name})") - - seg_rom_start = get_segment_rom_start(name) - line = f".{name} {vram_str}: AT({seg_rom_start})" - if segment.subalign != None: - line += f" SUBALIGN({segment.subalign})" - - self._writeln(line) - self._begin_block() - - def _begin_bss_segment(self, segment: Segment, is_first: bool = False): - if options.opts.ld_use_follows and segment.vram_of_symbol: - vram_str = segment.vram_of_symbol + " " + def _begin_segment( + self, segment: Segment, seg_name: str, noload: bool, is_first: bool + ): + if ( + options.opts.ld_use_symbolic_vram_addresses + and segment.vram_symbol is not None + ): + vram_str = segment.vram_symbol + " " else: vram_str = ( f"0x{segment.vram_start:X} " @@ -409,27 +511,33 @@ def _begin_bss_segment(self, segment: Segment, is_first: bool = False): else "" ) - name = segment_cname(segment) + "_bss" - - seg_vram_start = get_segment_vram_start(name) - self._write_symbol(seg_vram_start, f"ADDR(.{name})") - + addr_str = " " if is_first: - addr_str = vram_str + "(NOLOAD)" - else: - addr_str = "(NOLOAD)" - - line = f".{name} {addr_str} :" + addr_str += f"{vram_str}" + if noload: + seg_name += "_bss" + addr_str += "(NOLOAD) " + + seg_vram_start = get_segment_vram_start(seg_name) + self._write_symbol(seg_vram_start, f"ADDR(.{seg_name})") + + line = f".{seg_name}{addr_str}:" + if not noload: + seg_rom_start = get_segment_rom_start(seg_name) + line += f" AT({seg_rom_start})" if segment.subalign != None: line += f" SUBALIGN({segment.subalign})" self._writeln(line) self._begin_block() + if segment.ld_fill_value is not None: + self._writeln(f"FILL(0x{segment.ld_fill_value:08X});") + def _end_segment(self, segment: Segment, all_bss=False): self._end_block() - name = segment_cname(segment) + name = segment.get_cname() if not all_bss: self._writeln(f"__romPos += SIZEOF(.{name});") @@ -438,6 +546,8 @@ def _end_segment(self, segment: Segment, all_bss=False): if not options.opts.segment_end_before_align: if segment.align: self._writeln(f"__romPos = ALIGN(__romPos, {segment.align});") + if options.opts.ld_align_segment_vram_end: + self._writeln(f". = ALIGN(., {segment.align});") seg_rom_end = get_segment_rom_end(name) self._write_symbol(seg_rom_end, "__romPos") @@ -447,21 +557,91 @@ def _end_segment(self, segment: Segment, all_bss=False): if options.opts.segment_end_before_align: if segment.align: self._writeln(f"__romPos = ALIGN(__romPos, {segment.align});") + if options.opts.ld_align_segment_vram_end: + self._writeln(f". = ALIGN(., {segment.align});") self._writeln("") - def _end_section( - self, - seg_name: str, - cur_section: str, - section_labels: OrderedDict[str, LinkerSection], - ) -> None: + def _begin_partial_segment(self, section_name: str, segment: Segment, noload: bool): + line = f"{section_name}" + if noload: + line += " (NOLOAD)" + line += " :" + if segment.subalign != None: + line += f" SUBALIGN({segment.subalign})" + + self._writeln(line) + self._begin_block() + + def _end_partial_segment(self, section_name: str, all_bss=False): + self._end_block() + + self._writeln("") + + def _begin_section(self, seg_name: str, cur_section: str) -> None: + section_start = get_segment_section_start(seg_name, cur_section) + self._write_symbol(section_start, ".") + + def _end_section(self, seg_name: str, cur_section: str, segment: Segment) -> None: section_start = get_segment_section_start(seg_name, cur_section) section_end = get_segment_section_end(seg_name, cur_section) section_size = get_segment_section_size(seg_name, cur_section) + if options.opts.ld_align_section_vram_end and segment.align is not None: + self._writeln(f". = ALIGN(., {segment.align});") self._write_symbol(section_end, ".") self._write_symbol( section_size, f"ABSOLUTE({section_end} - {section_start})", ) - section_labels[cur_section].ended = True + + def _write_linker_entry(self, entry: LinkerEntry): + if entry.section_link_type == "linker_offset": + self._write_symbol(f"{entry.segment.get_cname()}_OFFSET", ".") + return + + # TODO: option to turn this off? + if ( + entry.object_path + and entry.section_link_type == ".data" + and entry.segment.type != "lib" + ): + path_cname = re.sub( + r"[^0-9a-zA-Z_]", + "_", + str(entry.segment.dir / entry.segment.name) + + ".".join(entry.object_path.suffixes[:-1]), + ) + self._write_symbol(path_cname, ".") + + if entry.noload and entry.bss_contains_common: + self._writeln(f"{entry.object_path}(.bss COMMON .scommon);") + else: + wildcard = "*" if options.opts.ld_wildcard_sections else "" + + self._writeln(f"{entry.object_path}({entry.section_link}{wildcard});") + + def _write_segment_sections( + self, + segment: Segment, + seg_name: str, + section_entries: OrderedDict[str, List[LinkerEntry]], + noload: bool, + is_first: bool, + ): + if not is_first: + self._end_block() + + self._begin_segment(segment, seg_name, noload=noload, is_first=is_first) + + for section_name, entries in section_entries.items(): + if len(entries) == 0: + continue + + first_entry = entries[0] + if first_entry.noload != noload: + continue + + self._begin_section(seg_name, section_name) + for entry in entries: + self._write_linker_entry(entry) + self._end_section(seg_name, section_name, segment) diff --git a/tools/splat/segtypes/n64/linker_offset.py b/tools/splat/segtypes/n64/linker_offset.py index ac4d945..b13a190 100644 --- a/tools/splat/segtypes/n64/linker_offset.py +++ b/tools/splat/segtypes/n64/linker_offset.py @@ -7,4 +7,8 @@ class N64SegLinker_offset(N64Segment): def get_linker_entries(self): from segtypes.linker_entry import LinkerEntry - return [LinkerEntry(self, [], Path(self.name), "linker_offset")] + return [ + LinkerEntry( + self, [], Path(self.name), "linker_offset", "linker_offset", False + ) + ] diff --git a/tools/splat/segtypes/n64/palette.py b/tools/splat/segtypes/n64/palette.py index 553f642..7ede8d0 100644 --- a/tools/splat/segtypes/n64/palette.py +++ b/tools/splat/segtypes/n64/palette.py @@ -6,6 +6,7 @@ from util.color import unpack_color from segtypes.n64.segment import N64Segment +from util.symbols import to_cname if TYPE_CHECKING: from segtypes.n64.ci import N64SegCi as Raster @@ -60,6 +61,9 @@ def __init__(self, *args, **kwargs): f"Error: {self.name} (0x{actual_len:X} bytes) is not a valid palette size ({', '.join(hex(s) for s in VALID_SIZES)})\n{hint_msg}" ) + def get_cname(self) -> str: + return super().get_cname() + "_pal" + def split(self, rom_bytes): if self.raster is None: # TODO: output with no raster @@ -94,7 +98,9 @@ def get_linker_entries(self): self, [options.opts.asset_path / self.dir / f"{self.name}.png"], options.opts.asset_path / self.dir / f"{self.name}.pal", - self.get_linker_section(), + self.get_linker_section_order(), + self.get_linker_section_linksection(), + self.is_noload(), ) ] diff --git a/tools/splat/segtypes/ps2/__init__.py b/tools/splat/segtypes/ps2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tools/splat/segtypes/ps2/asm.py b/tools/splat/segtypes/ps2/asm.py new file mode 100644 index 0000000..53e4a44 --- /dev/null +++ b/tools/splat/segtypes/ps2/asm.py @@ -0,0 +1,23 @@ +from util import options + +from segtypes.common.asm import CommonSegAsm + + +class Ps2SegAsm(CommonSegAsm): + @staticmethod + def get_file_header(): + ret = [] + + ret.append('.include "macro.inc"') + ret.append("") + ret.append(".set noat") + ret.append(".set noreorder") + ret.append("") + preamble = options.opts.generated_s_preamble + if preamble: + ret.append(preamble) + ret.append("") + ret.append('.section .text, "ax"') + ret.append("") + + return ret diff --git a/tools/splat/segtypes/segment.py b/tools/splat/segtypes/segment.py index 6f03426..94e6d2f 100644 --- a/tools/splat/segtypes/segment.py +++ b/tools/splat/segtypes/segment.py @@ -5,7 +5,9 @@ from typing import Dict, List, Optional, Set, Type, TYPE_CHECKING, Union from intervaltree import Interval, IntervalTree +from util import vram_classes +from util.vram_classes import VramClass from util import log, options, symbols from util.symbols import Symbol, to_cname @@ -21,6 +23,25 @@ def parse_segment_vram(segment: Union[dict, list]) -> Optional[int]: return None +def parse_segment_vram_symbol(segment: Union[dict, list]) -> Optional[str]: + if isinstance(segment, dict) and "vram_symbol" in segment: + return str(segment["vram_symbol"]) + else: + return None + + +def parse_segment_vram_class(segment: Union[dict, list]) -> Optional[VramClass]: + if isinstance(segment, dict) and "vram_class" in segment: + return vram_classes.resolve(segment["vram_class"]) + return None + + +def parse_segment_follows_vram(segment: Union[dict, list]) -> Optional[str]: + if isinstance(segment, dict): + return segment.get("follows_vram", None) + return None + + def parse_segment_align(segment: Union[dict, list]) -> Optional[int]: if isinstance(segment, dict) and "align" in segment: return int(segment["align"]) @@ -44,18 +65,6 @@ def parse_segment_section_order(segment: Union[dict, list]) -> List[str]: return default -def parse_segment_follows_vram(segment: Union[dict, list]) -> Optional[str]: - if isinstance(segment, dict): - return segment.get("follows_vram", None) - return None - - -def parse_segment_vram_of_symbol(segment: Union[dict, list]) -> Optional[str]: - if isinstance(segment, dict): - return segment.get("vram_of_symbol", segment.get("follows_vram_symbol", None)) - return None - - class Segment: require_unique_name = True @@ -177,6 +186,26 @@ def parse_segment_bss_contains_common(segment: Union[dict, list]) -> bool: else: return False + @staticmethod + def parse_linker_section_order(yaml: Union[dict, list]) -> Optional[str]: + if isinstance(yaml, dict) and "linker_section_order" in yaml: + return str(yaml["linker_section_order"]) + return None + + @staticmethod + def parse_linker_section(yaml: Union[dict, list]) -> Optional[str]: + if isinstance(yaml, dict) and "linker_section" in yaml: + return str(yaml["linker_section"]) + return None + + @staticmethod + def parse_ld_fill_value( + yaml: Union[dict, list], default: Optional[int] + ) -> Optional[int]: + if isinstance(yaml, dict) and "ld_fill_value" in yaml: + return yaml["ld_fill_value"] + return default + def __init__( self, rom_start: Optional[int], @@ -198,6 +227,9 @@ def __init__( self.exclusive_ram_id: Optional[str] = None self.given_dir: Path = Path() + # Default to global options. + self.given_find_file_boundaries: Optional[bool] = None + # Symbols known to be in this segment self.given_seg_symbols: Dict[int, List[Symbol]] = {} @@ -206,8 +238,10 @@ def __init__( self.symbol_ranges_rom: IntervalTree = IntervalTree() self.given_section_order: List[str] = options.opts.section_order + + self.vram_class: Optional[VramClass] = None self.given_follows_vram: Optional[str] = None - self.vram_of_symbol: Optional[str] = None + self.given_vram_symbol: Optional[str] = None self.given_symbol_name_format: str = options.opts.symbol_name_format self.given_symbol_name_format_no_rom: str = ( @@ -216,6 +250,7 @@ def __init__( self.parent: Optional[Segment] = None self.sibling: Optional[Segment] = None + self.data_sibling: Optional[Segment] = None self.rodata_sibling: Optional[Segment] = None self.file_path: Optional[Path] = None @@ -236,6 +271,14 @@ def __init__( # For segments which are not in the usual VRAM segment space, like N64's IPL3 which lives in 0xA4... self.special_vram_segment: bool = False + self.linker_section_order: Optional[str] = self.parse_linker_section_order(yaml) + self.linker_section: Optional[str] = self.parse_linker_section(yaml) + + # If not defined on the segment then default to the global option + self.ld_fill_value: Optional[int] = self.parse_ld_fill_value( + yaml, options.opts.ld_fill_value + ) + if self.rom_start is not None and self.rom_end is not None: if self.rom_start > self.rom_end: log.error( @@ -252,7 +295,15 @@ def from_yaml( ): type = Segment.parse_segment_type(yaml) name = Segment.parse_segment_name(cls, rom_start, yaml) - vram_start = vram if vram is not None else parse_segment_vram(yaml) + + vram_class = parse_segment_vram_class(yaml) + + if vram is not None: + vram_start = vram + elif vram_class: + vram_start = vram_class.vram + else: + vram_start = parse_segment_vram(yaml) args: List[str] = [] if isinstance(yaml, dict) else yaml[3:] @@ -267,11 +318,14 @@ def from_yaml( ) ret.given_section_order = parse_segment_section_order(yaml) ret.given_subalign = parse_segment_subalign(yaml) + if isinstance(yaml, dict): ret.extract = bool(yaml.get("extract", ret.extract)) ret.exclusive_ram_id = yaml.get("exclusive_ram_id") ret.given_dir = Path(yaml.get("dir", "")) ret.has_linker_entry = bool(yaml.get("linker_entry", True)) + ret.given_find_file_boundaries = yaml.get("find_file_boundaries", None) + ret.given_symbol_name_format = Segment.parse_segment_symbol_name_format(yaml) ret.given_symbol_name_format_no_rom = ( Segment.parse_segment_symbol_name_format_no_rom(yaml) @@ -279,11 +333,20 @@ def from_yaml( ret.file_path = Segment.parse_segment_file_path(yaml) ret.bss_contains_common = Segment.parse_segment_bss_contains_common(yaml) - if not ret.given_follows_vram: - ret.given_follows_vram = parse_segment_follows_vram(yaml) - if not ret.vram_of_symbol: - ret.vram_of_symbol = parse_segment_vram_of_symbol(yaml) + ret.given_follows_vram = parse_segment_follows_vram(yaml) + ret.given_vram_symbol = parse_segment_vram_symbol(yaml) + + if vram_class: + ret.vram_class = vram_class + if ret.given_follows_vram: + log.error( + f"Error: segment {ret.name} has both a vram class and a follows_vram property" + ) + if ret.given_vram_symbol: + log.error( + f"Error: segment {ret.name} has both a vram class and a vram_symbol property" + ) if not ret.align: ret.align = parse_segment_align(yaml) @@ -294,6 +357,11 @@ def from_yaml( def is_text() -> bool: return False + # For read-write segments (.data); like data + @staticmethod + def is_data() -> bool: + return False + # For readonly segments (.rodata); like rodata or rdata @staticmethod def is_rodata() -> bool: @@ -319,6 +387,18 @@ def dir(self) -> Path: else: return self.given_dir + @property + def show_file_boundaries(self) -> bool: + # If the segment has explicitly set `find_file_boundaries`, use it. + if self.given_find_file_boundaries is not None: + return self.given_find_file_boundaries + + # If the segment has no parent, use options as default. + if not self.parent: + return options.opts.find_file_boundaries + + return self.parent.show_file_boundaries + @property def symbol_name_format(self) -> str: return self.given_symbol_name_format @@ -334,6 +414,15 @@ def subalign(self) -> int: else: return self.given_subalign + @property + def vram_symbol(self) -> Optional[str]: + if self.vram_class and self.vram_class.vram_symbol: + return self.vram_class.vram_symbol + elif self.given_vram_symbol: + return self.given_vram_symbol + else: + return None + def get_exclusive_ram_id(self) -> Optional[str]: if self.parent: return self.parent.get_exclusive_ram_id() @@ -383,6 +472,13 @@ def rodata_follows_data(self) -> bool: self.section_order.index(".rodata") - self.section_order.index(".data") == 1 ) + def get_cname(self) -> str: + name = self.name + if self.parent: + name = self.parent.name + "_" + name + + return to_cname(name) + def contains_vram(self, vram: int) -> bool: if self.vram_start is not None and self.vram_end is not None: return vram >= self.vram_start and vram < self.vram_end @@ -428,6 +524,24 @@ def cache(self): def get_linker_section(self) -> str: return ".data" + def get_linker_section_order(self) -> str: + """ + Used to override the linking _order_ of a specific section + + Useful for files that may have non-conventional orderings (like putting .data with the other .rodata sections) + """ + if self.linker_section_order is not None: + return self.linker_section_order + return self.get_linker_section() + + def get_linker_section_linksection(self) -> str: + """ + The actual section that will be used when linking + """ + if self.linker_section is not None: + return self.linker_section + return self.get_linker_section() + def get_section_flags(self) -> Optional[str]: """ Allows specifying flags for a section. @@ -466,7 +580,16 @@ def get_linker_entries(self) -> "List[LinkerEntry]": path = self.out_path() if path: - return [LinkerEntry(self, [path], path, self.get_linker_section())] + return [ + LinkerEntry( + self, + [path], + path, + self.get_linker_section_order(), + self.get_linker_section_linksection(), + self.is_noload(), + ) + ] else: return [] @@ -553,7 +676,6 @@ def get_symbol( reference: bool = False, search_ranges: bool = False, local_only: bool = False, - dead: bool = True, ) -> Optional[Symbol]: ret: Optional[Symbol] = None rom: Optional[int] = None @@ -584,10 +706,6 @@ def get_symbol( if cands: ret = cands.pop().data - # Reject dead symbols unless we allow them - if not dead and ret and ret.dead: - ret = None - # Create the symbol if it doesn't exist if not ret and create: ret = Symbol(addr, rom=rom, type=type) @@ -623,7 +741,6 @@ def create_symbol( reference: bool = False, search_ranges: bool = False, local_only: bool = False, - dead: bool = True, ) -> Symbol: ret = self.get_symbol( addr, @@ -634,7 +751,6 @@ def create_symbol( reference=reference, search_ranges=search_ranges, local_only=local_only, - dead=dead, ) assert ret is not None diff --git a/tools/splat/split.py b/tools/splat/split.py index 1ca67fd..5cc15c9 100755 --- a/tools/splat/split.py +++ b/tools/splat/split.py @@ -5,9 +5,14 @@ import importlib import pickle from typing import Any, Dict, List, Optional, Set, Tuple, Union +from pathlib import Path from disassembler import disassembler_instance -from util import progress_bar +from util import progress_bar, vram_classes + +# This unused import makes the yaml library faster. don't remove +import pylibyaml # pyright: ignore import yaml + from colorama import Fore, Style from intervaltree import Interval, IntervalTree import sys @@ -15,12 +20,11 @@ from segtypes.linker_entry import ( LinkerWriter, get_segment_vram_end_symbol_name, - to_cname, ) from segtypes.segment import Segment from util import log, options, palettes, symbols, relocs -VERSION = "0.16.5" +VERSION = "0.19.6" parser = argparse.ArgumentParser( description="Split a rom given a rom, a config, and output directory" @@ -39,6 +43,11 @@ parser.add_argument( "--stdout-only", help="Print all output to stdout", action="store_true" ) +parser.add_argument( + "--disassemble-all", + help="Disasemble matched functions and migrated data", + action="store_true", +) linker_writer: LinkerWriter config: Dict[str, Any] @@ -123,7 +132,7 @@ def initialize_segments(config_segments: Union[dict, list]) -> List[Segment]: log.error( f"segment '{segment.given_follows_vram}', the 'follows_vram' value for segment '{segment.name}', does not exist" ) - segment.vram_of_symbol = get_segment_vram_end_symbol_name( + segment.given_vram_symbol = get_segment_vram_end_symbol_name( segments_by_name[segment.given_follows_vram] ) @@ -213,6 +222,32 @@ def brief_seg_name(seg: Segment, limit: int, ellipsis="…") -> str: return s +# Return a mapping of vram classes to segments that need to be part of their vram symbol's calculation +def calc_segment_dependences( + all_segments: List[Segment], +) -> Dict[vram_classes.VramClass, List[Segment]]: + # Map vram class names to segments that have that vram class + vram_class_to_segments: Dict[str, List[Segment]] = {} + for seg in all_segments: + if seg.vram_class is not None: + if seg.vram_class.name not in vram_class_to_segments: + vram_class_to_segments[seg.vram_class.name] = [] + vram_class_to_segments[seg.vram_class.name].append(seg) + + # Map vram class names to segments that the vram class follows + vram_class_to_follows_segments: Dict[vram_classes.VramClass, List[Segment]] = {} + for vram_class in vram_classes._vram_classes.values(): + if vram_class.follows_classes: + vram_class_to_follows_segments[vram_class] = [] + + for follows_class in vram_class.follows_classes: + if follows_class in vram_class_to_segments: + vram_class_to_follows_segments[ + vram_class + ] += vram_class_to_segments[follows_class] + return vram_class_to_follows_segments + + def main( config_path, modes, @@ -220,6 +255,7 @@ def main( use_cache=True, skip_version_check=False, stdout_only=False, + disassemble_all=False, ): global config @@ -233,7 +269,9 @@ def main( additional_config = yaml.load(f.read(), Loader=yaml.SafeLoader) config = merge_configs(config, additional_config) - options.initialize(config, config_path, modes, verbose) + vram_classes.initialize(config.get("vram_classes")) + + options.initialize(config, config_path, modes, verbose, disassemble_all) disassembler_instance.create_disassembler_instance(options.opts.platform) disassembler_instance.get_instance().check_version(skip_version_check, VERSION) @@ -246,6 +284,8 @@ def main( e_sha1 = config["sha1"].lower() if e_sha1 != sha1: log.error(f"sha1 mismatch: expected {e_sha1}, was {sha1}") + else: + log.write("Warning: no sha1 in config") # Create main output dir options.opts.base_path.mkdir(parents=True, exist_ok=True) @@ -360,47 +400,88 @@ def main( if ( options.opts.is_mode_active("ld") and options.opts.platform != "gc" ): # TODO move this to platform initialization when it gets implemented - # Calculate list of segments for which we need to find the largest, so we can safely place the symbol after it - max_vram_end_syms: Dict[str, List[Segment]] = {} - for sym in symbols.appears_after_overlays_syms: - max_vram_end_syms[sym.name] = [ - seg - for seg in all_segments - if isinstance(seg.vram_start, int) - and seg.vram_start == sym.appears_after_overlays_addr - ] - max_vram_end_sym_names: Set[str] = set(max_vram_end_syms.keys()) + vram_class_dependencies = calc_segment_dependences(all_segments) + vram_classes_to_search = set(vram_class_dependencies.keys()) max_vram_end_insertion_points: Dict[ Segment, List[Tuple[str, List[Segment]]] ] = {} - # Find the last segment whose vram_of_symbol is one of the max_vram_end_syms - for segment in reversed(all_segments): - vram_of_sym = segment.vram_of_symbol - if vram_of_sym is not None and vram_of_sym in max_vram_end_sym_names: - if segment not in max_vram_end_insertion_points: - max_vram_end_insertion_points[segment] = [] - max_vram_end_insertion_points[segment].append( - (vram_of_sym, max_vram_end_syms[vram_of_sym]) + for seg in reversed(all_segments): + if seg.vram_class in vram_classes_to_search: + assert seg.vram_class.vram_symbol is not None + if seg not in max_vram_end_insertion_points: + max_vram_end_insertion_points[seg] = [] + max_vram_end_insertion_points[seg].append( + ( + seg.vram_class.vram_symbol, + vram_class_dependencies[seg.vram_class], + ) ) - max_vram_end_sym_names.remove(vram_of_sym) + vram_classes_to_search.remove(seg.vram_class) global linker_writer linker_writer = LinkerWriter() linker_bar = progress_bar.get_progress_bar(all_segments) + partial_linking = options.opts.ld_partial_linking + partial_scripts_path = options.opts.ld_partial_scripts_path + segments_path = options.opts.ld_partial_build_segments_path + if partial_linking: + if partial_scripts_path is None: + log.error( + "Partial linking is enabled but `ld_partial_scripts_path` has not been set" + ) + if options.opts.ld_partial_build_segments_path is None: + log.error( + "Partial linking is enabled but `ld_partial_build_segments_path` has not been set" + ) + for segment in linker_bar: + assert isinstance(segment, Segment) linker_bar.set_description(f"Linker script {brief_seg_name(segment, 20)}") - linker_writer.add(segment, max_vram_end_insertion_points.get(segment, [])) - linker_writer.save_linker_script() + max_vram_syms = max_vram_end_insertion_points.get(segment, []) + + if options.opts.ld_partial_linking: + linker_writer.add_referenced_partial_segment(segment, max_vram_syms) + + # Create linker script for segment + sub_linker_writer = LinkerWriter(is_partial=True) + sub_linker_writer.add_partial_segment(segment) + + assert partial_scripts_path is not None + assert segments_path is not None + + seg_name = segment.get_cname() + + sub_linker_writer.save_linker_script( + partial_scripts_path / f"{seg_name}.ld" + ) + if options.opts.ld_dependencies: + sub_linker_writer.save_dependencies_file( + partial_scripts_path / f"{seg_name}.d", + segments_path / f"{seg_name}.o", + ) + else: + linker_writer.add(segment, max_vram_syms) + + linker_writer.save_linker_script(options.opts.ld_script_path) linker_writer.save_symbol_header() + if options.opts.ld_dependencies: + elf_path = options.opts.elf_path + if elf_path is None: + log.error( + "Generation of dependency file for linker script requested but `elf_path` was not provided in the yaml options" + ) + linker_writer.save_dependencies_file( + options.opts.ld_script_path.with_suffix(".d"), elf_path + ) # write elf_sections.txt - this only lists the generated sections in the elf, not subsections # that the elf combines into one section if options.opts.elf_section_list_path: section_list = "" for segment in all_segments: - section_list += "." + to_cname(segment.name) + "\n" + section_list += "." + segment.get_cname() + "\n" with open(options.opts.elf_section_list_path, "w", newline="\n") as f: f.write(section_list) @@ -409,7 +490,7 @@ def main( to_write = [ s for s in symbols.all_symbols - if s.referenced and not s.defined and not s.dead and s.type == "func" + if s.referenced and not s.defined and s.type == "func" ] to_write.sort(key=lambda x: x.vram_start) @@ -424,7 +505,6 @@ def main( for s in symbols.all_symbols if s.referenced and not s.defined - and not s.dead and s.type not in {"func", "label", "jtbl_label"} ] to_write.sort(key=lambda x: x.vram_start) @@ -456,14 +536,12 @@ def main( pickle.dump(cache, f4) if options.opts.dump_symbols and options.opts.is_mode_active("code"): - from pathlib import Path - splat_hidden_folder = Path(".splat/") splat_hidden_folder.mkdir(exist_ok=True) with open(splat_hidden_folder / "splat_symbols.csv", "w") as f: f.write( - "vram_start,given_name,name,type,given_size,size,rom,defined,user_declared,referenced,dead,extract\n" + "vram_start,given_name,name,type,given_size,size,rom,defined,user_declared,referenced,extract\n" ) for s in sorted(symbols.all_symbols, key=lambda x: x.vram_start): f.write(f"{s.vram_start:X},{s.given_name},{s.name},{s.type},") @@ -476,9 +554,7 @@ def main( f.write(f"0x{s.rom:X},") else: f.write("None,") - f.write( - f"{s.defined},{s.user_declared},{s.referenced},{s.dead},{s.extract}\n" - ) + f.write(f"{s.defined},{s.user_declared},{s.referenced},{s.extract}\n") symbols.spim_context.saveContextToFile(splat_hidden_folder / "spim_context.csv") @@ -492,4 +568,5 @@ def main( args.use_cache, args.skip_version_check, args.stdout_only, + args.disassemble_all, ) diff --git a/tools/splat/test.py b/tools/splat/test.py index 3685541..549c112 100644 --- a/tools/splat/test.py +++ b/tools/splat/test.py @@ -47,6 +47,7 @@ def get_right_only_files(self, dcmp, out): self.get_right_only_files(sub_dcmp, out) def test_basic_app(self): + spimdisasm.common.GlobalConfig.ASM_GENERATED_BY = False main(["test/basic_app/splat.yaml"], None, None) comparison = filecmp.dircmp("test/basic_app/split", "test/basic_app/expected") @@ -340,8 +341,7 @@ def test_attrs(self): test_init() sym_addrs_lines = [ - "func_1 = 0x100 // type:func size:10 rom:100 segment:test_segment name_end:the_name_end " - "appears_after_overlays_addr:1234" + "func_1 = 0x100; // type:func size:10 rom:100 segment:test_segment name_end:the_name_end " ] all_segments = [ @@ -365,7 +365,6 @@ def test_attrs(self): assert symbols.all_symbols[0].rom == 100 assert symbols.all_symbols[0].segment == all_segments[0] assert symbols.all_symbols[0].given_name_end == "the_name_end" - assert symbols.appears_after_overlays_syms[0] == symbols.all_symbols[0] def test_boolean_attrs(self): import pathlib @@ -374,7 +373,7 @@ def test_boolean_attrs(self): test_init() sym_addrs_lines = [ - "func_1 = 0x100 // dead:True defined:True extract:True force_migration:True force_not_migration:True " + "func_1 = 0x100; // defined:True extract:True force_migration:True force_not_migration:True " "allow_addend:True dont_allow_addend:True" ] @@ -393,7 +392,6 @@ def test_boolean_attrs(self): symbols.handle_sym_addrs( pathlib.Path("/tmp/thing"), sym_addrs_lines, all_segments ) - assert symbols.all_symbols[0].dead == True assert symbols.all_symbols[0].defined == True assert symbols.all_symbols[0].force_migration == True assert symbols.all_symbols[0].force_not_migration == True @@ -407,7 +405,7 @@ def test_ignore(self): symbols.reset_symbols() test_init() - sym_addrs_lines = ["func_1 = 0x100 // ignore:True size:4"] + sym_addrs_lines = ["func_1 = 0x100; // ignore:True size:4"] all_segments = [ Segment( @@ -424,8 +422,8 @@ def test_ignore(self): symbols.handle_sym_addrs( pathlib.Path("/tmp/thing"), sym_addrs_lines, all_segments ) - assert symbols.spim_context.bannedRangedSymbols[0].start == 16 - assert symbols.spim_context.bannedRangedSymbols[0].end == 20 + assert symbols.spim_context.bannedRangedSymbols[0].start == 0x100 + assert symbols.spim_context.bannedRangedSymbols[0].end == 0x100 + 4 class InitializeSpimContext(unittest.TestCase): diff --git a/tools/splat/test/basic_app/.gitignore b/tools/splat/test/basic_app/.gitignore new file mode 100644 index 0000000..df3d40d --- /dev/null +++ b/tools/splat/test/basic_app/.gitignore @@ -0,0 +1,3 @@ +build/ +split/ +gcc-2.7.2/ diff --git a/tools/splat/test/basic_app/build.sh b/tools/splat/test/basic_app/build.sh index 0e11e36..0eb0ff1 100755 --- a/tools/splat/test/basic_app/build.sh +++ b/tools/splat/test/basic_app/build.sh @@ -1,3 +1,7 @@ +#!/bin/bash + +set -e + mkdir -p build echo "Building..." export PATH=/gcc-2.7.2:$PATH diff --git a/tools/splat/test/basic_app/expected/.splache b/tools/splat/test/basic_app/expected/.splache index c855f15..8f17e13 100644 Binary files a/tools/splat/test/basic_app/expected/.splache and b/tools/splat/test/basic_app/expected/.splache differ diff --git a/tools/splat/test/basic_app/expected/basic_app.ld b/tools/splat/test/basic_app/expected/basic_app.ld index 0779b7e..57ec4ae 100644 --- a/tools/splat/test/basic_app/expected/basic_app.ld +++ b/tools/splat/test/basic_app/expected/basic_app.ld @@ -6,6 +6,7 @@ SECTIONS header_VRAM = ADDR(.header); .header : AT(header_ROM_START) SUBALIGN(16) { + FILL(0x00000000); header_DATA_START = .; header_s = .; build/split/asm/header.s.o(.data); @@ -20,14 +21,17 @@ SECTIONS dummy_ipl3_VRAM = ADDR(.dummy_ipl3); .dummy_ipl3 0xA4000040 : AT(dummy_ipl3_ROM_START) SUBALIGN(16) { + FILL(0x00000000); dummy_ipl3_DATA_START = .; dummy_ipl3_bin = .; build/split/assets/dummy_ipl3.bin.o(.data); + . = ALIGN(., 16); dummy_ipl3_DATA_END = .; dummy_ipl3_DATA_SIZE = ABSOLUTE(dummy_ipl3_DATA_END - dummy_ipl3_DATA_START); } __romPos += SIZEOF(.dummy_ipl3); __romPos = ALIGN(__romPos, 16); + . = ALIGN(., 16); dummy_ipl3_ROM_END = __romPos; dummy_ipl3_VRAM_END = .; @@ -35,31 +39,38 @@ SECTIONS boot_VRAM = ADDR(.boot); .boot 0x80000400 : AT(boot_ROM_START) SUBALIGN(16) { + FILL(0x00000000); boot_TEXT_START = .; build/split/src/main.c.o(.text); build/split/asm/handwritten.s.o(.text); + . = ALIGN(., 16); boot_TEXT_END = .; boot_TEXT_SIZE = ABSOLUTE(boot_TEXT_END - boot_TEXT_START); boot_DATA_START = .; main_data__s = .; build/split/asm/data/main.data.s.o(.data); + . = ALIGN(., 16); boot_DATA_END = .; boot_DATA_SIZE = ABSOLUTE(boot_DATA_END - boot_DATA_START); boot_RODATA_START = .; build/split/asm/data/main.rodata.s.o(.rodata); + . = ALIGN(., 16); boot_RODATA_END = .; boot_RODATA_SIZE = ABSOLUTE(boot_RODATA_END - boot_RODATA_START); } boot_bss_VRAM = ADDR(.boot_bss); .boot_bss (NOLOAD) : SUBALIGN(16) { + FILL(0x00000000); boot_BSS_START = .; build/split/asm/data/main.bss.s.o(.bss); + . = ALIGN(., 16); boot_BSS_END = .; boot_BSS_SIZE = ABSOLUTE(boot_BSS_END - boot_BSS_START); } __romPos += SIZEOF(.boot); __romPos = ALIGN(__romPos, 16); + . = ALIGN(., 16); boot_ROM_END = __romPos; boot_VRAM_END = .; diff --git a/tools/splat/test/basic_app/splat.yaml b/tools/splat/test/basic_app/splat.yaml index c1d2cf9..d4d4a41 100644 --- a/tools/splat/test/basic_app/splat.yaml +++ b/tools/splat/test/basic_app/splat.yaml @@ -12,7 +12,6 @@ options: undefined_funcs_auto_path: split/undefined_funcs_auto.txt undefined_syms_auto_path: split/undefined_syms_auto.txt asset_path: split/assets - asm_generated_by: False compiler: GCC segments: - name: header diff --git a/tools/splat/util/compiler.py b/tools/splat/util/compiler.py index e1666dc..3c67e59 100644 --- a/tools/splat/util/compiler.py +++ b/tools/splat/util/compiler.py @@ -6,6 +6,7 @@ class Compiler: name: str asm_function_macro: str = "glabel" + asm_function_alt_macro: str = "glabel" asm_jtbl_label_macro: str = "glabel" asm_data_macro: str = "glabel" asm_end_label: str = "" diff --git a/tools/splat/util/options.py b/tools/splat/util/options.py index bcb7b20..6d26db2 100644 --- a/tools/splat/util/options.py +++ b/tools/splat/util/options.py @@ -20,6 +20,8 @@ class SplatOpts: base_path: Path # Determines the path to the target binary target_path: Path + # Path to the final elf target + elf_path: Optional[Path] # Determines the platform of the target binary platform: str # Determines the compiler used to compile the target binary @@ -37,6 +39,8 @@ class SplatOpts: use_o_as_suffix: bool # the value of the $gp register to correctly calculate offset to %gp_rel relocs gp: Optional[int] + # Checks and errors if there are any non consecutive segment types + check_consecutive_segment_types: bool # Paths asset_path: Path @@ -93,21 +97,44 @@ class SplatOpts: # Determines the desired path to the linker symbol header, # which exposes externed definitions for all segment ram/rom start/end locations ld_symbol_header_path: Optional[Path] - # Determines whether to add a discard section to the linker script + # Determines whether to add a discard section with a wildcard to the linker script ld_discard_section: bool + # A list of sections to preserve during link time. It can be useful to preserve debugging sections + ld_sections_allowlist: List[str] + # A list of sections to discard during link time. It can be useful to avoid using the wildcard discard. Note that this option does not turn off `ld_discard_section` + ld_sections_denylist: List[str] # Determines the list of section labels that are to be added to the linker script ld_section_labels: List[str] # Determines whether to add wildcards for section linking in the linker script (.rodata* for example) ld_wildcard_sections: bool - # Determines whether to use "follows" settings to determine locations of overlays in the linker script. - # If disabled, this effectively ignores "follows" directives in the yaml. - ld_use_follows: bool + # Determines whether to use `follows_vram` (segment option) and + # `vram_symbol` / `follows_classes` (vram_class options) to calculate vram addresses in the linker script. + # If disabled, this uses the plain integer values for vram addresses defined in the yaml. + ld_use_symbolic_vram_addresses: bool + # Change linker script generation to allow partially linking segments. Requires both `ld_partial_scripts_path` and `ld_partial_build_segments_path` to be set. + ld_partial_linking: bool + # Folder were each intermediary linker script will be written to. + ld_partial_scripts_path: Optional[Path] + # Folder where the built partially linked segments will be placed by the build system. + ld_partial_build_segments_path: Optional[Path] + # Generate a dependency file for every linker script generated. Dependency files will have the same path and name as the corresponding linker script, but changing the extension to `.d`. Requires `elf_path` to be set. + ld_dependencies: bool + # Legacy linker script generation does not impose the section_order specified in the yaml options or per-segment options. + ld_legacy_generation: bool # If enabled, the end symbol for each segment will be placed before the alignment directive for the segment segment_end_before_align: bool # Controls the style of the auto-generated segment symbols in the linker script. Possible values: splat, makerom segment_symbols_style: str # Specifies the starting offset for rom address symbols in the linker script. ld_rom_start: int + # The value passed to the FILL statement on each segment. `None` disables using FILL statements on the linker script. Defaults to a fill value of 0. + ld_fill_value: Optional[int] + # Allows to control if `bss` sections (and derivatived sections) will be put on a `NOLOAD` segment on the generated linker script or not. + ld_bss_is_noload: bool + # Allows to toggle aligning the `*_VRAM_END` linker symbol for each segment. + ld_align_segment_vram_end: bool + # Allows to toggle aligning the `*_END` linker symbol for each section of each section. + ld_align_section_vram_end: bool ################################################################################ # C file options @@ -140,6 +167,8 @@ class SplatOpts: asm_inc_header: str # Determines the macro used to declare functions in asm files asm_function_macro: str + # Determines the macro used to declare symbols in the middle of functions in asm files (which may be alternative entries) + asm_function_alt_macro: str # Determines the macro used to declare jumptable labels in asm files asm_jtbl_label_macro: str # Determines the macro used to declare data symbols in asm files @@ -161,6 +190,8 @@ class SplatOpts: # o32 is highly recommended, as it provides logically named registers for floating point instructions # For more info, see https://gist.github.com/EllipticEllipsis/27eef11205c7a59d8ea85632bc49224d mips_abi_float_regs: str + # Determines whether functions inside c files should have named registers + named_regs_for_c_funcs: bool # Determines whether to add ".set gp=64" to asm/hasm files add_set_gp_64: bool # Generate .asmproc.d dependency files for each C file which still reference functions in assembly files @@ -175,8 +206,6 @@ class SplatOpts: data_string_guesser_level: Optional[int] # Global option for allowing data symbols using addends on symbol references. It can be overriden per symbol allow_data_addends: bool - # Determines whether to include the "Generated by spimdisasm" text in the asm - asm_generated_by: bool # Tells the disassembler to try disassembling functions with unknown instructions instead of falling back to disassembling as raw data disasm_unknown: bool # Tries to detect redundant and unreferenced functions ends and merge them together. This option is ignored if the compiler is not set to IDO. @@ -194,6 +223,8 @@ class SplatOpts: gfx_ucode: str # Use named libultra symbols by default. Those will need to be added to a linker script manually by the user libultra_symbols: bool + # Use named libultra symbols by default. Those will need to be added to a linker script manually by the user + ique_symbols: bool # Use named hardware register symbols by default. Those will need to be added to a linker script manually by the user hardware_regs: bool @@ -246,6 +277,19 @@ def parse_optional_opt(self, opt: str, t: Type[T]) -> Optional[T]: return None return self.parse_opt(opt, t) + def parse_optional_opt_with_default( + self, opt: str, t: Type[T], default: Optional[T] + ) -> Optional[T]: + if opt not in self._yaml: + return default + self._read_opts.add(opt) + value = self._yaml[opt] + if value is None or isinstance(value, t): + return value + if t is float and isinstance(value, int): + return cast(T, float(value)) + raise ValueError(f"Expected {opt} to have type {t}, got {type(value)}") + def parse_opt_within( self, opt: str, t: Type[T], within: List[T], default: Optional[T] = None ) -> T: @@ -285,6 +329,7 @@ def _parse_yaml( config_paths: List[str], modes: List[str], verbose: bool = False, + disasm_all: bool = False, ) -> SplatOpts: p = OptParser(yaml) @@ -323,6 +368,7 @@ def parse_endianness() -> Literal["big", "little"]: modes=modes, base_path=base_path, target_path=p.parse_path(base_path, "target_path"), + elf_path=p.parse_optional_path(base_path, "elf_path"), platform=platform, compiler=comp, endianness=parse_endianness(), @@ -335,6 +381,9 @@ def parse_endianness() -> Literal["big", "little"]: generated_s_preamble=p.parse_opt("generated_s_preamble", str, ""), use_o_as_suffix=p.parse_opt("o_as_suffix", bool, False), gp=p.parse_opt("gp_value", int, 0), + check_consecutive_segment_types=p.parse_opt( + "check_consecutive_segment_types", bool, True + ), asset_path=p.parse_path(base_path, "asset_path", "assets"), symbol_addrs_paths=p.parse_path_list( base_path, "symbol_addrs_path", "symbol_addrs.txt" @@ -370,18 +419,35 @@ def parse_endianness() -> Literal["big", "little"]: ld_script_path=p.parse_path(base_path, "ld_script_path", f"{basename}.ld"), ld_symbol_header_path=p.parse_optional_path(base_path, "ld_symbol_header_path"), ld_discard_section=p.parse_opt("ld_discard_section", bool, True), + ld_sections_allowlist=p.parse_opt("ld_sections_allowlist", list, []), + ld_sections_denylist=p.parse_opt("ld_sections_denylist", list, []), ld_section_labels=p.parse_opt( "ld_section_labels", list, [".text", ".data", ".rodata", ".bss"], ), ld_wildcard_sections=p.parse_opt("ld_wildcard_sections", bool, False), - ld_use_follows=p.parse_opt("ld_use_follows", bool, True), + ld_use_symbolic_vram_addresses=p.parse_opt( + "ld_use_symbolic_vram_addresses", bool, True + ), + ld_partial_linking=p.parse_opt("ld_partial_linking", bool, False), + ld_partial_scripts_path=p.parse_optional_path( + base_path, "ld_partial_scripts_path" + ), + ld_partial_build_segments_path=p.parse_optional_path( + base_path, "ld_partial_build_segments_path" + ), + ld_dependencies=p.parse_opt("ld_dependencies", bool, False), + ld_legacy_generation=p.parse_opt("ld_legacy_generation", bool, False), segment_end_before_align=p.parse_opt("segment_end_before_align", bool, False), segment_symbols_style=p.parse_opt_within( "segment_symbols_style", str, ["splat", "makerom"], "splat" ), ld_rom_start=p.parse_opt("ld_rom_start", int, 0), + ld_fill_value=p.parse_optional_opt_with_default("ld_fill_value", int, 0), + ld_bss_is_noload=p.parse_opt("ld_bss_is_noload", bool, True), + ld_align_segment_vram_end=p.parse_opt("ld_align_segment_vram_end", bool, True), + ld_align_section_vram_end=p.parse_opt("ld_align_section_vram_end", bool, True), create_c_files=p.parse_opt("create_c_files", bool, True), auto_decompile_empty_functions=p.parse_opt( "auto_decompile_empty_functions", bool, True @@ -401,6 +467,9 @@ def parse_endianness() -> Literal["big", "little"]: asm_function_macro=p.parse_opt( "asm_function_macro", str, comp.asm_function_macro ), + asm_function_alt_macro=p.parse_opt( + "asm_function_alt_macro", str, comp.asm_function_alt_macro + ), asm_jtbl_label_macro=p.parse_opt( "asm_jtbl_label_macro", str, comp.asm_jtbl_label_macro ), @@ -424,6 +493,7 @@ def parse_endianness() -> Literal["big", "little"]: ["numeric", "o32", "n32", "n64"], "numeric", ), + named_regs_for_c_funcs=p.parse_opt("named_regs_for_c_funcs", bool, True), add_set_gp_64=p.parse_opt("add_set_gp_64", bool, True), create_asm_dependencies=p.parse_opt("create_asm_dependencies", bool, False), string_encoding=p.parse_optional_opt("string_encoding", str), @@ -443,15 +513,18 @@ def parse_endianness() -> Literal["big", "little"]: "f3dex2", ), libultra_symbols=p.parse_opt("libultra_symbols", bool, False), + ique_symbols=p.parse_opt("ique_symbols", bool, False), hardware_regs=p.parse_opt("hardware_regs", bool, False), use_legacy_include_asm=p.parse_opt("use_legacy_include_asm", bool, True), filesystem_path=p.parse_optional_path(base_path, "filesystem_path"), - asm_generated_by=p.parse_opt("asm_generated_by", bool, True), disasm_unknown=p.parse_opt("disasm_unknown", bool, False), detect_redundant_function_end=p.parse_opt( "detect_redundant_function_end", bool, True ), - disassemble_all=p.parse_opt("disassemble_all", bool, False), + # Command line argument takes precedence over yaml option + disassemble_all=disasm_all + if disasm_all + else p.parse_opt("disassemble_all", bool, False), ) p.check_no_unread_opts() return ret @@ -462,10 +535,11 @@ def initialize( config_paths: List[str], modes: Optional[List[str]] = None, verbose=False, + disasm_all=False, ): global opts if not modes: modes = ["all"] - opts = _parse_yaml(config["options"], config_paths, modes, verbose) + opts = _parse_yaml(config["options"], config_paths, modes, verbose, disasm_all) diff --git a/tools/splat/util/psx/__init__.py b/tools/splat/util/psx/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tools/splat/util/psx/psxexeinfo.py b/tools/splat/util/psx/psxexeinfo.py new file mode 100755 index 0000000..490bb6d --- /dev/null +++ b/tools/splat/util/psx/psxexeinfo.py @@ -0,0 +1,117 @@ +#! /usr/bin/env python3 + +from __future__ import annotations + +import argparse + +import hashlib +import struct + +import dataclasses + +from pathlib import Path + + +@dataclasses.dataclass +class PsxExe: + # Based on https://psx-spx.consoledev.net/cdromdrive/#filenameexe-general-purpose-executable + initial_pc: int # offset: 0x10 + initial_gp: int # offset: 0x14 + destination_vram: int # offset: 0x18 + file_size: int # offset: 0x1C + data_vram: int # offset: 0x20 + data_size: int # offset: 0x24 + bss_vram: int # offset: 0x28 + bss_size: int # offset: 0x2C + initial_sp_base: int # offset: 0x30 + initial_sp_offset: int # offset: 0x34 + + size: int + sha1: str + + @property + def text_offset(self) -> int: + return self.initial_pc - self.destination_vram + 0x800 + + @property + def data_offset(self) -> int: + if self.data_vram == 0 or self.data_size == 0: + return 0 + return self.data_vram - self.destination_vram + 0x800 + + @staticmethod + def get_info(exe_path: Path, exe_bytes: bytes) -> PsxExe: + initial_pc = struct.unpack(" Optional["Segment"]: return segment return None + seen_symbols: Dict[str, "Symbol"] = dict() prog_bar = progress_bar.get_progress_bar(sym_addrs_lines) prog_bar.set_description(f"Loading symbols ({path.stem})") line: str @@ -102,13 +102,14 @@ def get_seg_for_rom(rom: int) -> Optional["Segment"]: line_main = line[:comment_loc].strip() try: + assert line.count(";") == 1, "Line must contain a single semi-colon" line_split = line_main.split("=") name = line_split[0].strip() addr = int(line_split[1].strip()[:-1], 0) except: log.parsing_error_preamble(path, line_num, line) - log.write("Line should be of the form") - log.write(" =
// attr0:val0 attr1:val1 [...]") + log.write("Line must be of the form") + log.write(" =
; // attr0:val0 attr1:val1 [...]") log.write("with
in hex preceded by 0x, or dec") log.write("") raise @@ -180,10 +181,6 @@ def get_seg_for_rom(rom: int) -> Optional["Segment"]: if attr_name == "name_end": sym.given_name_end = attr_val continue - if attr_name == "appears_after_overlays_addr": - sym.appears_after_overlays_addr = int(attr_val, 0) - appears_after_overlays_syms.append(sym) - continue except: log.parsing_error_preamble(path, line_num, line) log.write( @@ -208,9 +205,6 @@ def get_seg_for_rom(rom: int) -> Optional["Segment"]: log.write([*TRUEY_VALS, *FALSEY_VALS]) log.error("") else: - if attr_name == "dead": - sym.dead = tf_val - continue if attr_name == "defined": sym.defined = tf_val continue @@ -250,6 +244,29 @@ def get_seg_for_rom(rom: int) -> Optional["Segment"]: sym.segment.add_symbol(sym) sym.user_declared = True + + if sym.name in seen_symbols: + log.parsing_error_preamble(path, line_num, line) + log.error( + f"Duplicate symbol detected! {sym.name} has already been defined at 0x{seen_symbols[sym.name].vram_start:X}" + ) + + if addr in all_symbols_dict: + items = all_symbols_dict[addr] + for item in items: + if ( + sym.rom == item.rom + or None in (sym.rom, item.rom) + or sym.segment == item.segment + or None in (sym.segment, item.rom) + ): + log.parsing_error_preamble(path, line_num, line) + log.error( + f"Duplicate symbol detected! {sym.name} clashes with {item.name} defined at 0x{addr:X}" + ) + + seen_symbols[sym.name] = sym + add_symbol(sym) @@ -544,7 +561,6 @@ class Symbol: defined: bool = False referenced: bool = False - dead: bool = False extract: bool = True user_declared: bool = False @@ -559,8 +575,6 @@ class Symbol: _generated_default_name: Optional[str] = None _last_type: Optional[str] = None - appears_after_overlays_addr: Optional[int] = None - def __str__(self): return self.name @@ -660,10 +674,8 @@ def reset_symbols(): global all_symbols_ranges global ignored_addresses global to_mark_as_defined - global appears_after_overlays_syms all_symbols = [] all_symbols_dict = {} all_symbols_ranges = IntervalTree() ignored_addresses = set() to_mark_as_defined = set() - appears_after_overlays_syms = [] diff --git a/tools/splat/util/vram_classes.py b/tools/splat/util/vram_classes.py new file mode 100644 index 0000000..c296bbb --- /dev/null +++ b/tools/splat/util/vram_classes.py @@ -0,0 +1,102 @@ +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + +from util import log + + +@dataclass(frozen=True) +class VramClass: + name: str + vram: int + given_vram_symbol: Optional[str] = None + follows_classes: List[str] = field(default_factory=list, compare=False) + + @property + def vram_symbol(self) -> Optional[str]: + if self.given_vram_symbol is not None: + return self.given_vram_symbol + elif self.follows_classes: + return self.name + "_CLASS_VRAM" + else: + return None + + +_vram_classes: Dict[str, VramClass] = {} + + +def initialize(yaml: Any): + global _vram_classes + + _vram_classes = {} + + if yaml is None: + return + + if not isinstance(yaml, list): + log.error("vram_classes must be a list") + + class_names = set() + for vram_class in yaml: + if isinstance(vram_class, dict): + if "name" not in vram_class: + log.error(f"vram_class ({vram_class}) must have a name") + class_names.add(vram_class["name"]) + elif isinstance(vram_class, list): + class_names.add(vram_class[0]) + + for vram_class in yaml: + name: str + vram: int + vram_symbol: Optional[str] = None + follows_classes: List[str] = [] + + if isinstance(vram_class, dict): + if "name" not in vram_class: + log.error(f"vram_class ({vram_class}) must have a name") + name = vram_class["name"] + + if "vram" not in vram_class: + log.error(f"vram_class ({vram_class}) must have a vram") + vram = vram_class["vram"] + + if "vram_symbol" in vram_class: + vram_symbol = vram_class["vram_symbol"] + if not isinstance(vram_symbol, str): + log.error( + f"vram_symbol ({vram_symbol})must be a string, got {type(vram_symbol)}" + ) + + if "follows_classes" in vram_class: + follows_classes = vram_class["follows_classes"] + if not isinstance(follows_classes, list): + log.error( + f"vram_symbol ({follows_classes})must be a list, got {type(follows_classes)}" + ) + for follows_class in follows_classes: + if follows_class not in class_names: + log.error( + f"follows_class ({follows_class}) not found in vram_classes" + ) + elif isinstance(vram_class, list): + if len(vram_class) != 2: + log.error( + f"vram_class ({vram_class}) must have 2 elements, got {len(vram_class)}" + ) + name = vram_class[0] + vram = vram_class[1] + else: + log.error(f"vram_class must be a dict or list, got {type(vram_class)}") + + if not isinstance(name, str): + log.error(f"vram_class name ({name}) must be a string, got {type(name)}") + if not isinstance(vram, int): + log.error(f"vram_class vram ({vram}) must be an int, got {type(vram)}") + if name in _vram_classes: + log.error(f"Duplicate vram class name '{name}'") + _vram_classes[name] = VramClass(name, vram, vram_symbol, follows_classes) + + +def resolve(name: str) -> VramClass: + if name not in _vram_classes: + log.error(f"Unknown vram class '{name}'") + return _vram_classes[name] diff --git a/yamls/us/makerom.yaml b/yamls/us/makerom.yaml index a005987..8098a2e 100644 --- a/yamls/us/makerom.yaml +++ b/yamls/us/makerom.yaml @@ -1,7 +1,9 @@ - name: makerom type: code + dir: makerom start: 0x000000 + section_order: [".data", ".text", ".rodata", ".bss"] subsegments: - - [0x000000, header, makerom/header] - - [0x000040, bin, makerom/ipl3] - - {start: 0x001000, type: hasm, name: makerom/entry, vram: 0x80000400} + - [0x000000, header, header] + - [0x000040, bin, ipl3] + - {start: 0x001000, type: hasm, name: entry, vram: 0x80000400}