diff --git a/NESDecompiler.Core/Decompilation/CodeRegion.cs b/NESDecompiler.Core/Decompilation/CodeRegion.cs new file mode 100644 index 0000000..af23496 --- /dev/null +++ b/NESDecompiler.Core/Decompilation/CodeRegion.cs @@ -0,0 +1,8 @@ +namespace NESDecompiler.Core.Decompilation; + +/// +/// A set of code that may contain executable code +/// +/// Where the first byte of the region can be found from the CPU's memory map +/// The set of data to pull code out of +public record CodeRegion(ushort BaseAddress, ReadOnlyMemory Bytes); \ No newline at end of file diff --git a/NESDecompiler.Core/Decompilation/DecompiledFunction.cs b/NESDecompiler.Core/Decompilation/DecompiledFunction.cs new file mode 100644 index 0000000..a8d6eee --- /dev/null +++ b/NESDecompiler.Core/Decompilation/DecompiledFunction.cs @@ -0,0 +1,50 @@ +using NESDecompiler.Core.Disassembly; + +namespace NESDecompiler.Core.Decompilation; + +/// +/// Represents an independently decompiled function +/// +public class DecompiledFunction +{ + /// + /// The CPU address where the address' first instruction is located + /// + public ushort Address { get; } + + /// + /// The instructions that make up this function in the correct order in which they should be + /// executed. + /// + public IReadOnlyList OrderedInstructions { get; } + + /// + /// Location and the labels of all jump and branch targets within this function + /// + public IReadOnlyDictionary JumpTargets { get; } + + public DecompiledFunction( + ushort address, + IReadOnlyList instructions, + IReadOnlySet jumpTargets) + { + Address = address; + JumpTargets = instructions + .Where(x => jumpTargets.Contains(x.CPUAddress)) + .Where(x => x.Label != null) + .ToDictionary(x => x.CPUAddress, x => x.Label!); + + // We need to order the instructions so that the starting instruction is the first one encountered. + // We can't just rely on the CPU address, because a function may jump to a code point earlier than + // the first instruction. + var initialInstructions = instructions + .Where(x => x.CPUAddress >= address) + .OrderBy(x => x.CPUAddress); + + var trailingInstructions = instructions + .Where(x => x.CPUAddress < address) + .OrderBy(x => x.CPUAddress); + + OrderedInstructions = initialInstructions.Concat(trailingInstructions).ToArray(); + } +} \ No newline at end of file diff --git a/NESDecompiler.Core/Decompilation/FunctionDecompiler.cs b/NESDecompiler.Core/Decompilation/FunctionDecompiler.cs new file mode 100644 index 0000000..5f741b2 --- /dev/null +++ b/NESDecompiler.Core/Decompilation/FunctionDecompiler.cs @@ -0,0 +1,125 @@ +using NESDecompiler.Core.CPU; +using NESDecompiler.Core.Disassembly; + +namespace NESDecompiler.Core.Decompilation; + +public static class FunctionDecompiler +{ + /// + /// Traces and decompiles a single function + /// + /// The CPU address of the entry point of the function to decompile + /// All available regions of bytes that could contain instructions for the function + public static DecompiledFunction Decompile(ushort functionAddress, IReadOnlyList codeRegions) + { + var instructions = new List(); + var jumpAddresses = new HashSet(); + var seenInstructions = new HashSet(); + var addressQueue = new Queue([functionAddress]); + + while (addressQueue.TryDequeue(out var nextAddress)) + { + if (!seenInstructions.Add(nextAddress)) + { + continue; + } + + var instruction = GetNextInstruction(nextAddress, codeRegions); + instructions.Add(instruction); + + if (IsEndOfFunction(instruction)) + { + continue; + } + + if (instruction.TargetAddress != null) + { + jumpAddresses.Add(instruction.TargetAddress.Value); + addressQueue.Enqueue(instruction.TargetAddress.Value); + } + + if (!instruction.IsJump) + { + addressQueue.Enqueue((ushort)(nextAddress + instruction.Info.Size)); + } + } + + // Add labels for any jump targets + foreach (var instruction in instructions) + { + if (jumpAddresses.Contains(instruction.CPUAddress)) + { + instruction.Label = $"loc_{instruction.CPUAddress:X4}"; + } + } + + return new DecompiledFunction(functionAddress, instructions, jumpAddresses); + } + + private static DisassembledInstruction GetNextInstruction(ushort address, IReadOnlyList regions) + { + var relevantRegion = regions + .Where(x => x.BaseAddress < address) + .Where(x => x.BaseAddress + x.Bytes.Length > address) + .FirstOrDefault(); + + if (relevantRegion == null) + { + var message = $"No code region contained the address 0x{address:X4}"; + throw new InvalidOperationException(message); + } + + var offset = address - relevantRegion.BaseAddress; + var bytes = relevantRegion.Bytes.Span[offset..]; + var info = InstructionSet.GetInstruction(bytes[0]); + if (!info.IsValid) + { + var message = $"Attempted to get instruction at address 0x{address:X4}, but byte 0x{bytes[0]:X4} " + + $"is not a valid/known opcode"; + + throw new InvalidOperationException(message); + } + + if (bytes.Length < info.Size) + { + var message = $"Opcode {info.Mnemonic} at address 0x{address:X4} requires {info.Size} bytes, but only " + + $"{bytes.Length} are available"; + + throw new InvalidOperationException(message); + } + + var instruction = new DisassembledInstruction + { + Address = (ushort)offset, + CPUAddress = address, + Info = info, + Bytes = bytes[..info.Size].ToArray(), + }; + + Disassembler.CalculateTargetAddress(instruction); + + return instruction; + } + + private static bool IsEndOfFunction(DisassembledInstruction instruction) + { + // RTI and RTS are obviously the end of a function. We consider BRK and JSR + // to be the end of a function as well because an RTI or RTS will do a function + // call into the next instruction. This is required because RTI/RTS could be + // returning based on a modified stack, and therefore we are not guaranteed to + // be returning to the expected spot. + if (instruction.Info.Mnemonic is "JSR" or "BRK" or "RTI" or "RTS") + { + return true; + } + + // Since we don't know where we are jumping at compile time, this will be treated + // as a function call, thus we consider it the end of the function. + if (instruction.Info.AddressingMode == AddressingMode.Indirect) + { + return true; + } + + return false; + } +} \ No newline at end of file diff --git a/NESDecompiler.Core/Disassembly/Disassembler.cs b/NESDecompiler.Core/Disassembly/Disassembler.cs index 80f86d3..b25b066 100644 --- a/NESDecompiler.Core/Disassembly/Disassembler.cs +++ b/NESDecompiler.Core/Disassembly/Disassembler.cs @@ -438,7 +438,7 @@ private void GenerateLabels() /// Calculates the target address for branch and jump instructions /// /// The instruction to process - private void CalculateTargetAddress(DisassembledInstruction instruction) + public static void CalculateTargetAddress(DisassembledInstruction instruction) { if (instruction.Info.AddressingMode == AddressingMode.Relative) {