Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions NESDecompiler.Core/Decompilation/CodeRegion.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
namespace NESDecompiler.Core.Decompilation;

/// <summary>
/// A set of code that may contain executable code
/// </summary>
/// <param name="BaseAddress">Where the first byte of the region can be found from the CPU's memory map</param>
/// <param name="Bytes">The set of data to pull code out of</param>
public record CodeRegion(ushort BaseAddress, ReadOnlyMemory<byte> Bytes);
50 changes: 50 additions & 0 deletions NESDecompiler.Core/Decompilation/DecompiledFunction.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
using NESDecompiler.Core.Disassembly;

namespace NESDecompiler.Core.Decompilation;

/// <summary>
/// Represents an independently decompiled function
/// </summary>
public class DecompiledFunction
{
/// <summary>
/// The CPU address where the address' first instruction is located
/// </summary>
public ushort Address { get; }

/// <summary>
/// The instructions that make up this function in the correct order in which they should be
/// executed.
/// </summary>
public IReadOnlyList<DisassembledInstruction> OrderedInstructions { get; }

/// <summary>
/// Location and the labels of all jump and branch targets within this function
/// </summary>
public IReadOnlyDictionary<ushort, string> JumpTargets { get; }

public DecompiledFunction(
ushort address,
IReadOnlyList<DisassembledInstruction> instructions,
IReadOnlySet<ushort> jumpTargets)
{
Address = address;
JumpTargets = instructions
.Where(x => jumpTargets.Contains(x.CPUAddress))
.Where(x => x.Label != null)
.ToDictionary(x => x.CPUAddress, x => x.Label!);

// We need to order the instructions so that the starting instruction is the first one encountered.
// We can't just rely on the CPU address, because a function may jump to a code point earlier than
// the first instruction.
var initialInstructions = instructions
.Where(x => x.CPUAddress >= address)
.OrderBy(x => x.CPUAddress);

var trailingInstructions = instructions
.Where(x => x.CPUAddress < address)
.OrderBy(x => x.CPUAddress);

OrderedInstructions = initialInstructions.Concat(trailingInstructions).ToArray();
}
}
125 changes: 125 additions & 0 deletions NESDecompiler.Core/Decompilation/FunctionDecompiler.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
using NESDecompiler.Core.CPU;
using NESDecompiler.Core.Disassembly;

namespace NESDecompiler.Core.Decompilation;

public static class FunctionDecompiler
{
/// <summary>
/// Traces and decompiles a single function
/// </summary>
/// <param name="functionAddress">The CPU address of the entry point of the function to decompile</param>
/// <param name="codeRegions">All available regions of bytes that could contain instructions for the function</param>
public static DecompiledFunction Decompile(ushort functionAddress, IReadOnlyList<CodeRegion> codeRegions)
{
var instructions = new List<DisassembledInstruction>();
var jumpAddresses = new HashSet<ushort>();
var seenInstructions = new HashSet<ushort>();
var addressQueue = new Queue<ushort>([functionAddress]);

while (addressQueue.TryDequeue(out var nextAddress))
{
if (!seenInstructions.Add(nextAddress))
{
continue;
}

var instruction = GetNextInstruction(nextAddress, codeRegions);
instructions.Add(instruction);

if (IsEndOfFunction(instruction))
{
continue;
}

if (instruction.TargetAddress != null)
{
jumpAddresses.Add(instruction.TargetAddress.Value);
addressQueue.Enqueue(instruction.TargetAddress.Value);
}

if (!instruction.IsJump)
{
addressQueue.Enqueue((ushort)(nextAddress + instruction.Info.Size));
}
}

// Add labels for any jump targets
foreach (var instruction in instructions)
{
if (jumpAddresses.Contains(instruction.CPUAddress))
{
instruction.Label = $"loc_{instruction.CPUAddress:X4}";
}
}

return new DecompiledFunction(functionAddress, instructions, jumpAddresses);
}

private static DisassembledInstruction GetNextInstruction(ushort address, IReadOnlyList<CodeRegion> regions)
{
var relevantRegion = regions
.Where(x => x.BaseAddress < address)
.Where(x => x.BaseAddress + x.Bytes.Length > address)
.FirstOrDefault();

if (relevantRegion == null)
{
var message = $"No code region contained the address 0x{address:X4}";
throw new InvalidOperationException(message);
}

var offset = address - relevantRegion.BaseAddress;
var bytes = relevantRegion.Bytes.Span[offset..];
var info = InstructionSet.GetInstruction(bytes[0]);
if (!info.IsValid)
{
var message = $"Attempted to get instruction at address 0x{address:X4}, but byte 0x{bytes[0]:X4} " +
$"is not a valid/known opcode";

throw new InvalidOperationException(message);
}

if (bytes.Length < info.Size)
{
var message = $"Opcode {info.Mnemonic} at address 0x{address:X4} requires {info.Size} bytes, but only " +
$"{bytes.Length} are available";

throw new InvalidOperationException(message);
}

var instruction = new DisassembledInstruction
{
Address = (ushort)offset,
CPUAddress = address,
Info = info,
Bytes = bytes[..info.Size].ToArray(),
};

Disassembler.CalculateTargetAddress(instruction);

return instruction;
}

private static bool IsEndOfFunction(DisassembledInstruction instruction)
{
// RTI and RTS are obviously the end of a function. We consider BRK and JSR
// to be the end of a function as well because an RTI or RTS will do a function
// call into the next instruction. This is required because RTI/RTS could be
// returning based on a modified stack, and therefore we are not guaranteed to
// be returning to the expected spot.
if (instruction.Info.Mnemonic is "JSR" or "BRK" or "RTI" or "RTS")
{
return true;
}

// Since we don't know where we are jumping at compile time, this will be treated
// as a function call, thus we consider it the end of the function.
if (instruction.Info.AddressingMode == AddressingMode.Indirect)
{
return true;
}

return false;
}
}
2 changes: 1 addition & 1 deletion NESDecompiler.Core/Disassembly/Disassembler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ private void GenerateLabels()
/// Calculates the target address for branch and jump instructions
/// </summary>
/// <param name="instruction">The instruction to process</param>
private void CalculateTargetAddress(DisassembledInstruction instruction)
public static void CalculateTargetAddress(DisassembledInstruction instruction)
{
if (instruction.Info.AddressingMode == AddressingMode.Relative)
{
Expand Down
Loading