somewhat improved disassembler

This commit is contained in:
Bryan Bishop
2012-06-10 12:24:33 -05:00
parent 3233cead33
commit 10a5a6518d
2 changed files with 96 additions and 162 deletions

View File

@@ -6,15 +6,18 @@ from copy import copy, deepcopy
from ctypes import c_int8 from ctypes import c_int8
import json import json
import random import random
from romstr import RomStr
spacing = "\t" spacing = "\t"
class XRomStr(str):
def __repr__(self):
return "RomStr(too long)"
def load_rom(filename="../baserom.gbc"): def load_rom(filename="../baserom.gbc"):
"""loads bytes into memory""" """loads bytes into memory"""
global rom global rom
file_handler = open(filename, "r") file_handler = open(filename, "r")
rom = RomStr(file_handler.read()) rom = XRomStr(file_handler.read())
file_handler.close() file_handler.close()
return rom return rom

View File

@@ -147,48 +147,55 @@ class Asm:
# [{"command": 0x20, "bytes": [0x20, 0x40, 0x50], # [{"command": 0x20, "bytes": [0x20, 0x40, 0x50],
# "asm": "jp $5040", "label": "Unknown5040"}] # "asm": "jp $5040", "label": "Unknown5040"}]
asm_commands = [] asm_commands = {}
offset = start_address offset = start_address
current_byte_number = 0
last_hl_address = None last_hl_address = None
last_a_address = None last_a_address = None
used_3d97 = False used_3d97 = False
keep_reading = True keep_reading = True
# for labeling future bytes (like for relative jumps)
byte_labels = {}
while offset <= end_address and keep_reading: while offset <= end_address and keep_reading:
# read the current opcode byte
current_byte = ord(rom[offset]) current_byte = ord(rom[offset])
current_byte_number = len(asm_commands.keys())
is_data = False
maybe_byte = current_byte # setup this next/upcoming command
asm_command = {
"address": offset,
# check if this byte has a label prior to it # This counts how many times relative jumps reference this
# and if not, generate a new label # byte. This is used to determine whether or not to print out a
# This new label might not be used, so it will be # label later.
# removed if the total usage is zero. "references": 0,
if offset in byte_labels.keys(): }
line_label = byte_labels[offset]["name"]
byte_labels[offset]["usage"] += 1
else:
line_label = asm_label(offset)
byte_labels[offset] = {}
byte_labels[offset]["name"] = line_label
byte_labels[offset]["usage"] = 0
byte_labels[offset]["definition"] = True
#find out if there's a two byte key like this # some commands have two opcodes
temp_maybe = maybe_byte next_byte = ord(rom[offset+1])
temp_maybe += ( ord(rom[offset+1]) << 8)
if temp_maybe in opt_table.keys() and ord(rom[offset+1])!=0: # all two-byte opcodes also have their first byte in there somewhere
opstr = opt_table[temp_maybe][0].lower() if current_byte in opt_table.keys():
# this might be a two-byte opcode
possible_opcode = current_byte + (next_byte << 8)
# check if this is a two-byte opcode
if possible_opcode in opt_table.keys():
op_code = possible_opcode
else:
op_code = current_byte
op = opt_table[op_code]
opstr = op[0].lower()
optype = op[1]
asm_command["type"] = "op"
asm_command["id"] = op_code
asm_command["format"] = opstr
asm_command["opnumberthing"] = optype
if "x" in opstr: if "x" in opstr:
for x in range(0, opstr.count("x")): for x in range(0, opstr.count("x")):
insertion = ord(rom[offset + 1]) insertion = ord(rom[offset + 1])
@@ -196,8 +203,8 @@ class Asm:
opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower() opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower()
current_byte += 1 current_byte_number += 1
offset += 1 offset += 1
if "?" in opstr: if "?" in opstr:
for y in range(0, opstr.count("?")): for y in range(0, opstr.count("?")):
@@ -213,145 +220,69 @@ class Asm:
current_byte_number += 2 current_byte_number += 2
offset += 2 offset += 2
asm_commands.append({"address": offset, "command": opstr}) # Check for relative jumps, construct the formatted asm line.
output += spacing + opstr #+ " ; " + hex(offset) # Also set the usage of labels.
output += "\n" if current_byte in [0x18, 0x20] or current_byte in relative_jumps: # jr or jr nz
# generate a label for the byte we're jumping to
current_byte_number += 2 target_address = offset + 2 + c_int8(ord(rom[offset + 1])).value
offset += 2
elif maybe_byte in opt_table.keys(): if target_address in byte_labels.keys():
op_code = opt_table[maybe_byte] byte_labels[target_address]["usage"] = 1 + byte_labels[target_address]["usage"]
op_code_type = op_code[1] line_label2 = byte_labels[target_address]["name"]
op_code_byte = maybe_byte
else:
#type = -1 when it's the E op line_label2 = asm_label(target_address)
#if op_code_type != -1: byte_labels[target_address] = {}
if op_code_type == 0 and ord(rom[offset]) == op_code_byte: byte_labels[target_address]["name"] = line_label2
op_str = op_code[0].lower() byte_labels[target_address]["usage"] = 1
byte_labels[target_address]["definition"] = False
output += spacing + op_code[0].lower() #+ " ; " + hex(offset)
output += "\n" insertion = line_label2.lower()
include_comment = True
offset += 1 elif current_byte == 0x3e:
current_byte_number += 1 last_a_address = ord(rom[offset + 1])
elif op_code_type == 1 and ord(rom[offset]) == op_code_byte:
oplen = len(op_code[0]) # store the formatted string for the output later
opstr = copy(op_code[0]) asm_command["formatted"] = opstr
xes = op_code[0].count("x")
include_comment = False if current_byte == 0x21:
for x in range(0, xes): last_hl_address = byte1 + (byte2 << 8)
insertion = ord(rom[offset + 1])
insertion = "$" + hex(insertion)[2:] # this is leftover from pokered, might be meaningless
if current_byte == 0xcd:
if current_byte == 0x18 or current_byte==0x20 or current_byte in relative_jumps: #jr or jr nz if number == 0x3d97:
#generate a label for the byte we're jumping to used_3d97 = True
target_address = offset + 2 + c_int8(ord(rom[offset + 1])).value
if target_address in byte_labels.keys(): if current_byte == 0xc3 or current_byte in relative_unconditional_jumps:
byte_labels[target_address]["usage"] = 1 + byte_labels[target_address]["usage"] if current_byte == 0xc3:
line_label2 = byte_labels[target_address]["name"] if number == 0x3d97:
else: used_3d97 = True
line_label2 = asm_label(target_address)
byte_labels[target_address] = {} if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset):
byte_labels[target_address]["name"] = line_label2 keep_reading = False
byte_labels[target_address]["usage"] = 1 break
byte_labels[target_address]["definition"] = False
# stop reading at a jump, relative jump or return
insertion = line_label2.lower()
include_comment = True
elif current_byte == 0x3e:
last_a_address = ord(rom[offset + 1])
opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower()
output += spacing + opstr
if include_comment:
output += " ; " + hex(offset)
if current_byte in relative_jumps:
output += " $" + hex(ord(rom[offset + 1]))[2:]
output += "\n"
current_byte_number += 1
offset += 1
insertion = ""
current_byte_number += 1
offset += 1
include_comment = False
elif op_code_type == 2 and ord(rom[offset]) == op_code_byte:
oplen = len(op_code[0])
opstr = copy(op_code[0])
qes = op_code[0].count("?")
for x in range(0, qes):
byte1 = ord(rom[offset + 1])
byte2 = ord(rom[offset + 2])
number = byte1
number += byte2 << 8;
insertion = "$%.4x" % (number)
if maybe_byte in call_commands or current_byte in relative_unconditional_jumps or current_byte in relative_jumps:
result = find_label(insertion, bank_id)
if result != None:
insertion = result
opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower()
output += spacing + opstr #+ " ; " + hex(offset)
output += "\n"
current_byte_number += 2
offset += 2
current_byte_number += 1
offset += 1
if current_byte == 0x21:
last_hl_address = byte1 + (byte2 << 8)
if current_byte == 0xcd:
if number == 0x3d97: used_3d97 = True
#duck out if this is jp $24d7
if current_byte == 0xc3 or current_byte in relative_unconditional_jumps:
if current_byte == 0xc3:
if number == 0x3d97: used_3d97 = True
#if number == 0x24d7: #jp
if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset):
keep_reading = False
is_data = False
break
else:
is_data = True
#stop reading at a jump, relative jump or return
if current_byte in end_08_scripts_with: if current_byte in end_08_scripts_with:
is_data = False
if not has_outstanding_labels(byte_labels) and all_outstanding_labels_are_reverse(byte_labels, offset): if not has_outstanding_labels(byte_labels) and all_outstanding_labels_are_reverse(byte_labels, offset):
keep_reading = False keep_reading = False
is_data = False #cleanup
break break
else: else:
is_data = False
keep_reading = True keep_reading = True
else: else:
is_data = False
keep_reading = True keep_reading = True
else: else:
#if is_data and keep_reading: # This shouldn't really happen, and means that this area of the
output += spacing + "db $" + hex(ord(rom[offset]))[2:] #+ " ; " + hex(offset) # ROM probably doesn't represent instructions.
output += "\n" asm_command["type"] = "data" # db
offset += 1 asm_command["value"] = current_byte
current_byte_number += 1
#else the while loop would have spit out the opcode # save this new command in the list
asm_commands[current_byte_number] = asm_command
#these two are done prior
#offset += 1
#current_byte_number += 1
# clean up unused labels.. used to be in 'output', but is now in asm_commands
for label_line in byte_labels.keys():
address = label_line
label_line = byte_labels[label_line]
if label_line["usage"] == 0:
output = output.replace((label_line["name"] + "\n").lower(), "")
raise NotImplementedError
def __str__(self): def __str__(self):
""" ASM pretty printer. """ ASM pretty printer.