T4SP-Server-Plugin/tools/dump_disassembly.py
2023-05-05 00:40:57 -07:00

393 lines
14 KiB
Python

import idautils
import idc
import idaapi
import ida_funcs
import ida_idaapi
import ida_typeinf
import os
# Operand types returned by get_operand_Type
# o_void = cvar.o_void
# o_reg = cvar.o_reg
# o_mem = cvar.o_mem
# o_phrase = cvar.o_phrase
# o_displ = cvar.o_displ
# o_imm = cvar.o_imm
# o_far = cvar.o_far # Jump
# o_near = cvar.o_near # Jump
# o_idpspec0 = cvar.o_idpspec0
# o_idpspec1 = cvar.o_idpspec1
# o_idpspec2 = cvar.o_idpspec2
# o_idpspec3 = cvar.o_idpspec3
# o_idpspec4 = cvar.o_idpspec4
# o_idpspec5 = cvar.o_idpspec5
# Output file to write function code
output_file_c = "function_code.c"
# Output file to write function declarations
output_file_h = "function_declarations.h"
dumpable_funcs_searched = 0
max_functions_to_dump = 50
dumped_func_addr_array = []
dumpable_func_calls = []
dumpable_func_names = []
global_objects_list = {}
debug_dump_extra_data = True
# array indexed by func names of pairs of a start addr
jump_locs = {}
jump_loc_names = {}
def is_integer(n):
try:
int(n)
except ValueError:
return False
else:
return True
def item_in_array(array, item):
try:
array.index(item)
except ValueError:
return False
else:
return True
def is_ida_hex_str(str):
for char in str:
if not is_integer(char) and char != "h":
return False
return True
def convert_ida_hex_str_to_true_hex(str):
int_str = ""
for char in str:
if is_integer(char):
int_str += char
return hex(int(int_str))
def clean_func_name(func_name):
has_colon = item_in_array(func_name, ":")
has_semicolon = item_in_array(func_name, ";")
new_name = ""
if has_colon:
new_name = func_name.split(":")[1]
elif has_semicolon:
new_name = func_name.split(";")[0]
else:
new_name = func_name
# index = 0
# for char in func_name:
# if char == ";":
# continue
# new_name += char
return new_name
def get_func_name_in_register(insn):
tokens = insn.split()
index = 0
for token in tokens:
if token == ";":
fwd_index = index + 1
if fwd_index > len(tokens):
return ""
else:
return tokens[index + 1]
return ""
def funct_is_dumpable(func_name, addr):
called_func = idaapi.get_func(addr)
called_func_flags = idc.get_func_flags(addr)
# data_segment_for_addr = idaapi.get_segm_by_name(".idata")
if not idaapi.is_func_entry(called_func):
return False
if idc.get_segm_name(addr) != ".text":
return False
if called_func_flags & ida_funcs.FUNC_LIB:
return False
# if not data_segment_for_addr:
# return False
# if data_segment_for_addr.contains(called_ea):
# return False
return True
# def parse_addr_from_loc_name(loc_name):
# index = 0
# array_size = len(loc_name)
# for char in loc_name:
# while not is_integer(char) and
def parse_assembly_for_func_calls(func_addr):
global dumpable_funcs_searched
if dumpable_funcs_searched >= max_functions_to_dump:
print("Hit func search limit {}".format(dumpable_funcs_searched))
return
dumpable_funcs_searched += 1
func = idaapi.get_func(func_addr)
if not func:
print("Couldn't find func at addr {}".format(hex(func_addr)))
return
primary_func_name = idc.get_func_name(func_addr)
# Iterate over the function's basic blocks
print("Searching function {}".format(primary_func_name))
# Iterate over the instructions in the basic block
for insn in idautils.FuncItems(func_addr):
insn2 = idaapi.insn_t()
idaapi.decode_insn(insn2, insn)
# Write the instruction to the .c file
first_operand_type = idc.get_operand_type(insn, 0)
second_operand_type = idc.get_operand_type(insn, 1)
operand_name = insn2.get_canon_mnem()
insn_as_string = idc.GetDisasm(insn)
insn_tokens = insn_as_string.split()
if operand_name == "call":
called_func_name = insn_tokens[1]
cleaned_func_name = clean_func_name(called_func_name)
called_ea = idc.get_name_ea_simple(cleaned_func_name)
if called_ea == ida_idaapi.BADADDR:
continue
# Handle functions stored in registers
if cleaned_func_name == "eax":
# if get_func_name_in_register(insn_tokens) == "":
# print("Couldn't resolve function name for {} in primary func {}".format(insn_as_string, primary_func_name))
# continue
continue
if item_in_array(dumpable_func_calls, called_ea):
continue
if not funct_is_dumpable(clean_func_name, called_ea):
continue
dumpable_func_calls.append(called_ea)
dumpable_func_names.append(cleaned_func_name)
parse_assembly_for_func_calls(called_ea)
elif first_operand_type == idaapi.o_near: # TODO: Handle idaapi.o_far jump type
if insn_tokens[1] == "short":
start_loc_name = insn_tokens[2]
else:
start_loc_name = insn_tokens[1]
if not primary_func_name in jump_locs:
jump_locs[primary_func_name] = []
jump_loc_names[primary_func_name] = {}
loc_addr = idc.get_name_ea_simple(start_loc_name)
if loc_addr == ida_idaapi.BADADDR:
# TODO: Add logging for this
continue
if loc_addr in jump_locs[primary_func_name]:
continue
jump_locs[primary_func_name].append(loc_addr)
jump_loc_names[primary_func_name]["{}".format(loc_addr)] = start_loc_name
# if first_operand_type == idaapi.o_void:
# continue
# if first_operand_type == idaapi.o_mem:
# # get_operand_value # Returns the offset added to the operand
# # TODO: Get the base address of the object too so the offset isn't lost ida_name.get_name_base_ea
# global_object_addr = idc.get_operand_value(insn, 0)
# global_object_name = idc.get_name(global_object_addr) # This function can take in flags that determine what name it returns
# object_addr_as_str = "{}".format(global_object_addr)
# if global_object_addr == ida_idaapi.BADADDR:
# # Add logging warning here
# print("Failed to add global object to list")
# elif not (object_addr_as_str in global_objects_list):
# print("Object is not in list {}".format(hex(global_object_addr)))
# global_objects_list[object_addr_as_str] = global_object_name
# if (second_operand_type == idaapi.o_mem):
# global_object_addr = idc.get_operand_value(insn, 1)
# global_object_name = idc.get_name(global_object_addr)
# object_addr_as_str = "{}".format(global_object_addr)
# if global_object_addr == ida_idaapi.BADADDR:
# # Add logging warning here
# print("Failed to add global object to list")
# elif not (object_addr_as_str in global_objects_list):
# print("Object is not in list {}".format(hex(global_object_addr)))
# global_objects_list[object_addr_as_str] = global_object_name
def addr_is_end_loc(func_name, addr):
for jump_loc in jump_locs[func_name]:
# print("addr_is_end_loc: {} {} {}".format(func_name, addr, jump_loc))
if jump_loc == addr:
return True
return False
def pre_parse_instruction(insn_str):
index = 0
pre_string = ""
string_len = len(insn_str)
for char in insn_str:
if (index + 1) < string_len:
fwd_char = insn_str[index + 1]
if fwd_char == "+" or fwd_char == "-" or fwd_char == "*" or fwd_char == "/":
pre_string += char + " "
index += 1
continue
if char == "+" or char == "-" or char == "*" or char == "/":
pre_string += char + " "
index += 1
continue
if char == ";":
index += 1
continue
pre_string += char
index += 1
return pre_string
def post_parse_instruction_as_tokens(insn_str):
final_string = ""
index = 0
tokens = insn_str.split()
max_index = len(tokens)
for token in tokens:
if token == "short" or token == "offset":
index += 1
continue
if is_ida_hex_str(token):
token = convert_ida_hex_str_to_true_hex(token)
if index != max_index:
final_string += token + " "
else:
final_string += token
index += 1
return final_string
def dump_function_code(func_ea, mangled_name, f_c, f_h):
global debug_dump_extra_data
# Try to demangle the name
# demangled_name = idc.demangle_name(mangled_name, idc.INF_SHORT_DN)
func_name = mangled_name
# if demangled_name != "None":
# func_name = demangled_name
f_c.write("\n/*{}*/\n".format(hex(func_ea)))
cfunc_ptr = idaapi.decompile(func_ea)
arguments = cfunc_ptr.arguments
f_c.write("{}".format(func_name))
# lvar_t info
# type method on arguments index returns a tinfo_t
# name class var on arguments index
if arguments:
f_c.write(" /*(")
# arg is a tinfo_t object
index = 0
args_len = len(arguments)
# TODO: Dump arg registers too
for arg in arguments:
if index + 1 >= args_len:
f_c.write(" {} {}".format(arg.type().dstr(), arg.name))
continue
f_c.write(" {} {},".format(arg.type().dstr(), arg.name))
index += 1
f_c.write(" )*/\n")
else:
f_c.write("/*( )*/\n")
# Write the function declaration to the .h file
f_h.write("void {}();\n".format(func_name))
func_has_jump_locs = mangled_name in jump_locs
# func_object = idaapi.get_func(func_ea)
# Iterate over the instructions in the function
for insn in idautils.FuncItems(func_ea):
# Write the instruction to the .c file
insn_as_string = idc.GetDisasm(insn)
insn2 = idaapi.insn_t()
idaapi.decode_insn(insn2, insn)
instruction_name = insn2.get_canon_mnem()
if func_has_jump_locs:
if addr_is_end_loc(mangled_name, insn2.ea):
func_addr_as_str = "{}".format(insn2.ea)
if func_addr_as_str in jump_loc_names[func_name]:
f_c.write("{}:\n".format(jump_loc_names[func_name][func_addr_as_str]))
f_c.write("\t")
# pre_string = pre_parse_instruction(insn_as_string)
# final_string = post_parse_instruction_as_tokens(pre_string)
# f_c.write(final_string + "\n")
first_operand_type = idc.get_operand_type(insn, 0)
second_operand_type = idc.get_operand_type(insn, 1)
wrote_first_operand_addrs = False
wrote_second_operand_addrs = False
f_c.write(" {}".format(insn_as_string))
if not debug_dump_extra_data:
f_c.write("\n")
continue
if first_operand_type == idaapi.o_mem:
# get_operand_value # Returns the offset added to the operand
# TODO: Get the base address of the object too so the offset isn't lost
global_object_addr = idc.get_operand_value(insn, 0)
# object_addr_as_str = "{}".format(global_object_addr)
f_c.write(" //:Type: <{}> :Addr: <{}>".format(first_operand_type, hex(global_object_addr)))
wrote_first_operand_addrs = True
if second_operand_type == idaapi.o_mem:
global_object_addr = idc.get_operand_value(insn, 1)
# object_addr_as_str = "{}".format(global_object_addr)
if wrote_first_operand_addrs:
f_c.write(" {} {}".format(second_operand_type, hex(global_object_addr)))
else:
f_c.write(" //:Type: <{}> :Addr: <{}>".format(second_operand_type, hex(global_object_addr)))
wrote_second_operand_addrs = True
if instruction_name == "call":
# get_operand_value # Returns the offset added to the operand
# TODO: Get the base address of the object too so the offset isn't lost
call_addr = idc.get_operand_value(insn, 0)
# object_addr_as_str = "{}".format(global_object_addr)
f_c.write(" //:Type: <{}> :Addr: <{}>".format(first_operand_type, hex(call_addr)))
wrote_first_operand_addrs = True
if wrote_first_operand_addrs and not wrote_second_operand_addrs:
f_c.write(" {}".format(second_operand_type))
elif not wrote_first_operand_addrs and not wrote_second_operand_addrs:
f_c.write(" //:Type: <{}> :Type: <{}>".format(first_operand_type, second_operand_type))
f_c.write("\n")
f_c.write("\n")
# Open the output files in write mode
with open(output_file_c, "w") as f_c, open(output_file_h, "w") as f_h:
# Write the header guard for the .h file
f_h.write("#ifndef FUNCTION_DECLARATIONS_H\n#define FUNCTION_DECLARATIONS_H\n\n")
# Name of the input function to dump
# input_func_ea = 0x699E90 # Scr_GetFloat
input_func_ea = 0x40FAA0 # G_FindItem
# Get the address of the input function
input_func_name = idc.get_func_name(input_func_ea)
# Define a function to dump the code for a given function
dumpable_func_calls.append(input_func_ea)
dumpable_func_names.append(input_func_name)
# f_c.write(" //op: {}\n".format(insn2.get_canon_mnem()))
# Dump the code for the input function
print("Starting search for functions recursively")
parse_assembly_for_func_calls(input_func_ea)
index = 0
function_count = len(dumpable_func_names)
print("Dumping {} functions max {}". format(function_count, max_functions_to_dump))
for func_addr in dumpable_func_calls:
if index >= max_functions_to_dump:
break
func_name = dumpable_func_names[index]
print("Now dumping {} at address {} count {}".format(func_name, hex(func_addr), index))
dump_function_code(func_addr, func_name, f_c, f_h)
index += 1
print("Successfully dumped {} of {} total functions".format(function_count, max_functions_to_dump))
# Write the footer guard for the .h file
f_h.write("\n#endif")
print("Function code written to {} and function declarations written to {}".format(output_file_c, output_file_h))