From 3f71e99a0bc07868eb0f663a5cb585e1f27529c9 Mon Sep 17 00:00:00 2001 From: JezuzLizard Date: Fri, 5 May 2023 00:40:57 -0700 Subject: [PATCH] Create dump_disassembly.py --- tools/dump_disassembly.py | 393 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 393 insertions(+) create mode 100644 tools/dump_disassembly.py diff --git a/tools/dump_disassembly.py b/tools/dump_disassembly.py new file mode 100644 index 0000000..27737d4 --- /dev/null +++ b/tools/dump_disassembly.py @@ -0,0 +1,393 @@ +import idautils +import idc +import idaapi +import ida_funcs +import ida_idaapi +import ida_typeinf +import os + +# Operand types returned by get_operand_Type +# o_void = cvar.o_void +# o_reg = cvar.o_reg +# o_mem = cvar.o_mem +# o_phrase = cvar.o_phrase +# o_displ = cvar.o_displ +# o_imm = cvar.o_imm +# o_far = cvar.o_far # Jump +# o_near = cvar.o_near # Jump +# o_idpspec0 = cvar.o_idpspec0 +# o_idpspec1 = cvar.o_idpspec1 +# o_idpspec2 = cvar.o_idpspec2 +# o_idpspec3 = cvar.o_idpspec3 +# o_idpspec4 = cvar.o_idpspec4 +# o_idpspec5 = cvar.o_idpspec5 + +# Output file to write function code +output_file_c = "function_code.c" + +# Output file to write function declarations +output_file_h = "function_declarations.h" + +dumpable_funcs_searched = 0 + +max_functions_to_dump = 50 + +dumped_func_addr_array = [] + +dumpable_func_calls = [] + +dumpable_func_names = [] + +global_objects_list = {} + +debug_dump_extra_data = True + +# array indexed by func names of pairs of a start addr +jump_locs = {} + +jump_loc_names = {} + +def is_integer(n): + try: + int(n) + except ValueError: + return False + else: + return True + +def item_in_array(array, item): + try: + array.index(item) + except ValueError: + return False + else: + return True + +def is_ida_hex_str(str): + for char in str: + if not is_integer(char) and char != "h": + return False + return True + +def convert_ida_hex_str_to_true_hex(str): + int_str = "" + for char in str: + if is_integer(char): + int_str += char + return hex(int(int_str)) + +def clean_func_name(func_name): + has_colon = item_in_array(func_name, ":") + has_semicolon = item_in_array(func_name, ";") + new_name = "" + if has_colon: + new_name = func_name.split(":")[1] + elif has_semicolon: + new_name = func_name.split(";")[0] + else: + new_name = func_name + # index = 0 + # for char in func_name: + # if char == ";": + # continue + # new_name += char + return new_name + +def get_func_name_in_register(insn): + tokens = insn.split() + index = 0 + for token in tokens: + if token == ";": + fwd_index = index + 1 + if fwd_index > len(tokens): + return "" + else: + return tokens[index + 1] + return "" + +def funct_is_dumpable(func_name, addr): + called_func = idaapi.get_func(addr) + called_func_flags = idc.get_func_flags(addr) + # data_segment_for_addr = idaapi.get_segm_by_name(".idata") + if not idaapi.is_func_entry(called_func): + return False + if idc.get_segm_name(addr) != ".text": + return False + if called_func_flags & ida_funcs.FUNC_LIB: + return False + # if not data_segment_for_addr: + # return False + # if data_segment_for_addr.contains(called_ea): + # return False + return True + +# def parse_addr_from_loc_name(loc_name): +# index = 0 +# array_size = len(loc_name) +# for char in loc_name: +# while not is_integer(char) and + +def parse_assembly_for_func_calls(func_addr): + global dumpable_funcs_searched + if dumpable_funcs_searched >= max_functions_to_dump: + print("Hit func search limit {}".format(dumpable_funcs_searched)) + return + dumpable_funcs_searched += 1 + func = idaapi.get_func(func_addr) + if not func: + print("Couldn't find func at addr {}".format(hex(func_addr))) + return + primary_func_name = idc.get_func_name(func_addr) + # Iterate over the function's basic blocks + print("Searching function {}".format(primary_func_name)) + # Iterate over the instructions in the basic block + for insn in idautils.FuncItems(func_addr): + insn2 = idaapi.insn_t() + idaapi.decode_insn(insn2, insn) + # Write the instruction to the .c file + first_operand_type = idc.get_operand_type(insn, 0) + second_operand_type = idc.get_operand_type(insn, 1) + operand_name = insn2.get_canon_mnem() + insn_as_string = idc.GetDisasm(insn) + insn_tokens = insn_as_string.split() + if operand_name == "call": + called_func_name = insn_tokens[1] + cleaned_func_name = clean_func_name(called_func_name) + called_ea = idc.get_name_ea_simple(cleaned_func_name) + + if called_ea == ida_idaapi.BADADDR: + continue + + # Handle functions stored in registers + if cleaned_func_name == "eax": + # if get_func_name_in_register(insn_tokens) == "": + # print("Couldn't resolve function name for {} in primary func {}".format(insn_as_string, primary_func_name)) + # continue + continue + + if item_in_array(dumpable_func_calls, called_ea): + continue + + if not funct_is_dumpable(clean_func_name, called_ea): + continue + + dumpable_func_calls.append(called_ea) + dumpable_func_names.append(cleaned_func_name) + + parse_assembly_for_func_calls(called_ea) + elif first_operand_type == idaapi.o_near: # TODO: Handle idaapi.o_far jump type + if insn_tokens[1] == "short": + start_loc_name = insn_tokens[2] + else: + start_loc_name = insn_tokens[1] + + if not primary_func_name in jump_locs: + jump_locs[primary_func_name] = [] + jump_loc_names[primary_func_name] = {} + + loc_addr = idc.get_name_ea_simple(start_loc_name) + if loc_addr == ida_idaapi.BADADDR: + # TODO: Add logging for this + continue + if loc_addr in jump_locs[primary_func_name]: + continue + jump_locs[primary_func_name].append(loc_addr) + jump_loc_names[primary_func_name]["{}".format(loc_addr)] = start_loc_name + # if first_operand_type == idaapi.o_void: + # continue + # if first_operand_type == idaapi.o_mem: + # # get_operand_value # Returns the offset added to the operand + # # TODO: Get the base address of the object too so the offset isn't lost ida_name.get_name_base_ea + # global_object_addr = idc.get_operand_value(insn, 0) + # global_object_name = idc.get_name(global_object_addr) # This function can take in flags that determine what name it returns + # object_addr_as_str = "{}".format(global_object_addr) + # if global_object_addr == ida_idaapi.BADADDR: + # # Add logging warning here + # print("Failed to add global object to list") + # elif not (object_addr_as_str in global_objects_list): + # print("Object is not in list {}".format(hex(global_object_addr))) + # global_objects_list[object_addr_as_str] = global_object_name + # if (second_operand_type == idaapi.o_mem): + # global_object_addr = idc.get_operand_value(insn, 1) + # global_object_name = idc.get_name(global_object_addr) + # object_addr_as_str = "{}".format(global_object_addr) + # if global_object_addr == ida_idaapi.BADADDR: + # # Add logging warning here + # print("Failed to add global object to list") + # elif not (object_addr_as_str in global_objects_list): + # print("Object is not in list {}".format(hex(global_object_addr))) + # global_objects_list[object_addr_as_str] = global_object_name + +def addr_is_end_loc(func_name, addr): + for jump_loc in jump_locs[func_name]: + # print("addr_is_end_loc: {} {} {}".format(func_name, addr, jump_loc)) + if jump_loc == addr: + return True + return False + +def pre_parse_instruction(insn_str): + index = 0 + pre_string = "" + string_len = len(insn_str) + for char in insn_str: + if (index + 1) < string_len: + fwd_char = insn_str[index + 1] + if fwd_char == "+" or fwd_char == "-" or fwd_char == "*" or fwd_char == "/": + pre_string += char + " " + index += 1 + continue + if char == "+" or char == "-" or char == "*" or char == "/": + pre_string += char + " " + index += 1 + continue + if char == ";": + index += 1 + continue + pre_string += char + index += 1 + return pre_string + +def post_parse_instruction_as_tokens(insn_str): + final_string = "" + index = 0 + tokens = insn_str.split() + max_index = len(tokens) + for token in tokens: + if token == "short" or token == "offset": + index += 1 + continue + if is_ida_hex_str(token): + token = convert_ida_hex_str_to_true_hex(token) + if index != max_index: + final_string += token + " " + else: + final_string += token + index += 1 + return final_string + +def dump_function_code(func_ea, mangled_name, f_c, f_h): + global debug_dump_extra_data + # Try to demangle the name + # demangled_name = idc.demangle_name(mangled_name, idc.INF_SHORT_DN) + func_name = mangled_name + # if demangled_name != "None": + # func_name = demangled_name + f_c.write("\n/*{}*/\n".format(hex(func_ea))) + cfunc_ptr = idaapi.decompile(func_ea) + arguments = cfunc_ptr.arguments + f_c.write("{}".format(func_name)) + # lvar_t info + # type method on arguments index returns a tinfo_t + # name class var on arguments index + if arguments: + f_c.write(" /*(") + # arg is a tinfo_t object + index = 0 + args_len = len(arguments) + # TODO: Dump arg registers too + for arg in arguments: + if index + 1 >= args_len: + f_c.write(" {} {}".format(arg.type().dstr(), arg.name)) + continue + f_c.write(" {} {},".format(arg.type().dstr(), arg.name)) + index += 1 + f_c.write(" )*/\n") + else: + f_c.write("/*( )*/\n") + + # Write the function declaration to the .h file + f_h.write("void {}();\n".format(func_name)) + func_has_jump_locs = mangled_name in jump_locs + # func_object = idaapi.get_func(func_ea) + + # Iterate over the instructions in the function + for insn in idautils.FuncItems(func_ea): + # Write the instruction to the .c file + insn_as_string = idc.GetDisasm(insn) + insn2 = idaapi.insn_t() + idaapi.decode_insn(insn2, insn) + instruction_name = insn2.get_canon_mnem() + if func_has_jump_locs: + + if addr_is_end_loc(mangled_name, insn2.ea): + func_addr_as_str = "{}".format(insn2.ea) + if func_addr_as_str in jump_loc_names[func_name]: + f_c.write("{}:\n".format(jump_loc_names[func_name][func_addr_as_str])) + f_c.write("\t") + # pre_string = pre_parse_instruction(insn_as_string) + # final_string = post_parse_instruction_as_tokens(pre_string) + # f_c.write(final_string + "\n") + first_operand_type = idc.get_operand_type(insn, 0) + second_operand_type = idc.get_operand_type(insn, 1) + wrote_first_operand_addrs = False + wrote_second_operand_addrs = False + f_c.write(" {}".format(insn_as_string)) + if not debug_dump_extra_data: + f_c.write("\n") + continue + if first_operand_type == idaapi.o_mem: + # get_operand_value # Returns the offset added to the operand + # TODO: Get the base address of the object too so the offset isn't lost + global_object_addr = idc.get_operand_value(insn, 0) + # object_addr_as_str = "{}".format(global_object_addr) + f_c.write(" //:Type: <{}> :Addr: <{}>".format(first_operand_type, hex(global_object_addr))) + wrote_first_operand_addrs = True + if second_operand_type == idaapi.o_mem: + global_object_addr = idc.get_operand_value(insn, 1) + # object_addr_as_str = "{}".format(global_object_addr) + if wrote_first_operand_addrs: + f_c.write(" {} {}".format(second_operand_type, hex(global_object_addr))) + else: + f_c.write(" //:Type: <{}> :Addr: <{}>".format(second_operand_type, hex(global_object_addr))) + wrote_second_operand_addrs = True + if instruction_name == "call": + # get_operand_value # Returns the offset added to the operand + # TODO: Get the base address of the object too so the offset isn't lost + call_addr = idc.get_operand_value(insn, 0) + # object_addr_as_str = "{}".format(global_object_addr) + f_c.write(" //:Type: <{}> :Addr: <{}>".format(first_operand_type, hex(call_addr))) + wrote_first_operand_addrs = True + if wrote_first_operand_addrs and not wrote_second_operand_addrs: + f_c.write(" {}".format(second_operand_type)) + elif not wrote_first_operand_addrs and not wrote_second_operand_addrs: + f_c.write(" //:Type: <{}> :Type: <{}>".format(first_operand_type, second_operand_type)) + + f_c.write("\n") + f_c.write("\n") + +# Open the output files in write mode +with open(output_file_c, "w") as f_c, open(output_file_h, "w") as f_h: + + # Write the header guard for the .h file + f_h.write("#ifndef FUNCTION_DECLARATIONS_H\n#define FUNCTION_DECLARATIONS_H\n\n") + + # Name of the input function to dump + # input_func_ea = 0x699E90 # Scr_GetFloat + + input_func_ea = 0x40FAA0 # G_FindItem + + # Get the address of the input function + input_func_name = idc.get_func_name(input_func_ea) + # Define a function to dump the code for a given function + dumpable_func_calls.append(input_func_ea) + dumpable_func_names.append(input_func_name) + # f_c.write(" //op: {}\n".format(insn2.get_canon_mnem())) + # Dump the code for the input function + + print("Starting search for functions recursively") + parse_assembly_for_func_calls(input_func_ea) + + index = 0 + function_count = len(dumpable_func_names) + print("Dumping {} functions max {}". format(function_count, max_functions_to_dump)) + for func_addr in dumpable_func_calls: + if index >= max_functions_to_dump: + break + func_name = dumpable_func_names[index] + print("Now dumping {} at address {} count {}".format(func_name, hex(func_addr), index)) + dump_function_code(func_addr, func_name, f_c, f_h) + index += 1 + print("Successfully dumped {} of {} total functions".format(function_count, max_functions_to_dump)) + # Write the footer guard for the .h file + f_h.write("\n#endif") + print("Function code written to {} and function declarations written to {}".format(output_file_c, output_file_h)) \ No newline at end of file