mirror of
				https://github.com/JezuzLizard/T4SP-Server-Plugin.git
				synced 2025-10-31 01:36:57 +00:00 
			
		
		
		
	Create dump_disassembly.py
This commit is contained in:
		
							
								
								
									
										393
									
								
								tools/dump_disassembly.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										393
									
								
								tools/dump_disassembly.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,393 @@ | ||||
| import idautils | ||||
| import idc | ||||
| import idaapi | ||||
| import ida_funcs | ||||
| import ida_idaapi | ||||
| import ida_typeinf | ||||
| import os | ||||
|  | ||||
| # Operand types returned by get_operand_Type | ||||
| # o_void = cvar.o_void | ||||
| # o_reg = cvar.o_reg | ||||
| # o_mem = cvar.o_mem | ||||
| # o_phrase = cvar.o_phrase | ||||
| # o_displ = cvar.o_displ | ||||
| # o_imm = cvar.o_imm | ||||
| # o_far = cvar.o_far # Jump | ||||
| # o_near = cvar.o_near # Jump | ||||
| # o_idpspec0 = cvar.o_idpspec0 | ||||
| # o_idpspec1 = cvar.o_idpspec1 | ||||
| # o_idpspec2 = cvar.o_idpspec2 | ||||
| # o_idpspec3 = cvar.o_idpspec3 | ||||
| # o_idpspec4 = cvar.o_idpspec4 | ||||
| # o_idpspec5 = cvar.o_idpspec5 | ||||
|  | ||||
| # Output file to write function code | ||||
| output_file_c = "function_code.c" | ||||
|  | ||||
| # Output file to write function declarations | ||||
| output_file_h = "function_declarations.h" | ||||
|  | ||||
| dumpable_funcs_searched = 0 | ||||
|  | ||||
| max_functions_to_dump = 50 | ||||
|  | ||||
| dumped_func_addr_array = [] | ||||
|  | ||||
| dumpable_func_calls = [] | ||||
|  | ||||
| dumpable_func_names = [] | ||||
|  | ||||
| global_objects_list = {} | ||||
|  | ||||
| debug_dump_extra_data = True | ||||
|  | ||||
| # array indexed by func names of pairs of a start addr | ||||
| jump_locs = {} | ||||
|  | ||||
| jump_loc_names = {} | ||||
|  | ||||
| def is_integer(n): | ||||
|     try: | ||||
|         int(n) | ||||
|     except ValueError: | ||||
|         return False | ||||
|     else: | ||||
|         return True | ||||
|  | ||||
| def item_in_array(array, item): | ||||
|     try: | ||||
|         array.index(item) | ||||
|     except ValueError: | ||||
|         return False | ||||
|     else: | ||||
|         return True | ||||
|  | ||||
| def is_ida_hex_str(str): | ||||
|     for char in str: | ||||
|         if not is_integer(char) and char != "h": | ||||
|             return False | ||||
|     return True | ||||
|  | ||||
| def convert_ida_hex_str_to_true_hex(str): | ||||
|     int_str = "" | ||||
|     for char in str: | ||||
|         if is_integer(char): | ||||
|             int_str += char | ||||
|     return hex(int(int_str)) | ||||
|  | ||||
| def clean_func_name(func_name): | ||||
|     has_colon = item_in_array(func_name, ":") | ||||
|     has_semicolon = item_in_array(func_name, ";") | ||||
|     new_name = "" | ||||
|     if has_colon: | ||||
|         new_name = func_name.split(":")[1] | ||||
|     elif has_semicolon: | ||||
|         new_name = func_name.split(";")[0] | ||||
|     else: | ||||
|         new_name = func_name | ||||
|     # index = 0 | ||||
|     # for char in func_name: | ||||
|     #     if char == ";": | ||||
|     #         continue | ||||
|     #     new_name += char | ||||
|     return new_name | ||||
|  | ||||
| def get_func_name_in_register(insn): | ||||
|     tokens = insn.split() | ||||
|     index = 0 | ||||
|     for token in tokens: | ||||
|         if token == ";": | ||||
|             fwd_index = index + 1 | ||||
|             if fwd_index > len(tokens): | ||||
|                 return "" | ||||
|             else: | ||||
|                 return tokens[index + 1] | ||||
|     return "" | ||||
|  | ||||
| def funct_is_dumpable(func_name, addr): | ||||
|     called_func = idaapi.get_func(addr) | ||||
|     called_func_flags = idc.get_func_flags(addr) | ||||
|     # data_segment_for_addr = idaapi.get_segm_by_name(".idata") | ||||
|     if not idaapi.is_func_entry(called_func): | ||||
|         return False | ||||
|     if idc.get_segm_name(addr) != ".text": | ||||
|         return False | ||||
|     if called_func_flags & ida_funcs.FUNC_LIB: | ||||
|         return False | ||||
|     # if not data_segment_for_addr: | ||||
|     #     return False | ||||
|     # if data_segment_for_addr.contains(called_ea): | ||||
|     #     return False | ||||
|     return True | ||||
|  | ||||
| # def parse_addr_from_loc_name(loc_name): | ||||
| #     index = 0 | ||||
| #     array_size = len(loc_name) | ||||
| #     for char in loc_name: | ||||
| #         while not is_integer(char) and  | ||||
|  | ||||
| def parse_assembly_for_func_calls(func_addr): | ||||
|     global dumpable_funcs_searched | ||||
|     if dumpable_funcs_searched >= max_functions_to_dump: | ||||
|         print("Hit func search limit {}".format(dumpable_funcs_searched)) | ||||
|         return | ||||
|     dumpable_funcs_searched += 1 | ||||
|     func = idaapi.get_func(func_addr) | ||||
|     if not func: | ||||
|         print("Couldn't find func at addr {}".format(hex(func_addr))) | ||||
|         return | ||||
|     primary_func_name = idc.get_func_name(func_addr) | ||||
|     # Iterate over the function's basic blocks | ||||
|     print("Searching function {}".format(primary_func_name)) | ||||
|     # Iterate over the instructions in the basic block | ||||
|     for insn in idautils.FuncItems(func_addr): | ||||
|         insn2 = idaapi.insn_t() | ||||
|         idaapi.decode_insn(insn2, insn) | ||||
|         # Write the instruction to the .c file | ||||
|         first_operand_type = idc.get_operand_type(insn, 0) | ||||
|         second_operand_type = idc.get_operand_type(insn, 1) | ||||
|         operand_name = insn2.get_canon_mnem() | ||||
|         insn_as_string = idc.GetDisasm(insn) | ||||
|         insn_tokens = insn_as_string.split() | ||||
|         if operand_name == "call": | ||||
|             called_func_name = insn_tokens[1] | ||||
|             cleaned_func_name = clean_func_name(called_func_name) | ||||
|             called_ea = idc.get_name_ea_simple(cleaned_func_name) | ||||
|              | ||||
|             if called_ea == ida_idaapi.BADADDR: | ||||
|                 continue | ||||
|  | ||||
|             # Handle functions stored in registers | ||||
|             if cleaned_func_name == "eax": | ||||
|                 # if get_func_name_in_register(insn_tokens) == "": | ||||
|                 #     print("Couldn't resolve function name for {} in primary func {}".format(insn_as_string, primary_func_name)) | ||||
|                 #     continue | ||||
|                 continue | ||||
|              | ||||
|             if item_in_array(dumpable_func_calls, called_ea): | ||||
|                 continue | ||||
|              | ||||
|             if not funct_is_dumpable(clean_func_name, called_ea): | ||||
|                 continue | ||||
|  | ||||
|             dumpable_func_calls.append(called_ea) | ||||
|             dumpable_func_names.append(cleaned_func_name) | ||||
|              | ||||
|             parse_assembly_for_func_calls(called_ea) | ||||
|         elif first_operand_type == idaapi.o_near: # TODO: Handle idaapi.o_far jump type | ||||
|             if insn_tokens[1] == "short": | ||||
|                 start_loc_name = insn_tokens[2] | ||||
|             else: | ||||
|                 start_loc_name = insn_tokens[1] | ||||
|              | ||||
|             if not primary_func_name in jump_locs: | ||||
|                 jump_locs[primary_func_name] = [] | ||||
|                 jump_loc_names[primary_func_name] = {} | ||||
|  | ||||
|             loc_addr = idc.get_name_ea_simple(start_loc_name) | ||||
|             if loc_addr == ida_idaapi.BADADDR: | ||||
|                 # TODO: Add logging for this | ||||
|                 continue | ||||
|             if loc_addr in jump_locs[primary_func_name]: | ||||
|                 continue | ||||
|             jump_locs[primary_func_name].append(loc_addr) | ||||
|             jump_loc_names[primary_func_name]["{}".format(loc_addr)] = start_loc_name | ||||
|         # if first_operand_type == idaapi.o_void: | ||||
|         #     continue | ||||
|         # if first_operand_type == idaapi.o_mem: | ||||
|         #     # get_operand_value # Returns the offset added to the operand | ||||
|         #     # TODO: Get the base address of the object too so the offset isn't lost ida_name.get_name_base_ea | ||||
|         #     global_object_addr = idc.get_operand_value(insn, 0) | ||||
|         #     global_object_name = idc.get_name(global_object_addr) # This function can take in flags that determine what name it returns | ||||
|         #     object_addr_as_str = "{}".format(global_object_addr) | ||||
|         #     if global_object_addr == ida_idaapi.BADADDR: | ||||
|         #         # Add logging warning here | ||||
|         #         print("Failed to add global object to list") | ||||
|         #     elif not (object_addr_as_str in global_objects_list): | ||||
|         #         print("Object is not in list {}".format(hex(global_object_addr))) | ||||
|         #         global_objects_list[object_addr_as_str] = global_object_name | ||||
|         # if (second_operand_type == idaapi.o_mem): | ||||
|         #     global_object_addr = idc.get_operand_value(insn, 1) | ||||
|         #     global_object_name = idc.get_name(global_object_addr) | ||||
|         #     object_addr_as_str = "{}".format(global_object_addr) | ||||
|         #     if global_object_addr == ida_idaapi.BADADDR: | ||||
|         #         # Add logging warning here | ||||
|         #         print("Failed to add global object to list") | ||||
|         #     elif not (object_addr_as_str in global_objects_list): | ||||
|         #         print("Object is not in list {}".format(hex(global_object_addr))) | ||||
|         #         global_objects_list[object_addr_as_str] = global_object_name | ||||
|  | ||||
| def addr_is_end_loc(func_name, addr): | ||||
|     for jump_loc in jump_locs[func_name]: | ||||
|         # print("addr_is_end_loc: {} {} {}".format(func_name, addr, jump_loc)) | ||||
|         if jump_loc == addr: | ||||
|             return True | ||||
|     return False | ||||
|  | ||||
| def pre_parse_instruction(insn_str): | ||||
|     index = 0 | ||||
|     pre_string = "" | ||||
|     string_len = len(insn_str) | ||||
|     for char in insn_str: | ||||
|         if (index + 1) < string_len: | ||||
|             fwd_char = insn_str[index + 1] | ||||
|             if fwd_char == "+" or fwd_char == "-" or fwd_char == "*" or fwd_char == "/": | ||||
|                 pre_string += char + " " | ||||
|                 index += 1 | ||||
|                 continue | ||||
|             if char == "+" or char == "-" or char == "*" or char == "/": | ||||
|                 pre_string += char + " " | ||||
|                 index += 1 | ||||
|                 continue | ||||
|             if char == ";": | ||||
|                 index += 1 | ||||
|                 continue | ||||
|             pre_string += char | ||||
|             index += 1 | ||||
|     return pre_string | ||||
|  | ||||
| def post_parse_instruction_as_tokens(insn_str): | ||||
|     final_string = "" | ||||
|     index = 0 | ||||
|     tokens = insn_str.split() | ||||
|     max_index = len(tokens) | ||||
|     for token in tokens: | ||||
|         if token == "short" or token == "offset": | ||||
|             index += 1 | ||||
|             continue | ||||
|         if is_ida_hex_str(token): | ||||
|             token = convert_ida_hex_str_to_true_hex(token) | ||||
|         if index != max_index: | ||||
|             final_string += token + " " | ||||
|         else: | ||||
|             final_string += token | ||||
|         index += 1     | ||||
|     return final_string | ||||
|  | ||||
| def dump_function_code(func_ea, mangled_name, f_c, f_h): | ||||
|     global debug_dump_extra_data | ||||
|     # Try to demangle the name | ||||
|     # demangled_name = idc.demangle_name(mangled_name, idc.INF_SHORT_DN) | ||||
|     func_name = mangled_name | ||||
|     # if demangled_name != "None": | ||||
|     #    func_name = demangled_name | ||||
|     f_c.write("\n/*{}*/\n".format(hex(func_ea))) | ||||
|     cfunc_ptr = idaapi.decompile(func_ea) | ||||
|     arguments = cfunc_ptr.arguments | ||||
|     f_c.write("{}".format(func_name)) | ||||
|     # lvar_t info | ||||
|     # type method on arguments index returns a tinfo_t | ||||
| 	# name class var on arguments index | ||||
|     if arguments: | ||||
|         f_c.write(" /*(") | ||||
|         # arg is a tinfo_t object | ||||
|         index = 0 | ||||
|         args_len = len(arguments) | ||||
|         # TODO: Dump arg registers too | ||||
|         for arg in arguments: | ||||
|             if index + 1 >= args_len: | ||||
|                 f_c.write(" {} {}".format(arg.type().dstr(), arg.name)) | ||||
|                 continue | ||||
|             f_c.write(" {} {},".format(arg.type().dstr(), arg.name)) | ||||
|             index += 1 | ||||
|         f_c.write(" )*/\n") | ||||
|     else: | ||||
|         f_c.write("/*( )*/\n") | ||||
|      | ||||
|     # Write the function declaration to the .h file | ||||
|     f_h.write("void {}();\n".format(func_name)) | ||||
|     func_has_jump_locs = mangled_name in jump_locs | ||||
|     # func_object = idaapi.get_func(func_ea) | ||||
|  | ||||
|     # Iterate over the instructions in the function | ||||
|     for insn in idautils.FuncItems(func_ea): | ||||
|         # Write the instruction to the .c file | ||||
|         insn_as_string = idc.GetDisasm(insn) | ||||
|         insn2 = idaapi.insn_t() | ||||
|         idaapi.decode_insn(insn2, insn) | ||||
|         instruction_name = insn2.get_canon_mnem() | ||||
|         if func_has_jump_locs: | ||||
|  | ||||
|             if addr_is_end_loc(mangled_name, insn2.ea): | ||||
|                 func_addr_as_str = "{}".format(insn2.ea) | ||||
|                 if func_addr_as_str in jump_loc_names[func_name]: | ||||
|                     f_c.write("{}:\n".format(jump_loc_names[func_name][func_addr_as_str])) | ||||
|         f_c.write("\t") | ||||
|         # pre_string = pre_parse_instruction(insn_as_string) | ||||
|         # final_string = post_parse_instruction_as_tokens(pre_string) | ||||
|         # f_c.write(final_string + "\n") | ||||
|         first_operand_type = idc.get_operand_type(insn, 0) | ||||
|         second_operand_type = idc.get_operand_type(insn, 1) | ||||
|         wrote_first_operand_addrs = False | ||||
|         wrote_second_operand_addrs = False | ||||
|         f_c.write(" {}".format(insn_as_string)) | ||||
|         if not debug_dump_extra_data: | ||||
|             f_c.write("\n") | ||||
|             continue | ||||
|         if first_operand_type == idaapi.o_mem: | ||||
|             # get_operand_value # Returns the offset added to the operand | ||||
|             # TODO: Get the base address of the object too so the offset isn't lost | ||||
|             global_object_addr = idc.get_operand_value(insn, 0) | ||||
|             # object_addr_as_str = "{}".format(global_object_addr) | ||||
|             f_c.write(" //:Type: <{}> :Addr: <{}>".format(first_operand_type, hex(global_object_addr))) | ||||
|             wrote_first_operand_addrs = True | ||||
|         if second_operand_type == idaapi.o_mem: | ||||
|             global_object_addr = idc.get_operand_value(insn, 1) | ||||
|             # object_addr_as_str = "{}".format(global_object_addr) | ||||
|             if wrote_first_operand_addrs: | ||||
|                 f_c.write(" {} {}".format(second_operand_type, hex(global_object_addr))) | ||||
|             else: | ||||
|                 f_c.write(" //:Type: <{}> :Addr: <{}>".format(second_operand_type, hex(global_object_addr))) | ||||
|             wrote_second_operand_addrs = True | ||||
|         if instruction_name == "call": | ||||
|             # get_operand_value # Returns the offset added to the operand | ||||
|             # TODO: Get the base address of the object too so the offset isn't lost | ||||
|             call_addr = idc.get_operand_value(insn, 0) | ||||
|             # object_addr_as_str = "{}".format(global_object_addr) | ||||
|             f_c.write(" //:Type: <{}> :Addr: <{}>".format(first_operand_type, hex(call_addr))) | ||||
|             wrote_first_operand_addrs = True | ||||
|         if wrote_first_operand_addrs and not wrote_second_operand_addrs: | ||||
|             f_c.write(" {}".format(second_operand_type)) | ||||
|         elif not wrote_first_operand_addrs and not wrote_second_operand_addrs: | ||||
|             f_c.write(" //:Type: <{}> :Type: <{}>".format(first_operand_type, second_operand_type)) | ||||
|              | ||||
|         f_c.write("\n") | ||||
|     f_c.write("\n") | ||||
|  | ||||
| # Open the output files in write mode | ||||
| with open(output_file_c, "w") as f_c, open(output_file_h, "w") as f_h: | ||||
|  | ||||
|     # Write the header guard for the .h file | ||||
|     f_h.write("#ifndef FUNCTION_DECLARATIONS_H\n#define FUNCTION_DECLARATIONS_H\n\n") | ||||
|  | ||||
|     # Name of the input function to dump | ||||
|     # input_func_ea = 0x699E90 # Scr_GetFloat | ||||
|  | ||||
|     input_func_ea = 0x40FAA0 # G_FindItem | ||||
|  | ||||
|     # Get the address of the input function | ||||
|     input_func_name = idc.get_func_name(input_func_ea) | ||||
|     # Define a function to dump the code for a given function | ||||
|     dumpable_func_calls.append(input_func_ea) | ||||
|     dumpable_func_names.append(input_func_name) | ||||
|                 # f_c.write(" //op: {}\n".format(insn2.get_canon_mnem())) | ||||
|     # Dump the code for the input function | ||||
|  | ||||
|     print("Starting search for functions recursively")	 | ||||
|     parse_assembly_for_func_calls(input_func_ea) | ||||
|      | ||||
|     index = 0 | ||||
|     function_count = len(dumpable_func_names) | ||||
|     print("Dumping {} functions max {}". format(function_count, max_functions_to_dump)) | ||||
|     for func_addr in dumpable_func_calls: | ||||
|         if index >= max_functions_to_dump: | ||||
|             break | ||||
|         func_name = dumpable_func_names[index] | ||||
|         print("Now dumping {} at address {} count {}".format(func_name, hex(func_addr), index)) | ||||
|         dump_function_code(func_addr, func_name, f_c, f_h) | ||||
|         index += 1 | ||||
|     print("Successfully dumped {} of {} total functions".format(function_count, max_functions_to_dump)) | ||||
|     # Write the footer guard for the .h file | ||||
|     f_h.write("\n#endif") | ||||
|     print("Function code written to {} and function declarations written to {}".format(output_file_c, output_file_h)) | ||||
		Reference in New Issue
	
	Block a user