import os from collections import defaultdict # Folder containing the .txt files out_folder = "out" duplicates_folder = os.path.join(out_folder, "duplicates") result_file_path = os.path.join(duplicates_folder, "result.txt") os.makedirs(duplicates_folder, exist_ok=True) # Map each line to a set of files that contain it line_to_files = defaultdict(set) # Iterate over all .txt files in the out folder for filename in os.listdir(out_folder): if filename.endswith(".txt") and filename != "result.txt": path = os.path.join(out_folder, filename) with open(path, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: line_to_files[line].add(filename) # Open result file for writing with open(result_file_path, "w", encoding="utf-8") as result_file: print("Duplicate lines found in multiple files:\n") result_file.write("Duplicate lines found in multiple files:\n\n") for line, files in sorted(line_to_files.items()): if len(files) > 1: info = f"{line} -> in: {', '.join(sorted(files))}" print(info) result_file.write(info + "\n") print(f"\nResults saved to: {result_file_path}")