list-iwd/show-duplicates.py
2025-05-21 22:58:45 +02:00

35 lines
1.2 KiB
Python

import os
from collections import defaultdict
# Folder containing the .txt files
out_folder = "out"
duplicates_folder = os.path.join(out_folder, "duplicates")
result_file_path = os.path.join(duplicates_folder, "result.txt")
os.makedirs(duplicates_folder, exist_ok=True)
# Map each line to a set of files that contain it
line_to_files = defaultdict(set)
# Iterate over all .txt files in the out folder
for filename in os.listdir(out_folder):
if filename.endswith(".txt") and filename != "result.txt":
path = os.path.join(out_folder, filename)
with open(path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line:
line_to_files[line].add(filename)
# Open result file for writing
with open(result_file_path, "w", encoding="utf-8") as result_file:
print("Duplicate lines found in multiple files:\n")
result_file.write("Duplicate lines found in multiple files:\n\n")
for line, files in sorted(line_to_files.items()):
if len(files) > 1:
info = f"{line} -> in: {', '.join(sorted(files))}"
print(info)
result_file.write(info + "\n")
print(f"\nResults saved to: {result_file_path}")