35 lines
1.2 KiB
Python
35 lines
1.2 KiB
Python
import os
|
|
from collections import defaultdict
|
|
|
|
# Folder containing the .txt files
|
|
out_folder = "out"
|
|
duplicates_folder = os.path.join(out_folder, "duplicates")
|
|
result_file_path = os.path.join(duplicates_folder, "result.txt")
|
|
|
|
os.makedirs(duplicates_folder, exist_ok=True)
|
|
|
|
# Map each line to a set of files that contain it
|
|
line_to_files = defaultdict(set)
|
|
|
|
# Iterate over all .txt files in the out folder
|
|
for filename in os.listdir(out_folder):
|
|
if filename.endswith(".txt") and filename != "result.txt":
|
|
path = os.path.join(out_folder, filename)
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if line:
|
|
line_to_files[line].add(filename)
|
|
|
|
# Open result file for writing
|
|
with open(result_file_path, "w", encoding="utf-8") as result_file:
|
|
print("Duplicate lines found in multiple files:\n")
|
|
result_file.write("Duplicate lines found in multiple files:\n\n")
|
|
for line, files in sorted(line_to_files.items()):
|
|
if len(files) > 1:
|
|
info = f"{line} -> in: {', '.join(sorted(files))}"
|
|
print(info)
|
|
result_file.write(info + "\n")
|
|
|
|
print(f"\nResults saved to: {result_file_path}")
|