feat: save duplicates
This commit is contained in:
34
show-duplicates.py
Normal file
34
show-duplicates.py
Normal file
@ -0,0 +1,34 @@
|
||||
import os
|
||||
from collections import defaultdict
|
||||
|
||||
# Folder containing the .txt files
|
||||
out_folder = "out"
|
||||
duplicates_folder = os.path.join(out_folder, "duplicates")
|
||||
result_file_path = os.path.join(duplicates_folder, "result.txt")
|
||||
|
||||
os.makedirs(duplicates_folder, exist_ok=True)
|
||||
|
||||
# Map each line to a set of files that contain it
|
||||
line_to_files = defaultdict(set)
|
||||
|
||||
# Iterate over all .txt files in the out folder
|
||||
for filename in os.listdir(out_folder):
|
||||
if filename.endswith(".txt") and filename != "result.txt":
|
||||
path = os.path.join(out_folder, filename)
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
line_to_files[line].add(filename)
|
||||
|
||||
# Open result file for writing
|
||||
with open(result_file_path, "w", encoding="utf-8") as result_file:
|
||||
print("Duplicate lines found in multiple files:\n")
|
||||
result_file.write("Duplicate lines found in multiple files:\n\n")
|
||||
for line, files in sorted(line_to_files.items()):
|
||||
if len(files) > 1:
|
||||
info = f"{line} -> in: {', '.join(sorted(files))}"
|
||||
print(info)
|
||||
result_file.write(info + "\n")
|
||||
|
||||
print(f"\nResults saved to: {result_file_path}")
|
Reference in New Issue
Block a user