Add a script to find duplicates
This commit is contained in:
parent
10d0f5dbfc
commit
8eada425be
1 changed files with 18 additions and 0 deletions
18
find-duplicates.py
Normal file
18
find-duplicates.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
import os
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
file_name_by_link = defaultdict(list)
|
||||
|
||||
for file_name in sorted(os.listdir("_data/signed")):
|
||||
with open(f"_data/signed/{file_name}") as f:
|
||||
contents = f.read().replace("\r", "")
|
||||
link = next(line for line in contents.split("\n") if line.startswith("link:"))[5:].strip()
|
||||
if link == "/#":
|
||||
continue
|
||||
file_name_by_link[link].append(file_name)
|
||||
|
||||
for link, file_names in file_name_by_link.items():
|
||||
if len(file_names) == 1:
|
||||
continue
|
||||
print(link, "duplicates:", file_names)
|
Loading…
Reference in a new issue