finished find duplicate function

This commit is contained in:
aeltheos 2021-11-12 21:40:00 +01:00
parent 29e9dba141
commit b347ac76c1

View file

@ -0,0 +1,43 @@
from django.core.management.base import BaseCommand
import argparse
import hashlib
class Command(BaseCommand):
help = 'List all duplicate for chosen galleries'
def add_arguments(self, parser):
pass
def handle(self, *args, **options):
pass
def find_duplicate(gallery):
# Dict of all already checked photos
non_duplicate = {}
# Dict of all found duplicate {original.slug:[duplicates]}
duplicate = {}
for photo in gallery.photos.all():
h0 = hashlib.sha256(photo.image.read()).digest()
if photo not in non_duplicate:
# Photo is not a duplicate
non_duplicate[h0] = photo
elif len(photo.slug) > len(non_duplicate[h0.slug()]):
# Photo is a duplicate and photo slug is longer
if non_duplicate[h0].slug in duplicate:
duplicate[h0][1] += [photo]
else:
duplicate[h0] = [non_duplicate[h0], [photo]]
else:
# Photo is a duplicate and photo slug is shorter
if non_duplicate[h0].slug in duplicate:
duplicate[h0][0] = photo
duplicate[h0][1] += [non_duplicate[h0]]
else:
duplicate[h0] = [photo, [non_duplicate[h0]]]
non_duplicate[h0] += [photo]
# Return values because hash aren't need anymore
return duplicate.values()