Added output of found duplicate

Fixed find_duplicate
This commit is contained in:
aeltheos 2021-11-14 00:45:44 +01:00
parent dbb71d088a
commit ff50845a13

View file

@ -1,6 +1,6 @@
from django.core.management.base import BaseCommand, CommandError
from photologue_custom.models import Gallery
import argparse
import hashlib
@ -28,8 +28,15 @@ class Command(BaseCommand):
raise CommandError(
'Slug {} does not correspond to a gallery in the database.'.format(slug))
print('error')
#
# Find duplicates in all galleries
for gallery in galleries:
duplicate = find_duplicate(gallery)
self.stdout.write('Gallery {} :'.format(gallery.slug))
print(duplicate)
for (original, copies) in duplicate:
for copy in copies:
self.stdout.write('{} is duplicate of {}'.format(
copy.slug, original.slug))
def find_duplicate(gallery):
@ -40,22 +47,16 @@ def find_duplicate(gallery):
for photo in gallery.photos.all():
h0 = hashlib.sha256(photo.image.read()).digest()
if photo not in non_duplicate:
if h0 not in non_duplicate:
# Photo is not a duplicate
non_duplicate[h0] = photo
elif len(photo.slug) > len(non_duplicate[h0.slug()]):
# Photo is a duplicate and photo slug is longer
if non_duplicate[h0].slug in duplicate:
elif h0 in duplicate:
if len(photo.slug) > len(duplicate[h0][0].slug):
duplicate[h0][1] += [photo]
else:
duplicate[h0] = (non_duplicate[h0], [photo])
else:
# Photo is a duplicate and photo slug is shorter
if non_duplicate[h0].slug in duplicate:
duplicate[h0][1] += [duplicate[h0][0]]
duplicate[h0][0] = photo
duplicate[h0][1] += [non_duplicate[h0]]
else:
duplicate[h0] = (photo, [non_duplicate[h0]])
non_duplicate[h0] += [photo]
# Return values because hash aren't need anymore
else:
duplicate[h0] = [non_duplicate[h0], [photo]]
# Return only value because hash aren't usefull
return duplicate.values()