finished find duplicate function
This commit is contained in:
parent
29e9dba141
commit
b347ac76c1
1 changed files with 43 additions and 0 deletions
43
photologue_custom/management/commands/duplicate.py
Normal file
43
photologue_custom/management/commands/duplicate.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
from django.core.management.base import BaseCommand
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'List all duplicate for chosen galleries'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
pass
|
||||
|
||||
def handle(self, *args, **options):
|
||||
pass
|
||||
|
||||
|
||||
def find_duplicate(gallery):
|
||||
# Dict of all already checked photos
|
||||
non_duplicate = {}
|
||||
# Dict of all found duplicate {original.slug:[duplicates]}
|
||||
duplicate = {}
|
||||
|
||||
for photo in gallery.photos.all():
|
||||
h0 = hashlib.sha256(photo.image.read()).digest()
|
||||
if photo not in non_duplicate:
|
||||
# Photo is not a duplicate
|
||||
non_duplicate[h0] = photo
|
||||
elif len(photo.slug) > len(non_duplicate[h0.slug()]):
|
||||
# Photo is a duplicate and photo slug is longer
|
||||
if non_duplicate[h0].slug in duplicate:
|
||||
duplicate[h0][1] += [photo]
|
||||
else:
|
||||
duplicate[h0] = [non_duplicate[h0], [photo]]
|
||||
else:
|
||||
# Photo is a duplicate and photo slug is shorter
|
||||
if non_duplicate[h0].slug in duplicate:
|
||||
duplicate[h0][0] = photo
|
||||
duplicate[h0][1] += [non_duplicate[h0]]
|
||||
else:
|
||||
duplicate[h0] = [photo, [non_duplicate[h0]]]
|
||||
non_duplicate[h0] += [photo]
|
||||
# Return values because hash aren't need anymore
|
||||
return duplicate.values()
|
||||
Loading…
Add table
Add a link
Reference in a new issue