from django.core.management.base import BaseCommand, CommandError from photologue_custom.models import Gallery import argparse import hashlib class Command(BaseCommand): help = 'List all duplicate for chosen galleries' def add_arguments(self, parser): parser.add_argument( '--slugs', nargs='+', help='Try to find duplicate in the selected galleries', default=[]) parser.add_argument('-a', '--all', action='store_true', help='Try to find duplicate in all galleries, overide any slugs given') parser.add_argument('-d', '--delete', action='store_true') def handle(self, *args, **options): # Collect all required galleries if options['all']: galleries = Gallery.objects.all() else: for slug in options['slugs']: for gallery in Gallery.objects.all(): if gallery.slug == slug: galleries += [gallery] break else: raise CommandError( 'Slug {} does not correspond to a gallery in the database.'.format(slug)) print('error') # def find_duplicate(gallery): # Dict of all already checked photos non_duplicate = {} # Dict of all found duplicate {h0 : (original:[duplicates])} duplicate = {} for photo in gallery.photos.all(): h0 = hashlib.sha256(photo.image.read()).digest() if photo not in non_duplicate: # Photo is not a duplicate non_duplicate[h0] = photo elif len(photo.slug) > len(non_duplicate[h0.slug()]): # Photo is a duplicate and photo slug is longer if non_duplicate[h0].slug in duplicate: duplicate[h0][1] += [photo] else: duplicate[h0] = (non_duplicate[h0], [photo]) else: # Photo is a duplicate and photo slug is shorter if non_duplicate[h0].slug in duplicate: duplicate[h0][0] = photo duplicate[h0][1] += [non_duplicate[h0]] else: duplicate[h0] = (photo, [non_duplicate[h0]]) non_duplicate[h0] += [photo] # Return values because hash aren't need anymore return duplicate.values()