from django.core.management.base import BaseCommand import argparse import hashlib class Command(BaseCommand): help = 'List all duplicate for chosen galleries' def add_arguments(self, parser): parser.add_arguments( '--slugs', nargs='+', help='Try to find duplicate in the selected galleries', default=[]) parser.add_arguments('-a', '--all', action='store_true', help='Try to find duplicate in all galleries') parser.add_arguments('-d', '--delete', action='store_true') def handle(self, *args, **options): pass def find_duplicate(gallery): # Dict of all already checked photos non_duplicate = {} # Dict of all found duplicate {original.slug:[duplicates]} duplicate = {} for photo in gallery.photos.all(): h0 = hashlib.sha256(photo.image.read()).digest() if photo not in non_duplicate: # Photo is not a duplicate non_duplicate[h0] = photo elif len(photo.slug) > len(non_duplicate[h0.slug()]): # Photo is a duplicate and photo slug is longer if non_duplicate[h0].slug in duplicate: duplicate[h0][1] += [photo] else: duplicate[h0] = [non_duplicate[h0], [photo]] else: # Photo is a duplicate and photo slug is shorter if non_duplicate[h0].slug in duplicate: duplicate[h0][0] = photo duplicate[h0][1] += [non_duplicate[h0]] else: duplicate[h0] = [photo, [non_duplicate[h0]]] non_duplicate[h0] += [photo] # Return values because hash aren't need anymore return duplicate.values()