from django.core.management.base import BaseCommand, CommandError from photologue.models import Gallery import hashlib class Command(BaseCommand): help = 'List all duplicate for chosen galleries' def add_arguments(self, parser): parser.add_argument( '--slugs', nargs='+', help='Try to find duplicate in the selected galleries', default=[]) parser.add_argument('-a', '--all', action='store_true', help='Try to find duplicate in all galleries, overide any slugs given') parser.add_argument('-d', '--delete', action='store_true') def handle(self, *args, **options): # Collect all required galleries if options['all']: galleries = Gallery.objects.all() else: galleries = [] for slug in options['slugs']: gallery_query = Gallery.objects.filter(slug=slug) if not gallery_query: raise CommandError(f"Slug {slug} does not correspond to a " "gallery in the database.") galleries += gallery_query # Find duplicates in all galleries for gallery in galleries: duplicates = find_duplicate(gallery) self.stdout.write(f"Gallery {gallery.slug}:") for original, copies in duplicates: self.stdout.write(f" {original.slug} is duplicated:", ending='') for copy in copies: self.stdout.write(f" {copy.slug}") # Delete them if --delete if options['delete']: self.stdout.write( ' Deleting duplicate in {} :'.format(gallery.slug)) for (_original, copies) in duplicates: for copy in copies: self.stdout.write( ' Deleting {}...'.format(copy.slug)) copy.delete() def find_duplicate(gallery): # Dict of all already checked photos non_duplicate = {} # Dict of all found duplicate {h0 : (original:[duplicates])} duplicate = {} for photo in gallery.photos.all(): with photo.image.open("rb") as f: h0 = hashlib.sha256(f.read()).digest() if h0 not in non_duplicate: # Photo is not a duplicate non_duplicate[h0] = photo elif h0 in duplicate: if len(photo.slug) > len(duplicate[h0][0].slug): duplicate[h0][1] += [photo] else: duplicate[h0][1] += [duplicate[h0][0]] duplicate[h0][0] = photo else: duplicate[h0] = [non_duplicate[h0], [photo]] # Return only value because hash aren't usefull return duplicate.values()