# This file is part of photo21 # Copyright (C) 2022 Amicale des élèves de l'ENS Paris-Saclay # SPDX-License-Identifier: GPL-3.0-or-later import hashlib from django.core.management.base import BaseCommand, CommandError from photologue.models import Gallery class Command(BaseCommand): help = "List all duplicate for chosen galleries" def add_arguments(self, parser): parser.add_argument( "--slugs", nargs="+", help="Try to find duplicate in the selected galleries", default=[], ) parser.add_argument( "-a", "--all", action="store_true", help="Try to find duplicate in all galleries, overide any slugs given", ) parser.add_argument("-d", "--delete", action="store_true") def handle(self, *args, **options): # Collect all required galleries if options["all"]: galleries = Gallery.objects.all() else: galleries = [] for slug in options["slugs"]: gallery_query = Gallery.objects.filter(slug=slug) if not gallery_query: raise CommandError( f"Slug {slug} does not correspond to a " "gallery in the database." ) galleries += gallery_query # Find duplicates in all galleries for gallery in galleries: duplicates = find_duplicate(gallery) self.stdout.write(f"Gallery {gallery.slug}:") for original, copies in duplicates: self.stdout.write(f" {original.slug} is duplicated:", ending="") for copy in copies: self.stdout.write(f" {copy.slug}") # Delete them if --delete if options["delete"]: self.stdout.write(" Deleting duplicate in {} :".format(gallery.slug)) for (_original, copies) in duplicates: for copy in copies: self.stdout.write(" Deleting {}...".format(copy.slug)) copy.delete() def find_duplicate(gallery): # Dict of all already checked photos non_duplicate = {} # Dict of all found duplicate {h0 : (original:[duplicates])} duplicate = {} for photo in gallery.photos.all(): with photo.image.open("rb") as f: h0 = hashlib.sha256(f.read()).digest() if h0 not in non_duplicate: # Photo is not a duplicate non_duplicate[h0] = photo elif h0 in duplicate: if len(photo.slug) > len(duplicate[h0][0].slug): duplicate[h0][1] += [photo] else: duplicate[h0][1] += [duplicate[h0][0]] duplicate[h0][0] = photo else: duplicate[h0] = [non_duplicate[h0], [photo]] # Return only value because hash aren't usefull return duplicate.values()