83 lines
2.9 KiB
Python
83 lines
2.9 KiB
Python
# This file is part of photo21
|
|
# Copyright (C) 2022 Amicale des élèves de l'ENS Paris-Saclay
|
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
import hashlib
|
|
|
|
from django.core.management.base import BaseCommand, CommandError
|
|
from photologue.models import Gallery
|
|
|
|
|
|
class Command(BaseCommand):
|
|
help = "List all duplicate for chosen galleries"
|
|
|
|
def add_arguments(self, parser):
|
|
parser.add_argument(
|
|
"--slugs",
|
|
nargs="+",
|
|
help="Try to find duplicate in the selected galleries",
|
|
default=[],
|
|
)
|
|
parser.add_argument(
|
|
"-a",
|
|
"--all",
|
|
action="store_true",
|
|
help="Try to find duplicate in all galleries, overide any slugs given",
|
|
)
|
|
parser.add_argument("-d", "--delete", action="store_true")
|
|
|
|
def handle(self, *args, **options):
|
|
# Collect all required galleries
|
|
if options["all"]:
|
|
galleries = Gallery.objects.all()
|
|
else:
|
|
galleries = []
|
|
for slug in options["slugs"]:
|
|
gallery_query = Gallery.objects.filter(slug=slug)
|
|
if not gallery_query:
|
|
raise CommandError(
|
|
f"Slug {slug} does not correspond to a "
|
|
"gallery in the database."
|
|
)
|
|
galleries += gallery_query
|
|
|
|
# Find duplicates in all galleries
|
|
for gallery in galleries:
|
|
duplicates = find_duplicate(gallery)
|
|
self.stdout.write(f"Gallery {gallery.slug}:")
|
|
for original, copies in duplicates:
|
|
self.stdout.write(f" {original.slug} is duplicated:", ending="")
|
|
for copy in copies:
|
|
self.stdout.write(f" {copy.slug}")
|
|
|
|
# Delete them if --delete
|
|
if options["delete"]:
|
|
self.stdout.write(" Deleting duplicate in {} :".format(gallery.slug))
|
|
for (_original, copies) in duplicates:
|
|
for copy in copies:
|
|
self.stdout.write(" Deleting {}...".format(copy.slug))
|
|
copy.delete()
|
|
|
|
|
|
def find_duplicate(gallery):
|
|
# Dict of all already checked photos
|
|
non_duplicate = {}
|
|
# Dict of all found duplicate {h0 : (original:[duplicates])}
|
|
duplicate = {}
|
|
|
|
for photo in gallery.photos.all():
|
|
with photo.image.open("rb") as f:
|
|
h0 = hashlib.sha256(f.read()).digest()
|
|
if h0 not in non_duplicate:
|
|
# Photo is not a duplicate
|
|
non_duplicate[h0] = photo
|
|
elif h0 in duplicate:
|
|
if len(photo.slug) > len(duplicate[h0][0].slug):
|
|
duplicate[h0][1] += [photo]
|
|
else:
|
|
duplicate[h0][1] += [duplicate[h0][0]]
|
|
duplicate[h0][0] = photo
|
|
else:
|
|
duplicate[h0] = [non_duplicate[h0], [photo]]
|
|
# Return only value because hash aren't usefull
|
|
return duplicate.values()
|