From b347ac76c19d861c31a94654580009a13617a903 Mon Sep 17 00:00:00 2001 From: aeltheos Date: Fri, 12 Nov 2021 21:40:00 +0100 Subject: [PATCH] finished find duplicate function --- .../management/commands/duplicate.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 photologue_custom/management/commands/duplicate.py diff --git a/photologue_custom/management/commands/duplicate.py b/photologue_custom/management/commands/duplicate.py new file mode 100644 index 0000000..ff546aa --- /dev/null +++ b/photologue_custom/management/commands/duplicate.py @@ -0,0 +1,43 @@ +from django.core.management.base import BaseCommand + +import argparse +import hashlib + + +class Command(BaseCommand): + help = 'List all duplicate for chosen galleries' + + def add_arguments(self, parser): + pass + + def handle(self, *args, **options): + pass + + +def find_duplicate(gallery): + # Dict of all already checked photos + non_duplicate = {} + # Dict of all found duplicate {original.slug:[duplicates]} + duplicate = {} + + for photo in gallery.photos.all(): + h0 = hashlib.sha256(photo.image.read()).digest() + if photo not in non_duplicate: + # Photo is not a duplicate + non_duplicate[h0] = photo + elif len(photo.slug) > len(non_duplicate[h0.slug()]): + # Photo is a duplicate and photo slug is longer + if non_duplicate[h0].slug in duplicate: + duplicate[h0][1] += [photo] + else: + duplicate[h0] = [non_duplicate[h0], [photo]] + else: + # Photo is a duplicate and photo slug is shorter + if non_duplicate[h0].slug in duplicate: + duplicate[h0][0] = photo + duplicate[h0][1] += [non_duplicate[h0]] + else: + duplicate[h0] = [photo, [non_duplicate[h0]]] + non_duplicate[h0] += [photo] + # Return values because hash aren't need anymore + return duplicate.values()