diff --git a/photologue_custom/management/commands/duplicate.py b/photologue_custom/management/commands/duplicate.py new file mode 100644 index 0000000..c9fd70a --- /dev/null +++ b/photologue_custom/management/commands/duplicate.py @@ -0,0 +1,71 @@ +from django.core.management.base import BaseCommand, CommandError +from photologue_custom.models import Gallery + +import hashlib + + +class Command(BaseCommand): + help = 'List all duplicate for chosen galleries' + + def add_arguments(self, parser): + parser.add_argument( + '--slugs', nargs='+', help='Try to find duplicate in the selected galleries', default=[]) + parser.add_argument('-a', '--all', action='store_true', + help='Try to find duplicate in all galleries, overide any slugs given') + parser.add_argument('-d', '--delete', action='store_true') + + def handle(self, *args, **options): + # Collect all required galleries + if options['all']: + galleries = Gallery.objects.all() + else: + for slug in options['slugs']: + for gallery in Gallery.objects.all(): + if gallery.slug == slug: + galleries += [gallery] + break + else: + raise CommandError( + 'Slug {} does not correspond to a gallery in the database.'.format(slug)) + print('error') + # Find duplicates in all galleries + for gallery in galleries: + duplicates = find_duplicate(gallery) + self.stdout.write('Gallery {} :'.format(gallery.slug)) + for (original, copies) in duplicates: + self.stdout.write( + ' {} has following duplicate(s) :'.format(original.slug)) + for copy in copies: + self.stdout.write(' {}'.format(copy.slug)) + # Delete them if --delete + if options['delete']: + self.stdout.write( + ' Deleting duplicate in {} :'.format(gallery.slug)) + for (_original, copies) in duplicates: + for copy in copies: + self.stdout.write( + ' Deleting {}...'.format(copy.slug)) + copy.delete() + + +def find_duplicate(gallery): + # Dict of all already checked photos + non_duplicate = {} + # Dict of all found duplicate {h0 : (original:[duplicates])} + duplicate = {} + + for photo in gallery.photos.all(): + h0 = hashlib.sha256(photo.image.read()).digest() + if h0 not in non_duplicate: + # Photo is not a duplicate + non_duplicate[h0] = photo + elif h0 in duplicate: + if len(photo.slug) > len(duplicate[h0][0].slug): + duplicate[h0][1] += [photo] + else: + duplicate[h0][1] += [duplicate[h0][0]] + duplicate[h0][0] = photo + else: + duplicate[h0] = [non_duplicate[h0], [photo]] + # Return only value because hash aren't usefull + return duplicate.values()