Merge branch 'duplicate' into 'master'
merge duplicate deletion command into master See merge request bde/photo21!17
This commit is contained in:
commit
b99e8755e3
1 changed files with 71 additions and 0 deletions
71
photologue_custom/management/commands/duplicate.py
Normal file
71
photologue_custom/management/commands/duplicate.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
from django.core.management.base import BaseCommand, CommandError
|
||||
from photologue_custom.models import Gallery
|
||||
|
||||
import hashlib
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'List all duplicate for chosen galleries'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
'--slugs', nargs='+', help='Try to find duplicate in the selected galleries', default=[])
|
||||
parser.add_argument('-a', '--all', action='store_true',
|
||||
help='Try to find duplicate in all galleries, overide any slugs given')
|
||||
parser.add_argument('-d', '--delete', action='store_true')
|
||||
|
||||
def handle(self, *args, **options):
|
||||
# Collect all required galleries
|
||||
if options['all']:
|
||||
galleries = Gallery.objects.all()
|
||||
else:
|
||||
for slug in options['slugs']:
|
||||
for gallery in Gallery.objects.all():
|
||||
if gallery.slug == slug:
|
||||
galleries += [gallery]
|
||||
break
|
||||
else:
|
||||
raise CommandError(
|
||||
'Slug {} does not correspond to a gallery in the database.'.format(slug))
|
||||
print('error')
|
||||
# Find duplicates in all galleries
|
||||
for gallery in galleries:
|
||||
duplicates = find_duplicate(gallery)
|
||||
self.stdout.write('Gallery {} :'.format(gallery.slug))
|
||||
for (original, copies) in duplicates:
|
||||
self.stdout.write(
|
||||
' {} has following duplicate(s) :'.format(original.slug))
|
||||
for copy in copies:
|
||||
self.stdout.write(' {}'.format(copy.slug))
|
||||
# Delete them if --delete
|
||||
if options['delete']:
|
||||
self.stdout.write(
|
||||
' Deleting duplicate in {} :'.format(gallery.slug))
|
||||
for (_original, copies) in duplicates:
|
||||
for copy in copies:
|
||||
self.stdout.write(
|
||||
' Deleting {}...'.format(copy.slug))
|
||||
copy.delete()
|
||||
|
||||
|
||||
def find_duplicate(gallery):
|
||||
# Dict of all already checked photos
|
||||
non_duplicate = {}
|
||||
# Dict of all found duplicate {h0 : (original:[duplicates])}
|
||||
duplicate = {}
|
||||
|
||||
for photo in gallery.photos.all():
|
||||
h0 = hashlib.sha256(photo.image.read()).digest()
|
||||
if h0 not in non_duplicate:
|
||||
# Photo is not a duplicate
|
||||
non_duplicate[h0] = photo
|
||||
elif h0 in duplicate:
|
||||
if len(photo.slug) > len(duplicate[h0][0].slug):
|
||||
duplicate[h0][1] += [photo]
|
||||
else:
|
||||
duplicate[h0][1] += [duplicate[h0][0]]
|
||||
duplicate[h0][0] = photo
|
||||
else:
|
||||
duplicate[h0] = [non_duplicate[h0], [photo]]
|
||||
# Return only value because hash aren't usefull
|
||||
return duplicate.values()
|
||||
Loading…
Add table
Add a link
Reference in a new issue