gnm_deliverables/management/commands/duplicate_scanner.py (32 lines of code) (raw):

from django.core.management.base import BaseCommand import csv from gnm_deliverables.models import DeliverableAsset from pprint import pprint import logging from django.db.models import Count from gnm_deliverables.choices import DELIVERABLE_ASSET_STATUSES_DICT logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) class Command(BaseCommand): """ Management command to generate a CSV file of duplicate deliverable assets """ help = 'Generate a CSV file of duplicate deliverable assets' def add_arguments(self, parser): parser.add_argument("--output", type=str, default="report.csv", help="Location to output a CSV report") def handle(self, *args, **options): pprint(options) output_file_path = options["output"] paths_with_issue = DeliverableAsset.objects.values('absolute_path').annotate(Count('id')).order_by().filter(id__count__gt=1) with open(output_file_path, "w") as f: writer = csv.writer(f, dialect=csv.excel) writer.writerow(["Id.", "Path", "Filename", "Type", "Size", "Version", "Job Id.", "Online Item Id.", "Duration in Seconds", "Atom Id.", "Status"]) if len(paths_with_issue) > 0: for path in paths_with_issue: print ("\n Duplicates found with path: {0}".format(path['absolute_path'])) assets_with_issue = DeliverableAsset.objects.filter(absolute_path=path['absolute_path']) for asset in assets_with_issue: print("\n Id.: {0}".format(asset.id)) print("Name: {0}".format(asset.filename)) print("Size: {0}".format(asset.size)) print("Version: {0}".format(asset.version)) writer.writerow([asset.id, asset.absolute_path, asset.filename, asset.type_string, asset.size, asset.version, asset.job_id, asset.online_item_id, asset.duration_seconds, asset.atom_id, DELIVERABLE_ASSET_STATUSES_DICT.get(asset.status)]) else: print("No duplicates")