def _process_jenkins_run()

in services/jenkins-run-statistics/statistics.py [0:0]


def _process_jenkins_run(cloudwatch, jenkins_run, metric_dimensions):
    """
    Process a single Jenkins run and record metrics accordingly
    :param jenkins_run:
    :return: True if we should continue or False if job should no longer be crawled, e.g. due to running jobs
    """
    def process_stage(jenkins_node):
        """
        Process the Jenkins node that is being considered a stage
        :param jenkins_node: Jenkins node
        :return: New stage name
        """
        # The nodes are always in the correct order, so we can use that fact to preserve the
        # information about the stage we are currently in during parallel steps.
        current_stage = jenkins_node.display_name
        stage_metric_dimensions = dict(node_metric_dimensions)
        stage_metric_dimensions['Stage'] = current_stage
        aws_utils.publish_cloudwatch_metric(
            cloudwatch=cloudwatch, metric_name='Stage Duration',
            metric_namespace=CLOUDWATCH_METRIC_NAMESPACE, value=jenkins_node.duration_ms / 1000,
            unix_timestamp=unix_timestamp, dimensions=stage_metric_dimensions, unit='Seconds')
        logging.info('= STAGE %s took %s',
                     current_stage, str(timedelta(milliseconds=jenkins_node.duration_ms)))
        return current_stage

    def process_parallel(jenkins_node):
        """
        Process the Jenkins node that is being considered a parallel node
        :param jenkins_node:
        :return:
        """
        # Determine duration of each parallel-entry by making the sum of all steps. This is
        # necessary because durationInMillis contains garbage for these nodes. Thanks, Jenkins!
        steps = jenkins_node.get_steps()
        if not steps:
            logging.error('No steps available')
            return

        parallel_duration_ms = 0
        for step in steps:
            parallel_duration_ms += step.duration_ms

        step_metric_dimensions = dict(node_metric_dimensions)
        step_metric_dimensions['Stage'] = current_stage
        step_metric_dimensions['Step'] = jenkins_node.display_name
        aws_utils.publish_cloudwatch_metric(
            cloudwatch=cloudwatch, metric_name='Step Duration', unit='Seconds',
            value=int(parallel_duration_ms / 1000), unix_timestamp=unix_timestamp,
            metric_namespace=CLOUDWATCH_METRIC_NAMESPACE, dimensions=step_metric_dimensions)

        logging.info('== STEP %s ran for %s',
                     jenkins_node.display_name, str(timedelta(milliseconds=parallel_duration_ms)))

    metadata = jenkins_run.retrieve_metadata(tree_filter_string='duration,building,timestamp,result')

    if metadata and metadata['building']:
        logging.info('%s is still running, skipping...', jenkins_run)
        return False

    # Make sure to not return eagerly because the DynamoDB entry creation has to happen to mark the run as processed

    if not metadata:
        logging.debug('Run %s does not exist, skipping...', jenkins_run)
    else:
        total_duration_ms = metadata['duration']
        unix_timestamp = metadata['timestamp'] / 1000

        time_diff = time.time() - unix_timestamp
        if time_diff >= MAXIMUM_LOOKBACK_TIMEFRAME_SECONDS:
            logging.info('Run %s is from %d days ago, skipping since its more than two weeks',
                         jenkins_run, int(time_diff/60/60/24))
        else:
            run_metric_dimensions = dict(metric_dimensions)
            run_metric_dimensions['Result'] = metadata['result']
            aws_utils.publish_cloudwatch_metric(cloudwatch=cloudwatch, metric_namespace=CLOUDWATCH_METRIC_NAMESPACE,
                                                metric_name='Total Run Duration', unix_timestamp=unix_timestamp,
                                                dimensions=run_metric_dimensions, unit='Seconds',
                                                value=total_duration_ms/1000)
            logging.info('Run %s has been running for %s', jenkins_run, str(timedelta(milliseconds=total_duration_ms)))

            nodes = jenkins_run.retrieve_nodes()

            if not nodes:
                logging.debug('Run %s has no child stages', jenkins_run)
            else:
                current_stage = 'Unknown stage'
                for jenkins_node in nodes:
                    node_metric_dimensions = dict(metric_dimensions)
                    if jenkins_node.result:  # This is none if the stage has not been reached
                        # Make sure to differentiate metrics by whether the step was successful or not. Otherwise,
                        # time measurements would be off since some jobs did not run until the end.
                        node_metric_dimensions['Result'] = jenkins_node.result
                        unix_timestamp = jenkins_node.start_timestamp

                        if jenkins_node.type == 'STAGE':
                            current_stage = process_stage(jenkins_node)
                        elif jenkins_node.type == 'PARALLEL':
                            process_parallel(jenkins_node)
                        else:
                            logging.error('Unknown stage: %s for %s', jenkins_node.type, jenkins_node)

    return True