def get_info_for_rj()

in pygenie/adapter/genie_3.py [0:0]


    def get_info_for_rj(self, job_id, job=False, request=False,
                        applications=False, cluster=False, command=False,
                        execution=False, output=False, timeout=30, *args, **kwargs):
        """
        Get information for RunningJob object.
        """

        get_all = all([not job,
                       not request,
                       not applications,
                       not cluster,
                       not command,
                       not execution,
                       not output])

        timeout = None if self.disable_timeout else timeout

        ret = dict()

        if job or get_all:
            data = self.get(job_id, timeout=timeout)

            link = data.get('_links', {}).get('self', {}).get('href')
            link_parts = urlparse(link)
            output_link = '{scheme}://{netloc}/output/{job_id}/output' \
                .format(scheme=link_parts.scheme,
                        netloc=link_parts.netloc,
                        job_id=data.get('id'))
            job_link = '{scheme}://{netloc}/jobs?id={job_id}&rowId={job_id}' \
                .format(scheme=link_parts.scheme,
                        netloc=link_parts.netloc,
                        job_id=data.get('id'))

            ret['archive_location'] = data.get('archiveLocation')
            ret['attachments'] = None
            ret['command_args'] = data.get('commandArgs')
            ret['command_name'] = data.get('commandName')
            ret['cluster_name'] = data.get('clusterName')
            ret['created'] = data.get('created')
            ret['description'] = data.get('description')
            ret['finished'] = data.get('finished')
            ret['genie_grouping'] = data.get('grouping')
            ret['genie_grouping_instance'] = data.get('groupingInstance')
            ret['id'] = data.get('id')
            ret['job_link'] = job_link
            ret['json_link'] = link
            ret['kill_uri'] = link
            ret['metadata'] = data.get('metadata') or dict()
            ret['name'] = data.get('name')
            ret['output_uri'] = output_link
            ret['started'] = data.get('started')
            ret['status'] = data.get('status')
            ret['status_msg'] = data.get('statusMsg')
            ret['tags'] = data.get('tags')
            ret['updated'] = data.get('updated')
            ret['user'] = data.get('user')
            ret['version'] = data.get('version')

        if request or get_all:
            request_data = self.get(job_id, path='request', timeout=timeout)

            ret['disable_archive'] = request_data.get('disableLogArchival')
            ret['email'] = request_data.get('email')
            ret['file_dependencies'] = request_data.get('dependencies')
            ret['group'] = request_data.get('group')
            ret['request_data'] = request_data

        if applications or get_all:
            application_data = self.get(job_id,
                                        path='applications',
                                        if_not_found=list(),
                                        timeout=timeout)

            ret['application_name'] = ','.join(a.get('id') for a in application_data)

        if cluster or get_all:
            cluster_data = self.get(job_id,
                                    path='cluster',
                                    if_not_found=dict(),
                                    timeout=timeout)

            ret['cluster_id'] = cluster_data.get('id')
            ret['cluster_name'] = cluster_data.get('name')

        if command or get_all:
            command_data = self.get(job_id,
                                    path='command',
                                    if_not_found=dict(),
                                    timeout=timeout)

            ret['command_id'] = command_data.get('id')
            ret['command_name'] = command_data.get('name')
            ret['command_data'] = command_data

        if execution or get_all:
            execution_data = self.get(job_id,
                                      path='execution',
                                      if_not_found=dict(),
                                      timeout=timeout)

            ret['client_host'] = execution_data.get('hostName')

        if output or get_all:
            output_data = self.get(job_id,
                                   path='output',
                                   if_not_found=dict(),
                                   timeout=timeout,
                                   headers={'Accept': 'application/json'})

            ret['output_data'] = output_data
            output_files = output_data.get('files') or []
            for entry in output_files:
                if entry.get('name') == 'stderr':
                    # TODO: Should the default size be None to signify unknown vs 0 byte file?
                    ret['stderr_size'] = entry.get('size') or 0
                if entry.get('name') == 'stdout':
                    # TODO: Should the default size be None to signify unknown vs 0 byte file?
                    ret['stdout_size'] = entry.get('size') or 0
                if entry.get('name') == 'spark.log':
                    # TODO: Should the default size be None to signify unknown vs 0 byte file?
                    ret['spark_log_size'] = entry.get('size') or 0
        return ret