in pygenie/adapter/genie_3.py [0:0]
def get_info_for_rj(self, job_id, job=False, request=False,
applications=False, cluster=False, command=False,
execution=False, output=False, timeout=30, *args, **kwargs):
"""
Get information for RunningJob object.
"""
get_all = all([not job,
not request,
not applications,
not cluster,
not command,
not execution,
not output])
timeout = None if self.disable_timeout else timeout
ret = dict()
if job or get_all:
data = self.get(job_id, timeout=timeout)
link = data.get('_links', {}).get('self', {}).get('href')
link_parts = urlparse(link)
output_link = '{scheme}://{netloc}/output/{job_id}/output' \
.format(scheme=link_parts.scheme,
netloc=link_parts.netloc,
job_id=data.get('id'))
job_link = '{scheme}://{netloc}/jobs?id={job_id}&rowId={job_id}' \
.format(scheme=link_parts.scheme,
netloc=link_parts.netloc,
job_id=data.get('id'))
ret['archive_location'] = data.get('archiveLocation')
ret['attachments'] = None
ret['command_args'] = data.get('commandArgs')
ret['command_name'] = data.get('commandName')
ret['cluster_name'] = data.get('clusterName')
ret['created'] = data.get('created')
ret['description'] = data.get('description')
ret['finished'] = data.get('finished')
ret['genie_grouping'] = data.get('grouping')
ret['genie_grouping_instance'] = data.get('groupingInstance')
ret['id'] = data.get('id')
ret['job_link'] = job_link
ret['json_link'] = link
ret['kill_uri'] = link
ret['metadata'] = data.get('metadata') or dict()
ret['name'] = data.get('name')
ret['output_uri'] = output_link
ret['started'] = data.get('started')
ret['status'] = data.get('status')
ret['status_msg'] = data.get('statusMsg')
ret['tags'] = data.get('tags')
ret['updated'] = data.get('updated')
ret['user'] = data.get('user')
ret['version'] = data.get('version')
if request or get_all:
request_data = self.get(job_id, path='request', timeout=timeout)
ret['disable_archive'] = request_data.get('disableLogArchival')
ret['email'] = request_data.get('email')
ret['file_dependencies'] = request_data.get('dependencies')
ret['group'] = request_data.get('group')
ret['request_data'] = request_data
if applications or get_all:
application_data = self.get(job_id,
path='applications',
if_not_found=list(),
timeout=timeout)
ret['application_name'] = ','.join(a.get('id') for a in application_data)
if cluster or get_all:
cluster_data = self.get(job_id,
path='cluster',
if_not_found=dict(),
timeout=timeout)
ret['cluster_id'] = cluster_data.get('id')
ret['cluster_name'] = cluster_data.get('name')
if command or get_all:
command_data = self.get(job_id,
path='command',
if_not_found=dict(),
timeout=timeout)
ret['command_id'] = command_data.get('id')
ret['command_name'] = command_data.get('name')
ret['command_data'] = command_data
if execution or get_all:
execution_data = self.get(job_id,
path='execution',
if_not_found=dict(),
timeout=timeout)
ret['client_host'] = execution_data.get('hostName')
if output or get_all:
output_data = self.get(job_id,
path='output',
if_not_found=dict(),
timeout=timeout,
headers={'Accept': 'application/json'})
ret['output_data'] = output_data
output_files = output_data.get('files') or []
for entry in output_files:
if entry.get('name') == 'stderr':
# TODO: Should the default size be None to signify unknown vs 0 byte file?
ret['stderr_size'] = entry.get('size') or 0
if entry.get('name') == 'stdout':
# TODO: Should the default size be None to signify unknown vs 0 byte file?
ret['stdout_size'] = entry.get('size') or 0
if entry.get('name') == 'spark.log':
# TODO: Should the default size be None to signify unknown vs 0 byte file?
ret['spark_log_size'] = entry.get('size') or 0
return ret