in 5-app-infra/3-artifact-publish/docker/cdmc/tag_engine_api/extensions/query_cookbook/summarize_sql/main.py [0:0]
def format_predictions(raw_predictions, operation):
print('format_predictions')
print('input:', raw_predictions)
print('count:', len(raw_predictions))
filtered_predictions = []
for raw in raw_predictions:
#print('raw:', raw)
line_splits = raw.split('\n')
#print('line_splits:', line_splits)
#print('count:', len(line_splits))
count = 0
for line_split in line_splits:
count += 1
if count < 3 and line_split != '' and line_split[0] == '|':
continue
#print('line_split:', line_split)
pipe_splits = line_split.split('|')
for pipe_split in pipe_splits:
token = pipe_split.strip()
#print('raw token:', token)
#print('filtered_predictions:', filtered_predictions)
if token == '':
continue
if token.isnumeric():
#print('token is numeric, skipping it')
continue
if token[0].isdigit():
token = token[2:].strip()
#print('removed digit from token')
#print('new token:', token)
if operation == 'JOIN' and len(filtered_predictions) > 0 and token.startswith('on '):
last_token = filtered_predictions[-1]
replaced_token = last_token + ' ' + token
filtered_predictions[-1] = replaced_token
#print('replaced last token', last_token, ' with replaced_token', replaced_token)
else:
filtered_predictions.append(token)
#print('added token:', token)
#print('filtered_predictions:', filtered_predictions)
if len(filtered_predictions) == 0:
return ''
unique_predictions = set(filtered_predictions)
final_predictions = remove_false_positives(unique_predictions, operation)
if len(final_predictions) == 0:
return 'None'
html = '<html>'
html += '<br>'.join(final_predictions)
html += '</html>'
return html