def format_predictions()

in 5-app-infra/3-artifact-publish/docker/cdmc/tag_engine_api/extensions/query_cookbook/summarize_sql/main.py [0:0]


def format_predictions(raw_predictions, operation):
    
    print('format_predictions')
    print('input:', raw_predictions)
    print('count:', len(raw_predictions))
    
    filtered_predictions = []
    
    for raw in raw_predictions:
        #print('raw:', raw)
    
        line_splits = raw.split('\n')
        #print('line_splits:', line_splits)
        #print('count:', len(line_splits))
        
        count = 0
    
        for line_split in line_splits:
        
            count += 1
            
            if count < 3 and line_split != '' and line_split[0] == '|':
                continue
                        
            #print('line_split:', line_split)
        
            pipe_splits = line_split.split('|')
        
            for pipe_split in pipe_splits:
        
                token = pipe_split.strip()
                #print('raw token:', token)
                #print('filtered_predictions:', filtered_predictions)
                
                if token == '':
                    continue
                
                if token.isnumeric():
                    #print('token is numeric, skipping it')
                    continue
                    
                if token[0].isdigit():
                    token = token[2:].strip()
                    #print('removed digit from token')
                    #print('new token:', token)
                    
                if operation == 'JOIN' and len(filtered_predictions) > 0 and token.startswith('on '):
                    last_token = filtered_predictions[-1]
                    replaced_token = last_token + ' ' + token
                    filtered_predictions[-1] = replaced_token
                    #print('replaced last token', last_token, ' with replaced_token', replaced_token)
                
                else:
                    filtered_predictions.append(token)
                    #print('added token:', token)
    
    #print('filtered_predictions:', filtered_predictions)
    
    if len(filtered_predictions) == 0:
        return ''
    
    unique_predictions = set(filtered_predictions) 
    final_predictions = remove_false_positives(unique_predictions, operation)   
    
    if len(final_predictions) == 0:
        return 'None'
        
    html = '<html>'    
    html += '<br>'.join(final_predictions)
    html += '</html>'
    
    return html