def cleanup()

in inference/etl.py [0:0]


def cleanup(ip,column=None):
    '''
    ip: list of strings at inference time or dataframe at training time
    '''
    if column:
        print("training ETL")
        ip[column] = ip.apply(lambda row: row[column].replace("\n"," "), axis=1)
        ip[column] = ip.apply(lambda row: re.sub('http://\S+|https://\S+', 'urls',row[column]).lower(),axis=1)
        ip[column] = ip.apply(lambda row: re.sub('[^A-Za-z\' ]+', '',row[column]).lower(), axis=1)
    else:
        print("inference ETL")
        ip= [i.replace("\n"," ") for i in ip]
        ip = [re.sub('http://\S+|https://\S+', 'url',i).lower() for i in ip]
        ip = [re.sub('[^A-Za-z\' ]+', '',i).lower() for i in ip]          
    return ip