def get_ers_data()

in src/entity_relation_scorer.py [0:0]


def get_ers_data(input_data_df):
    keyword_ers = [ers for ers in (_extract_keyword_entities_rltn_score(input_data_df, 'keyword_data', 'keyword', 'refers_to', 'url_hash', 'use_count'))]
    logger.info(f" Number of keyword entities = {len(keyword_ers)}")
    logger.info(keyword_ers[:5])
    domain_ers = [ers for ers in _extract_domain_entities_rltn_score(input_data_df, 'host', 'contains', 'url_hash', 'domain_frecency')]
    logger.info(f"\n Number of domain entities = {len(domain_ers)}")
    logger.info(domain_ers[:5])
    path_info_ers = [ers for ers in _extract_path_info_entities_rltn_score(input_data_df, 'path_info', 'parses_to', 'url_hash', 1.0)]
    logger.info(f"\n Number of path info entities = {len(path_info_ers)}")
    logger.info(path_info_ers[:5])
    tags_ers = [ers for ers in _extract_tags_entities_rltn_score(input_data_df, 'tags', 'tagged_has', 'url_hash', 1.0)]
    logger.info(f"\n Number of tag entities = {len(tags_ers)}")
    logger.info(tags_ers[:5])
    topics_ers = [ers for ers in _extract_topics_entities_rltn_score(input_data_df, 'topics', 'belongs_to', 'url_hash', 1.0)]
    logger.info(f"\n Number of topic entities = {len(topics_ers)}")
    logger.info(topics_ers[:5])
    return pd.DataFrame(keyword_ers + domain_ers + path_info_ers + tags_ers + topics_ers,
            columns=['entity', 'entity_type', 'relation', 'url_hash', 'score'])