in reference/src/main/python/similar.py [0:0]
def setup(records_file):
global config
global vocab
config = Config()
logging.basicConfig(level=logging.DEBUG)
random.seed(config.SEED)
os.makedirs(options.working_dir, exist_ok=True)
if records_file is None:
vocab = Vocab.load()
else:
vocab = Vocab.load(True)
featurize_records_file(
records_file, os.path.join(options.working_dir, config.FEATURES_FILE)
)
vocab.dump()
logging.info("Done featurizing.")
counter_vectorize(
os.path.join(options.working_dir, config.FEATURES_FILE),
os.path.join(options.working_dir, config.TFIDF_FILE),
)
logging.info("Done computing counter matrix.")