def upload()

in clutrr/utils/data_backend.py [0:0]


    def upload(self, data_path, db='gold'):
        """
        Given a csv file, upload the entire dataframe in the particular db
        :param data:
        :param db:
        :return:
        """
        print("Reading {}".format(data_path))
        data = self._read_csv(data_path)
        records = data.to_dict(orient='records')
        # add used counter if gold and test
        # add reviewed counter if mturk
        num_records = len(records)
        print("Number of records found : {}".format(len(records)))
        for rec in records:
            if db == 'gold':
                rec['used'] = 0
            else:
                rec['reviewed'] = 0
            sents = nltk.sent_tokenize(rec['story'])
            rec['relation_length'] = len(sents)
        mdb = getattr(self, db)
        # prune the records which are already present in the database
        keep_idx = []
        for rec_idx, rec in enumerate(records):
            fd = mdb.find({'id': rec['id']}).count()
            if fd == 0:
                keep_idx.append(rec_idx)
        records = [records[idx] for idx in keep_idx]
        num_kept = len(records)
        print("Number of records already in db : {}".format(num_records - num_kept))
        if len(records) > 0:
            r = mdb.insert_many(records)
        print("Inserted {} records in db {}".format(len(records), db))