data_annotation_platform/import/import_datasets.py (81 lines of code) (raw):
import sys
import json
import requests
import re
import pandas as pd
from pathlib import Path
'''
Class for the AnnotateChange import format.
'''
class AnnotateChange:
def __init__(self, name, data):
self.n_dim = 1
self.name = name
self.longname = self.name
measurements = data
self.series = [{
'label': 'V1',
'type': 'float',
'raw': measurements
}]
self.n_obs = len(measurements)
self.time = {'index': [i for i in range(self.n_obs)]}
'''
Manages login and uploading of datasets.
'''
class Upload:
def __init__(self, base_url, username, password):
self.base_url = base_url
self.username = username
self.password = password
# Initialize the session
self.session = requests.Session()
def login(self):
login_url = self.base_url + 'auth/login'
# Get the csrf_token
r = self.session.get(login_url)
self.token = re.search(r'"csrf_token" type="hidden" value="([^"]+)"',
r.text).group(1)
# Login
payload = {
'csrf_token': self.token,
'username': self.username,
'password': self.password,
'submit': 'Sign+In',
}
r = self.session.post(login_url, data=payload)
if 'Welcome to' not in r.text:
return False
return True
def upload(self, data):
payload = {
'csrf_token': self.token,
'submit': 'Upload',
}
files = {
'file_': ('test.json', json.dumps(data), 'application/json'),
}
r = self.session.post(self.base_url + 'admin/add',
files=files, data=payload)
m = re.search(r'"help-block">([^<]+)<', r.text)
if m is not None:
print(m.group(1))
return False
return True
if __name__ == '__main__':
if len(sys.argv) < 5:
print('Usage:\npython {} <*-.csv> <base url> <username> <password>'\
.format(sys.argv[0]))
exit(0)
# Read the CSV
csv_file = sys.argv[1]
data = pd.read_csv(csv_file)
# Login to AnnotateChange
up = Upload(sys.argv[2], sys.argv[3], sys.argv[4])
if not up.login():
print('Unable to login.')
exit(1)
for dataset in data['id'].unique():
# Convert to the AnnotateChange object
df = data[data['id'] == dataset]
name = '{}_{}'.format(Path(csv_file).with_suffix('').name.lower(), dataset)
series = AnnotateChange(name, df['value'].to_list())
# Upload the dataset
if not up.upload(series.__dict__):
print('Unable to upload:', name)
exit(1)
print('Uploaded:', name)
break