in data/procdata.py [0:0]
def load(csv_file, settings):
"""
Args:
csv_file (string): Local file name that is to be loaded, with headers, consisting of:
Content Device, e.g. R33S32_Y81C76
Colony (blank)
Well Col int, 1 to 12
Well Row letter, A to H
Content Condition, e.g. C6=<float> or C12=<float> or EtOH=<float>
and then for each of EYFP, ECFP, mRFP1 and OD, 100 readings at different times
and then second line is "timesall": time of each col except for the first 5
Returns:
devices
treatments
times
observations
"""
data_path = os.path.join(settings.data_dir, csv_file)
loaded = pd.read_csv(data_path, sep=",", na_filter=False)
timesall = loaded.iloc[0, 5:] # times of the observations
obs_rows = loaded.iloc[1:, :] # observation rows
# Rows we want to keep are those whose first ("Content") value is in the "devices" list.
rows = obs_rows.iloc[np.isin(obs_rows.iloc[:, 0], settings.devices), :]
# Create devices
devices = np.array([settings.device_map[dev] for dev in rows.iloc[:, 0]], dtype=int)
# List of OrderedDicts, each with keys C6 or C12 (i.e. the two "content" columns above)
# and float values.
treatment_values = [process_condition(cond) for cond in rows.iloc[:, 4]]
# print(treatment_values)
if len(treatment_values) == 0:
return None # flag value to indicate the dataset doesn't exist in this file
# As treatment_values, but each OrderedDict additionally has the keys that the others have, with value 0.0.
expanded = expand_conditions(treatment_values, settings.conditions)
# Filter out time-series that have nonzero values for unspecified conditions
locs, filtered = find_conditions(expanded, settings.conditions)
treatments = np.array([list(cond.values()) for cond in filtered])
# Collect the time-series observations
X = rows.iloc[locs, 5:]
headers = np.array([v.split(".")[0] for v in X.columns.values])
header_signals = np.array([extract_signal(h) for h in headers])
x_values = [[row.iloc[header_signals == signal].values for signal in settings.signals] for idx, row in X.iterrows()]
observations = np.array(x_values)
times = timesall.iloc[header_signals == "OD"].values
if settings.dtype == "float32":
return (
devices,
treatments.astype(np.float32),
times.astype(np.float32),
observations.astype(np.float32),
)
elif settings.dtype == "float64":
return (
devices,
treatments.astype(np.float64),
times.astype(np.float64),
observations.astype(np.float64),
)
else:
raise Exception("Unknown dtype %s" % settings.dtype)