def load()

in data/procdata.py [0:0]


def load(csv_file, settings):
    """
    Args:
        csv_file (string): Local file name that is to be loaded, with headers, consisting of:
            Content   Device, e.g. R33S32_Y81C76
            Colony    (blank)
            Well Col  int, 1 to 12
            Well Row  letter, A to H
            Content   Condition, e.g. C6=<float> or C12=<float> or EtOH=<float>
            and then for each of EYFP, ECFP, mRFP1 and OD, 100 readings at different times
            and then second line is "timesall": time of each col except for the first 5
    Returns:
        devices
        treatments
        times
        observations
    """

    data_path = os.path.join(settings.data_dir, csv_file)
    loaded = pd.read_csv(data_path, sep=",", na_filter=False)
    timesall = loaded.iloc[0, 5:]  # times of the observations
    obs_rows = loaded.iloc[1:, :]  # observation rows
    # Rows we want to keep are those whose first ("Content") value is in the "devices" list.
    rows = obs_rows.iloc[np.isin(obs_rows.iloc[:, 0], settings.devices), :]

    # Create devices
    devices = np.array([settings.device_map[dev] for dev in rows.iloc[:, 0]], dtype=int)

    # List of OrderedDicts, each with keys C6 or C12 (i.e. the two "content" columns above)
    # and float values.
    treatment_values = [process_condition(cond) for cond in rows.iloc[:, 4]]
    # print(treatment_values)
    if len(treatment_values) == 0:
        return None  # flag value to indicate the dataset doesn't exist in this file

    # As treatment_values, but each OrderedDict additionally has the keys that the others have, with value 0.0.
    expanded = expand_conditions(treatment_values, settings.conditions)

    # Filter out time-series that have nonzero values for unspecified conditions
    locs, filtered = find_conditions(expanded, settings.conditions)
    treatments = np.array([list(cond.values()) for cond in filtered])

    # Collect the time-series observations
    X = rows.iloc[locs, 5:]
    headers = np.array([v.split(".")[0] for v in X.columns.values])
    header_signals = np.array([extract_signal(h) for h in headers])
    x_values = [[row.iloc[header_signals == signal].values for signal in settings.signals] for idx, row in X.iterrows()]
    observations = np.array(x_values)
    times = timesall.iloc[header_signals == "OD"].values

    if settings.dtype == "float32":
        return (
            devices,
            treatments.astype(np.float32),
            times.astype(np.float32),
            observations.astype(np.float32),
        )
    elif settings.dtype == "float64":
        return (
            devices,
            treatments.astype(np.float64),
            times.astype(np.float64),
            observations.astype(np.float64),
        )
    else:
        raise Exception("Unknown dtype %s" % settings.dtype)