def map_csv_column_names_to_parameters()

in src/loading_manifest/csv_to_json.py [0:0]


def map_csv_column_names_to_parameters(csv_file, parameters_object):
    def get_column_index(col_names, col_name):
        idx = -1
        try:
            idx = col_names.index(col_name)
            # check duplicate
            col_names.index(col_name, idx + 1)
            raise Exception('Duplicate parameter found in csv file:', col_name)
        except ValueError:
            pass

        return idx

    column_names = list()
    with open(csv_file, mode='r') as infile:
        reader = csv.reader(infile)
        for rows in reader:
            for colname in rows:
                column_names.append(colname.strip().lower())
            break

    map_parameter_column = dict()
    for parameter, parameter_recs in parameters_object.items():
        parameter_key = parameter[len(parameter_start_delimiter):-len(parameter_end_delimiter)].strip().lower()
        for parameter_rec in parameter_recs:
            _, _, root_list = parameter_rec
            d_array = len(root_list)
            if d_array == 0:
                # parameters not inside array type
                parameter_column = map_parameter_column.get(parameter)
                if parameter_column is None:
                    csv_index = get_column_index(column_names, parameter_key)
                    if csv_index >= 0:
                        map_parameter_column[parameter] = csv_index
            else:
                # parameters inside array type
                parameter_column = map_parameter_column.get(parameter)
                if parameter_column is not None:
                    raise Exception('Duplicate array parameter not allowed:', parameter)
                parameter_column = []
                map_parameter_column[parameter] = parameter_column

                # find max indexes
                indexes_count = [0 for i in range(d_array)]
                pattern = re.escape(parameter_key)
                for i in range(d_array):
                    pattern = pattern + '_[1-9][0-9]*'

                for column_name in column_names:
                    if re.fullmatch(pattern, column_name):
                        temp_name = column_name[(len(parameter_key)+1):]
                        col_nums = [int(n) for n in temp_name.split('_')]
                        for i in range(d_array):
                            indexes_count[i] = max(indexes_count[i], col_nums[i])

                # find csv column index for each parameter array indexes
                indexes_column = [0 for i in range(d_array)]
                indexes_column.append(-1)
                done = False
                count = 0
                while not done:
                    count = count + 1
                    parameter_with_indexes = parameter_key
                    for i in range(d_array):
                        parameter_with_indexes = parameter_with_indexes + '_' + str(indexes_column[i] + 1)
                    csv_index = get_column_index(column_names, parameter_with_indexes)
                    if csv_index >= 0:
                        indexes_column[d_array] = csv_index
                        parameter_column.append(list(indexes_column))
                    for i in range(d_array-1, -1, -1):
                        temp_val = indexes_column[i] + 1
                        if temp_val < indexes_count[i]:
                            for j in range(d_array-1, i-1, -1):
                                indexes_column[j] = 0
                            indexes_column[i] = temp_val
                            break
                        done = (i == 0)

    return map_parameter_column