def create_data_schema_from_dir()

in src/lookoutequipment/schema.py [0:0]


def create_data_schema_from_dir(root_dir):
    """
    Generates a data schema compatible for Lookout for Equipment from a local 
    directory
    
    Parameters:
        root_dir (string):
            a path pointing to the root directory where all the CSV files are 
            located
            
    Returns:
        string:
            a JSON-formatted string ready to be used as a schema for a Lookout
            for Equipment dataset
    """
    # List of the directories from the training data 
    # directory: each directory corresponds to a subsystem:
    components = []
    for _, dirs, _ in os.walk(root_dir):
        for subsystem in dirs:
            if subsystem != '.ipynb_checkpoints':
                components.append(subsystem)

    # Loops through each subdirectory found in the root dir:
    DATASET_COMPONENT_FIELDS_MAP = dict()
    for subsystem in components:
        subsystem_tags = ['Timestamp']
        for root, _, files in os.walk(f'{root_dir}/{subsystem}'):
            for file in files:
                fname = os.path.join(root, file)
                current_subsystem_df = pd.read_csv(fname, nrows=1)
                subsystem_tags = subsystem_tags + current_subsystem_df.columns.tolist()[1:]

            DATASET_COMPONENT_FIELDS_MAP.update({subsystem: subsystem_tags})
            
    schema = create_data_schema(DATASET_COMPONENT_FIELDS_MAP)
    
    return schema