in src/lookoutequipment/schema.py [0:0]
def create_data_schema_from_dir(root_dir):
"""
Generates a data schema compatible for Lookout for Equipment from a local
directory
Parameters:
root_dir (string):
a path pointing to the root directory where all the CSV files are
located
Returns:
string:
a JSON-formatted string ready to be used as a schema for a Lookout
for Equipment dataset
"""
# List of the directories from the training data
# directory: each directory corresponds to a subsystem:
components = []
for _, dirs, _ in os.walk(root_dir):
for subsystem in dirs:
if subsystem != '.ipynb_checkpoints':
components.append(subsystem)
# Loops through each subdirectory found in the root dir:
DATASET_COMPONENT_FIELDS_MAP = dict()
for subsystem in components:
subsystem_tags = ['Timestamp']
for root, _, files in os.walk(f'{root_dir}/{subsystem}'):
for file in files:
fname = os.path.join(root, file)
current_subsystem_df = pd.read_csv(fname, nrows=1)
subsystem_tags = subsystem_tags + current_subsystem_df.columns.tolist()[1:]
DATASET_COMPONENT_FIELDS_MAP.update({subsystem: subsystem_tags})
schema = create_data_schema(DATASET_COMPONENT_FIELDS_MAP)
return schema