def create_jsonl_and_mltable_files()

in cli/foundation-models/system/finetune/video-multi-object-tracking/prepare_data.py [0:0]


def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir):
    """Create jsonl

    :param uri_folder_data_path: Path to the data folder
    :param dataset_dir: Path to the dataset folder
    """
    # We'll copy each JSONL file within its related MLTable folder
    training_mltable_path = os.path.join(dataset_dir, "../training-mltable-folder")
    validation_mltable_path = os.path.join(dataset_dir, "../validation-mltable-folder")
    testing_mltable_path = os.path.join(dataset_dir, "../testing-mltable-folder")

    # First, let's create the folders if they don't exist
    os.makedirs(training_mltable_path, exist_ok=True)
    os.makedirs(validation_mltable_path, exist_ok=True)
    os.makedirs(testing_mltable_path, exist_ok=True)

    train_annotations_file = os.path.join(
        training_mltable_path, "train_annotations.jsonl"
    )
    validation_annotations_file = os.path.join(
        validation_mltable_path, "validation_annotations.jsonl"
    )
    testing_annotations_file = os.path.join(
        testing_mltable_path, "testing_annotations.jsonl"
    )

    print("Creating jsonl files")

    # Second, convert the COCO format to jsonl
    print("convert MOT format to COCO format")
    mot2coco_converter(
        argparse.Namespace(
            input=dataset_dir,
            output=f"{dataset_dir}/annotations",
            convert_det=True,
            split_train=True,
        )
    )
    print("Converting COCO video format to jsonl")
    cocovid2jsonl_converter(
        argparse.Namespace(
            input_cocovid_file_path=f"{dataset_dir}/annotations/half-train_cocoformat.json",
            output_dir=training_mltable_path,
            output_file_name="train_annotations.jsonl",
            task_type="ObjectTracking",
            base_url=f"{uri_folder_data_path}train",
        )
    )
    cocovid2jsonl_converter(
        argparse.Namespace(
            input_cocovid_file_path=f"{dataset_dir}/annotations/half-val_cocoformat.json",
            output_dir=validation_mltable_path,
            output_file_name="validation_annotations.jsonl",
            task_type="ObjectTracking",
            base_url=f"{uri_folder_data_path}train",
        )
    )

    # Create and save train mltable
    print("create and save train mltable")
    train_mltable_file_contents = create_ml_table_file(
        os.path.basename(train_annotations_file)
    )
    save_ml_table_file(training_mltable_path, train_mltable_file_contents)

    # Create and save validation mltable
    print("create and save validation mltable")
    validation_mltable_file_contents = create_ml_table_file(
        os.path.basename(validation_annotations_file)
    )
    save_ml_table_file(validation_mltable_path, validation_mltable_file_contents)

    # Create and save testing mltable
    testing_mltable_file_contents = create_ml_table_file(
        os.path.basename(testing_annotations_file)
    )
    save_ml_table_file(testing_mltable_path, testing_mltable_file_contents)