in community-content/vertex_model_garden/model_oss/data_converter/data_converter_main.py [0:0]
def main(_) -> None:
logging.info(
(
'Start data converter on: %s (type: %s) with split: %s for %s'
' (shard=%s), and output to %s.'
),
_INPUT_FILE_PATH.value,
_INPUT_FILE_TYPE.value,
_SPLIT_RATIO.value,
_OBJECTIVE.value,
_NUM_SHARD.value,
_OUTPUT_DIR.value,
)
split_ratio = list(map(float, _SPLIT_RATIO.value))
num_shard = list(map(int, _NUM_SHARD.value))
common_lib.check_split_ratio(split_ratio)
common_lib.check_num_shard(num_shard)
if (
_OBJECTIVE.value == constants.OBJECTIVE_IMAGE_OBJECT_DETECTION
and _INPUT_FILE_TYPE.value == constants.INPUT_FILE_TYPE_CSV
):
data_converter_iod_lib.convert_csv_to_tfrecord(
_INPUT_FILE_PATH.value,
_OUTPUT_DIR.value,
split_ratio,
num_shard,
)
elif (
_OBJECTIVE.value == constants.OBJECTIVE_IMAGE_OBJECT_DETECTION
and _INPUT_FILE_TYPE.value == constants.INPUT_FILE_TYPE_JSONL
):
data_converter_iod_lib.convert_jsonl_to_tfrecord(
_INPUT_FILE_PATH.value, _OUTPUT_DIR.value, split_ratio, num_shard
)
elif (
_OBJECTIVE.value == constants.OBJECTIVE_IMAGE_OBJECT_DETECTION
and _INPUT_FILE_TYPE.value == constants.INPUT_FILE_TYPE_COCO_JSON
):
data_converter_iod_lib.convert_coco_json_to_tfrecord(
_INPUT_FILE_PATH.value,
_OUTPUT_DIR.value,
split_ratio,
num_shard,
)
elif _OBJECTIVE.value == constants.OBJECTIVE_IMAGE_SEGMENTATION:
data_converter_isg_lib.beam_build_tfrecord_from_coco_json(
_INPUT_FILE_PATH.value,
_OUTPUT_DIR.value,
split_ratio,
num_shard,
)
elif (
_OBJECTIVE.value == constants.OBJECTIVE_IMAGE_CLASSIFICATION
and _INPUT_FILE_TYPE.value == constants.INPUT_FILE_TYPE_CSV
):
data_converter_icn_lib.convert_csv_to_tfrecord(
_INPUT_FILE_PATH.value,
_OUTPUT_DIR.value,
split_ratio,
num_shard,
)
elif (
_OBJECTIVE.value == constants.OBJECTIVE_IMAGE_CLASSIFICATION
and _INPUT_FILE_TYPE.value == constants.INPUT_FILE_TYPE_JSONL
):
data_converter_icn_lib.convert_jsonl_to_tfrecord(
_INPUT_FILE_PATH.value, _OUTPUT_DIR.value, split_ratio, num_shard
)
elif (
_OBJECTIVE.value == constants.OBJECTIVE_VIDEO_CLASSIFICATION
and _INPUT_FILE_TYPE.value == constants.INPUT_FILE_TYPE_CSV
):
data_converter_vcn_lib.convert_csv_to_tfrecord(
_INPUT_FILE_PATH.value,
_OUTPUT_DIR.value,
_OUTPUT_FPS.value,
split_ratio,
num_shard,
)
elif (
_OBJECTIVE.value == constants.OBJECTIVE_VIDEO_CLASSIFICATION
and _INPUT_FILE_TYPE.value == constants.INPUT_FILE_TYPE_JSONL
):
data_converter_vcn_lib.convert_jsonl_to_tfrecord(
_INPUT_FILE_PATH.value,
_OUTPUT_DIR.value,
_OUTPUT_FPS.value,
split_ratio,
num_shard,
)
else:
raise NotImplementedError(
f'File format {_INPUT_FILE_TYPE.value} is not supported for'
f' {_OBJECTIVE.value}.'
)