def validate_json_data()

in LogicApps-AI-RAG-Demo/TokenizeDocFunction/function_app.py [0:0]


def validate_json_data(json_data):
    json_data["chunkSize"] = json_data.get("chunkSize", 4000)
    if json_data["chunkSize"] <= 1:
        raise ValueError("Chunk size should be greater than 1.")
    json_data["chunkOverlap"] = json_data.get("chunkOverlap", 200)
    if json_data["chunkOverlap"] < 0:
        raise ValueError("Chunk overlap should be 0 or greater.")

    valid_primary_splitters = {"RECURSIVE", "TOKEN", "MARKUP", "HTML"}
    json_data["splittingStrategy"] = json_data.get("splittingStrategy", "RECURSIVE")
    if json_data["splittingStrategy"].upper() not in valid_primary_splitters:
        raise ValueError("Invalid primary splitter value.")

    valid_secondary_splitters = {"RECURSIVE", "TOKEN"}
    json_data["secondarySplittingStrategy"] = json_data.get("secondarySplittingStrategy", "RECURSIVE")
    if json_data["secondarySplittingStrategy"].upper() not in valid_secondary_splitters:
        raise ValueError("Invalid secondary splitter value.")