in LogicApps-AI-RAG-Demo/TokenizeDocFunction/function_app.py [0:0]
def validate_json_data(json_data):
json_data["chunkSize"] = json_data.get("chunkSize", 4000)
if json_data["chunkSize"] <= 1:
raise ValueError("Chunk size should be greater than 1.")
json_data["chunkOverlap"] = json_data.get("chunkOverlap", 200)
if json_data["chunkOverlap"] < 0:
raise ValueError("Chunk overlap should be 0 or greater.")
valid_primary_splitters = {"RECURSIVE", "TOKEN", "MARKUP", "HTML"}
json_data["splittingStrategy"] = json_data.get("splittingStrategy", "RECURSIVE")
if json_data["splittingStrategy"].upper() not in valid_primary_splitters:
raise ValueError("Invalid primary splitter value.")
valid_secondary_splitters = {"RECURSIVE", "TOKEN"}
json_data["secondarySplittingStrategy"] = json_data.get("secondarySplittingStrategy", "RECURSIVE")
if json_data["secondarySplittingStrategy"].upper() not in valid_secondary_splitters:
raise ValueError("Invalid secondary splitter value.")