3_optimization-design-ptn/03_prompt-optimization/promptwizard/glue/common/utils/file.py (71 lines of code) (raw):

import json from os.path import join from typing import Dict, List import yaml from ..exceptions import GlueValidaionException def yaml_to_dict(file_path: str) -> Dict: with open(file_path) as yaml_file: yaml_string = yaml_file.read() try: # convert yaml string to dict parsed_dict = yaml.safe_load(yaml_string) except yaml.scanner.ScannerError as e: raise GlueValidaionException( f"There could be some syntax error in yaml written in {file_path}", e ) return parsed_dict def yaml_to_class(yaml_file_path: str, cls: type, default_yaml_file_path: str = None): """ Read yaml file present at path `yaml_file_path`, convert it to dictionary using pyyaml's standard methods. Then convert this dictionary to class object of class given as `cls`. Further check if user has provided all the required fields in `yaml_file_path`. Fields that are missing in `yaml_file_path`, set them with defaults. :param yaml_file_path: str :param cls: type :param default_yaml_file_path: str :return: """ if not yaml_file_path: yaml_file_path = default_yaml_file_path custom_args = yaml_to_dict(yaml_file_path) if default_yaml_file_path: # If user has not provided all the required arguments, fill them with defaults default_args = yaml_to_dict(default_yaml_file_path) missing_args = set(default_args) - set(custom_args) for key in list(missing_args): custom_args[key] = default_args[key] try: yaml_as_class = cls(**custom_args) except TypeError as e: raise GlueValidaionException( f"Exception while converting yaml file at {yaml_file_path} " f"to class {cls.__name__}: ", e, ) return yaml_as_class def read_jsonl(file_path: str) -> List: """ This function should be used when size of jsonl file is not too big. :param file_path: :return: All json strings in .jsonl file as a list """ jsonl_list = [] with open(file_path, "r") as fileobj: while True: single_row = fileobj.readline() if not single_row: break json_object = json.loads(single_row.strip()) jsonl_list.append(json_object) return jsonl_list def read_jsonl_row(file_path: str): """ :param file_path: :return: Single line from the file. One at a time. """ with open(file_path, "r") as fileobj: while True: try: single_row = fileobj.readline() if not single_row: break json_object = json.loads(single_row.strip()) yield json_object except json.JSONDecodeError as e: print(f"Error while reading jsonl file at {file_path}. Error: {e}") continue def append_as_jsonl(file_path: str, args_to_log: Dict): """ :param file_path: :param args_to_log: :return: """ json_str = json.dumps(args_to_log, default=str) with open(file_path, "a") as fileobj: fileobj.write(json_str + "\n") def save_jsonlist(file_path: str, json_list: List, mode: str = "a"): """ :param json_list: List of json objects :param file_path: File location to which we shall save content of json_list list, in jsonl format. :param mode: Write mode :return: None """ with open(file_path, mode) as file_obj: for json_obj in json_list: json_str = json.dumps(json_obj, default=str, ensure_ascii=False) file_obj.write(json_str + "\n") def str_list_to_dir_path(str_list: List[str]) -> str: """ Return a string which is directory path formed out of concatenating given strings in list `str_list` e.g. str_list=["dir_1", "sub_dir_1"] return "dir_1\sub_dir_1" """ if not str_list: return "" path = "" for dir_name in str_list: path = join(path, dir_name) return path