def ingest_json_files()

in 4-mmrag_tooluse/ingest.py [0:0]


def ingest_json_files(json_folder_path, db_path):
    """
    Ingest JSON files into the SQLite database.
    """
    # Connect to the SQLite3 database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    # Create tables
    create_tables(cursor)

    # Loop over all JSON files in the specified folder
    for filename in os.listdir(json_folder_path):
        if filename.endswith(".json"):
            file_path = os.path.join(json_folder_path, filename)

            # Load the JSON data
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)

            title = data.get("title")
            report_date = data.get("report_date")
            records = data.get("data", [])

            if title == "Free_Cash_Flow_Less_Equipment_Finance_Leases":
                for record in records:
                    cursor.execute('''
                    INSERT INTO Free_Cash_Flow_Less_Equipment_Finance_Leases (
                        report_date,
                        quarter,
                        operating_cash_flow,
                        purchases_of_property_and_equipment,
                        equipment_acquired_under_finance_leases,
                        principal_repayments_of_other_finance_leases,
                        principal_repayments_of_financing_obligations,
                        free_cash_flow_less_equipment_finance_leases
                    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                    ''', (
                        report_date,
                        record.get("quarter"),
                        record.get("operating_cash_flow"),
                        record.get("purchases_of_property_and_equipment"),
                        record.get("equipment_acquired_under_finance_leases"),
                        record.get(
                            "principal_repayments_of_other_finance_leases"),
                        record.get(
                            "principal_repayments_of_financing_obligations"),
                        record.get(
                            "free_cash_flow_less_equipment_finance_leases")
                    ))
            elif title == "Free_Cash_Flow_Less_Principal_Repayments":
                for record in records:
                    cursor.execute('''
                    INSERT INTO Free_Cash_Flow_Less_Principal_Repayments (
                        report_date,
                        quarter,
                        operating_cash_flow,
                        purchases_of_property_and_equipment,
                        principal_repayments_of_finance_leases,
                        principal_repayments_of_financing_obligations,
                        free_cash_flow_less_principal_repayments
                    ) VALUES (?, ?, ?, ?, ?, ?, ?)
                    ''', (
                        report_date,
                        record.get("quarter"),
                        record.get("operating_cash_flow"),
                        record.get("purchases_of_property_and_equipment"),
                        record.get("principal_repayments_of_finance_leases"),
                        record.get(
                            "principal_repayments_of_financing_obligations"),
                        record.get("free_cash_flow_less_principal_repayments")
                    ))
            elif title == "Free_Cash_Flow_Reconciliation":
                for record in records:
                    cursor.execute('''
                    INSERT INTO Free_Cash_Flow_Reconciliation (
                        report_date,
                        quarter,
                        operating_cash_flow,
                        purchases_of_property_and_equipment,
                        free_cash_flow
                    ) VALUES (?, ?, ?, ?, ?)
                    ''', (
                        report_date,
                        record.get("quarter"),
                        record.get("operating_cash_flow"),
                        record.get("purchases_of_property_and_equipment"),
                        record.get("free_cash_flow")
                    ))
            else:
                print(
                    f"Unknown title '{title}' in file '{filename}'. Skipping.")

    # Commit changes and close connection
    conn.commit()
    conn.close()